fix(cli): prefer curses over simple_term_menu in setup.py

fix(terminal): add SSH preflight check (#1486 )
fix(agent): skip reasoning extra_body for unsupported OpenRouter models (#1485 )
2026-03-15 21:14:08 -07:00 · 2026-03-15 21:09:07 -07:00 · 2026-03-15 20:42:07 -07:00 · 2026-03-15 20:35:24 -07:00 · 2026-03-15 20:21:21 -07:00 · 2026-03-15 20:09:50 -07:00
197 changed files with 14509 additions and 2297 deletions
--- a/.github/workflows/docs-site-checks.yml
+++ b/.github/workflows/docs-site-checks.yml
@@ -0,0 +1,39 @@
+name: Docs Site Checks
+
+on:
+  pull_request:
+    paths:
+      - 'website/**'
+      - '.github/workflows/docs-site-checks.yml'
+  workflow_dispatch:
+
+jobs:
+  docs-site-checks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+          cache-dependency-path: website/package-lock.json
+
+      - name: Install website dependencies
+        run: npm ci
+        working-directory: website
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install ascii-guard
+        run: python -m pip install ascii-guard
+
+      - name: Lint docs diagrams
+        run: npm run lint:diagrams
+        working-directory: website
+
+      - name: Build Docusaurus
+        run: npm run build
+        working-directory: website
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -235,6 +235,7 @@ hermes_cli/skin_engine.py    # SkinConfig dataclass, built-in skins, YAML loader
 | Spinner verbs | `spinner.thinking_verbs` | `display.py` |
 | Spinner wings (optional) | `spinner.wings` | `display.py` |
 | Tool output prefix | `tool_prefix` | `display.py` |
+| Per-tool emojis | `tool_emojis` | `display.py` → `get_tool_emoji()` |
 | Agent name | `branding.agent_name` | `banner.py`, `cli.py` |
 | Welcome message | `branding.welcome` | `cli.py` |
 | Response box label | `branding.response_label` | `cli.py` |
--- a/acp_adapter/entry.py
+++ b/acp_adapter/entry.py
@@ -42,19 +42,16 @@ def _setup_logging() -> None:

 def _load_env() -> None:
    """Load .env from HERMES_HOME (default ``~/.hermes``)."""
-    from dotenv import load_dotenv
+    from hermes_cli.env_loader import load_hermes_dotenv

    hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-    env_file = hermes_home / ".env"
-    if env_file.exists():
-        try:
-            load_dotenv(dotenv_path=env_file, encoding="utf-8")
-        except UnicodeDecodeError:
-            load_dotenv(dotenv_path=env_file, encoding="latin-1")
-        logging.getLogger(__name__).info("Loaded env from %s", env_file)
+    loaded = load_hermes_dotenv(hermes_home=hermes_home)
+    if loaded:
+        for env_file in loaded:
+            logging.getLogger(__name__).info("Loaded env from %s", env_file)
    else:
        logging.getLogger(__name__).info(
-            "No .env found at %s, using system env", env_file
+            "No .env found at %s, using system env", hermes_home / ".env"
        )


--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -102,30 +102,15 @@ def build_anthropic_client(api_key: str, base_url: str = None):


 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
-    """Read credentials from Claude Code's config files.
+    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.

-    Checks two locations (in order):
-      1. ~/.claude.json — top-level primaryApiKey (native binary, v2.x)
-      2. ~/.claude/.credentials.json — claudeAiOauth block (npm/legacy installs)
+    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
+    subscription flow is OAuth/setup-token based with refreshable credentials,
+    and native direct Anthropic provider usage should follow that path rather
+    than auto-detecting Claude's first-party managed key.

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
-    # 1. Native binary (v2.x): ~/.claude.json with top-level primaryApiKey
-    claude_json = Path.home() / ".claude.json"
-    if claude_json.exists():
-        try:
-            data = json.loads(claude_json.read_text(encoding="utf-8"))
-            primary_key = data.get("primaryApiKey", "")
-            if primary_key:
-                return {
-                    "accessToken": primary_key,
-                    "refreshToken": "",
-                    "expiresAt": 0,  # Managed keys don't have a user-visible expiry
-                }
-        except (json.JSONDecodeError, OSError, IOError) as e:
-            logger.debug("Failed to read ~/.claude.json: %s", e)
-
-    # 2. Legacy/npm installs: ~/.claude/.credentials.json
    cred_path = Path.home() / ".claude" / ".credentials.json"
    if cred_path.exists():
        try:
@@ -138,6 +123,7 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
                        "accessToken": access_token,
                        "refreshToken": oauth_data.get("refreshToken", ""),
                        "expiresAt": oauth_data.get("expiresAt", 0),
+                        "source": "claude_code_credentials_file",
                    }
        except (json.JSONDecodeError, OSError, IOError) as e:
            logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
@@ -145,6 +131,20 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    return None


+def read_claude_managed_key() -> Optional[str]:
+    """Read Claude's native managed key from ~/.claude.json for diagnostics only."""
+    claude_json = Path.home() / ".claude.json"
+    if claude_json.exists():
+        try:
+            data = json.loads(claude_json.read_text(encoding="utf-8"))
+            primary_key = data.get("primaryApiKey", "")
+            if isinstance(primary_key, str) and primary_key.strip():
+                return primary_key.strip()
+        except (json.JSONDecodeError, OSError, IOError) as e:
+            logger.debug("Failed to read ~/.claude.json: %s", e)
+    return None
+
+
 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    """Check if Claude Code credentials have a non-expired access token."""
    import time
@@ -236,6 +236,72 @@ def _write_claude_code_credentials(access_token: str, refresh_token: str, expire
        logger.debug("Failed to write refreshed credentials: %s", e)


+def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
+    """Resolve a token from Claude Code credential files, refreshing if needed."""
+    creds = creds or read_claude_code_credentials()
+    if creds and is_claude_code_token_valid(creds):
+        logger.debug("Using Claude Code credentials (auto-detected)")
+        return creds["accessToken"]
+    if creds:
+        logger.debug("Claude Code credentials expired — attempting refresh")
+        refreshed = _refresh_oauth_token(creds)
+        if refreshed:
+            return refreshed
+        logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
+    return None
+
+
+def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
+    """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
+
+    Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
+    later refresh impossible because the static env token wins before we ever
+    inspect Claude Code's refreshable credential file. If we have a refreshable
+    Claude Code credential record, prefer it over the static env OAuth token.
+    """
+    if not env_token or not _is_oauth_token(env_token) or not isinstance(creds, dict):
+        return None
+    if not creds.get("refreshToken"):
+        return None
+
+    resolved = _resolve_claude_code_token_from_credentials(creds)
+    if resolved and resolved != env_token:
+        logger.debug(
+            "Preferring Claude Code credential file over static env OAuth token so refresh can proceed"
+        )
+        return resolved
+    return None
+
+
+def get_anthropic_token_source(token: Optional[str] = None) -> str:
+    """Best-effort source classification for an Anthropic credential token."""
+    token = (token or "").strip()
+    if not token:
+        return "none"
+
+    env_token = os.getenv("ANTHROPIC_TOKEN", "").strip()
+    if env_token and env_token == token:
+        return "anthropic_token_env"
+
+    cc_env_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
+    if cc_env_token and cc_env_token == token:
+        return "claude_code_oauth_token_env"
+
+    creds = read_claude_code_credentials()
+    if creds and creds.get("accessToken") == token:
+        return str(creds.get("source") or "claude_code_credentials")
+
+    managed_key = read_claude_managed_key()
+    if managed_key and managed_key == token:
+        return "claude_json_primary_api_key"
+
+    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
+    if api_key and api_key == token:
+        return "anthropic_api_key_env"
+
+    return "unknown"
+
+
 def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

@@ -248,28 +314,28 @@ def resolve_anthropic_token() -> Optional[str]:

    Returns the token string or None.
    """
+    creds = read_claude_code_credentials()
+
    # 1. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
+        preferred = _prefer_refreshable_claude_code_token(token, creds)
+        if preferred:
+            return preferred
        return token

    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
+        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
+        if preferred:
+            return preferred
        return cc_token

    # 3. Claude Code credential file
-    creds = read_claude_code_credentials()
-    if creds and is_claude_code_token_valid(creds):
-        logger.debug("Using Claude Code credentials (auto-detected)")
-        return creds["accessToken"]
-    elif creds:
-        # Token expired — attempt to refresh
-        logger.debug("Claude Code credentials expired — attempting refresh")
-        refreshed = _refresh_oauth_token(creds)
-        if refreshed:
-            return refreshed
-        logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
+    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
+    if resolved_claude_token:
+        return resolved_claude_token

    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
@@ -354,6 +420,68 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


+def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Convert an OpenAI-style image block to Anthropic's image source format."""
+    image_data = part.get("image_url", {})
+    url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
+    if not isinstance(url, str) or not url.strip():
+        return None
+    url = url.strip()
+
+    if url.startswith("data:"):
+        header, sep, data = url.partition(",")
+        if sep and ";base64" in header:
+            media_type = header[5:].split(";", 1)[0] or "image/png"
+            return {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": media_type,
+                    "data": data,
+                },
+            }
+
+    if url.startswith("http://") or url.startswith("https://"):
+        return {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "url": url,
+            },
+        }
+
+    return None
+
+
+def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
+    if isinstance(part, dict):
+        ptype = part.get("type")
+        if ptype == "text":
+            block = {"type": "text", "text": part.get("text", "")}
+            if isinstance(part.get("cache_control"), dict):
+                block["cache_control"] = dict(part["cache_control"])
+            return block
+        if ptype == "image_url":
+            return _convert_openai_image_part_to_anthropic(part)
+        if ptype == "image" and part.get("source"):
+            return dict(part)
+        if ptype == "image" and part.get("data"):
+            media_type = part.get("mimeType") or part.get("media_type") or "image/png"
+            return {
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": media_type,
+                    "data": part.get("data", ""),
+                },
+            }
+        if ptype == "tool_result":
+            return dict(part)
+    elif part is not None:
+        return {"type": "text", "text": str(part)}
+    return None
+
+
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -369,6 +497,66 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    return result


+def _image_source_from_openai_url(url: str) -> Dict[str, str]:
+    """Convert an OpenAI-style image URL/data URL into Anthropic image source."""
+    url = str(url or "").strip()
+    if not url:
+        return {"type": "url", "url": ""}
+
+    if url.startswith("data:"):
+        header, _, data = url.partition(",")
+        media_type = "image/jpeg"
+        if header.startswith("data:"):
+            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            if mime_part.startswith("image/"):
+                media_type = mime_part
+        return {
+            "type": "base64",
+            "media_type": media_type,
+            "data": data,
+        }
+
+    return {"type": "url", "url": url}
+
+
+def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
+    """Convert a single OpenAI-style content part to Anthropic format."""
+    if part is None:
+        return None
+    if isinstance(part, str):
+        return {"type": "text", "text": part}
+    if not isinstance(part, dict):
+        return {"type": "text", "text": str(part)}
+
+    ptype = part.get("type")
+
+    if ptype == "input_text":
+        block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
+    elif ptype in {"image_url", "input_image"}:
+        image_value = part.get("image_url", {})
+        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
+        block = {"type": "image", "source": _image_source_from_openai_url(url)}
+    else:
+        block = dict(part)
+
+    if isinstance(part.get("cache_control"), dict) and "cache_control" not in block:
+        block["cache_control"] = dict(part["cache_control"])
+    return block
+
+
+def _convert_content_to_anthropic(content: Any) -> Any:
+    """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
+    if not isinstance(content, list):
+        return content
+
+    converted = []
+    for part in content:
+        block = _convert_content_part_to_anthropic(part)
+        if block is not None:
+            converted.append(block)
+    return converted
+
+
 def convert_messages_to_anthropic(
    messages: List[Dict],
 ) -> Tuple[Optional[Any], List[Dict]]:
@@ -405,11 +593,9 @@ def convert_messages_to_anthropic(
            blocks = []
            if content:
                if isinstance(content, list):
-                    for part in content:
-                        if isinstance(part, dict):
-                            blocks.append(dict(part))
-                        elif part is not None:
-                            blocks.append({"type": "text", "text": str(part)})
+                    converted_content = _convert_content_to_anthropic(content)
+                    if isinstance(converted_content, list):
+                        blocks.extend(converted_content)
                else:
                    blocks.append({"type": "text", "text": str(content)})
            for tc in m.get("tool_calls", []):
@@ -458,7 +644,14 @@ def convert_messages_to_anthropic(
            continue

        # Regular user message
-        result.append({"role": "user", "content": content})
+        if isinstance(content, list):
+            converted_blocks = _convert_content_to_anthropic(content)
+            result.append({
+                "role": "user",
+                "content": converted_blocks or [{"type": "text", "text": ""}],
+            })
+        else:
+            result.append({"role": "user", "content": content})

    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1,4 +1,4 @@
-"""Shared auxiliary OpenAI client for cheap/fast side tasks.
+"""Shared auxiliary client router for side tasks.

 Provides a single resolution chain so every consumer (context compression,
 session search, web extraction, vision analysis, browser vision) picks up
@@ -10,26 +10,30 @@ Resolution order for text tasks (auto mode):
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
     wrapped to look like a chat.completions client)
-  5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
-     — checked via PROVIDER_REGISTRY entries with auth_type='api_key'
-  6. None
+  5. Native Anthropic
+  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
+  7. None

 Resolution order for vision/multimodal tasks (auto mode):
-  1. OpenRouter
-  2. Nous Portal
-  3. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
-  4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  5. None  (API-key providers like z.ai/Kimi/MiniMax are skipped —
-     they may not support multimodal)
+  1. Selected main provider, if it is one of the supported vision backends below
+  2. OpenRouter
+  3. Nous Portal
+  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
+  5. Native Anthropic
+  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  7. None

 Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
-CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
-"openrouter", "nous", "codex", or "main" (= steps 3-5).
+CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task.
 Default "auto" follows the chains above.

 Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
 AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
 than the provider's default.
+
+Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
+AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
+custom OpenAI-compatible endpoint without touching the main model settings.
 """

 import json
@@ -74,11 +78,15 @@ auxiliary_is_nous: bool = False
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
+_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

 # Codex fallback: uses the Responses API (the only endpoint the Codex
 # OAuth token can access) with a fast model for auxiliary tasks.
-_CODEX_AUX_MODEL = "gpt-5.3-codex"
+# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
+# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
+# vision via Responses.
+_CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -309,6 +317,114 @@ class AsyncCodexAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


+class _AnthropicCompletionsAdapter:
+    """OpenAI-client-compatible adapter for Anthropic Messages API."""
+
+    def __init__(self, real_client: Any, model: str):
+        self._client = real_client
+        self._model = model
+
+    def create(self, **kwargs) -> Any:
+        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", self._model)
+        tools = kwargs.get("tools")
+        tool_choice = kwargs.get("tool_choice")
+        max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
+        temperature = kwargs.get("temperature")
+
+        normalized_tool_choice = None
+        if isinstance(tool_choice, str):
+            normalized_tool_choice = tool_choice
+        elif isinstance(tool_choice, dict):
+            choice_type = str(tool_choice.get("type", "")).lower()
+            if choice_type == "function":
+                normalized_tool_choice = tool_choice.get("function", {}).get("name")
+            elif choice_type in {"auto", "required", "none"}:
+                normalized_tool_choice = choice_type
+
+        anthropic_kwargs = build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens,
+            reasoning_config=None,
+            tool_choice=normalized_tool_choice,
+        )
+        if temperature is not None:
+            anthropic_kwargs["temperature"] = temperature
+
+        response = self._client.messages.create(**anthropic_kwargs)
+        assistant_message, finish_reason = normalize_anthropic_response(response)
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
+            completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
+            total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
+            usage = SimpleNamespace(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+
+        choice = SimpleNamespace(
+            index=0,
+            message=assistant_message,
+            finish_reason=finish_reason,
+        )
+        return SimpleNamespace(
+            choices=[choice],
+            model=model,
+            usage=usage,
+        )
+
+
+class _AnthropicChatShim:
+    def __init__(self, adapter: _AnthropicCompletionsAdapter):
+        self.completions = adapter
+
+
+class AnthropicAuxiliaryClient:
+    """OpenAI-client-compatible wrapper over a native Anthropic client."""
+
+    def __init__(self, real_client: Any, model: str, api_key: str, base_url: str):
+        self._real_client = real_client
+        adapter = _AnthropicCompletionsAdapter(real_client, model)
+        self.chat = _AnthropicChatShim(adapter)
+        self.api_key = api_key
+        self.base_url = base_url
+
+    def close(self):
+        close_fn = getattr(self._real_client, "close", None)
+        if callable(close_fn):
+            close_fn()
+
+
+class _AsyncAnthropicCompletionsAdapter:
+    def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
+        self._sync = sync_adapter
+
+    async def create(self, **kwargs) -> Any:
+        import asyncio
+        return await asyncio.to_thread(self._sync.create, **kwargs)
+
+
+class _AsyncAnthropicChatShim:
+    def __init__(self, adapter: _AsyncAnthropicCompletionsAdapter):
+        self.completions = adapter
+
+
+class AsyncAnthropicAuxiliaryClient:
+    def __init__(self, sync_wrapper: "AnthropicAuxiliaryClient"):
+        sync_adapter = sync_wrapper.chat.completions
+        async_adapter = _AsyncAnthropicCompletionsAdapter(sync_adapter)
+        self.chat = _AsyncAnthropicChatShim(async_adapter)
+        self.api_key = sync_wrapper.api_key
+        self.base_url = sync_wrapper.base_url
+
+
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -380,6 +496,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                break
        if not api_key:
            continue
+        if provider_id == "anthropic":
+            return _try_anthropic()
+
        # Resolve base URL (with optional env-var override)
        # Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
        env_url = ""
@@ -418,6 +537,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
    return "auto"


+def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
+    """Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
+    if not task:
+        return None
+    for prefix in ("AUXILIARY_", "CONTEXT_"):
+        val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
+        if val:
+            return val
+    return None
+
+
 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
@@ -465,9 +595,44 @@ def _read_main_model() -> str:
    return ""


+def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
+    """Resolve the active custom/main endpoint the same way the main CLI does.
+
+    This covers both env-driven OPENAI_BASE_URL setups and config-saved custom
+    endpoints where the base URL lives in config.yaml instead of the live
+    environment.
+    """
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+
+        runtime = resolve_runtime_provider(requested="custom")
+    except Exception as exc:
+        logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc)
+        return None, None
+
+    custom_base = runtime.get("base_url")
+    custom_key = runtime.get("api_key")
+    if not isinstance(custom_base, str) or not custom_base.strip():
+        return None, None
+    if not isinstance(custom_key, str) or not custom_key.strip():
+        return None, None
+
+    custom_base = custom_base.strip().rstrip("/")
+    if "openrouter.ai" in custom_base.lower():
+        # requested='custom' falls back to OpenRouter when no custom endpoint is
+        # configured. Treat that as "no custom endpoint" for auxiliary routing.
+        return None, None
+
+    return custom_base, custom_key.strip()
+
+
+def _current_custom_base_url() -> str:
+    custom_base, _ = _resolve_custom_runtime()
+    return custom_base or ""
+
+
 def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
-    custom_base = os.getenv("OPENAI_BASE_URL")
-    custom_key = os.getenv("OPENAI_API_KEY")
+    custom_base, custom_key = _resolve_custom_runtime()
    if not custom_base or not custom_key:
        return None, None
    model = _read_main_model() or "gpt-4o-mini"
@@ -484,6 +649,22 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


+def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
+    try:
+        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+    except ImportError:
+        return None, None
+
+    token = resolve_anthropic_token()
+    if not token:
+        return None, None
+
+    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
+    logger.debug("Auxiliary client: Anthropic native (%s)", model)
+    real_client = build_anthropic_client(token, _ANTHROPIC_DEFAULT_BASE_URL)
+    return AnthropicAuxiliaryClient(real_client, model, token, _ANTHROPIC_DEFAULT_BASE_URL), model
+
+
 def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
    """Resolve a specific forced provider.  Returns (None, None) if creds missing."""
    if forced == "openrouter":
@@ -546,6 +727,8 @@ def _to_async_client(sync_client, model: str):

    if isinstance(sync_client, CodexAuxiliaryClient):
        return AsyncCodexAuxiliaryClient(sync_client), model
+    if isinstance(sync_client, AnthropicAuxiliaryClient):
+        return AsyncAnthropicAuxiliaryClient(sync_client), model

    async_kwargs = {
        "api_key": sync_client.api_key,
@@ -564,6 +747,8 @@ def resolve_provider_client(
    model: str = None,
    async_mode: bool = False,
    raw_codex: bool = False,
+    explicit_base_url: str = None,
+    explicit_api_key: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -585,6 +770,8 @@ def resolve_provider_client(
            instead of wrapping in CodexAuxiliaryClient.  Use this when
            the caller needs direct access to responses.stream() (e.g.,
            the main agent loop).
+        explicit_base_url: Optional direct OpenAI-compatible endpoint.
+        explicit_api_key: Optional API key paired with explicit_base_url.

    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
@@ -661,6 +848,22 @@ def resolve_provider_client(

    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
+        if explicit_base_url:
+            custom_base = explicit_base_url.strip()
+            custom_key = (
+                (explicit_api_key or "").strip()
+                or os.getenv("OPENAI_API_KEY", "").strip()
+            )
+            if not custom_base or not custom_key:
+                logger.warning(
+                    "resolve_provider_client: explicit custom endpoint requested "
+                    "but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
+                )
+                return None, None
+            final_model = model or _read_main_model() or "gpt-4o-mini"
+            client = OpenAI(api_key=custom_key, base_url=custom_base)
+            return (_to_async_client(client, final_model) if async_mode
+                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
                       _resolve_api_key_provider):
@@ -686,6 +889,14 @@ def resolve_provider_client(
        return None, None

    if pconfig.auth_type == "api_key":
+        if provider == "anthropic":
+            client, default_model = _try_anthropic()
+            if client is None:
+                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
+                return None, None
+            final_model = model or default_model
+            return (_to_async_client(client, final_model) if async_mode else (client, final_model))
+
        # Find the first configured API key
        api_key = ""
        for env_var in pconfig.api_key_env_vars:
@@ -749,10 +960,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
    Callers may override the returned model with a per-task env var
    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
    """
-    forced = _get_auxiliary_provider(task)
-    if forced != "auto":
-        return resolve_provider_client(forced)
-    return resolve_provider_client("auto")
+    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
+    return resolve_provider_client(
+        provider,
+        model=model,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )


 def get_async_text_auxiliary_client(task: str = ""):
@@ -762,54 +976,154 @@ def get_async_text_auxiliary_client(task: str = ""):
    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
    Returns (None, None) when no provider is available.
    """
-    forced = _get_auxiliary_provider(task)
-    if forced != "auto":
-        return resolve_provider_client(forced, async_mode=True)
-    return resolve_provider_client("auto", async_mode=True)
+    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
+    return resolve_provider_client(
+        provider,
+        model=model,
+        async_mode=True,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )


-def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Return (client, default_model_slug) for vision/multimodal auxiliary tasks.
+_VISION_AUTO_PROVIDER_ORDER = (
+    "openrouter",
+    "nous",
+    "openai-codex",
+    "anthropic",
+    "custom",
+)

-    Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise
-    auto-detects.  Callers may override the returned model with
-    AUXILIARY_VISION_MODEL.

-    In auto mode, only providers known to support multimodal are tried:
-    OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports
-    vision via the Responses API).  Custom endpoints and API-key
-    providers are skipped — they may not handle vision input.  To use
-    them, set AUXILIARY_VISION_PROVIDER explicitly.
-    """
-    forced = _get_auxiliary_provider("vision")
-    if forced != "auto":
-        return resolve_provider_client(forced)
-    # Auto: try providers known to support multimodal first, then fall
-    # back to the user's custom endpoint.  Many local models (Qwen-VL,
-    # LLaVA, Pixtral, etc.) support vision — skipping them entirely
-    # caused silent failures for local-only users.
-    for try_fn in (_try_openrouter, _try_nous, _try_codex,
-                   _try_custom_endpoint):
-        client, model = try_fn()
-        if client is not None:
-            return client, model
-    logger.debug("Auxiliary vision client: none available")
+def _normalize_vision_provider(provider: Optional[str]) -> str:
+    provider = (provider or "auto").strip().lower()
+    if provider == "codex":
+        return "openai-codex"
+    if provider == "main":
+        return "custom"
+    return provider
+
+
+def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
+    provider = _normalize_vision_provider(provider)
+    if provider == "openrouter":
+        return _try_openrouter()
+    if provider == "nous":
+        return _try_nous()
+    if provider == "openai-codex":
+        return _try_codex()
+    if provider == "anthropic":
+        return _try_anthropic()
+    if provider == "custom":
+        return _try_custom_endpoint()
    return None, None


-def get_async_vision_auxiliary_client():
-    """Return (async_client, model_slug) for async vision consumers.
+def _strict_vision_backend_available(provider: str) -> bool:
+    return _resolve_strict_vision_backend(provider)[0] is not None

-    Properly handles Codex routing — unlike manually constructing
-    AsyncOpenAI from a sync client, this preserves the Responses API
-    adapter for Codex providers.

-    Returns (None, None) when no provider is available.
+def _preferred_main_vision_provider() -> Optional[str]:
+    """Return the selected main provider when it is also a supported vision backend."""
+    try:
+        from hermes_cli.config import load_config
+
+        config = load_config()
+        model_cfg = config.get("model", {})
+        if isinstance(model_cfg, dict):
+            provider = _normalize_vision_provider(model_cfg.get("provider", ""))
+            if provider in _VISION_AUTO_PROVIDER_ORDER:
+                return provider
+    except Exception:
+        pass
+    return None
+
+
+def get_available_vision_backends() -> List[str]:
+    """Return the currently available vision backends in auto-selection order.
+
+    This is the single source of truth for setup, tool gating, and runtime
+    auto-routing of vision tasks. The selected main provider is preferred when
+    it is also a known-good vision backend; otherwise Hermes falls back through
+    the standard conservative order.
    """
-    sync_client, model = get_vision_auxiliary_client()
-    if sync_client is None:
-        return None, None
-    return _to_async_client(sync_client, model)
+    ordered = list(_VISION_AUTO_PROVIDER_ORDER)
+    preferred = _preferred_main_vision_provider()
+    if preferred in ordered:
+        ordered.remove(preferred)
+        ordered.insert(0, preferred)
+    return [provider for provider in ordered if _strict_vision_backend_available(provider)]
+
+
+def resolve_vision_provider_client(
+    provider: Optional[str] = None,
+    model: Optional[str] = None,
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    async_mode: bool = False,
+) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
+    """Resolve the client actually used for vision tasks.
+
+    Direct endpoint overrides take precedence over provider selection. Explicit
+    provider overrides still use the generic provider router for non-standard
+    backends, so users can intentionally force experimental providers. Auto mode
+    stays conservative and only tries vision backends known to work today.
+    """
+    requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+        "vision", provider, model, base_url, api_key
+    )
+    requested = _normalize_vision_provider(requested)
+
+    def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
+        if sync_client is None:
+            return resolved_provider, None, None
+        final_model = resolved_model or default_model
+        if async_mode:
+            async_client, async_model = _to_async_client(sync_client, final_model)
+            return resolved_provider, async_client, async_model
+        return resolved_provider, sync_client, final_model
+
+    if resolved_base_url:
+        client, final_model = resolve_provider_client(
+            "custom",
+            model=resolved_model,
+            async_mode=async_mode,
+            explicit_base_url=resolved_base_url,
+            explicit_api_key=resolved_api_key,
+        )
+        if client is None:
+            return "custom", None, None
+        return "custom", client, final_model
+
+    if requested == "auto":
+        for candidate in get_available_vision_backends():
+            sync_client, default_model = _resolve_strict_vision_backend(candidate)
+            if sync_client is not None:
+                return _finalize(candidate, sync_client, default_model)
+        logger.debug("Auxiliary vision client: none available")
+        return None, None, None
+
+    if requested in _VISION_AUTO_PROVIDER_ORDER:
+        sync_client, default_model = _resolve_strict_vision_backend(requested)
+        return _finalize(requested, sync_client, default_model)
+
+    client, final_model = _get_cached_client(requested, resolved_model, async_mode)
+    if client is None:
+        return requested, None, None
+    return requested, client, final_model
+
+
+def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
+    """Return (client, default_model_slug) for vision/multimodal auxiliary tasks."""
+    _, client, final_model = resolve_vision_provider_client(async_mode=False)
+    return client, final_model
+
+
+def get_async_vision_auxiliary_client():
+    """Return (async_client, model_slug) for async vision consumers."""
+    _, client, final_model = resolve_vision_provider_client(async_mode=True)
+    return client, final_model


 def get_auxiliary_extra_body() -> dict:
@@ -829,7 +1143,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    The Codex adapter translates max_tokens internally, so we use max_tokens
    for it as well.
    """
-    custom_base = os.getenv("OPENAI_BASE_URL", "")
+    custom_base = _current_custom_base_url()
    or_key = os.getenv("OPENROUTER_API_KEY")
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
@@ -851,19 +1165,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
 # Every auxiliary LLM consumer should use these instead of manually
 # constructing clients and calling .chat.completions.create().

-# Client cache: (provider, async_mode) -> (client, default_model)
+# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
 _client_cache: Dict[tuple, tuple] = {}


 def _get_cached_client(
-    provider: str, model: str = None, async_mode: bool = False,
+    provider: str,
+    model: str = None,
+    async_mode: bool = False,
+    base_url: str = None,
+    api_key: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider."""
-    cache_key = (provider, async_mode)
+    cache_key = (provider, async_mode, base_url or "", api_key or "")
    if cache_key in _client_cache:
        cached_client, cached_default = _client_cache[cache_key]
        return cached_client, model or cached_default
-    client, default_model = resolve_provider_client(provider, model, async_mode)
+    client, default_model = resolve_provider_client(
+        provider,
+        model,
+        async_mode,
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
    if client is not None:
        _client_cache[cache_key] = (client, default_model)
    return client, model or default_model
@@ -873,57 +1197,75 @@ def _resolve_task_provider_model(
    task: str = None,
    provider: str = None,
    model: str = None,
-) -> Tuple[str, Optional[str]]:
+    base_url: str = None,
+    api_key: str = None,
+) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
    """Determine provider + model for a call.

    Priority:
-      1. Explicit provider/model args (always win)
-      2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
-      3. Config file (auxiliary.{task}.provider/model or compression.*)
+      1. Explicit provider/model/base_url/api_key args (always win)
+      2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
+      3. Config file (auxiliary.{task}.* or compression.*)
      4. "auto" (full auto-detection chain)

-    Returns (provider, model) where model may be None (use provider default).
+    Returns (provider, model, base_url, api_key) where model may be None
+    (use provider default). When base_url is set, provider is forced to
+    "custom" and the task uses that direct endpoint.
    """
-    if provider:
-        return provider, model
+    config = {}
+    cfg_provider = None
+    cfg_model = None
+    cfg_base_url = None
+    cfg_api_key = None

    if task:
-        # Check env var overrides first
-        env_provider = _get_auxiliary_provider(task)
-        if env_provider != "auto":
-            # Check for env var model override too
-            env_model = None
-            for prefix in ("AUXILIARY_", "CONTEXT_"):
-                val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
-                if val:
-                    env_model = val
-                    break
-            return env_provider, model or env_model
-
-        # Read from config file
        try:
            from hermes_cli.config import load_config
            config = load_config()
        except ImportError:
-            return "auto", model
+            config = {}

-        # Check auxiliary.{task} section
-        aux = config.get("auxiliary", {})
-        task_config = aux.get(task, {})
-        cfg_provider = task_config.get("provider", "").strip() or None
-        cfg_model = task_config.get("model", "").strip() or None
+        aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
+        task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
+        if not isinstance(task_config, dict):
+            task_config = {}
+        cfg_provider = str(task_config.get("provider", "")).strip() or None
+        cfg_model = str(task_config.get("model", "")).strip() or None
+        cfg_base_url = str(task_config.get("base_url", "")).strip() or None
+        cfg_api_key = str(task_config.get("api_key", "")).strip() or None

        # Backwards compat: compression section has its own keys
        if task == "compression" and not cfg_provider:
-            comp = config.get("compression", {})
-            cfg_provider = comp.get("summary_provider", "").strip() or None
-            cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
+            comp = config.get("compression", {}) if isinstance(config, dict) else {}
+            if isinstance(comp, dict):
+                cfg_provider = comp.get("summary_provider", "").strip() or None
+                cfg_model = cfg_model or comp.get("summary_model", "").strip() or None

+    env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
+    resolved_model = model or env_model or cfg_model
+
+    if base_url:
+        return "custom", resolved_model, base_url, api_key
+    if provider:
+        return provider, resolved_model, base_url, api_key
+
+    if task:
+        env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
+        env_api_key = _get_auxiliary_env_override(task, "API_KEY")
+        if env_base_url:
+            return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
+
+        env_provider = _get_auxiliary_provider(task)
+        if env_provider != "auto":
+            return env_provider, resolved_model, None, None
+
+        if cfg_base_url:
+            return "custom", resolved_model, cfg_base_url, cfg_api_key
        if cfg_provider and cfg_provider != "auto":
-            return cfg_provider, model or cfg_model
-        return "auto", model or cfg_model
+            return cfg_provider, resolved_model, None, None
+        return "auto", resolved_model, None, None

-    return "auto", model
+    return "auto", resolved_model, None, None


 def _build_call_kwargs(
@@ -935,6 +1277,7 @@ def _build_call_kwargs(
    tools: Optional[list] = None,
    timeout: float = 30.0,
    extra_body: Optional[dict] = None,
+    base_url: Optional[str] = None,
 ) -> dict:
    """Build kwargs for .chat.completions.create() with model/provider adjustments."""
    kwargs: Dict[str, Any] = {
@@ -950,7 +1293,7 @@ def _build_call_kwargs(
        # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
-            custom_base = os.getenv("OPENAI_BASE_URL", "")
+            custom_base = base_url or _current_custom_base_url()
            if "api.openai.com" in custom_base.lower():
                kwargs["max_completion_tokens"] = max_tokens
            else:
@@ -976,6 +1319,8 @@ def call_llm(
    *,
    provider: str = None,
    model: str = None,
+    base_url: str = None,
+    api_key: str = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@@ -1007,26 +1352,57 @@ def call_llm(
    Raises:
        RuntimeError: If no provider is configured.
    """
-    resolved_provider, resolved_model = _resolve_task_provider_model(
-        task, provider, model)
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+        task, provider, model, base_url, api_key)

-    client, final_model = _get_cached_client(resolved_provider, resolved_model)
-    if client is None:
-        # Fallback: try openrouter
-        if resolved_provider != "openrouter":
-            logger.warning("Provider %s unavailable, falling back to openrouter",
-                           resolved_provider)
-            client, final_model = _get_cached_client(
-                "openrouter", resolved_model or _OPENROUTER_MODEL)
-    if client is None:
-        raise RuntimeError(
-            f"No LLM provider configured for task={task} provider={resolved_provider}. "
-            f"Run: hermes setup")
+    if task == "vision":
+        effective_provider, client, final_model = resolve_vision_provider_client(
+            provider=provider,
+            model=model,
+            base_url=base_url,
+            api_key=api_key,
+            async_mode=False,
+        )
+        if client is None and resolved_provider != "auto" and not resolved_base_url:
+            logger.warning(
+                "Vision provider %s unavailable, falling back to auto vision backends",
+                resolved_provider,
+            )
+            effective_provider, client, final_model = resolve_vision_provider_client(
+                provider="auto",
+                model=resolved_model,
+                async_mode=False,
+            )
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup"
+            )
+        resolved_provider = effective_provider or resolved_provider
+    else:
+        client, final_model = _get_cached_client(
+            resolved_provider,
+            resolved_model,
+            base_url=resolved_base_url,
+            api_key=resolved_api_key,
+        )
+        if client is None:
+            # Fallback: try openrouter
+            if resolved_provider != "openrouter" and not resolved_base_url:
+                logger.warning("Provider %s unavailable, falling back to openrouter",
+                               resolved_provider)
+                client, final_model = _get_cached_client(
+                    "openrouter", resolved_model or _OPENROUTER_MODEL)
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup")

    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body)
+        tools=tools, timeout=timeout, extra_body=extra_body,
+        base_url=resolved_base_url)

    # Handle max_tokens vs max_completion_tokens retry
    try:
@@ -1045,6 +1421,8 @@ async def async_call_llm(
    *,
    provider: str = None,
    model: str = None,
+    base_url: str = None,
+    api_key: str = None,
    messages: list,
    temperature: float = None,
    max_tokens: int = None,
@@ -1056,27 +1434,58 @@ async def async_call_llm(

    Same as call_llm() but async. See call_llm() for full documentation.
    """
-    resolved_provider, resolved_model = _resolve_task_provider_model(
-        task, provider, model)
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+        task, provider, model, base_url, api_key)

-    client, final_model = _get_cached_client(
-        resolved_provider, resolved_model, async_mode=True)
-    if client is None:
-        if resolved_provider != "openrouter":
-            logger.warning("Provider %s unavailable, falling back to openrouter",
-                           resolved_provider)
-            client, final_model = _get_cached_client(
-                "openrouter", resolved_model or _OPENROUTER_MODEL,
-                async_mode=True)
-    if client is None:
-        raise RuntimeError(
-            f"No LLM provider configured for task={task} provider={resolved_provider}. "
-            f"Run: hermes setup")
+    if task == "vision":
+        effective_provider, client, final_model = resolve_vision_provider_client(
+            provider=provider,
+            model=model,
+            base_url=base_url,
+            api_key=api_key,
+            async_mode=True,
+        )
+        if client is None and resolved_provider != "auto" and not resolved_base_url:
+            logger.warning(
+                "Vision provider %s unavailable, falling back to auto vision backends",
+                resolved_provider,
+            )
+            effective_provider, client, final_model = resolve_vision_provider_client(
+                provider="auto",
+                model=resolved_model,
+                async_mode=True,
+            )
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup"
+            )
+        resolved_provider = effective_provider or resolved_provider
+    else:
+        client, final_model = _get_cached_client(
+            resolved_provider,
+            resolved_model,
+            async_mode=True,
+            base_url=resolved_base_url,
+            api_key=resolved_api_key,
+        )
+        if client is None:
+            if resolved_provider != "openrouter" and not resolved_base_url:
+                logger.warning("Provider %s unavailable, falling back to openrouter",
+                               resolved_provider)
+                client, final_model = _get_cached_client(
+                    "openrouter", resolved_model or _OPENROUTER_MODEL,
+                    async_mode=True)
+        if client is None:
+            raise RuntimeError(
+                f"No LLM provider configured for task={task} provider={resolved_provider}. "
+                f"Run: hermes setup")

    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=timeout, extra_body=extra_body)
+        tools=tools, timeout=timeout, extra_body=extra_body,
+        base_url=resolved_base_url)

    try:
        return await client.chat.completions.create(**kwargs)
--- a/agent/display.py
+++ b/agent/display.py
@@ -59,6 +59,32 @@ def get_skin_tool_prefix() -> str:
    return "┊"


+def get_tool_emoji(tool_name: str, default: str = "⚡") -> str:
+    """Get the display emoji for a tool.
+
+    Resolution order:
+    1. Active skin's ``tool_emojis`` overrides (if a skin is loaded)
+    2. Tool registry's per-tool ``emoji`` field
+    3. *default* fallback
+    """
+    # 1. Skin override
+    skin = _get_skin()
+    if skin and skin.tool_emojis:
+        override = skin.tool_emojis.get(tool_name)
+        if override:
+            return override
+    # 2. Registry default
+    try:
+        from tools.registry import registry
+        emoji = registry.get_emoji(tool_name, default="")
+        if emoji:
+            return emoji
+    except Exception:
+        pass
+    # 3. Hardcoded fallback
+    return default
+
+
 # =========================================================================
 # Tool preview (one-line summary of a tool call's primary argument)
 # =========================================================================
@@ -80,7 +106,7 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str | N
        "image_generate": "prompt", "text_to_speech": "text",
        "vision_analyze": "question", "mixture_of_agents": "user_prompt",
        "skill_view": "name", "skills_list": "category",
-        "schedule_cronjob": "name",
+        "cronjob": "action",
        "execute_code": "code", "delegate_task": "goal",
        "clarify": "question", "skill_manage": "name",
    }
@@ -513,12 +539,15 @@ def get_cute_tool_message(
        return _wrap(f"┊ 🧠 reason    {_trunc(args.get('user_prompt', ''), 30)}  {dur}")
    if tool_name == "send_message":
        return _wrap(f"┊ 📨 send      {args.get('target', '?')}: \"{_trunc(args.get('message', ''), 25)}\"  {dur}")
-    if tool_name == "schedule_cronjob":
-        return _wrap(f"┊ ⏰ schedule  {_trunc(args.get('name', args.get('prompt', 'task')), 30)}  {dur}")
-    if tool_name == "list_cronjobs":
-        return _wrap(f"┊ ⏰ jobs      listing  {dur}")
-    if tool_name == "remove_cronjob":
-        return _wrap(f"┊ ⏰ remove    job {args.get('job_id', '?')}  {dur}")
+    if tool_name == "cronjob":
+        action = args.get("action", "?")
+        if action == "create":
+            skills = args.get("skills") or ([] if not args.get("skill") else [args.get("skill")])
+            label = args.get("name") or (skills[0] if skills else None) or args.get("prompt", "task")
+            return _wrap(f"┊ ⏰ cron      create {_trunc(label, 24)}  {dur}")
+        if action == "list":
+            return _wrap(f"┊ ⏰ cron      listing  {dur}")
+        return _wrap(f"┊ ⏰ cron      {action} {args.get('job_id', '')}  {dur}")
    if tool_name.startswith("rl_"):
        rl = {
            "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}",
--- a/agent/skill_commands.py
+++ b/agent/skill_commands.py
@@ -1,17 +1,151 @@
-"""Skill slash commands — scan installed skills and build invocation messages.
+"""Shared slash command helpers for skills and built-in prompt-style modes.

 Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
-can invoke skills via /skill-name commands.
+can invoke skills via /skill-name commands and prompt-only built-ins like
+/plan.
 """

 import json
 import logging
+import re
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
+_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
+
+
+def build_plan_path(
+    user_instruction: str = "",
+    *,
+    now: datetime | None = None,
+) -> Path:
+    """Return the default workspace-relative markdown path for a /plan invocation.
+
+    Relative paths are intentional: file tools are task/backend-aware and resolve
+    them against the active working directory for local, docker, ssh, modal,
+    daytona, and similar terminal backends. That keeps the plan with the active
+    workspace instead of the Hermes host's global home directory.
+    """
+    slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
+    slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
+    if slug:
+        slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
+    slug = slug or "conversation-plan"
+    timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
+    return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
+
+
+def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
+    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
+    raw_identifier = (skill_identifier or "").strip()
+    if not raw_identifier:
+        return None
+
+    try:
+        from tools.skills_tool import SKILLS_DIR, skill_view
+
+        identifier_path = Path(raw_identifier).expanduser()
+        if identifier_path.is_absolute():
+            try:
+                normalized = str(identifier_path.resolve().relative_to(SKILLS_DIR.resolve()))
+            except Exception:
+                normalized = raw_identifier
+        else:
+            normalized = raw_identifier.lstrip("/")
+
+        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
+    except Exception:
+        return None
+
+    if not loaded_skill.get("success"):
+        return None
+
+    skill_name = str(loaded_skill.get("name") or normalized)
+    skill_path = str(loaded_skill.get("path") or "")
+    skill_dir = None
+    if skill_path:
+        try:
+            skill_dir = SKILLS_DIR / Path(skill_path).parent
+        except Exception:
+            skill_dir = None
+
+    return loaded_skill, skill_dir, skill_name
+
+
+def _build_skill_message(
+    loaded_skill: dict[str, Any],
+    skill_dir: Path | None,
+    activation_note: str,
+    user_instruction: str = "",
+    runtime_note: str = "",
+) -> str:
+    """Format a loaded skill into a user/system message payload."""
+    from tools.skills_tool import SKILLS_DIR
+
+    content = str(loaded_skill.get("content") or "")
+
+    parts = [activation_note, "", content.strip()]
+
+    if loaded_skill.get("setup_skipped"):
+        parts.extend(
+            [
+                "",
+                "[Skill setup note: Required environment setup was skipped. Continue loading the skill and explain any reduced functionality if it matters.]",
+            ]
+        )
+    elif loaded_skill.get("gateway_setup_hint"):
+        parts.extend(
+            [
+                "",
+                f"[Skill setup note: {loaded_skill['gateway_setup_hint']}]",
+            ]
+        )
+    elif loaded_skill.get("setup_needed") and loaded_skill.get("setup_note"):
+        parts.extend(
+            [
+                "",
+                f"[Skill setup note: {loaded_skill['setup_note']}]",
+            ]
+        )
+
+    supporting = []
+    linked_files = loaded_skill.get("linked_files") or {}
+    for entries in linked_files.values():
+        if isinstance(entries, list):
+            supporting.extend(entries)
+
+    if not supporting and skill_dir:
+        for subdir in ("references", "templates", "scripts", "assets"):
+            subdir_path = skill_dir / subdir
+            if subdir_path.exists():
+                for f in sorted(subdir_path.rglob("*")):
+                    if f.is_file():
+                        rel = str(f.relative_to(skill_dir))
+                        supporting.append(rel)
+
+    if supporting and skill_dir:
+        skill_view_target = str(skill_dir.relative_to(SKILLS_DIR))
+        parts.append("")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        for sf in supporting:
+            parts.append(f"- {sf}")
+        parts.append(
+            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+        )
+
+    if user_instruction:
+        parts.append("")
+        parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
+
+    if runtime_note:
+        parts.append("")
+        parts.append(f"[Runtime note: {runtime_note}]")
+
+    return "\n".join(parts)


 def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
@@ -68,6 +202,7 @@ def build_skill_invocation_message(
    cmd_key: str,
    user_instruction: str = "",
    task_id: str | None = None,
+    runtime_note: str = "",
 ) -> Optional[str]:
    """Build the user message content for a skill slash command invocation.

@@ -83,77 +218,61 @@ def build_skill_invocation_message(
    if not skill_info:
        return None

-    skill_name = skill_info["name"]
-    skill_path = skill_info["skill_dir"]
+    loaded = _load_skill_payload(skill_info["skill_dir"], task_id=task_id)
+    if not loaded:
+        return f"[Failed to load skill: {skill_info['name']}]"

-    try:
-        from tools.skills_tool import SKILLS_DIR, skill_view
+    loaded_skill, skill_dir, skill_name = loaded
+    activation_note = (
+        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
+        "you to follow its instructions. The full skill content is loaded below.]"
+    )
+    return _build_skill_message(
+        loaded_skill,
+        skill_dir,
+        activation_note,
+        user_instruction=user_instruction,
+        runtime_note=runtime_note,
+    )

-        loaded_skill = json.loads(skill_view(skill_path, task_id=task_id))
-    except Exception:
-        return f"[Failed to load skill: {skill_name}]"

-    if not loaded_skill.get("success"):
-        return f"[Failed to load skill: {skill_name}]"
+def build_preloaded_skills_prompt(
+    skill_identifiers: list[str],
+    task_id: str | None = None,
+) -> tuple[str, list[str], list[str]]:
+    """Load one or more skills for session-wide CLI preloading.

-    content = str(loaded_skill.get("content") or "")
-    skill_dir = Path(skill_info["skill_dir"])
+    Returns (prompt_text, loaded_skill_names, missing_identifiers).
+    """
+    prompt_parts: list[str] = []
+    loaded_names: list[str] = []
+    missing: list[str] = []

-    parts = [
-        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
-        "",
-        content.strip(),
-    ]
+    seen: set[str] = set()
+    for raw_identifier in skill_identifiers:
+        identifier = (raw_identifier or "").strip()
+        if not identifier or identifier in seen:
+            continue
+        seen.add(identifier)

-    if loaded_skill.get("setup_skipped"):
-        parts.extend(
-            [
-                "",
-                "[Skill setup note: Required environment setup was skipped. Continue loading the skill and explain any reduced functionality if it matters.]",
-            ]
+        loaded = _load_skill_payload(identifier, task_id=task_id)
+        if not loaded:
+            missing.append(identifier)
+            continue
+
+        loaded_skill, skill_dir, skill_name = loaded
+        activation_note = (
+            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
+            "preloaded. Treat its instructions as active guidance for the duration of this "
+            "session unless the user overrides them.]"
        )
-    elif loaded_skill.get("gateway_setup_hint"):
-        parts.extend(
-            [
-                "",
-                f"[Skill setup note: {loaded_skill['gateway_setup_hint']}]",
-            ]
-        )
-    elif loaded_skill.get("setup_needed") and loaded_skill.get("setup_note"):
-        parts.extend(
-            [
-                "",
-                f"[Skill setup note: {loaded_skill['setup_note']}]",
-            ]
+        prompt_parts.append(
+            _build_skill_message(
+                loaded_skill,
+                skill_dir,
+                activation_note,
+            )
        )
+        loaded_names.append(skill_name)

-    supporting = []
-    linked_files = loaded_skill.get("linked_files") or {}
-    for entries in linked_files.values():
-        if isinstance(entries, list):
-            supporting.extend(entries)
-
-    if not supporting:
-        for subdir in ("references", "templates", "scripts", "assets"):
-            subdir_path = skill_dir / subdir
-            if subdir_path.exists():
-                for f in sorted(subdir_path.rglob("*")):
-                    if f.is_file():
-                        rel = str(f.relative_to(skill_dir))
-                        supporting.append(rel)
-
-    if supporting:
-        skill_view_target = str(Path(skill_path).relative_to(SKILLS_DIR))
-        parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
-        for sf in supporting:
-            parts.append(f"- {sf}")
-        parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
-        )
-
-    if user_instruction:
-        parts.append("")
-        parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
-
-    return "\n".join(parts)
+    return "\n\n".join(prompt_parts), loaded_names, missing
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -456,7 +456,7 @@ platform_toolsets:
 #   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 #   todo         - todo (in-memory task planning, no deps)
 #   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI key)
-#   cronjob      - schedule_cronjob, list_cronjobs, remove_cronjob
+#   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
 #   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 #
 # PRESETS (curated bundles):
--- a/cli.py
+++ b/cli.py
--- a/cron/init.py
+++ b/cron/init.py
@@ -7,7 +7,8 @@ This module provides scheduled task execution, allowing the agent to:
 - Execute tasks in isolated sessions (no prior context)

 Cron jobs are executed automatically by the gateway daemon:
-    hermes gateway install    # Install as system service (recommended)
+    hermes gateway install    # Install as a user service
+    sudo hermes gateway install --system  # Linux servers: boot-time system service
    hermes gateway            # Or run in foreground

 The gateway ticks the scheduler every 60 seconds. A file lock prevents
@@ -20,6 +21,9 @@ from cron.jobs import (
    list_jobs,
    remove_job,
    update_job,
+    pause_job,
+    resume_job,
+    trigger_job,
    JOBS_FILE,
 )
 from cron.scheduler import tick
@@ -30,6 +34,9 @@ __all__ = [
    "list_jobs",
    "remove_job",
    "update_job",
+    "pause_job",
+    "resume_job",
+    "trigger_job",
    "tick",
    "JOBS_FILE",
 ]
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -32,6 +32,32 @@ JOBS_FILE = CRON_DIR / "jobs.json"
 OUTPUT_DIR = CRON_DIR / "output"


+def _normalize_skill_list(skill: Optional[str] = None, skills: Optional[Any] = None) -> List[str]:
+    """Normalize legacy/single-skill and multi-skill inputs into a unique ordered list."""
+    if skills is None:
+        raw_items = [skill] if skill else []
+    elif isinstance(skills, str):
+        raw_items = [skills]
+    else:
+        raw_items = list(skills)
+
+    normalized: List[str] = []
+    for item in raw_items:
+        text = str(item or "").strip()
+        if text and text not in normalized:
+            normalized.append(text)
+    return normalized
+
+
+def _apply_skill_fields(job: Dict[str, Any]) -> Dict[str, Any]:
+    """Return a job dict with canonical `skills` and legacy `skill` fields aligned."""
+    normalized = dict(job)
+    skills = _normalize_skill_list(normalized.get("skill"), normalized.get("skills"))
+    normalized["skills"] = skills
+    normalized["skill"] = skills[0] if skills else None
+    return normalized
+
+
 def _secure_dir(path: Path):
    """Set directory to owner-only access (0700). No-op on Windows."""
    try:
@@ -263,39 +289,63 @@ def create_job(
    name: Optional[str] = None,
    repeat: Optional[int] = None,
    deliver: Optional[str] = None,
-    origin: Optional[Dict[str, Any]] = None
+    origin: Optional[Dict[str, Any]] = None,
+    skill: Optional[str] = None,
+    skills: Optional[List[str]] = None,
+    model: Optional[str] = None,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
-    
+
    Args:
-        prompt: The prompt to run (must be self-contained)
+        prompt: The prompt to run (must be self-contained, or a task instruction when skill is set)
        schedule: Schedule string (see parse_schedule)
        name: Optional friendly name
        repeat: How many times to run (None = forever, 1 = once)
        deliver: Where to deliver output ("origin", "local", "telegram", etc.)
        origin: Source info where job was created (for "origin" delivery)
-    
+        skill: Optional legacy single skill name to load before running the prompt
+        skills: Optional ordered list of skills to load before running the prompt
+        model: Optional per-job model override
+        provider: Optional per-job provider override
+        base_url: Optional per-job base URL override
+
    Returns:
        The created job dict
    """
    parsed_schedule = parse_schedule(schedule)
-    
+
    # Auto-set repeat=1 for one-shot schedules if not specified
    if parsed_schedule["kind"] == "once" and repeat is None:
        repeat = 1
-    
+
    # Default delivery to origin if available, otherwise local
    if deliver is None:
        deliver = "origin" if origin else "local"
-    
+
    job_id = uuid.uuid4().hex[:12]
    now = _hermes_now().isoformat()
-    
+
+    normalized_skills = _normalize_skill_list(skill, skills)
+    normalized_model = str(model).strip() if isinstance(model, str) else None
+    normalized_provider = str(provider).strip() if isinstance(provider, str) else None
+    normalized_base_url = str(base_url).strip().rstrip("/") if isinstance(base_url, str) else None
+    normalized_model = normalized_model or None
+    normalized_provider = normalized_provider or None
+    normalized_base_url = normalized_base_url or None
+
+    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
        "id": job_id,
-        "name": name or prompt[:50].strip(),
+        "name": name or label_source[:50].strip(),
        "prompt": prompt,
+        "skills": normalized_skills,
+        "skill": normalized_skills[0] if normalized_skills else None,
+        "model": normalized_model,
+        "provider": normalized_provider,
+        "base_url": normalized_base_url,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -303,6 +353,9 @@ def create_job(
            "completed": 0
        },
        "enabled": True,
+        "state": "scheduled",
+        "paused_at": None,
+        "paused_reason": None,
        "created_at": now,
        "next_run_at": compute_next_run(parsed_schedule),
        "last_run_at": None,
@@ -312,11 +365,11 @@ def create_job(
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
    }
-    
+
    jobs = load_jobs()
    jobs.append(job)
    save_jobs(jobs)
-    
+
    return job


@@ -325,29 +378,100 @@ def get_job(job_id: str) -> Optional[Dict[str, Any]]:
    jobs = load_jobs()
    for job in jobs:
        if job["id"] == job_id:
-            return job
+            return _apply_skill_fields(job)
    return None


 def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]:
    """List all jobs, optionally including disabled ones."""
-    jobs = load_jobs()
+    jobs = [_apply_skill_fields(j) for j in load_jobs()]
    if not include_disabled:
        jobs = [j for j in jobs if j.get("enabled", True)]
    return jobs


 def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    """Update a job by ID."""
+    """Update a job by ID, refreshing derived schedule fields when needed."""
    jobs = load_jobs()
    for i, job in enumerate(jobs):
-        if job["id"] == job_id:
-            jobs[i] = {**job, **updates}
-            save_jobs(jobs)
-            return jobs[i]
+        if job["id"] != job_id:
+            continue
+
+        updated = _apply_skill_fields({**job, **updates})
+        schedule_changed = "schedule" in updates
+
+        if "skills" in updates or "skill" in updates:
+            normalized_skills = _normalize_skill_list(updated.get("skill"), updated.get("skills"))
+            updated["skills"] = normalized_skills
+            updated["skill"] = normalized_skills[0] if normalized_skills else None
+
+        if schedule_changed:
+            updated_schedule = updated["schedule"]
+            updated["schedule_display"] = updates.get(
+                "schedule_display",
+                updated_schedule.get("display", updated.get("schedule_display")),
+            )
+            if updated.get("state") != "paused":
+                updated["next_run_at"] = compute_next_run(updated_schedule)
+
+        if updated.get("enabled", True) and updated.get("state") != "paused" and not updated.get("next_run_at"):
+            updated["next_run_at"] = compute_next_run(updated["schedule"])
+
+        jobs[i] = updated
+        save_jobs(jobs)
+        return _apply_skill_fields(jobs[i])
    return None


+def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]:
+    """Pause a job without deleting it."""
+    return update_job(
+        job_id,
+        {
+            "enabled": False,
+            "state": "paused",
+            "paused_at": _hermes_now().isoformat(),
+            "paused_reason": reason,
+        },
+    )
+
+
+def resume_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Resume a paused job and compute the next future run from now."""
+    job = get_job(job_id)
+    if not job:
+        return None
+
+    next_run_at = compute_next_run(job["schedule"])
+    return update_job(
+        job_id,
+        {
+            "enabled": True,
+            "state": "scheduled",
+            "paused_at": None,
+            "paused_reason": None,
+            "next_run_at": next_run_at,
+        },
+    )
+
+
+def trigger_job(job_id: str) -> Optional[Dict[str, Any]]:
+    """Schedule a job to run on the next scheduler tick."""
+    job = get_job(job_id)
+    if not job:
+        return None
+    return update_job(
+        job_id,
+        {
+            "enabled": True,
+            "state": "scheduled",
+            "paused_at": None,
+            "paused_reason": None,
+            "next_run_at": _hermes_now().isoformat(),
+        },
+    )
+
+
 def remove_job(job_id: str) -> bool:
    """Remove a job by ID."""
    jobs = load_jobs()
@@ -389,11 +513,14 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
            
            # Compute next run
            job["next_run_at"] = compute_next_run(job["schedule"], now)
-            
+
            # If no next run (one-shot completed), disable
            if job["next_run_at"] is None:
                job["enabled"] = False
-            
+                job["state"] = "completed"
+            elif job.get("state") != "paused":
+                job["state"] = "scheduled"
+
            save_jobs(jobs)
            return
    
@@ -403,21 +530,21 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
 def get_due_jobs() -> List[Dict[str, Any]]:
    """Get all jobs that are due to run now."""
    now = _hermes_now()
-    jobs = load_jobs()
+    jobs = [_apply_skill_fields(j) for j in load_jobs()]
    due = []
-    
+
    for job in jobs:
        if not job.get("enabled", True):
            continue
-        
+
        next_run = job.get("next_run_at")
        if not next_run:
            continue
-        
+
        next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
        if next_run_dt <= now:
            due.append(job)
-    
+
    return due


--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -9,6 +9,7 @@ runs at a time if multiple processes overlap.
 """

 import asyncio
+import json
 import logging
 import os
 import sys
@@ -174,6 +175,43 @@ def _deliver_result(job: dict, content: str) -> None:
            logger.warning("Job '%s': mirror_to_session failed: %s", job["id"], e)


+def _build_job_prompt(job: dict) -> str:
+    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
+    prompt = job.get("prompt", "")
+    skills = job.get("skills")
+    if skills is None:
+        legacy = job.get("skill")
+        skills = [legacy] if legacy else []
+
+    skill_names = [str(name).strip() for name in skills if str(name).strip()]
+    if not skill_names:
+        return prompt
+
+    from tools.skills_tool import skill_view
+
+    parts = []
+    for skill_name in skill_names:
+        loaded = json.loads(skill_view(skill_name))
+        if not loaded.get("success"):
+            error = loaded.get("error") or f"Failed to load skill '{skill_name}'"
+            raise RuntimeError(error)
+
+        content = str(loaded.get("content") or "").strip()
+        if parts:
+            parts.append("")
+        parts.extend(
+            [
+                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                "",
+                content,
+            ]
+        )
+
+    if prompt:
+        parts.extend(["", f"The user has provided the following instruction alongside the skill invocation: {prompt}"])
+    return "\n".join(parts)
+
+
 def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    """
    Execute a single cron job.
@@ -194,10 +232,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    
    job_id = job["id"]
    job_name = job["name"]
-    prompt = job["prompt"]
+    prompt = _build_job_prompt(job)
    origin = _resolve_origin(job)
-    delivery_target = _resolve_delivery_target(job)
-    
+
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

@@ -207,11 +244,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
        if origin.get("chat_name"):
            os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
-    if delivery_target:
-        os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
-        os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
-        if delivery_target.get("thread_id") is not None:
-            os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])

    try:
        # Re-read .env and config.yaml fresh every run so provider/key
@@ -222,7 +254,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        except UnicodeDecodeError:
            load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")

-        model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
+        delivery_target = _resolve_delivery_target(job)
+        if delivery_target:
+            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
+            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            if delivery_target.get("thread_id") is not None:
+                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+
+        model = job.get("model") or os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"

        # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
        _cfg = {}
@@ -233,10 +272,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                with open(_cfg_path) as _f:
                    _cfg = yaml.safe_load(_f) or {}
                _model_cfg = _cfg.get("model", {})
-                if isinstance(_model_cfg, str):
-                    model = _model_cfg
-                elif isinstance(_model_cfg, dict):
-                    model = _model_cfg.get("default", model)
+                if not job.get("model"):
+                    if isinstance(_model_cfg, str):
+                        model = _model_cfg
+                    elif isinstance(_model_cfg, dict):
+                        model = _model_cfg.get("default", model)
        except Exception as e:
            logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)

@@ -281,9 +321,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            format_runtime_provider_error,
        )
        try:
-            runtime = resolve_runtime_provider(
-                requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
-            )
+            runtime_kwargs = {
+                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
+            }
+            if job.get("base_url"):
+                runtime_kwargs["explicit_base_url"] = job.get("base_url")
+            runtime = resolve_runtime_provider(**runtime_kwargs)
        except Exception as exc:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc
@@ -301,6 +344,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
+            disabled_toolsets=["cronjob"],
            quiet_mode=True,
            platform="cron",
            session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
--- a/environments/tool_call_parsers/deepseek_v3_parser.py
+++ b/environments/tool_call_parsers/deepseek_v3_parser.py
@@ -10,12 +10,13 @@ Format uses special unicode tokens:
    <｜tool▁call▁end｜>
    <｜tool▁calls▁end｜>

-Based on VLLM's DeepSeekV3ToolParser.extract_tool_calls()
+Fixes Issue #989: Support for multiple simultaneous tool calls.
 """

 import re
 import uuid
-from typing import List, Optional
+import logging
+from typing import List, Optional, Tuple

 from openai.types.chat.chat_completion_message_tool_call import (
    ChatCompletionMessageToolCall,
@@ -24,6 +25,7 @@ from openai.types.chat.chat_completion_message_tool_call import (

 from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser

+logger = logging.getLogger(__name__)

@register_parser("deepseek_v3")
 class DeepSeekV3ToolCallParser(ToolCallParser):
@@ -32,45 +34,56 @@ class DeepSeekV3ToolCallParser(ToolCallParser):

    Uses special unicode tokens with fullwidth angle brackets and block elements.
    Extracts type, function name, and JSON arguments from the structured format.
+    Ensures all tool calls are captured when the model executes multiple actions.
    """

    START_TOKEN = "<｜tool▁calls▁begin｜>"

-    # Regex captures: type, function_name, function_arguments
+    # Updated PATTERN: Using \s* instead of literal \n for increased robustness
+    # against variations in model formatting (Issue #989).
    PATTERN = re.compile(
-        r"<｜tool▁call▁begin｜>(?P<type>.*?)<｜tool▁sep｜>(?P<function_name>.*?)\n```json\n(?P<function_arguments>.*?)\n```<｜tool▁call▁end｜>",
+        r"<｜tool▁call▁begin｜>(?P<type>.*?)<｜tool▁sep｜>(?P<function_name>.*?)\s*```json\s*(?P<function_arguments>.*?)\s*```\s*<｜tool▁call▁end｜>",
        re.DOTALL,
    )

    def parse(self, text: str) -> ParseResult:
+        """
+        Parses the input text and extracts all available tool calls.
+        """
        if self.START_TOKEN not in text:
            return text, None

        try:
-            matches = self.PATTERN.findall(text)
+            # Using finditer to capture ALL tool calls in the sequence
+            matches = list(self.PATTERN.finditer(text))
            if not matches:
                return text, None

            tool_calls: List[ChatCompletionMessageToolCall] = []
+            
            for match in matches:
-                tc_type, func_name, func_args = match
+                func_name = match.group("function_name").strip()
+                func_args = match.group("function_arguments").strip()
+                
                tool_calls.append(
                    ChatCompletionMessageToolCall(
                        id=f"call_{uuid.uuid4().hex[:8]}",
                        type="function",
                        function=Function(
-                            name=func_name.strip(),
-                            arguments=func_args.strip(),
+                            name=func_name,
+                            arguments=func_args,
                        ),
                    )
                )

-            if not tool_calls:
-                return text, None
+            if tool_calls:
+                # Content is text before the first tool call block
+                content_index = text.find(self.START_TOKEN)
+                content = text[:content_index].strip()
+                return content if content else None, tool_calls

-            # Content is everything before the tool calls section
-            content = text[: text.find(self.START_TOKEN)].strip()
-            return content if content else None, tool_calls
-
-        except Exception:
+            return text, None
+
+        except Exception as e:
+            logger.error(f"Error parsing DeepSeek V3 tool calls: {e}")
            return text, None
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -21,6 +21,17 @@ from hermes_cli.config import get_hermes_home
 logger = logging.getLogger(__name__)


+def _coerce_bool(value: Any, default: bool = True) -> bool:
+    """Coerce bool-ish config values, preserving a caller-provided default."""
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in ("true", "1", "yes", "on")
+    return bool(value)
+
+
 class Platform(Enum):
    """Supported messaging platforms."""
    LOCAL = "local"
@@ -160,6 +171,9 @@ class GatewayConfig:
    
    # Delivery settings
    always_log_local: bool = True  # Always save cron outputs to local files
+
+    # STT settings
+    stt_enabled: bool = True  # Whether to auto-transcribe inbound voice messages
    
    def get_connected_platforms(self) -> List[Platform]:
        """Return list of platforms that are enabled and configured."""
@@ -224,6 +238,7 @@ class GatewayConfig:
            "quick_commands": self.quick_commands,
            "sessions_dir": str(self.sessions_dir),
            "always_log_local": self.always_log_local,
+            "stt_enabled": self.stt_enabled,
        }
    
    @classmethod
@@ -260,6 +275,10 @@ class GatewayConfig:
        if not isinstance(quick_commands, dict):
            quick_commands = {}

+        stt_enabled = data.get("stt_enabled")
+        if stt_enabled is None:
+            stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None
+
        return cls(
            platforms=platforms,
            default_reset_policy=default_policy,
@@ -269,6 +288,7 @@ class GatewayConfig:
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
            always_log_local=data.get("always_log_local", True),
+            stt_enabled=_coerce_bool(stt_enabled, True),
        )


@@ -318,6 +338,12 @@ def load_gateway_config() -> GatewayConfig:
                else:
                    logger.warning("Ignoring invalid quick_commands in config.yaml (expected mapping, got %s)", type(qc).__name__)

+            # Bridge STT enable/disable from config.yaml into gateway runtime.
+            # This keeps the gateway aligned with the user-facing config source.
+            stt_cfg = yaml_cfg.get("stt")
+            if isinstance(stt_cfg, dict) and "enabled" in stt_cfg:
+                config.stt_enabled = _coerce_bool(stt_cfg.get("enabled"), True)
+
            # Bridge discord settings from config.yaml to env vars
            # (env vars take precedence — only set if not already defined)
            discord_cfg = yaml_cfg.get("discord", {})
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -315,7 +315,7 @@ def build_delivery_context_for_tool(
    origin: Optional[SessionSource] = None
 ) -> Dict[str, Any]:
    """
-    Build context for the schedule_cronjob tool to understand delivery options.
+    Build context for the unified cronjob tool to understand delivery options.
    
    This is passed to the tool so it can validate and explain delivery targets.
    """
--- a/gateway/platforms/ADDING_A_PLATFORM.md
+++ b/gateway/platforms/ADDING_A_PLATFORM.md
@@ -173,7 +173,7 @@ platform_map = {
 }
 ```

-Without this, `schedule_cronjob(deliver="your_platform")` silently fails.
+Without this, `cronjob(action="create", deliver="your_platform", ...)` silently fails.

 ---

--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -288,6 +288,7 @@ class MessageEvent:
    message_id: Optional[str] = None
    
    # Media attachments
+    # media_urls: local file paths (for vision tool access)
    media_urls: List[str] = field(default_factory=list)
    media_types: List[str] = field(default_factory=list)
    
@@ -355,6 +356,10 @@ class BasePlatformAdapter(ABC):
        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        # Background message-processing tasks spawned by handle_message().
+        # Gateway shutdown cancels these so an old gateway instance doesn't keep
+        # working on a task after --replace or manual restarts.
+        self._background_tasks: set[asyncio.Task] = set()
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()

@@ -751,7 +756,25 @@ class BasePlatformAdapter(ABC):
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
-            # Store this as a pending message - it will interrupt the running agent
+            # Special case: photo bursts/albums frequently arrive as multiple near-
+            # simultaneous messages. Queue them without interrupting the active run,
+            # then process them immediately after the current task finishes.
+            if event.message_type == MessageType.PHOTO:
+                print(f"[{self.name}] 🖼️ Queuing photo follow-up for session {session_key} without interrupt")
+                existing = self._pending_messages.get(session_key)
+                if existing and existing.message_type == MessageType.PHOTO:
+                    existing.media_urls.extend(event.media_urls)
+                    existing.media_types.extend(event.media_types)
+                    if event.text:
+                        if not existing.text:
+                            existing.text = event.text
+                        elif event.text not in existing.text:
+                            existing.text = f"{existing.text}\n\n{event.text}".strip()
+                else:
+                    self._pending_messages[session_key] = event
+                return  # Don't interrupt now - will run after current task completes
+
+            # Default behavior for non-photo follow-ups: interrupt the running agent
            print(f"[{self.name}] ⚡ New message while session {session_key} is active - triggering interrupt")
            self._pending_messages[session_key] = event
            # Signal the interrupt (the processing task checks this)
@@ -759,7 +782,15 @@ class BasePlatformAdapter(ABC):
            return  # Don't process now - will be handled after current task finishes
        
        # Spawn background task to process this message
-        asyncio.create_task(self._process_message_background(event, session_key))
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Some tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            return
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -969,6 +1000,21 @@ class BasePlatformAdapter(ABC):
            if session_key in self._active_sessions:
                del self._active_sessions[session_key]
    
+    async def cancel_background_tasks(self) -> None:
+        """Cancel any in-flight background message-processing tasks.
+
+        Used during gateway shutdown/replacement so active sessions from the old
+        process do not keep running after adapters are being torn down.
+        """
+        tasks = [task for task in self._background_tasks if not task.done()]
+        for task in tasks:
+            task.cancel()
+        if tasks:
+            await asyncio.gather(*tasks, return_exceptions=True)
+        self._background_tasks.clear()
+        self._pending_messages.clear()
+        self._active_sessions.clear()
+
    def has_pending_interrupt(self, session_key: str) -> bool:
        """Check if there's a pending interrupt for a session."""
        return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -87,8 +87,9 @@ class VoiceReceiver:
    SAMPLE_RATE = 48000        # Discord native rate
    CHANNELS = 2               # Discord sends stereo

-    def __init__(self, voice_client):
+    def __init__(self, voice_client, allowed_user_ids: set = None):
        self._vc = voice_client
+        self._allowed_user_ids = allowed_user_ids or set()
        self._running = False

        # Decryption
@@ -274,19 +275,21 @@ class VoiceReceiver:
        if self._dave_session:
            with self._lock:
                user_id = self._ssrc_to_user.get(ssrc, 0)
-            if user_id == 0:
-                if self._packet_debug_count <= 10:
-                    logger.warning("DAVE skip: unknown user for ssrc=%d", ssrc)
-                return  # unknown user, can't DAVE-decrypt
-            try:
-                import davey
-                decrypted = self._dave_session.decrypt(
-                    user_id, davey.MediaType.audio, decrypted
-                )
-            except Exception as e:
-                if self._packet_debug_count <= 10:
-                    logger.warning("DAVE decrypt failed for ssrc=%d: %s", ssrc, e)
-                return
+            if user_id:
+                try:
+                    import davey
+                    decrypted = self._dave_session.decrypt(
+                        user_id, davey.MediaType.audio, decrypted
+                    )
+                except Exception as e:
+                    # Unencrypted passthrough — use NaCl-decrypted data as-is
+                    if "Unencrypted" not in str(e):
+                        if self._packet_debug_count <= 10:
+                            logger.warning("DAVE decrypt failed for ssrc=%d: %s", ssrc, e)
+                        return
+            # If SSRC unknown (no SPEAKING event yet), skip DAVE and try
+            # Opus decode directly — audio may be in passthrough mode.
+            # Buffer will get a user_id when SPEAKING event arrives later.

        # --- Opus decode -> PCM ---
        try:
@@ -304,6 +307,32 @@ class VoiceReceiver:
    # Silence detection
    # ------------------------------------------------------------------

+    def _infer_user_for_ssrc(self, ssrc: int) -> int:
+        """Try to infer user_id for an unmapped SSRC.
+
+        When the bot rejoins a voice channel, Discord may not resend
+        SPEAKING events for users already speaking.  If exactly one
+        allowed user is in the channel, map the SSRC to them.
+        """
+        try:
+            channel = self._vc.channel
+            if not channel:
+                return 0
+            bot_id = self._vc.user.id if self._vc.user else 0
+            allowed = self._allowed_user_ids
+            candidates = [
+                m.id for m in channel.members
+                if m.id != bot_id and (not allowed or str(m.id) in allowed)
+            ]
+            if len(candidates) == 1:
+                uid = candidates[0]
+                self._ssrc_to_user[ssrc] = uid
+                logger.info("Auto-mapped ssrc=%d -> user=%d (sole allowed member)", ssrc, uid)
+                return uid
+        except Exception:
+            pass
+        return 0
+
    def check_silence(self) -> list:
        """Return list of (user_id, pcm_bytes) for completed utterances."""
        now = time.monotonic()
@@ -322,6 +351,10 @@ class VoiceReceiver:

                if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
                    user_id = ssrc_user_map.get(ssrc, 0)
+                    if not user_id:
+                        # SSRC not mapped (SPEAKING event missing after bot rejoin).
+                        # Infer from allowed users in the voice channel.
+                        user_id = self._infer_user_for_ssrc(ssrc)
                    if user_id:
                        completed.append((user_id, bytes(buf)))
                    self._buffers[ssrc] = bytearray()
@@ -400,6 +433,9 @@ class DiscordAdapter(BasePlatformAdapter):
        self._voice_listen_tasks: Dict[int, asyncio.Task] = {}  # guild_id -> listen loop
        self._voice_input_callback: Optional[Callable] = None  # set by run.py
        self._on_voice_disconnect: Optional[Callable] = None  # set by run.py
+        # Track threads where the bot has participated so follow-up messages
+        # in those threads don't require @mention.
+        self._bot_participated_threads: set = set()
    
    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -580,7 +616,7 @@ class DiscordAdapter(BasePlatformAdapter):
        """Send a message to a Discord channel."""
        if not self._client:
            return SendResult(success=False, error="Not connected")
-        
+
        try:
            # Get the channel
            channel = self._client.get_channel(int(chat_id))
@@ -605,10 +641,30 @@ class DiscordAdapter(BasePlatformAdapter):
                    logger.debug("Could not fetch reply-to message: %s", e)
            
            for i, chunk in enumerate(chunks):
-                msg = await channel.send(
-                    content=chunk,
-                    reference=reference if i == 0 else None,
-                )
+                chunk_reference = reference if i == 0 else None
+                try:
+                    msg = await channel.send(
+                        content=chunk,
+                        reference=chunk_reference,
+                    )
+                except Exception as e:
+                    err_text = str(e)
+                    if (
+                        chunk_reference is not None
+                        and "error code: 50035" in err_text
+                        and "Cannot reply to a system message" in err_text
+                    ):
+                        logger.warning(
+                            "[%s] Reply target %s is a Discord system message; retrying send without reply reference",
+                            self.name,
+                            reply_to,
+                        )
+                        msg = await channel.send(
+                            content=chunk,
+                            reference=None,
+                        )
+                    else:
+                        raise
                message_ids.append(str(msg.id))
            
            return SendResult(
@@ -649,6 +705,7 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_id: str,
        file_path: str,
        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
    ) -> SendResult:
        """Send a local file as a Discord attachment."""
        if not self._client:
@@ -660,7 +717,7 @@ class DiscordAdapter(BasePlatformAdapter):
        if not channel:
            return SendResult(success=False, error=f"Channel {chat_id} not found")

-        filename = os.path.basename(file_path)
+        filename = file_name or os.path.basename(file_path)
        with open(file_path, "rb") as fh:
            file = discord.File(fh, filename=filename)
            msg = await channel.send(content=caption if caption else None, file=file)
@@ -674,13 +731,14 @@ class DiscordAdapter(BasePlatformAdapter):
    ) -> SendResult:
        """Play auto-TTS audio.

-        When the bot is in a voice channel for this chat's guild, skip the
-        file attachment — the gateway runner plays audio in the VC instead.
+        When the bot is in a voice channel for this chat's guild, play
+        directly in the VC instead of sending as a file attachment.
        """
        for gid, text_ch_id in self._voice_text_channels.items():
            if str(text_ch_id) == str(chat_id) and self.is_in_voice_channel(gid):
-                logger.debug("[%s] Skipping play_tts for %s — VC playback handled by runner", self.name, chat_id)
-                return SendResult(success=True)
+                logger.info("[%s] Playing TTS in voice channel (guild=%d)", self.name, gid)
+                success = await self.play_in_voice_channel(gid, audio_path)
+                return SendResult(success=success)
        return await self.send_voice(chat_id=chat_id, audio_path=audio_path, **kwargs)

    async def send_voice(
@@ -784,7 +842,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Start voice receiver (Phase 2: listen to users)
        try:
-            receiver = VoiceReceiver(vc)
+            receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
            receiver.start()
            self._voice_receivers[guild_id] = receiver
            self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
@@ -980,14 +1038,32 @@ class DiscordAdapter(BasePlatformAdapter):
    # Voice listening (Phase 2)
    # ------------------------------------------------------------------

+    # UDP keepalive interval in seconds — prevents Discord from dropping
+    # the UDP route after ~60s of silence.
+    _KEEPALIVE_INTERVAL = 15
+
    async def _voice_listen_loop(self, guild_id: int):
        """Periodically check for completed utterances and process them."""
        receiver = self._voice_receivers.get(guild_id)
        if not receiver:
            return
+        last_keepalive = time.monotonic()
        try:
            while receiver._running:
                await asyncio.sleep(0.2)
+
+                # Send periodic UDP keepalive to prevent Discord from
+                # dropping the UDP session after ~60s of silence.
+                now = time.monotonic()
+                if now - last_keepalive >= self._KEEPALIVE_INTERVAL:
+                    last_keepalive = now
+                    try:
+                        vc = self._voice_clients.get(guild_id)
+                        if vc and vc.is_connected():
+                            vc._connection.send_packet(b'\xf8\xff\xfe')
+                    except Exception:
+                        pass
+
                completed = receiver.check_silence()
                for user_id, pcm_data in completed:
                    if not self._is_allowed_user(str(user_id)):
@@ -1121,6 +1197,41 @@ class DiscordAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return await super().send_image(chat_id, image_url, caption, reply_to)
+
+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a local video file natively as a Discord attachment."""
+        try:
+            return await self._send_file_attachment(chat_id, video_path, caption)
+        except FileNotFoundError:
+            return SendResult(success=False, error=f"Video file not found: {video_path}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send local video, falling back to base adapter: %s", self.name, e, exc_info=True)
+            return await super().send_video(chat_id, video_path, caption, reply_to, metadata=metadata)
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an arbitrary file natively as a Discord attachment."""
+        try:
+            return await self._send_file_attachment(chat_id, file_path, caption, file_name=file_name)
+        except FileNotFoundError:
+            return SendResult(success=False, error=f"File not found: {file_path}")
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[%s] Failed to send document, falling back to base adapter: %s", self.name, e, exc_info=True)
+            return await super().send_document(chat_id, file_path, caption, file_name, reply_to, metadata=metadata)
    
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """Send typing indicator."""
@@ -1690,14 +1801,13 @@ class DiscordAdapter(BasePlatformAdapter):
    async def _handle_message(self, message: DiscordMessage) -> None:
        """Handle incoming Discord messages."""
        # In server channels (not DMs), require the bot to be @mentioned
-        # UNLESS the channel is in the free-response list.
+        # UNLESS the channel is in the free-response list or the message is
+        # in a thread where the bot has already participated.
        #
-        # Config:
-        #   DISCORD_FREE_RESPONSE_CHANNELS: Comma-separated channel IDs where the
-        #       bot responds to every message without needing a mention.
-        #   DISCORD_REQUIRE_MENTION: Set to "false" to disable mention requirement
-        #       globally (all channels become free-response). Default: "true".
-        #       Can also be set via discord.require_mention in config.yaml.
+        # Config (all settable via discord.* in config.yaml):
+        #   discord.require_mention: Require @mention in server channels (default: true)
+        #   discord.free_response_channels: Channel IDs where bot responds without mention
+        #   discord.auto_thread: Auto-create thread on @mention in channels (default: true)

        thread_id = None
        parent_channel_id = None
@@ -1716,7 +1826,11 @@ class DiscordAdapter(BasePlatformAdapter):
            require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
            is_free_channel = bool(channel_ids & free_channels)

-            if require_mention and not is_free_channel:
+            # Skip the mention check if the message is in a thread where
+            # the bot has previously participated (auto-created or replied in).
+            in_bot_thread = is_thread and thread_id in self._bot_participated_threads
+
+            if require_mention and not is_free_channel and not in_bot_thread:
                if self._client.user not in message.mentions:
                    return

@@ -1725,17 +1839,18 @@ class DiscordAdapter(BasePlatformAdapter):
                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()

        # Auto-thread: when enabled, automatically create a thread for every
-        # new message in a text channel so each conversation is isolated.
+        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
        auto_threaded_channel = None
        if not is_thread and not isinstance(message.channel, discord.DMChannel):
-            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "").lower() in ("true", "1", "yes")
+            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
            if auto_thread:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
+                    self._bot_participated_threads.add(thread_id)

        # Determine message type
        msg_type = MessageType.TEXT
@@ -1835,7 +1950,12 @@ class DiscordAdapter(BasePlatformAdapter):
            reply_to_message_id=str(message.reference.message_id) if message.reference else None,
            timestamp=message.created_at,
        )
-        
+
+        # Track thread participation so the bot won't require @mention for
+        # follow-up messages in threads it has already engaged in.
+        if thread_id:
+            self._bot_participated_threads.add(thread_id)
+
        await self.handle_message(event)


--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -111,6 +111,11 @@ class TelegramAdapter(BasePlatformAdapter):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
        self._bot: Optional[Bot] = None
+        # Buffer rapid/album photo updates so Telegram image bursts are handled
+        # as a single MessageEvent instead of self-interrupting multiple turns.
+        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
+        self._pending_photo_batches: Dict[str, MessageEvent] = {}
+        self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {}
        self._media_group_events: Dict[str, MessageEvent] = {}
        self._media_group_tasks: Dict[str, asyncio.Task] = {}
        self._token_lock_identity: Optional[str] = None
@@ -275,8 +280,11 @@ class TelegramAdapter(BasePlatformAdapter):

        if self._app:
            try:
-                await self._app.updater.stop()
-                await self._app.stop()
+                # Only stop the updater if it's running
+                if self._app.updater and self._app.updater.running:
+                    await self._app.updater.stop()
+                if self._app.running:
+                    await self._app.stop()
                await self._app.shutdown()
            except Exception as e:
                logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True)
@@ -286,13 +294,19 @@ class TelegramAdapter(BasePlatformAdapter):
                release_scoped_lock("telegram-bot-token", self._token_lock_identity)
            except Exception as e:
                logger.warning("[%s] Error releasing Telegram token lock: %s", self.name, e, exc_info=True)
-        
+
+        for task in self._pending_photo_batch_tasks.values():
+            if task and not task.done():
+                task.cancel()
+        self._pending_photo_batch_tasks.clear()
+        self._pending_photo_batches.clear()
+
        self._mark_disconnected()
        self._app = None
        self._bot = None
        self._token_lock_identity = None
        logger.info("[%s] Disconnected from Telegram", self.name)
-    
+
    async def send(
        self,
        chat_id: str,
@@ -308,6 +322,14 @@ class TelegramAdapter(BasePlatformAdapter):
            # Format and split message if needed
            formatted = self.format_message(content)
            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            if len(chunks) > 1:
+                # truncate_message appends a raw " (1/2)" suffix. Escape the
+                # MarkdownV2-special parentheses so Telegram doesn't reject the
+                # chunk and fall back to plain text.
+                chunks = [
+                    re.sub(r" \((\d+)/(\d+)\)$", r" \\(\1/\2\\)", chunk)
+                    for chunk in chunks
+                ]
            
            message_ids = []
            thread_id = metadata.get("thread_id") if metadata else None
@@ -804,6 +826,49 @@ class TelegramAdapter(BasePlatformAdapter):
        event.text = "\n".join(parts)
        await self.handle_message(event)

+    def _photo_batch_key(self, event: MessageEvent, msg: Message) -> str:
+        """Return a batching key for Telegram photos/albums."""
+        from gateway.session import build_session_key
+        session_key = build_session_key(event.source)
+        media_group_id = getattr(msg, "media_group_id", None)
+        if media_group_id:
+            return f"{session_key}:album:{media_group_id}"
+        return f"{session_key}:photo-burst"
+
+    async def _flush_photo_batch(self, batch_key: str) -> None:
+        """Send a buffered photo burst/album as a single MessageEvent."""
+        current_task = asyncio.current_task()
+        try:
+            await asyncio.sleep(self._media_batch_delay_seconds)
+            event = self._pending_photo_batches.pop(batch_key, None)
+            if not event:
+                return
+            logger.info("[Telegram] Flushing photo batch %s with %d image(s)", batch_key, len(event.media_urls))
+            await self.handle_message(event)
+        finally:
+            if self._pending_photo_batch_tasks.get(batch_key) is current_task:
+                self._pending_photo_batch_tasks.pop(batch_key, None)
+
+    def _enqueue_photo_event(self, batch_key: str, event: MessageEvent) -> None:
+        """Merge photo events into a pending batch and schedule flush."""
+        existing = self._pending_photo_batches.get(batch_key)
+        if existing is None:
+            self._pending_photo_batches[batch_key] = event
+        else:
+            existing.media_urls.extend(event.media_urls)
+            existing.media_types.extend(event.media_types)
+            if event.text:
+                if not existing.text:
+                    existing.text = event.text
+                elif event.text not in existing.text:
+                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+
+        prior_task = self._pending_photo_batch_tasks.get(batch_key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+
+        self._pending_photo_batch_tasks[batch_key] = asyncio.create_task(self._flush_photo_batch(batch_key))
+
    async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming media messages, downloading images to local cache."""
        if not update.message:
@@ -855,14 +920,22 @@ class TelegramAdapter(BasePlatformAdapter):
                        if file_obj.file_path.lower().endswith(candidate):
                            ext = candidate
                            break
-                # Save to cache and populate media_urls with the local path
+                # Save to local cache (for vision tool access)
                cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
                event.media_urls = [cached_path]
-                event.media_types = [f"image/{ext.lstrip('.')}"]
+                event.media_types = [f"image/{ext.lstrip('.')}" ]
                logger.info("[Telegram] Cached user photo at %s", cached_path)
+                media_group_id = getattr(msg, "media_group_id", None)
+                if media_group_id:
+                    await self._queue_media_group_event(str(media_group_id), event)
+                else:
+                    batch_key = self._photo_batch_key(event, msg)
+                    self._enqueue_photo_event(batch_key, event)
+                return
+
            except Exception as e:
                logger.warning("[Telegram] Failed to cache photo: %s", e, exc_info=True)
-        
+
        # Download voice/audio messages to cache for STT transcription
        if msg.voice:
            try:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -35,16 +35,12 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 # Resolve Hermes home directory (respects HERMES_HOME override)
 _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))

-# Load environment variables from ~/.hermes/.env first
-from dotenv import load_dotenv
+# Load environment variables from ~/.hermes/.env first.
+# User-managed env files should override stale shell exports on restart.
+from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
+from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
-if _env_path.exists():
-    try:
-        load_dotenv(_env_path, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(_env_path, encoding="latin-1")
-# Also try project .env as fallback
-load_dotenv()
+load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')

 # Bridge config.yaml values into the environment so os.getenv() picks them up.
 # config.yaml is authoritative for terminal settings — overrides .env.
@@ -100,24 +96,40 @@ if _config_path.exists():
            for _cfg_key, _env_var in _compression_env_map.items():
                if _cfg_key in _compression_cfg:
                    os.environ[_env_var] = str(_compression_cfg[_cfg_key])
-        # Auxiliary model overrides (vision, web_extract).
-        # Each task has provider + model; bridge non-default values to env vars.
+        # Auxiliary model/direct-endpoint overrides (vision, web_extract).
+        # Each task has provider/model/base_url/api_key; bridge non-default values to env vars.
        _auxiliary_cfg = _cfg.get("auxiliary", {})
        if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict):
            _aux_task_env = {
-                "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
-                "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+                "vision": {
+                    "provider": "AUXILIARY_VISION_PROVIDER",
+                    "model": "AUXILIARY_VISION_MODEL",
+                    "base_url": "AUXILIARY_VISION_BASE_URL",
+                    "api_key": "AUXILIARY_VISION_API_KEY",
+                },
+                "web_extract": {
+                    "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
+                    "model": "AUXILIARY_WEB_EXTRACT_MODEL",
+                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
+                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
+                },
            }
-            for _task_key, (_prov_env, _model_env) in _aux_task_env.items():
+            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
                if not isinstance(_task_cfg, dict):
                    continue
                _prov = str(_task_cfg.get("provider", "")).strip()
                _model = str(_task_cfg.get("model", "")).strip()
+                _base_url = str(_task_cfg.get("base_url", "")).strip()
+                _api_key = str(_task_cfg.get("api_key", "")).strip()
                if _prov and _prov != "auto":
-                    os.environ[_prov_env] = _prov
+                    os.environ[_env_map["provider"]] = _prov
                if _model:
-                    os.environ[_model_env] = _model
+                    os.environ[_env_map["model"]] = _model
+                if _base_url:
+                    os.environ[_env_map["base_url"]] = _base_url
+                if _api_key:
+                    os.environ[_env_map["api_key"]] = _api_key
        _agent_cfg = _cfg.get("agent", {})
        if _agent_cfg and isinstance(_agent_cfg, dict):
            if "max_turns" in _agent_cfg:
@@ -293,7 +305,7 @@ class GatewayRunner:
        # Ensure tirith security scanner is available (downloads if needed)
        try:
            from tools.tirith_security import ensure_installed
-            ensure_installed()
+            ensure_installed(log_failures=False)
        except Exception:
            pass  # Non-fatal — fail-open at scan time if unavailable
        
@@ -884,8 +896,19 @@ class GatewayRunner:
        """Stop the gateway and disconnect all adapters."""
        logger.info("Stopping gateway...")
        self._running = False
-        
+
+        for session_key, agent in list(self._running_agents.items()):
+            try:
+                agent.interrupt("Gateway shutting down")
+                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
+            except Exception as e:
+                logger.debug("Failed interrupting agent during shutdown: %s", e)
+
        for platform, adapter in list(self.adapters.items()):
+            try:
+                await adapter.cancel_background_tasks()
+            except Exception as e:
+                logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
            try:
                await adapter.disconnect()
                logger.info("✓ %s disconnected", platform.value)
@@ -893,6 +916,9 @@ class GatewayRunner:
                logger.error("✗ %s disconnect error: %s", platform.value, e)

        self.adapters.clear()
+        self._running_agents.clear()
+        self._pending_messages.clear()
+        self._pending_approvals.clear()
        self._shutdown_all_gateway_honcho()
        self._shutdown_event.set()
        
@@ -1079,11 +1105,39 @@ class GatewayRunner:
                        )
            return None
        
-        # PRIORITY: If an agent is already running for this session, interrupt it
-        # immediately. This is before command parsing to minimize latency -- the
-        # user's "stop" message reaches the agent as fast as possible.
+        # PRIORITY handling when an agent is already running for this session.
+        # Default behavior is to interrupt immediately so user text/stop messages
+        # are handled with minimal latency.
+        #
+        # Special case: Telegram/photo bursts often arrive as multiple near-
+        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
+        # let the adapter-level batching/queueing logic absorb them.
        _quick_key = build_session_key(source)
        if _quick_key in self._running_agents:
+            if event.get_command() == "status":
+                return await self._handle_status_command(event)
+
+            if event.message_type == MessageType.PHOTO:
+                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
+                adapter = self.adapters.get(source.platform)
+                if adapter:
+                    # Reuse adapter queue semantics so photo bursts merge cleanly.
+                    if _quick_key in adapter._pending_messages:
+                        existing = adapter._pending_messages[_quick_key]
+                        if getattr(existing, "message_type", None) == MessageType.PHOTO:
+                            existing.media_urls.extend(event.media_urls)
+                            existing.media_types.extend(event.media_types)
+                            if event.text:
+                                if not existing.text:
+                                    existing.text = event.text
+                                elif event.text not in existing.text:
+                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                        else:
+                            adapter._pending_messages[_quick_key] = event
+                    else:
+                        adapter._pending_messages[_quick_key] = event
+                return None
+
            running_agent = self._running_agents[_quick_key]
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
@@ -1098,7 +1152,7 @@ class GatewayRunner:
        
        # Emit command:* hook for any recognized slash command
        _known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
-                          "personality", "retry", "undo", "sethome", "set-home",
+                          "personality", "plan", "retry", "undo", "sethome", "set-home",
                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
                          "update", "title", "resume", "provider", "rollback",
                          "background", "reasoning", "voice"}
@@ -1133,6 +1187,28 @@ class GatewayRunner:
        
        if command == "personality":
            return await self._handle_personality_command(event)
+
+        if command == "plan":
+            try:
+                from agent.skill_commands import build_plan_path, build_skill_invocation_message
+
+                user_instruction = event.get_command_args().strip()
+                plan_path = build_plan_path(user_instruction)
+                event.text = build_skill_invocation_message(
+                    "/plan",
+                    user_instruction,
+                    task_id=_quick_key,
+                    runtime_note=(
+                        "Save the markdown plan with write_file to this exact relative path "
+                        f"inside the active workspace/backend cwd: {plan_path}"
+                    ),
+                )
+                if not event.text:
+                    return "Failed to load the bundled /plan skill."
+                command = None
+            except Exception as e:
+                logger.exception("Failed to prepare /plan command")
+                return f"Failed to enter plan mode: {e}"
        
        if command == "retry":
            return await self._handle_retry_command(event)
@@ -1749,6 +1825,8 @@ class GatewayRunner:
            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
+                input_tokens=agent_result.get("input_tokens", 0),
+                output_tokens=agent_result.get("output_tokens", 0),
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
                model=agent_result.get("model"),
            )
@@ -2358,6 +2436,13 @@ class GatewayRunner:
        except Exception as e:
            logger.warning("Failed to join voice channel: %s", e)
            adapter._voice_input_callback = None
+            err_lower = str(e).lower()
+            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
+                return (
+                    "Voice dependencies are missing (PyNaCl / davey). "
+                    "Install or reinstall Hermes with the messaging extra, e.g. "
+                    "`pip install hermes-agent[messaging]`."
+                )
            return f"Failed to join voice channel: {e}"

        if success:
@@ -2498,18 +2583,9 @@ class GatewayRunner:
        if has_agent_tts:
            return False

-        # Dedup: base adapter auto-TTS already handles voice input.
-        # Exception: Discord voice channel — play_tts override is a no-op,
-        # so the runner must handle VC playback.
-        skip_double = is_voice_input
-        if skip_double:
-            adapter = self.adapters.get(event.source.platform)
-            guild_id = self._get_guild_id(event)
-            if (guild_id and adapter
-                    and hasattr(adapter, "is_in_voice_channel")
-                    and adapter.is_in_voice_channel(guild_id)):
-                skip_double = False
-        if skip_double:
+        # Dedup: base adapter auto-TTS already handles voice input
+        # (play_tts plays in VC when connected, so runner can skip).
+        if is_voice_input:
            return False

        return True
@@ -3431,10 +3507,12 @@ class GatewayRunner:
        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
        if context.source.chat_name:
            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
+        if context.source.thread_id:
+            os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
    
    def _clear_session_env(self) -> None:
        """Clear session environment variables."""
-        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
+        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID"]:
            if var in os.environ:
                del os.environ[var]
    
@@ -3452,9 +3530,13 @@ class GatewayRunner:
          1. Immediately understand what the user sent (no extra tool call).
          2. Re-examine the image with vision_analyze if it needs more detail.

+        Athabasca persistence should happen through Athabasca's own POST
+        /api/uploads flow, using the returned asset.publicUrl rather than local
+        cache paths.
+
        Args:
-            user_text:   The user's original caption / message text.
-            image_paths: List of local file paths to cached images.
+            user_text:      The user's original caption / message text.
+            image_paths:    List of local file paths to cached images.

        Returns:
            The enriched message string with vision descriptions prepended.
@@ -3479,10 +3561,16 @@ class GatewayRunner:
                result = _json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
+                    athabasca_note = (
+                        "\n[If this image needs to persist in Athabasca state, upload the cached file "
+                        "through Athabasca POST /api/uploads and use the returned asset.publicUrl. "
+                        "Do not store the local cache path as the canonical imageUrl.]"
+                    )
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
                        f"image_url: {path} ~]"
+                        f"{athabasca_note}"
                    )
                else:
                    enriched_parts.append(
@@ -3512,7 +3600,7 @@ class GatewayRunner:
        audio_paths: List[str],
    ) -> str:
        """
-        Auto-transcribe user voice/audio messages using OpenAI Whisper API
+        Auto-transcribe user voice/audio messages using the configured STT provider
        and prepend the transcript to the message text.

        Args:
@@ -3522,6 +3610,12 @@ class GatewayRunner:
        Returns:
            The enriched message string with transcriptions prepended.
        """
+        if not getattr(self.config, "stt_enabled", True):
+            disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]"
+            if user_text:
+                return f"{disabled_note}\n\n{user_text}"
+            return disabled_note
+
        from tools.transcription_tools import transcribe_audio, get_stt_model_from_config
        import asyncio

@@ -3762,47 +3856,8 @@ class GatewayRunner:
            last_tool[0] = tool_name
            
            # Build progress message with primary argument preview
-            tool_emojis = {
-                "terminal": "💻",
-                "process": "⚙️",
-                "web_search": "🔍",
-                "web_extract": "📄",
-                "read_file": "📖",
-                "write_file": "✍️",
-                "patch": "🔧",
-                "search": "🔎",
-                "search_files": "🔎",
-                "list_directory": "📂",
-                "image_generate": "🎨",
-                "text_to_speech": "🔊",
-                "browser_navigate": "🌐",
-                "browser_click": "👆",
-                "browser_type": "⌨️",
-                "browser_snapshot": "📸",
-                "browser_scroll": "📜",
-                "browser_back": "◀️",
-                "browser_press": "⌨️",
-                "browser_close": "🚪",
-                "browser_get_images": "🖼️",
-                "browser_vision": "👁️",
-                "moa_query": "🧠",
-                "mixture_of_agents": "🧠",
-                "vision_analyze": "👁️",
-                "skill_view": "📚",
-                "skills_list": "📋",
-                "todo": "📋",
-                "memory": "🧠",
-                "session_search": "🔍",
-                "send_message": "📨",
-                "schedule_cronjob": "⏰",
-                "list_cronjobs": "⏰",
-                "remove_cronjob": "⏰",
-                "execute_code": "🐍",
-                "delegate_task": "🔀",
-                "clarify": "❓",
-                "skill_manage": "📝",
-            }
-            emoji = tool_emojis.get(tool_name, "⚙️")
+            from agent.display import get_tool_emoji
+            emoji = get_tool_emoji(tool_name, default="⚙️")
            
            # Verbose mode: show detailed arguments
            if progress_mode == "verbose" and args:
@@ -4084,11 +4139,15 @@ class GatewayRunner:
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")

-            # Extract last actual prompt token count from the agent's compressor
+            # Extract actual token counts from the agent instance used for this run
            _last_prompt_toks = 0
+            _input_toks = 0
+            _output_toks = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
+                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
+                _output_toks = getattr(_agent, "session_completion_tokens", 0)
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
@@ -4100,6 +4159,8 @@ class GatewayRunner:
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
+                    "input_tokens": _input_toks,
+                    "output_tokens": _output_toks,
                    "model": _resolved_model,
                }
            
@@ -4163,6 +4224,8 @@ class GatewayRunner:
                "tools": tools_holder[0] or [],
                "history_offset": len(agent_history),
                "last_prompt_tokens": _last_prompt_toks,
+                "input_tokens": _input_toks,
+                "output_tokens": _output_toks,
                "model": _resolved_model,
                "session_id": effective_session_id,
            }
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -321,25 +321,32 @@ def build_session_key(source: SessionSource) -> str:
    This is the single source of truth for session key construction.

    DM rules:
-      - WhatsApp DMs include chat_id (multi-user support).
-      - Other DMs include thread_id when present (e.g. Slack threaded DMs),
-        so each DM thread gets its own session while top-level DMs share one.
-      - Without thread_id or chat_id, all DMs share a single session.
+      - DMs include chat_id when present, so each private conversation is isolated.
+      - thread_id further differentiates threaded DMs within the same DM chat.
+      - Without chat_id, thread_id is used as a best-effort fallback.
+      - Without thread_id or chat_id, DMs share a single session.

    Group/channel rules:
-      - thread_id differentiates threads within a channel.
-      - Without thread_id, all messages in a channel share one session.
+      - chat_id identifies the parent group/channel.
+      - thread_id differentiates threads within that parent chat.
+      - Without identifiers, messages fall back to one session per platform/chat_type.
    """
    platform = source.platform.value
    if source.chat_type == "dm":
+        if source.chat_id:
+            if source.thread_id:
+                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{source.chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
-        if platform == "whatsapp" and source.chat_id:
-            return f"agent:main:{platform}:dm:{source.chat_id}"
        return f"agent:main:{platform}:dm"
+    if source.chat_id:
+        if source.thread_id:
+            return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}:{source.thread_id}"
+        return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
    if source.thread_id:
-        return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}:{source.thread_id}"
-    return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
+        return f"agent:main:{platform}:{source.chat_type}:{source.thread_id}"
+    return f"agent:main:{platform}:{source.chat_type}"


 class SessionStore:
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -6,7 +6,9 @@ Pure display functions with no HermesCLI state dependency.
 import json
 import logging
 import os
+import shutil
 import subprocess
+import threading
 import time
 from pathlib import Path
 from typing import Dict, List, Any, Optional
@@ -143,7 +145,9 @@ def check_for_updates() -> Optional[int]:
    repo_dir = hermes_home / "hermes-agent"
    cache_file = hermes_home / ".update_check"

-    # Must be a git repo
+    # Must be a git repo — fall back to project root for dev installs
+    if not (repo_dir / ".git").exists():
+        repo_dir = Path(__file__).parent.parent.resolve()
    if not (repo_dir / ".git").exists():
        return None

@@ -190,6 +194,30 @@ def check_for_updates() -> Optional[int]:
    return behind


+# =========================================================================
+# Non-blocking update check
+# =========================================================================
+
+_update_result: Optional[int] = None
+_update_check_done = threading.Event()
+
+
+def prefetch_update_check():
+    """Kick off update check in a background daemon thread."""
+    def _run():
+        global _update_result
+        _update_result = check_for_updates()
+        _update_check_done.set()
+    t = threading.Thread(target=_run, daemon=True)
+    t.start()
+
+
+def get_update_result(timeout: float = 0.5) -> Optional[int]:
+    """Get result of prefetched check. Returns None if not ready."""
+    _update_check_done.wait(timeout=timeout)
+    return _update_result
+
+
 # =========================================================================
 # Welcome banner
 # =========================================================================
@@ -245,7 +273,15 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    text = _skin_color("banner_text", "#FFF8DC")
    session_color = _skin_color("session_border", "#8B8682")

-    left_lines = ["", HERMES_CADUCEUS, ""]
+    # Use skin's custom caduceus art if provided
+    try:
+        from hermes_cli.skin_engine import get_active_skin
+        _bskin = get_active_skin()
+        _hero = _bskin.banner_hero if hasattr(_bskin, 'banner_hero') and _bskin.banner_hero else HERMES_CADUCEUS
+    except Exception:
+        _bskin = None
+        _hero = HERMES_CADUCEUS
+    left_lines = ["", _hero, ""]
    model_short = model.split("/")[-1] if "/" in model else model
    if len(model_short) > 28:
        model_short = model_short[:25] + "..."
@@ -360,9 +396,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    summary_parts.append("/help for commands")
    right_lines.append(f"[dim {dim}]{' · '.join(summary_parts)}[/]")

-    # Update check — show if behind origin/main
+    # Update check — use prefetched result if available
    try:
-        behind = check_for_updates()
+        behind = get_update_result(timeout=0.5)
        if behind and behind > 0:
            commits_word = "commit" if behind == 1 else "commits"
            right_lines.append(
@@ -386,6 +422,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    )

    console.print()
-    console.print(HERMES_AGENT_LOGO)
-    console.print()
+    term_width = shutil.get_terminal_size().columns
+    if term_width >= 95:
+        _logo = _bskin.banner_logo if _bskin and hasattr(_bskin, 'banner_logo') and _bskin.banner_logo else HERMES_AGENT_LOGO
+        console.print(_logo)
+        console.print()
    console.print(outer_panel)
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -43,7 +43,7 @@ COMMANDS_BY_CATEGORY = {
        "/tools": "List available tools",
        "/toolsets": "List available toolsets",
        "/skills": "Search, install, inspect, or manage skills from online registries",
-        "/cron": "Manage scheduled tasks (list, add, remove)",
+        "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)",
        "/reload-mcp": "Reload MCP servers from config.yaml",
    },
    "Info": {
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -150,30 +150,44 @@ DEFAULT_CONFIG = {
        "vision": {
            "provider": "auto",    # auto | openrouter | nous | codex | custom
            "model": "",           # e.g. "google/gemini-2.5-flash", "gpt-4o"
+            "base_url": "",        # direct OpenAI-compatible endpoint (takes precedence over provider)
+            "api_key": "",         # API key for base_url (falls back to OPENAI_API_KEY)
        },
        "web_extract": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
        "compression": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
        "session_search": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
        "skills_hub": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
        "mcp": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
        "flush_memories": {
            "provider": "auto",
            "model": "",
+            "base_url": "",
+            "api_key": "",
        },
    },
    
@@ -205,7 +219,8 @@ DEFAULT_CONFIG = {
    },
    
    "stt": {
-        "provider": "local",  # "local" (free, faster-whisper) | "openai" (Whisper API)
+        "enabled": True,
+        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
        "local": {
            "model": "base",  # tiny, base, small, medium, large-v3
        },
@@ -243,6 +258,8 @@ DEFAULT_CONFIG = {
    "delegation": {
        "model": "",       # e.g. "google/gemini-3-flash-preview" (empty = inherit parent model)
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
+        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
+        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -263,6 +280,7 @@ DEFAULT_CONFIG = {
    "discord": {
        "require_mention": True,       # Require @mention to respond in server channels
        "free_response_channels": "",  # Comma-separated channel IDs where bot responds without mention
+        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
    },

    # Permanently allowed dangerous command patterns (added via "always" approval)
@@ -284,7 +302,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 7,
+    "_config_version": 8,
 }

 # =============================================================================
@@ -1092,6 +1110,13 @@ def save_anthropic_oauth_token(value: str, save_fn=None):
    writer("ANTHROPIC_API_KEY", "")


+def use_anthropic_claude_code_credentials(save_fn=None):
+    """Use Claude Code's own credential files instead of persisting env tokens."""
+    writer = save_fn or save_env_value
+    writer("ANTHROPIC_TOKEN", "")
+    writer("ANTHROPIC_API_KEY", "")
+
+
 def save_anthropic_api_key(value: str, save_fn=None):
    """Persist an Anthropic API key and clear the OAuth/setup-token slot."""
    writer = save_fn or save_env_value
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@@ -1,15 +1,14 @@
 """
 Cron subcommand for hermes CLI.

-Handles: hermes cron [list|status|tick]
-
-Cronjobs are executed automatically by the gateway daemon (hermes gateway).
-Install the gateway as a service for background execution:
-    hermes gateway install
+Handles standalone cron management commands like list, create, edit,
+pause/resume/run/remove, status, and tick.
 """

+import json
 import sys
 from pathlib import Path
+from typing import Iterable, List, Optional

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
 sys.path.insert(0, str(PROJECT_ROOT))
@@ -17,62 +16,87 @@ sys.path.insert(0, str(PROJECT_ROOT))
 from hermes_cli.colors import Colors, color


+def _normalize_skills(single_skill=None, skills: Optional[Iterable[str]] = None) -> Optional[List[str]]:
+    if skills is None:
+        if single_skill is None:
+            return None
+        raw_items = [single_skill]
+    else:
+        raw_items = list(skills)
+
+    normalized: List[str] = []
+    for item in raw_items:
+        text = str(item or "").strip()
+        if text and text not in normalized:
+            normalized.append(text)
+    return normalized
+
+
+def _cron_api(**kwargs):
+    from tools.cronjob_tools import cronjob as cronjob_tool
+
+    return json.loads(cronjob_tool(**kwargs))
+
+
 def cron_list(show_all: bool = False):
    """List all scheduled jobs."""
    from cron.jobs import list_jobs
-    
+
    jobs = list_jobs(include_disabled=show_all)
-    
+
    if not jobs:
        print(color("No scheduled jobs.", Colors.DIM))
-        print(color("Create one with the /cron add command in chat, or via Telegram.", Colors.DIM))
+        print(color("Create one with 'hermes cron create ...' or the /cron command in chat.", Colors.DIM))
        return
-    
+
    print()
    print(color("┌─────────────────────────────────────────────────────────────────────────┐", Colors.CYAN))
    print(color("│                         Scheduled Jobs                                  │", Colors.CYAN))
    print(color("└─────────────────────────────────────────────────────────────────────────┘", Colors.CYAN))
    print()
-    
+
    for job in jobs:
        job_id = job.get("id", "?")[:8]
        name = job.get("name", "(unnamed)")
        schedule = job.get("schedule_display", job.get("schedule", {}).get("value", "?"))
-        enabled = job.get("enabled", True)
+        state = job.get("state", "scheduled" if job.get("enabled", True) else "paused")
        next_run = job.get("next_run_at", "?")
-        
+
        repeat_info = job.get("repeat", {})
        repeat_times = repeat_info.get("times")
        repeat_completed = repeat_info.get("completed", 0)
-        
-        if repeat_times:
-            repeat_str = f"{repeat_completed}/{repeat_times}"
-        else:
-            repeat_str = "∞"
-        
+        repeat_str = f"{repeat_completed}/{repeat_times}" if repeat_times else "∞"
+
        deliver = job.get("deliver", ["local"])
        if isinstance(deliver, str):
            deliver = [deliver]
        deliver_str = ", ".join(deliver)
-        
-        if not enabled:
-            status = color("[disabled]", Colors.RED)
-        else:
+
+        skills = job.get("skills") or ([job["skill"]] if job.get("skill") else [])
+        if state == "paused":
+            status = color("[paused]", Colors.YELLOW)
+        elif state == "completed":
+            status = color("[completed]", Colors.BLUE)
+        elif job.get("enabled", True):
            status = color("[active]", Colors.GREEN)
-        
+        else:
+            status = color("[disabled]", Colors.RED)
+
        print(f"  {color(job_id, Colors.YELLOW)} {status}")
        print(f"    Name:      {name}")
        print(f"    Schedule:  {schedule}")
        print(f"    Repeat:    {repeat_str}")
        print(f"    Next run:  {next_run}")
        print(f"    Deliver:   {deliver_str}")
+        if skills:
+            print(f"    Skills:    {', '.join(skills)}")
        print()
-    
-    # Warn if gateway isn't running
+
    from hermes_cli.gateway import find_gateway_pids
    if not find_gateway_pids():
        print(color("  ⚠  Gateway is not running — jobs won't fire automatically.", Colors.YELLOW))
        print(color("     Start it with: hermes gateway install", Colors.DIM))
+        print(color("                    sudo hermes gateway install --system  # Linux servers", Colors.DIM))
        print()


@@ -86,9 +110,9 @@ def cron_status():
    """Show cron execution status."""
    from cron.jobs import list_jobs
    from hermes_cli.gateway import find_gateway_pids
-    
+
    print()
-    
+
    pids = find_gateway_pids()
    if pids:
        print(color("✓ Gateway is running — cron jobs will fire automatically", Colors.GREEN))
@@ -97,11 +121,12 @@ def cron_status():
        print(color("✗ Gateway is not running — cron jobs will NOT fire", Colors.RED))
        print()
        print("  To enable automatic execution:")
-        print("    hermes gateway install    # Install as system service (recommended)")
+        print("    hermes gateway install    # Install as a user service")
+        print("    sudo hermes gateway install --system  # Linux servers: boot-time system service")
        print("    hermes gateway            # Or run in foreground")
-    
+
    print()
-    
+
    jobs = list_jobs(include_disabled=False)
    if jobs:
        next_runs = [j.get("next_run_at") for j in jobs if j.get("next_run_at")]
@@ -110,25 +135,131 @@ def cron_status():
            print(f"  Next run: {min(next_runs)}")
    else:
        print("  No active jobs")
-    
+
    print()


+def cron_create(args):
+    result = _cron_api(
+        action="create",
+        schedule=args.schedule,
+        prompt=args.prompt,
+        name=getattr(args, "name", None),
+        deliver=getattr(args, "deliver", None),
+        repeat=getattr(args, "repeat", None),
+        skill=getattr(args, "skill", None),
+        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
+    )
+    if not result.get("success"):
+        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
+        return 1
+    print(color(f"Created job: {result['job_id']}", Colors.GREEN))
+    print(f"  Name: {result['name']}")
+    print(f"  Schedule: {result['schedule']}")
+    if result.get("skills"):
+        print(f"  Skills: {', '.join(result['skills'])}")
+    print(f"  Next run: {result['next_run_at']}")
+    return 0
+
+
+def cron_edit(args):
+    from cron.jobs import get_job
+
+    job = get_job(args.job_id)
+    if not job:
+        print(color(f"Job not found: {args.job_id}", Colors.RED))
+        return 1
+
+    existing_skills = list(job.get("skills") or ([] if not job.get("skill") else [job.get("skill")]))
+    replacement_skills = _normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None))
+    add_skills = _normalize_skills(None, getattr(args, "add_skills", None)) or []
+    remove_skills = set(_normalize_skills(None, getattr(args, "remove_skills", None)) or [])
+
+    final_skills = None
+    if getattr(args, "clear_skills", False):
+        final_skills = []
+    elif replacement_skills is not None:
+        final_skills = replacement_skills
+    elif add_skills or remove_skills:
+        final_skills = [skill for skill in existing_skills if skill not in remove_skills]
+        for skill in add_skills:
+            if skill not in final_skills:
+                final_skills.append(skill)
+
+    result = _cron_api(
+        action="update",
+        job_id=args.job_id,
+        schedule=getattr(args, "schedule", None),
+        prompt=getattr(args, "prompt", None),
+        name=getattr(args, "name", None),
+        deliver=getattr(args, "deliver", None),
+        repeat=getattr(args, "repeat", None),
+        skills=final_skills,
+    )
+    if not result.get("success"):
+        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
+        return 1
+
+    updated = result["job"]
+    print(color(f"Updated job: {updated['job_id']}", Colors.GREEN))
+    print(f"  Name: {updated['name']}")
+    print(f"  Schedule: {updated['schedule']}")
+    if updated.get("skills"):
+        print(f"  Skills: {', '.join(updated['skills'])}")
+    else:
+        print("  Skills: none")
+    return 0
+
+
+def _job_action(action: str, job_id: str, success_verb: str) -> int:
+    result = _cron_api(action=action, job_id=job_id)
+    if not result.get("success"):
+        print(color(f"Failed to {action} job: {result.get('error', 'unknown error')}", Colors.RED))
+        return 1
+    job = result.get("job") or result.get("removed_job") or {}
+    print(color(f"{success_verb} job: {job.get('name', job_id)} ({job_id})", Colors.GREEN))
+    if action in {"resume", "run"} and result.get("job", {}).get("next_run_at"):
+        print(f"  Next run: {result['job']['next_run_at']}")
+    if action == "run":
+        print("  It will run on the next scheduler tick.")
+    return 0
+
+
 def cron_command(args):
    """Handle cron subcommands."""
    subcmd = getattr(args, 'cron_command', None)
-    
+
    if subcmd is None or subcmd == "list":
        show_all = getattr(args, 'all', False)
        cron_list(show_all)
-    
-    elif subcmd == "tick":
-        cron_tick()
-    
-    elif subcmd == "status":
+        return 0
+
+    if subcmd == "status":
        cron_status()
-    
-    else:
-        print(f"Unknown cron command: {subcmd}")
-        print("Usage: hermes cron [list|status|tick]")
-        sys.exit(1)
+        return 0
+
+    if subcmd == "tick":
+        cron_tick()
+        return 0
+
+    if subcmd in {"create", "add"}:
+        return cron_create(args)
+
+    if subcmd == "edit":
+        return cron_edit(args)
+
+    if subcmd == "pause":
+        return _job_action("pause", args.job_id, "Paused")
+
+    if subcmd == "resume":
+        return _job_action("resume", args.job_id, "Resumed")
+
+    if subcmd == "run":
+        return _job_action("run", args.job_id, "Triggered")
+
+    if subcmd in {"remove", "rm", "delete"}:
+        return _job_action("remove", args.job_id, "Removed")
+
+    print(f"Unknown cron command: {subcmd}")
+    print("Usage: hermes cron [list|create|edit|pause|resume|run|remove|status|tick]")
+    sys.exit(1)
--- a/hermes_cli/env_loader.py
+++ b/hermes_cli/env_loader.py
@@ -0,0 +1,46 @@
+"""Helpers for loading Hermes .env files consistently across entrypoints."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Iterable
+
+from dotenv import load_dotenv
+
+
+def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
+    try:
+        load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
+    except UnicodeDecodeError:
+        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
+
+
+def load_hermes_dotenv(
+    *,
+    hermes_home: str | os.PathLike | None = None,
+    project_env: str | os.PathLike | None = None,
+) -> list[Path]:
+    """Load Hermes environment files with user config taking precedence.
+
+    Behavior:
+    - `~/.hermes/.env` overrides stale shell-exported values when present.
+    - project `.env` acts as a dev fallback and only fills missing values when
+      the user env exists.
+    - if no user env exists, the project `.env` also overrides stale shell vars.
+    """
+    loaded: list[Path] = []
+
+    home_path = Path(hermes_home or os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+    user_env = home_path / ".env"
+    project_env_path = Path(project_env) if project_env else None
+
+    if user_env.exists():
+        _load_dotenv_with_fallback(user_env, override=True)
+        loaded.append(user_env)
+
+    if project_env_path and project_env_path.exists():
+        _load_dotenv_with_fallback(project_env_path, override=not loaded)
+        loaded.append(project_env_path)
+
+    return loaded
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -123,10 +123,143 @@ SERVICE_NAME = "hermes-gateway"
 SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"


-def get_systemd_unit_path() -> Path:
+def get_systemd_unit_path(system: bool = False) -> Path:
+    if system:
+        return Path("/etc/systemd/system") / f"{SERVICE_NAME}.service"
    return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"


+def _systemctl_cmd(system: bool = False) -> list[str]:
+    return ["systemctl"] if system else ["systemctl", "--user"]
+
+
+def _journalctl_cmd(system: bool = False) -> list[str]:
+    return ["journalctl"] if system else ["journalctl", "--user"]
+
+
+def _service_scope_label(system: bool = False) -> str:
+    return "system" if system else "user"
+
+
+def get_installed_systemd_scopes() -> list[str]:
+    scopes = []
+    seen_paths: set[Path] = set()
+    for system, label in ((False, "user"), (True, "system")):
+        unit_path = get_systemd_unit_path(system=system)
+        if unit_path in seen_paths:
+            continue
+        if unit_path.exists():
+            scopes.append(label)
+            seen_paths.add(unit_path)
+    return scopes
+
+
+def has_conflicting_systemd_units() -> bool:
+    return len(get_installed_systemd_scopes()) > 1
+
+
+def print_systemd_scope_conflict_warning() -> None:
+    scopes = get_installed_systemd_scopes()
+    if len(scopes) < 2:
+        return
+
+    rendered_scopes = " + ".join(scopes)
+    print_warning(f"Both user and system gateway services are installed ({rendered_scopes}).")
+    print_info("  This is confusing and can make start/stop/status behavior ambiguous.")
+    print_info("  Default gateway commands target the user service unless you pass --system.")
+    print_info("  Keep one of these:")
+    print_info("    hermes gateway uninstall")
+    print_info("    sudo hermes gateway uninstall --system")
+
+
+def _require_root_for_system_service(action: str) -> None:
+    if os.geteuid() != 0:
+        print(f"System gateway {action} requires root. Re-run with sudo.")
+        sys.exit(1)
+
+
+def _system_service_identity(run_as_user: str | None = None) -> tuple[str, str, str]:
+    import getpass
+    import grp
+    import pwd
+
+    username = (run_as_user or os.getenv("SUDO_USER") or os.getenv("USER") or os.getenv("LOGNAME") or getpass.getuser()).strip()
+    if not username:
+        raise ValueError("Could not determine which user the gateway service should run as")
+    if username == "root":
+        raise ValueError("Refusing to install the gateway system service as root; pass --run-as USER")
+
+    try:
+        user_info = pwd.getpwnam(username)
+    except KeyError as e:
+        raise ValueError(f"Unknown user: {username}") from e
+
+    group_name = grp.getgrgid(user_info.pw_gid).gr_name
+    return username, group_name, user_info.pw_dir
+
+
+def _read_systemd_user_from_unit(unit_path: Path) -> str | None:
+    if not unit_path.exists():
+        return None
+
+    for line in unit_path.read_text(encoding="utf-8").splitlines():
+        if line.startswith("User="):
+            value = line.split("=", 1)[1].strip()
+            return value or None
+    return None
+
+
+def _default_system_service_user() -> str | None:
+    for candidate in (os.getenv("SUDO_USER"), os.getenv("USER"), os.getenv("LOGNAME")):
+        if candidate and candidate.strip() and candidate.strip() != "root":
+            return candidate.strip()
+    return None
+
+
+def prompt_linux_gateway_install_scope() -> str | None:
+    choice = prompt_choice(
+        "  Choose how the gateway should run in the background:",
+        [
+            "User service (no sudo; best for laptops/dev boxes; may need linger after logout)",
+            "System service (starts on boot; requires sudo; still runs as your user)",
+            "Skip service install for now",
+        ],
+        default=0,
+    )
+    return {0: "user", 1: "system", 2: None}[choice]
+
+
+def install_linux_gateway_from_setup(force: bool = False) -> tuple[str | None, bool]:
+    scope = prompt_linux_gateway_install_scope()
+    if scope is None:
+        return None, False
+
+    if scope == "system":
+        run_as_user = _default_system_service_user()
+        if os.geteuid() != 0:
+            print_warning("  System service install requires sudo, so Hermes can't create it from this user session.")
+            if run_as_user:
+                print_info(f"  After setup, run: sudo hermes gateway install --system --run-as-user {run_as_user}")
+            else:
+                print_info("  After setup, run: sudo hermes gateway install --system --run-as-user <your-user>")
+            print_info("  Then start it with: sudo hermes gateway start --system")
+            return scope, False
+
+        if not run_as_user:
+            while True:
+                run_as_user = prompt("  Run the system gateway service as which user?", default="")
+                run_as_user = (run_as_user or "").strip()
+                if run_as_user and run_as_user != "root":
+                    break
+                print_error("  Enter a non-root username.")
+
+        systemd_install(force=force, system=True, run_as_user=run_as_user)
+        return scope, True
+
+    systemd_install(force=force, system=False)
+    return scope, True
+
+
 def get_systemd_linger_status() -> tuple[bool | None, str]:
    """Return whether systemd user lingering is enabled for the current user.

@@ -216,8 +349,9 @@ def get_hermes_cli_path() -> str:
 # Systemd (Linux)
 # =============================================================================

-def generate_systemd_unit() -> str:
+def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    import shutil
+
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
    venv_dir = str(PROJECT_ROOT / "venv")
@@ -226,8 +360,38 @@ def generate_systemd_unit() -> str:

    # Build a PATH that includes the venv, node_modules, and standard system dirs
    sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
-    
    hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main"
+
+    if system:
+        username, group_name, home_dir = _system_service_identity(run_as_user)
+        return f"""[Unit]
+Description={SERVICE_DESCRIPTION}
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+Type=simple
+User={username}
+Group={group_name}
+ExecStart={python_path} -m hermes_cli.main gateway run --replace
+WorkingDirectory={working_dir}
+Environment="HOME={home_dir}"
+Environment="USER={username}"
+Environment="LOGNAME={username}"
+Environment="PATH={sane_path}"
+Environment="VIRTUAL_ENV={venv_dir}"
+Restart=on-failure
+RestartSec=10
+KillMode=mixed
+KillSignal=SIGTERM
+TimeoutStopSec=15
+StandardOutput=journal
+StandardError=journal
+
+[Install]
+WantedBy=multi-user.target
+"""
+
    return f"""[Unit]
 Description={SERVICE_DESCRIPTION}
 After=network.target
@@ -255,26 +419,28 @@ def _normalize_service_definition(text: str) -> str:
    return "\n".join(line.rstrip() for line in text.strip().splitlines())


-def systemd_unit_is_current() -> bool:
-    unit_path = get_systemd_unit_path()
+def systemd_unit_is_current(system: bool = False) -> bool:
+    unit_path = get_systemd_unit_path(system=system)
    if not unit_path.exists():
        return False

    installed = unit_path.read_text(encoding="utf-8")
-    expected = generate_systemd_unit()
+    expected_user = _read_systemd_user_from_unit(unit_path) if system else None
+    expected = generate_systemd_unit(system=system, run_as_user=expected_user)
    return _normalize_service_definition(installed) == _normalize_service_definition(expected)



-def refresh_systemd_unit_if_needed() -> bool:
-    """Rewrite the installed user unit when the generated definition has changed."""
-    unit_path = get_systemd_unit_path()
-    if not unit_path.exists() or systemd_unit_is_current():
+def refresh_systemd_unit_if_needed(system: bool = False) -> bool:
+    """Rewrite the installed systemd unit when the generated definition has changed."""
+    unit_path = get_systemd_unit_path(system=system)
+    if not unit_path.exists() or systemd_unit_is_current(system=system):
        return False

-    unit_path.write_text(generate_systemd_unit(), encoding="utf-8")
-    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
-    print("↻ Updated gateway service definition to match the current Hermes install")
+    expected_user = _read_systemd_user_from_unit(unit_path) if system else None
+    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=expected_user), encoding="utf-8")
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True)
+    print(f"↻ Updated gateway {_service_scope_label(system)} service definition to match the current Hermes install")
    return True


@@ -337,93 +503,137 @@ def _ensure_linger_enabled() -> None:
    _print_linger_enable_warning(username, detail or linger_detail)


-def systemd_install(force: bool = False):
-    unit_path = get_systemd_unit_path()
-    
+def _select_systemd_scope(system: bool = False) -> bool:
+    if system:
+        return True
+    return get_systemd_unit_path(system=True).exists() and not get_systemd_unit_path(system=False).exists()
+
+
+def systemd_install(force: bool = False, system: bool = False, run_as_user: str | None = None):
+    if system:
+        _require_root_for_system_service("install")
+
+    unit_path = get_systemd_unit_path(system=system)
+    scope_flag = " --system" if system else ""
+
    if unit_path.exists() and not force:
        print(f"Service already installed at: {unit_path}")
        print("Use --force to reinstall")
        return
-    
+
    unit_path.parent.mkdir(parents=True, exist_ok=True)
-    print(f"Installing systemd service to: {unit_path}")
-    unit_path.write_text(generate_systemd_unit())
-    
-    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
-    subprocess.run(["systemctl", "--user", "enable", SERVICE_NAME], check=True)
-    
+    print(f"Installing {_service_scope_label(system)} systemd service to: {unit_path}")
+    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")
+
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True)
+    subprocess.run(_systemctl_cmd(system) + ["enable", SERVICE_NAME], check=True)
+
    print()
-    print("✓ Service installed and enabled!")
+    print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
    print()
    print("Next steps:")
-    print(f"  hermes gateway start              # Start the service")
-    print(f"  hermes gateway status             # Check status")
-    print(f"  journalctl --user -u {SERVICE_NAME} -f  # View logs")
+    print(f"  {'sudo ' if system else ''}hermes gateway start{scope_flag}              # Start the service")
+    print(f"  {'sudo ' if system else ''}hermes gateway status{scope_flag}             # Check status")
+    print(f"  {'journalctl' if system else 'journalctl --user'} -u {SERVICE_NAME} -f  # View logs")
    print()
-    _ensure_linger_enabled()

-def systemd_uninstall():
-    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=False)
-    subprocess.run(["systemctl", "--user", "disable", SERVICE_NAME], check=False)
-    
-    unit_path = get_systemd_unit_path()
+    if system:
+        configured_user = _read_systemd_user_from_unit(unit_path)
+        if configured_user:
+            print(f"Configured to run as: {configured_user}")
+    else:
+        _ensure_linger_enabled()
+
+    print_systemd_scope_conflict_warning()
+
+
+def systemd_uninstall(system: bool = False):
+    system = _select_systemd_scope(system)
+    if system:
+        _require_root_for_system_service("uninstall")
+
+    subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=False)
+    subprocess.run(_systemctl_cmd(system) + ["disable", SERVICE_NAME], check=False)
+
+    unit_path = get_systemd_unit_path(system=system)
    if unit_path.exists():
        unit_path.unlink()
        print(f"✓ Removed {unit_path}")
-    
-    subprocess.run(["systemctl", "--user", "daemon-reload"], check=True)
-    print("✓ Service uninstalled")

-def systemd_start():
-    refresh_systemd_unit_if_needed()
-    subprocess.run(["systemctl", "--user", "start", SERVICE_NAME], check=True)
-    print("✓ Service started")
+    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True)
+    print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled")


-def systemd_stop():
-    subprocess.run(["systemctl", "--user", "stop", SERVICE_NAME], check=True)
-    print("✓ Service stopped")
+def systemd_start(system: bool = False):
+    system = _select_systemd_scope(system)
+    if system:
+        _require_root_for_system_service("start")
+    refresh_systemd_unit_if_needed(system=system)
+    subprocess.run(_systemctl_cmd(system) + ["start", SERVICE_NAME], check=True)
+    print(f"✓ {_service_scope_label(system).capitalize()} service started")


-def systemd_restart():
-    refresh_systemd_unit_if_needed()
-    subprocess.run(["systemctl", "--user", "restart", SERVICE_NAME], check=True)
-    print("✓ Service restarted")
+
+def systemd_stop(system: bool = False):
+    system = _select_systemd_scope(system)
+    if system:
+        _require_root_for_system_service("stop")
+    subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=True)
+    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")


-def systemd_status(deep: bool = False):
-    # Check if service unit file exists
-    unit_path = get_systemd_unit_path()
+
+def systemd_restart(system: bool = False):
+    system = _select_systemd_scope(system)
+    if system:
+        _require_root_for_system_service("restart")
+    refresh_systemd_unit_if_needed(system=system)
+    subprocess.run(_systemctl_cmd(system) + ["restart", SERVICE_NAME], check=True)
+    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")
+
+
+
+def systemd_status(deep: bool = False, system: bool = False):
+    system = _select_systemd_scope(system)
+    unit_path = get_systemd_unit_path(system=system)
+    scope_flag = " --system" if system else ""
+
    if not unit_path.exists():
        print("✗ Gateway service is not installed")
-        print("  Run: hermes gateway install")
+        print(f"  Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}")
        return

-    if not systemd_unit_is_current():
-        print("⚠ Installed gateway service definition is outdated")
-        print("  Run: hermes gateway restart  # auto-refreshes the unit")
+    if has_conflicting_systemd_units():
+        print_systemd_scope_conflict_warning()
        print()
-    
-    # Show detailed status first
+
+    if not systemd_unit_is_current(system=system):
+        print("⚠ Installed gateway service definition is outdated")
+        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
+        print()
+
    subprocess.run(
-        ["systemctl", "--user", "status", SERVICE_NAME, "--no-pager"],
-        capture_output=False
+        _systemctl_cmd(system) + ["status", SERVICE_NAME, "--no-pager"],
+        capture_output=False,
    )

-    # Check if service is active
    result = subprocess.run(
-        ["systemctl", "--user", "is-active", SERVICE_NAME],
+        _systemctl_cmd(system) + ["is-active", SERVICE_NAME],
        capture_output=True,
-        text=True
+        text=True,
    )

    status = result.stdout.strip()

    if status == "active":
-        print("✓ Gateway service is running")
+        print(f"✓ {_service_scope_label(system).capitalize()} gateway service is running")
    else:
-        print("✗ Gateway service is stopped")
-        print("  Run: hermes gateway start")
+        print(f"✗ {_service_scope_label(system).capitalize()} gateway service is stopped")
+        print(f"  Run: {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+
+    configured_user = _read_systemd_user_from_unit(unit_path) if system else None
+    if configured_user:
+        print(f"Configured to run as: {configured_user}")

    runtime_lines = _runtime_health_lines()
    if runtime_lines:
@@ -432,7 +642,9 @@ def systemd_status(deep: bool = False):
        for line in runtime_lines:
            print(f"  {line}")

-    if deep:
+    if system:
+        print("✓ System service starts at boot without requiring systemd linger")
+    elif deep:
        print_systemd_linger_guidance()
    else:
        linger_enabled, _ = get_systemd_linger_status()
@@ -445,10 +657,7 @@ def systemd_status(deep: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run([
-            "journalctl", "--user", "-u", SERVICE_NAME,
-            "-n", "20", "--no-pager"
-        ])
+        subprocess.run(_journalctl_cmd(system) + ["-u", SERVICE_NAME, "-n", "20", "--no-pager"])


 # =============================================================================
@@ -895,7 +1104,7 @@ def _setup_whatsapp():
 def _is_service_installed() -> bool:
    """Check if the gateway is installed as a system service."""
    if is_linux():
-        return get_systemd_unit_path().exists()
+        return get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()
    elif is_macos():
        return get_launchd_plist_path().exists()
    return False
@@ -903,12 +1112,27 @@ def _is_service_installed() -> bool:

 def _is_service_running() -> bool:
    """Check if the gateway service is currently running."""
-    if is_linux() and get_systemd_unit_path().exists():
-        result = subprocess.run(
-            ["systemctl", "--user", "is-active", SERVICE_NAME],
-            capture_output=True, text=True
-        )
-        return result.stdout.strip() == "active"
+    if is_linux():
+        user_unit_exists = get_systemd_unit_path(system=False).exists()
+        system_unit_exists = get_systemd_unit_path(system=True).exists()
+
+        if user_unit_exists:
+            result = subprocess.run(
+                _systemctl_cmd(False) + ["is-active", SERVICE_NAME],
+                capture_output=True, text=True
+            )
+            if result.stdout.strip() == "active":
+                return True
+
+        if system_unit_exists:
+            result = subprocess.run(
+                _systemctl_cmd(True) + ["is-active", SERVICE_NAME],
+                capture_output=True, text=True
+            )
+            if result.stdout.strip() == "active":
+                return True
+
+        return False
    elif is_macos() and get_launchd_plist_path().exists():
        result = subprocess.run(
            ["launchctl", "list", "ai.hermes.gateway"],
@@ -1050,6 +1274,10 @@ def gateway_setup():
    service_installed = _is_service_installed()
    service_running = _is_service_running()

+    if is_linux() and has_conflicting_systemd_units():
+        print_systemd_scope_conflict_warning()
+        print()
+
    if service_installed and service_running:
        print_success("Gateway service is installed and running.")
    elif service_installed:
@@ -1131,16 +1359,18 @@ def gateway_setup():
                platform_name = "systemd" if is_linux() else "launchd"
                if prompt_yes_no(f"  Install the gateway as a {platform_name} service? (runs in background, starts on boot)", True):
                    try:
-                        force = False
+                        installed_scope = None
+                        did_install = False
                        if is_linux():
-                            systemd_install(force)
+                            installed_scope, did_install = install_linux_gateway_from_setup(force=False)
                        else:
-                            launchd_install(force)
+                            launchd_install(force=False)
+                            did_install = True
                        print()
-                        if prompt_yes_no("  Start the service now?", True):
+                        if did_install and prompt_yes_no("  Start the service now?", True):
                            try:
                                if is_linux():
-                                    systemd_start()
+                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
                            except subprocess.CalledProcessError as e:
@@ -1150,6 +1380,8 @@ def gateway_setup():
                        print_info("  You can try manually: hermes gateway install")
                else:
                    print_info("  You can install later: hermes gateway install")
+                    if is_linux():
+                        print_info("  Or as a boot-time service: sudo hermes gateway install --system")
                    print_info("  Or run in foreground:  hermes gateway")
            else:
                print_info("  Service install not supported on this platform.")
@@ -1183,8 +1415,10 @@ def gateway_command(args):
    # Service management commands
    if subcmd == "install":
        force = getattr(args, 'force', False)
+        system = getattr(args, 'system', False)
+        run_as_user = getattr(args, 'run_as_user', None)
        if is_linux():
-            systemd_install(force)
+            systemd_install(force=force, system=system, run_as_user=run_as_user)
        elif is_macos():
            launchd_install(force)
        else:
@@ -1193,8 +1427,9 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "uninstall":
+        system = getattr(args, 'system', False)
        if is_linux():
-            systemd_uninstall()
+            systemd_uninstall(system=system)
        elif is_macos():
            launchd_uninstall()
        else:
@@ -1202,8 +1437,9 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "start":
+        system = getattr(args, 'system', False)
        if is_linux():
-            systemd_start()
+            systemd_start(system=system)
        elif is_macos():
            launchd_start()
        else:
@@ -1213,10 +1449,11 @@ def gateway_command(args):
    elif subcmd == "stop":
        # Try service first, then sweep any stray/manual gateway processes.
        service_available = False
+        system = getattr(args, 'system', False)
        
-        if is_linux() and get_systemd_unit_path().exists():
+        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
            try:
-                systemd_stop()
+                systemd_stop(system=system)
                service_available = True
            except subprocess.CalledProcessError:
                pass  # Fall through to process kill
@@ -1239,10 +1476,11 @@ def gateway_command(args):
    elif subcmd == "restart":
        # Try service first, fall back to killing and restarting
        service_available = False
+        system = getattr(args, 'system', False)
        
-        if is_linux() and get_systemd_unit_path().exists():
+        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
            try:
-                systemd_restart()
+                systemd_restart(system=system)
                service_available = True
            except subprocess.CalledProcessError:
                pass
@@ -1268,10 +1506,11 @@ def gateway_command(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
+        system = getattr(args, 'system', False)
        
        # Check for service first
-        if is_linux() and get_systemd_unit_path().exists():
-            systemd_status(deep)
+        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+            systemd_status(deep, system=system)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
        else:
@@ -1289,6 +1528,7 @@ def gateway_command(args):
                print()
                print("To install as a service:")
                print("  hermes gateway install")
+                print("  sudo hermes gateway install --system")
            else:
                print("✗ Gateway is not running")
                runtime_lines = _runtime_health_lines()
@@ -1300,4 +1540,5 @@ def gateway_command(args):
                print()
                print("To start:")
                print("  hermes gateway          # Run in foreground")
-                print("  hermes gateway install  # Install as service")
+                print("  hermes gateway install  # Install as user service")
+                print("  sudo hermes gateway install --system  # Install as boot-time system service")
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -54,16 +54,11 @@ from typing import Optional
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
 sys.path.insert(0, str(PROJECT_ROOT))

-# Load .env from ~/.hermes/.env first, then project root as dev fallback
-from dotenv import load_dotenv
-from hermes_cli.config import get_env_path, get_hermes_home
-_user_env = get_env_path()
-if _user_env.exists():
-    try:
-        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
-load_dotenv(dotenv_path=PROJECT_ROOT / '.env', override=False)
+# Load .env from ~/.hermes/.env first, then project root as dev fallback.
+# User-managed env files should override stale shell exports on restart.
+from hermes_cli.config import get_hermes_home
+from hermes_cli.env_loader import load_hermes_dotenv
+load_hermes_dotenv(project_env=PROJECT_ROOT / '.env')

 # Point mini-swe-agent at ~/.hermes/ so it shares our config
 os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(get_hermes_home()))
@@ -480,6 +475,13 @@ def cmd_chat(args):
        print("You can run 'hermes setup' at any time to configure.")
        sys.exit(1)

+    # Start update check in background (runs while other init happens)
+    try:
+        from hermes_cli.banner import prefetch_update_check
+        prefetch_update_check()
+    except Exception:
+        pass
+
    # Sync bundled skills on every CLI launch (fast -- skips unchanged skills)
    try:
        from tools.skills_sync import sync_skills
@@ -499,6 +501,7 @@ def cmd_chat(args):
        "model": args.model,
        "provider": getattr(args, "provider", None),
        "toolsets": args.toolsets,
+        "skills": getattr(args, "skills", None),
        "verbose": args.verbose,
        "quiet": getattr(args, "quiet", False),
        "query": args.query,
@@ -510,7 +513,11 @@ def cmd_chat(args):
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
    
-    cli_main(**kwargs)
+    try:
+        cli_main(**kwargs)
+    except ValueError as e:
+        print(f"Error: {e}")
+        sys.exit(1)


 def cmd_gateway(args):
@@ -1105,8 +1112,32 @@ def _model_flow_custom(config):

    effective_key = api_key or current_key

+    from hermes_cli.models import probe_api_models
+
+    probe = probe_api_models(effective_key, effective_url)
+    if probe.get("used_fallback") and probe.get("resolved_base_url"):
+        print(
+            f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, "
+            f"not the exact URL you entered. Saving the working base URL instead."
+        )
+        effective_url = probe["resolved_base_url"]
+        if base_url:
+            base_url = effective_url
+    elif probe.get("models") is not None:
+        print(
+            f"Verified endpoint via {probe.get('probed_url')} "
+            f"({len(probe.get('models') or [])} model(s) visible)"
+        )
+    else:
+        print(
+            f"Warning: could not verify this endpoint via {probe.get('probed_url')}. "
+            f"Hermes will still save it."
+        )
+        if probe.get("suggested_base_url"):
+            print(f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}")
+
    if base_url:
-        save_env_value("OPENAI_BASE_URL", base_url)
+        save_env_value("OPENAI_BASE_URL", effective_url)
    if api_key:
        save_env_value("OPENAI_API_KEY", api_key)

@@ -1368,6 +1399,12 @@ _PROVIDER_MODELS = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "moonshot": [
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
    "minimax": [
        "MiniMax-M2.5",
        "MiniMax-M2.5-highspeed",
@@ -1449,8 +1486,8 @@ def _model_flow_kimi(config, current_model=""):
            "kimi-k2-thinking-turbo",
        ]
    else:
-        # Legacy Moonshot models
-        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        # Legacy Moonshot models (excludes Coding Plan-only models)
+        model_list = _PROVIDER_MODELS.get("moonshot", [])

    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
@@ -1586,8 +1623,30 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):

 def _run_anthropic_oauth_flow(save_env_value):
    """Run the Claude OAuth setup-token flow. Returns True if credentials were saved."""
-    from agent.anthropic_adapter import run_oauth_setup_token
-    from hermes_cli.config import save_anthropic_oauth_token
+    from agent.anthropic_adapter import (
+        run_oauth_setup_token,
+        read_claude_code_credentials,
+        is_claude_code_token_valid,
+    )
+    from hermes_cli.config import (
+        save_anthropic_oauth_token,
+        use_anthropic_claude_code_credentials,
+    )
+
+    def _activate_claude_code_credentials_if_available() -> bool:
+        try:
+            creds = read_claude_code_credentials()
+        except Exception:
+            creds = None
+        if creds and (
+            is_claude_code_token_valid(creds)
+            or bool(creds.get("refreshToken"))
+        ):
+            use_anthropic_claude_code_credentials(save_fn=save_env_value)
+            print("  ✓ Claude Code credentials linked.")
+            print("    Hermes will use Claude's credential store directly instead of copying a setup-token into ~/.hermes/.env.")
+            return True
+        return False

    try:
        print()
@@ -1596,6 +1655,8 @@ def _run_anthropic_oauth_flow(save_env_value):
        print()
        token = run_oauth_setup_token()
        if token:
+            if _activate_claude_code_credentials_if_available():
+                return True
            save_anthropic_oauth_token(token, save_fn=save_env_value)
            print("  ✓ OAuth credentials saved.")
            return True
@@ -1828,6 +1889,18 @@ def cmd_version(args):
    except ImportError:
        print("OpenAI SDK: Not installed")

+    # Show update status (synchronous — acceptable since user asked for version info)
+    try:
+        from hermes_cli.banner import check_for_updates
+        behind = check_for_updates()
+        if behind and behind > 0:
+            commits_word = "commit" if behind == 1 else "commits"
+            print(f"Update available: {behind} {commits_word} behind — run 'hermes update'")
+        elif behind == 0:
+            print("Up to date")
+    except Exception:
+        pass
+

 def cmd_uninstall(args):
    """Uninstall Hermes Agent."""
@@ -1962,6 +2035,32 @@ def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[st



+def _resolve_stash_selector(git_cmd: list[str], cwd: Path, stash_ref: str) -> Optional[str]:
+    stash_list = subprocess.run(
+        git_cmd + ["stash", "list", "--format=%gd %H"],
+        cwd=cwd,
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+    for line in stash_list.stdout.splitlines():
+        selector, _, commit = line.partition(" ")
+        if commit.strip() == stash_ref:
+            return selector.strip()
+    return None
+
+
+
+def _print_stash_cleanup_guidance(stash_ref: str, stash_selector: Optional[str] = None) -> None:
+    print("  Check `git status` first so you don't accidentally reapply the same change twice.")
+    print("  Find the saved entry with: git stash list --format='%gd %H %s'")
+    if stash_selector:
+        print(f"  Remove it with: git stash drop {stash_selector}")
+    else:
+        print(f"  Look for commit {stash_ref}, then drop its selector with: git stash drop stash@{{N}}")
+
+
+
 def _restore_stashed_changes(
    git_cmd: list[str],
    cwd: Path,
@@ -1998,7 +2097,27 @@ def _restore_stashed_changes(
        print(f"Resolve manually with: git stash apply {stash_ref}")
        sys.exit(1)

-    subprocess.run(git_cmd + ["stash", "drop", stash_ref], cwd=cwd, check=True)
+    stash_selector = _resolve_stash_selector(git_cmd, cwd, stash_ref)
+    if stash_selector is None:
+        print("⚠ Local changes were restored, but Hermes couldn't find the stash entry to drop.")
+        print("  The stash was left in place. You can remove it manually after checking the result.")
+        _print_stash_cleanup_guidance(stash_ref)
+    else:
+        drop = subprocess.run(
+            git_cmd + ["stash", "drop", stash_selector],
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+        )
+        if drop.returncode != 0:
+            print("⚠ Local changes were restored, but Hermes couldn't drop the saved stash entry.")
+            if drop.stdout.strip():
+                print(drop.stdout.strip())
+            if drop.stderr.strip():
+                print(drop.stderr.strip())
+            print("  The stash was left in place. You can remove it manually after checking the result.")
+            _print_stash_cleanup_guidance(stash_ref, stash_selector)
+
    print("⚠ Local changes were restored on top of the updated codebase.")
    print("  Review `git diff` / `git status` if Hermes behaves unexpectedly.")
    return True
@@ -2276,8 +2395,9 @@ Examples:
    hermes config edit            Edit config in $EDITOR
    hermes config set model gpt-4 Set a config value
    hermes gateway                Run messaging gateway
+    hermes -s hermes-agent-dev,github-auth
    hermes -w                     Start in isolated git worktree
-    hermes gateway install        Install as system service
+    hermes gateway install        Install gateway background service
    hermes sessions list          List past sessions
    hermes sessions browse        Interactive session picker
    hermes sessions rename ID T   Rename/title a session
@@ -2314,6 +2434,12 @@ For more help on a command:
        default=False,
        help="Run in an isolated git worktree (for parallel agents)"
    )
+    parser.add_argument(
+        "--skills", "-s",
+        action="append",
+        default=None,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)"
+    )
    parser.add_argument(
        "--yolo",
        action="store_true",
@@ -2349,6 +2475,12 @@ For more help on a command:
        "-t", "--toolsets",
        help="Comma-separated toolsets to enable"
    )
+    chat_parser.add_argument(
+        "-s", "--skills",
+        action="append",
+        default=None,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)"
+    )
    chat_parser.add_argument(
        "--provider",
        choices=["auto", "openrouter", "nous", "openai-codex", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn"],
@@ -2433,23 +2565,30 @@ For more help on a command:
    
    # gateway start
    gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
+    gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway stop
    gateway_stop = gateway_subparsers.add_parser("stop", help="Stop gateway service")
+    gateway_stop.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway restart
    gateway_restart = gateway_subparsers.add_parser("restart", help="Restart gateway service")
+    gateway_restart.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway install
    gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as service")
    gateway_install.add_argument("--force", action="store_true", help="Force reinstall")
+    gateway_install.add_argument("--system", action="store_true", help="Install as a Linux system-level service (starts at boot)")
+    gateway_install.add_argument("--run-as-user", dest="run_as_user", help="User account the Linux system service should run as")
    
    # gateway uninstall
    gateway_uninstall = gateway_subparsers.add_parser("uninstall", help="Uninstall gateway service")
+    gateway_uninstall.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")

    # gateway setup
    gateway_setup = gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
@@ -2597,13 +2736,48 @@ For more help on a command:
    # cron list
    cron_list = cron_subparsers.add_parser("list", help="List scheduled jobs")
    cron_list.add_argument("--all", action="store_true", help="Include disabled jobs")
-    
+
+    # cron create/add
+    cron_create = cron_subparsers.add_parser("create", aliases=["add"], help="Create a scheduled job")
+    cron_create.add_argument("schedule", help="Schedule like '30m', 'every 2h', or '0 9 * * *'")
+    cron_create.add_argument("prompt", nargs="?", help="Optional self-contained prompt or task instruction")
+    cron_create.add_argument("--name", help="Optional human-friendly job name")
+    cron_create.add_argument("--deliver", help="Delivery target: origin, local, telegram, discord, signal, or platform:chat_id")
+    cron_create.add_argument("--repeat", type=int, help="Optional repeat count")
+    cron_create.add_argument("--skill", dest="skills", action="append", help="Attach a skill. Repeat to add multiple skills.")
+
+    # cron edit
+    cron_edit = cron_subparsers.add_parser("edit", help="Edit an existing scheduled job")
+    cron_edit.add_argument("job_id", help="Job ID to edit")
+    cron_edit.add_argument("--schedule", help="New schedule")
+    cron_edit.add_argument("--prompt", help="New prompt/task instruction")
+    cron_edit.add_argument("--name", help="New job name")
+    cron_edit.add_argument("--deliver", help="New delivery target")
+    cron_edit.add_argument("--repeat", type=int, help="New repeat count")
+    cron_edit.add_argument("--skill", dest="skills", action="append", help="Replace the job's skills with this set. Repeat to attach multiple skills.")
+    cron_edit.add_argument("--add-skill", dest="add_skills", action="append", help="Append a skill without replacing the existing list. Repeatable.")
+    cron_edit.add_argument("--remove-skill", dest="remove_skills", action="append", help="Remove a specific attached skill. Repeatable.")
+    cron_edit.add_argument("--clear-skills", action="store_true", help="Remove all attached skills from the job")
+
+    # lifecycle actions
+    cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job")
+    cron_pause.add_argument("job_id", help="Job ID to pause")
+
+    cron_resume = cron_subparsers.add_parser("resume", help="Resume a paused job")
+    cron_resume.add_argument("job_id", help="Job ID to resume")
+
+    cron_run = cron_subparsers.add_parser("run", help="Run a job on the next scheduler tick")
+    cron_run.add_argument("job_id", help="Job ID to trigger")
+
+    cron_remove = cron_subparsers.add_parser("remove", aliases=["rm", "delete"], help="Remove a scheduled job")
+    cron_remove.add_argument("job_id", help="Job ID to remove")
+
    # cron status
    cron_subparsers.add_parser("status", help="Check if cron scheduler is running")
-    
+
    # cron tick (mostly for debugging)
    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
-    
+
    cron_parser.set_defaults(func=cmd_cron)
    
    # =========================================================================
@@ -2948,7 +3122,11 @@ For more help on a command:

        elif action == "export":
            if args.session_id:
-                data = db.export_session(args.session_id)
+                resolved_session_id = db.resolve_session_id(args.session_id)
+                if not resolved_session_id:
+                    print(f"Session '{args.session_id}' not found.")
+                    return
+                data = db.export_session(resolved_session_id)
                if not data:
                    print(f"Session '{args.session_id}' not found.")
                    return
@@ -2963,13 +3141,17 @@ For more help on a command:
                print(f"Exported {len(sessions)} sessions to {args.output}")

        elif action == "delete":
+            resolved_session_id = db.resolve_session_id(args.session_id)
+            if not resolved_session_id:
+                print(f"Session '{args.session_id}' not found.")
+                return
            if not args.yes:
-                confirm = input(f"Delete session '{args.session_id}' and all its messages? [y/N] ")
+                confirm = input(f"Delete session '{resolved_session_id}' and all its messages? [y/N] ")
                if confirm.lower() not in ("y", "yes"):
                    print("Cancelled.")
                    return
-            if db.delete_session(args.session_id):
-                print(f"Deleted session '{args.session_id}'.")
+            if db.delete_session(resolved_session_id):
+                print(f"Deleted session '{resolved_session_id}'.")
            else:
                print(f"Session '{args.session_id}' not found.")

@@ -2985,10 +3167,14 @@ For more help on a command:
            print(f"Pruned {count} session(s).")

        elif action == "rename":
+            resolved_session_id = db.resolve_session_id(args.session_id)
+            if not resolved_session_id:
+                print(f"Session '{args.session_id}' not found.")
+                return
            title = " ".join(args.title)
            try:
-                if db.set_session_title(args.session_id, title):
-                    print(f"Session '{args.session_id}' renamed to: {title}")
+                if db.set_session_title(resolved_session_id, title):
+                    print(f"Session '{resolved_session_id}' renamed to: {title}")
                else:
                    print(f"Session '{args.session_id}' not found.")
            except ValueError as e:
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -308,6 +308,62 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
        return None


+def probe_api_models(
+    api_key: Optional[str],
+    base_url: Optional[str],
+    timeout: float = 5.0,
+) -> dict[str, Any]:
+    """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        return {
+            "models": None,
+            "probed_url": None,
+            "resolved_base_url": "",
+            "suggested_base_url": None,
+            "used_fallback": False,
+        }
+
+    if normalized.endswith("/v1"):
+        alternate_base = normalized[:-3].rstrip("/")
+    else:
+        alternate_base = normalized + "/v1"
+
+    candidates: list[tuple[str, bool]] = [(normalized, False)]
+    if alternate_base and alternate_base != normalized:
+        candidates.append((alternate_base, True))
+
+    tried: list[str] = []
+    headers: dict[str, str] = {}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    for candidate_base, is_fallback in candidates:
+        url = candidate_base.rstrip("/") + "/models"
+        tried.append(url)
+        req = urllib.request.Request(url, headers=headers)
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                data = json.loads(resp.read().decode())
+                return {
+                    "models": [m.get("id", "") for m in data.get("data", [])],
+                    "probed_url": url,
+                    "resolved_base_url": candidate_base.rstrip("/"),
+                    "suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
+                    "used_fallback": is_fallback,
+                }
+        except Exception:
+            continue
+
+    return {
+        "models": None,
+        "probed_url": tried[-1] if tried else normalized.rstrip("/") + "/models",
+        "resolved_base_url": normalized,
+        "suggested_base_url": alternate_base if alternate_base != normalized else None,
+        "used_fallback": False,
+    }
+
+
 def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
@@ -318,22 +374,7 @@ def fetch_api_models(
    Returns a list of model ID strings, or ``None`` if the endpoint could not
    be reached (network error, timeout, auth failure, etc.).
    """
-    if not base_url:
-        return None
-
-    url = base_url.rstrip("/") + "/models"
-    headers: dict[str, str] = {}
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
-
-    req = urllib.request.Request(url, headers=headers)
-    try:
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-            # Standard OpenAI format: {"data": [{"id": "model-name", ...}, ...]}
-            return [m.get("id", "") for m in data.get("data", [])]
-    except Exception:
-        return None
+    return probe_api_models(api_key, base_url, timeout=timeout).get("models")


 def validate_requested_model(
@@ -376,13 +417,53 @@ def validate_requested_model(
            "message": "Model names cannot contain spaces.",
        }

-    # Custom endpoints can serve any model — skip validation
    if normalized == "custom":
+        probe = probe_api_models(api_key, base_url)
+        api_models = probe.get("models")
+        if api_models is not None:
+            if requested in set(api_models):
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+
+            suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
+
+            message = (
+                f"Note: `{requested}` was not found in this custom endpoint's model listing "
+                f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
+                f"{suggestion_text}"
+            )
+            if probe.get("used_fallback"):
+                message += (
+                    f"\n  Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
+                    f"Consider saving that as your base URL."
+                )
+
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": False,
+                "message": message,
+            }
+
+        message = (
+            f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
+            f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
+        )
+        if probe.get("suggested_base_url"):
+            message += f"\n  If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
+
        return {
            "accepted": True,
            "persist": True,
            "recognized": False,
-            "message": None,
+            "message": message,
        }

    # Probe the live API to check if the model actually exists
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -144,10 +144,16 @@ def _resolve_openrouter_runtime(
    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()

    use_config_base_url = False
-    if requested_norm == "auto":
-        if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
+    if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url:
+        if requested_norm == "auto":
            if not cfg_provider or cfg_provider == "auto":
                use_config_base_url = True
+        elif requested_norm == "custom":
+            # Persisted custom endpoints store their base URL in config.yaml.
+            # If OPENAI_BASE_URL is not currently set in the environment, keep
+            # honoring that saved endpoint instead of falling back to OpenRouter.
+            if cfg_provider == "custom":
+                use_config_base_url = True

    # When the user explicitly requested the openrouter provider, skip
    # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -227,54 +227,86 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
        sys.exit(1)


+def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
+    """Single-select menu using curses to avoid simple_term_menu rendering bugs."""
+    try:
+        import curses
+        result_holder = [default]
+
+        def _curses_menu(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+            cursor = default
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+                try:
+                    stdscr.addnstr(
+                        0,
+                        0,
+                        question,
+                        max_x - 1,
+                        curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0),
+                    )
+                except curses.error:
+                    pass
+
+                for i, choice in enumerate(choices):
+                    y = i + 2
+                    if y >= max_y - 1:
+                        break
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow}  {choice}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+                if key in (curses.KEY_UP, ord("k")):
+                    cursor = (cursor - 1) % len(choices)
+                elif key in (curses.KEY_DOWN, ord("j")):
+                    cursor = (cursor + 1) % len(choices)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = cursor
+                    return
+                elif key in (27, ord("q")):
+                    return
+
+        curses.wrapper(_curses_menu)
+        return result_holder[0]
+    except Exception:
+        return -1
+
+
+
 def prompt_choice(question: str, choices: list, default: int = 0) -> int:
    """Prompt for a choice from a list with arrow key navigation.

    Escape keeps the current default (skips the question).
    Ctrl+C exits the wizard.
    """
-    print(color(question, Colors.YELLOW))
-
-    # Try to use interactive menu if available
-    try:
-        from simple_term_menu import TerminalMenu
-        import re
-
-        # Strip emoji characters — simple_term_menu miscalculates visual
-        # width of emojis, causing duplicated/garbled lines on redraw.
-        _emoji_re = re.compile(
-            "[\U0001f300-\U0001f9ff\U00002600-\U000027bf\U0000fe00-\U0000fe0f"
-            "\U0001fa00-\U0001fa6f\U0001fa70-\U0001faff\u200d]+",
-            flags=re.UNICODE,
-        )
-        menu_choices = [f"  {_emoji_re.sub('', choice).strip()}" for choice in choices]
-
-        print_info("  ↑/↓ Navigate  Enter Select  Esc Skip  Ctrl+C Exit")
-
-        terminal_menu = TerminalMenu(
-            menu_choices,
-            cursor_index=default,
-            menu_cursor="→ ",
-            menu_cursor_style=("fg_green", "bold"),
-            menu_highlight_style=("fg_green",),
-            cycle_cursor=True,
-            clear_screen=False,
-        )
-
-        idx = terminal_menu.show()
-        if idx is None:  # User pressed Escape — keep current value
-            print_info(f"  Skipped (keeping current)")
+    idx = _curses_prompt_choice(question, choices, default)
+    if idx >= 0:
+        if idx == default:
+            print_info("  Skipped (keeping current)")
            print()
            return default
-        print()  # Add newline after selection
+        print()
        return idx

-    except (ImportError, NotImplementedError):
-        pass
-    except Exception as e:
-        print(f"  (Interactive menu unavailable: {e})")
-
-    # Fallback to number-based selection (simple_term_menu doesn't support Windows)
+    print(color(question, Colors.YELLOW))
    for i, choice in enumerate(choices):
        marker = "●" if i == default else "○"
        if i == default:
@@ -344,84 +376,15 @@ def prompt_checklist(title: str, items: list, pre_selected: list = None) -> list
    if pre_selected is None:
        pre_selected = []

-    print(color(title, Colors.YELLOW))
-    print_info("  SPACE Toggle  ENTER Confirm  ESC Skip  Ctrl+C Exit")
-    print()
+    from hermes_cli.curses_ui import curses_checklist

-    try:
-        from simple_term_menu import TerminalMenu
-        import re
-
-        # Strip emoji characters from menu labels — simple_term_menu miscalculates
-        # visual width of emojis on macOS, causing duplicated/garbled lines.
-        _emoji_re = re.compile(
-            "[\U0001f300-\U0001f9ff\U00002600-\U000027bf\U0000fe00-\U0000fe0f"
-            "\U0001fa00-\U0001fa6f\U0001fa70-\U0001faff\u200d]+",
-            flags=re.UNICODE,
-        )
-        menu_items = [f"  {_emoji_re.sub('', item).strip()}" for item in items]
-
-        # Map pre-selected indices to the actual menu entry strings
-        preselected = [menu_items[i] for i in pre_selected if i < len(menu_items)]
-
-        terminal_menu = TerminalMenu(
-            menu_items,
-            multi_select=True,
-            show_multi_select_hint=False,
-            multi_select_cursor="[✓] ",
-            multi_select_select_on_accept=False,
-            multi_select_empty_ok=True,
-            preselected_entries=preselected if preselected else None,
-            menu_cursor="→ ",
-            menu_cursor_style=("fg_green", "bold"),
-            menu_highlight_style=("fg_green",),
-            cycle_cursor=True,
-            clear_screen=False,
-        )
-
-        terminal_menu.show()
-
-        if terminal_menu.chosen_menu_entries is None:
-            print_info("  Skipped (keeping current)")
-            return list(pre_selected)
-
-        selected = list(terminal_menu.chosen_menu_indices or [])
-        return selected
-
-    except (ImportError, NotImplementedError):
-        # Fallback: numbered toggle interface (simple_term_menu doesn't support Windows)
-        selected = set(pre_selected)
-
-        while True:
-            for i, item in enumerate(items):
-                marker = color("[✓]", Colors.GREEN) if i in selected else "[ ]"
-                print(f"  {marker} {i + 1}. {item}")
-            print()
-
-            try:
-                value = input(
-                    color("  Toggle # (or Enter to confirm): ", Colors.DIM)
-                ).strip()
-                if not value:
-                    break
-                idx = int(value) - 1
-                if 0 <= idx < len(items):
-                    if idx in selected:
-                        selected.discard(idx)
-                    else:
-                        selected.add(idx)
-                else:
-                    print_error(f"Enter a number between 1 and {len(items)}")
-            except ValueError:
-                print_error("Enter a number")
-            except (KeyboardInterrupt, EOFError):
-                print()
-                return []
-
-            # Clear and redraw (simple approach)
-            print()
-
-        return sorted(selected)
+    chosen = curses_checklist(
+        title,
+        items,
+        set(pre_selected),
+        cancel_returns=set(pre_selected),
+    )
+    return sorted(chosen)


 def _prompt_api_key(var: dict):
@@ -460,33 +423,15 @@ def _print_setup_summary(config: dict, hermes_home):

    tool_status = []

-    # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
-    _has_vision = False
-    if get_env_value("OPENROUTER_API_KEY"):
-        _has_vision = True
-    else:
-        try:
-            _vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
-            if _vauth_path.is_file():
-                import json as _vjson
+    # Vision — use the same runtime resolver as the actual vision tools
+    try:
+        from agent.auxiliary_client import get_available_vision_backends

-                _vauth = _vjson.loads(_vauth_path.read_text())
-                if _vauth.get("active_provider") == "nous":
-                    _np = _vauth.get("providers", {}).get("nous", {})
-                    if _np.get("agent_key") or _np.get("access_token"):
-                        _has_vision = True
-                elif _vauth.get("active_provider") == "openai-codex":
-                    _cp = _vauth.get("providers", {}).get("openai-codex", {})
-                    if _cp.get("tokens", {}).get("access_token"):
-                        _has_vision = True
-        except Exception:
-            pass
-    if not _has_vision:
-        _oai_base = get_env_value("OPENAI_BASE_URL") or ""
-        if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower():
-            _has_vision = True
+        _vision_backends = get_available_vision_backends()
+    except Exception:
+        _vision_backends = []

-    if _has_vision:
+    if _vision_backends:
        tool_status.append(("Vision (image analysis)", True, None))
    else:
        tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
@@ -951,11 +896,35 @@ def setup_model_provider(config: dict):

        base_url = prompt(
            "  API base URL (e.g., https://api.example.com/v1)", current_url
-        )
+        ).strip()
        api_key = prompt("  API key", password=True)
        model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)

        if base_url:
+            from hermes_cli.models import probe_api_models
+
+            probe = probe_api_models(api_key, base_url)
+            if probe.get("used_fallback") and probe.get("resolved_base_url"):
+                print_warning(
+                    f"Endpoint verification worked at {probe['resolved_base_url']}/models, "
+                    f"not the exact URL you entered. Saving the working base URL instead."
+                )
+                base_url = probe["resolved_base_url"]
+            elif probe.get("models") is not None:
+                print_success(
+                    f"Verified endpoint via {probe.get('probed_url')} "
+                    f"({len(probe.get('models') or [])} model(s) visible)"
+                )
+            else:
+                print_warning(
+                    f"Could not verify this endpoint via {probe.get('probed_url')}. "
+                    f"Hermes will still save it."
+                )
+                if probe.get("suggested_base_url"):
+                    print_info(
+                        f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}"
+                    )
+
            save_env_value("OPENAI_BASE_URL", base_url)
        if api_key:
            save_env_value("OPENAI_API_KEY", api_key)
@@ -1276,58 +1245,20 @@ def setup_model_provider(config: dict):
            selected_provider = "openrouter"

    # ── Vision & Image Analysis Setup ──
-    # Vision requires a multimodal-capable provider. Check whether the user's
-    # chosen provider already covers it — if so, skip the prompt entirely.
-    _vision_needs_setup = True
+    # Keep setup aligned with the actual runtime resolver the vision tools use.
+    try:
+        from agent.auxiliary_client import get_available_vision_backends

-    if selected_provider == "openrouter":
-        # OpenRouter → Gemini for vision, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "nous":
-        # Nous Portal OAuth → Gemini via Nous, already configured
-        _vision_needs_setup = False
-    elif selected_provider == "openai-codex":
-        # Codex OAuth → gpt-5.3-codex supports vision
-        _vision_needs_setup = False
-    elif selected_provider == "custom":
-        _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
-        if "api.openai.com" in _custom_base:
-            # Direct OpenAI endpoint — show vision model picker
-            print()
-            print_header("Vision Model")
-            print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
-            _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-            _vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"]
-            _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
-            _selected_vision_model = (
-                _oai_vision_models[_vm_idx]
-                if _vm_idx < len(_oai_vision_models)
-                else "gpt-4o-mini"
-            )
-            save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
-            print_success(f"Vision model set to {_selected_vision_model}")
-            _vision_needs_setup = False
+        _vision_backends = set(get_available_vision_backends())
+    except Exception:
+        _vision_backends = set()

-    # Even for providers without native vision, check if existing credentials
-    # from a previous setup already cover it (e.g. user had OpenRouter before
-    # switching to z.ai)
-    if _vision_needs_setup:
-        if get_env_value("OPENROUTER_API_KEY"):
-            _vision_needs_setup = False
-        else:
-            # Check for Nous Portal OAuth in auth.json
-            try:
-                _auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
-                if _auth_path.is_file():
-                    import json as _json
+    _vision_needs_setup = not bool(_vision_backends)

-                    _auth_data = _json.loads(_auth_path.read_text())
-                    if _auth_data.get("active_provider") == "nous":
-                        _nous_p = _auth_data.get("providers", {}).get("nous", {})
-                        if _nous_p.get("agent_key") or _nous_p.get("access_token"):
-                            _vision_needs_setup = False
-            except Exception:
-                pass
+    if selected_provider in _vision_backends:
+        # If the user just selected a backend Hermes can already use for
+        # vision, treat it as covered. Auth/setup failure returns earlier.
+        _vision_needs_setup = False

    if _vision_needs_setup:
        _prov_names = {
@@ -1343,44 +1274,54 @@ def setup_model_provider(config: dict):

        print()
        print_header("Vision & Image Analysis (optional)")
-        print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
-        print_info("doesn't natively support it. Choose how to enable vision,")
-        print_info("or skip to configure later.")
+        print_info(f"Vision uses a separate multimodal backend. {_prov_display}")
+        print_info("doesn't currently provide one Hermes can auto-use for vision,")
+        print_info("so choose a backend now or skip and configure later.")
        print()

        _vision_choices = [
            "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
-            "OpenAI — enter API key & choose a vision model",
+            "OpenAI-compatible endpoint — base URL, API key, and vision model",
            "Skip for now",
        ]
        _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)

        if _vision_idx == 0:  # OpenRouter
-            _or_key = prompt("  OpenRouter API key", password=True)
+            _or_key = prompt("  OpenRouter API key", password=True).strip()
            if _or_key:
                save_env_value("OPENROUTER_API_KEY", _or_key)
                print_success("OpenRouter key saved — vision will use Gemini")
            else:
                print_info("Skipped — vision won't be available")
-        elif _vision_idx == 1:  # OpenAI
-            _oai_key = prompt("  OpenAI API key", password=True)
+        elif _vision_idx == 1:  # OpenAI-compatible endpoint
+            _base_url = prompt("  Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
+            _api_key_label = "  API key"
+            if "api.openai.com" in _base_url.lower():
+                _api_key_label = "  OpenAI API key"
+            _oai_key = prompt(_api_key_label, password=True).strip()
            if _oai_key:
                save_env_value("OPENAI_API_KEY", _oai_key)
-                save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
-                _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
-                _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
-                _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
-                _selected_vision_model = (
-                    _oai_vision_models[_vm_idx]
-                    if _vm_idx < len(_oai_vision_models)
-                    else "gpt-4o-mini"
-                )
+                save_env_value("OPENAI_BASE_URL", _base_url)
+                if "api.openai.com" in _base_url.lower():
+                    _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+                    _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
+                    _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
+                    _selected_vision_model = (
+                        _oai_vision_models[_vm_idx]
+                        if _vm_idx < len(_oai_vision_models)
+                        else "gpt-4o-mini"
+                    )
+                else:
+                    _selected_vision_model = prompt("  Vision model (blank = use main/custom default)").strip()
                save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
-                print_success(f"Vision configured with OpenAI ({_selected_vision_model})")
+                print_success(
+                    f"Vision configured with {_base_url}"
+                    + (f" ({_selected_vision_model})" if _selected_vision_model else "")
+                )
            else:
                print_info("Skipped — vision won't be available")
        else:
-            print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
+            print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")

    # ── Model Selection (adapts based on provider) ──
    if selected_provider != "custom":  # Custom already prompted for model name
@@ -2186,20 +2127,22 @@ def setup_gateway(config: dict):
        print_info("      • Create an App-Level Token with 'connections:write' scope")
        print_info("   3. Add Bot Token Scopes: Features → OAuth & Permissions")
        print_info("      Required scopes: chat:write, app_mentions:read,")
-        print_info("      channels:history, channels:read, groups:history,")
-        print_info("      im:history, im:read, im:write, users:read, files:write")
+        print_info("      channels:history, channels:read, im:history,")
+        print_info("      im:read, im:write, users:read, files:write")
+        print_info("      Optional for private channels: groups:history")
        print_info("   4. Subscribe to Events: Features → Event Subscriptions → Enable")
-        print_info("      Required events: message.im, message.channels,")
-        print_info("      message.groups, app_mention")
-        print_warning("   ⚠ Without message.channels/message.groups events,")
-        print_warning("     the bot will ONLY work in DMs, not channels!")
+        print_info("      Required events: message.im, message.channels, app_mention")
+        print_info("      Optional for private channels: message.groups")
+        print_warning("   ⚠ Without message.channels the bot will ONLY work in DMs,")
+        print_warning("     not public channels.")
        print_info("   5. Install to Workspace: Settings → Install App")
+        print_info("   6. Reinstall the app after any scope or event changes")
        print_info(
-            "   6. After installing, invite the bot to channels: /invite @YourBot"
+            "   7. After installing, invite the bot to channels: /invite @YourBot"
        )
        print()
        print_info(
-            "   Full guide: https://hermes-agent.ai/docs/user-guide/messaging/slack"
+            "   Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/"
        )
        print()
        bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
@@ -2217,14 +2160,17 @@ def setup_gateway(config: dict):
            )
            print()
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty for open access)"
+                "Allowed user IDs (comma-separated, leave empty to deny everyone except paired users)"
            )
            if allowed_users:
                save_env_value("SLACK_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Slack allowlist configured")
            else:
+                print_warning(
+                    "⚠️  No Slack allowlist set - unpaired users will be denied by default."
+                )
                print_info(
-                    "⚠️  No allowlist set - anyone in your workspace can use the bot!"
+                    "   Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access."
                )

    # ── WhatsApp ──
@@ -2284,7 +2230,9 @@ def setup_gateway(config: dict):
        from hermes_cli.gateway import (
            _is_service_installed,
            _is_service_running,
-            systemd_install,
+            has_conflicting_systemd_units,
+            install_linux_gateway_from_setup,
+            print_systemd_scope_conflict_warning,
            systemd_start,
            systemd_restart,
            launchd_install,
@@ -2296,6 +2244,10 @@ def setup_gateway(config: dict):
        service_running = _is_service_running()

        print()
+        if _is_linux and has_conflicting_systemd_units():
+            print_systemd_scope_conflict_warning()
+            print()
+
        if service_running:
            if prompt_yes_no("  Restart the gateway to pick up changes?", True):
                try:
@@ -2321,15 +2273,18 @@ def setup_gateway(config: dict):
                True,
            ):
                try:
+                    installed_scope = None
+                    did_install = False
                    if _is_linux:
-                        systemd_install(force=False)
+                        installed_scope, did_install = install_linux_gateway_from_setup(force=False)
                    else:
                        launchd_install(force=False)
+                        did_install = True
                    print()
-                    if prompt_yes_no("  Start the service now?", True):
+                    if did_install and prompt_yes_no("  Start the service now?", True):
                        try:
                            if _is_linux:
-                                systemd_start()
+                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
                        except Exception as e:
@@ -2339,6 +2294,8 @@ def setup_gateway(config: dict):
                    print_info("  You can try manually: hermes gateway install")
            else:
                print_info("  You can install later: hermes gateway install")
+                if _is_linux:
+                    print_info("  Or as a boot-time service: sudo hermes gateway install --system")
                print_info("  Or run in foreground:  hermes gateway")
        else:
            print_info("Start the gateway to bring your bots online:")
--- a/hermes_cli/skin_engine.py
+++ b/hermes_cli/skin_engine.py
@@ -60,6 +60,12 @@ All fields are optional. Missing values inherit from the ``default`` skin.
    # Tool prefix: character for tool output lines (default: ┊)
    tool_prefix: "┊"

+    # Tool emojis: override the default emoji for any tool (used in spinners & progress)
+    tool_emojis:
+      terminal: "⚔"           # Override terminal tool emoji
+      web_search: "🔮"        # Override web_search tool emoji
+      # Any tool not listed here uses its registry default
+
 USAGE
 =====

@@ -111,6 +117,7 @@ class SkinConfig:
    spinner: Dict[str, Any] = field(default_factory=dict)
    branding: Dict[str, str] = field(default_factory=dict)
    tool_prefix: str = "┊"
+    tool_emojis: Dict[str, str] = field(default_factory=dict)  # per-tool emoji overrides
    banner_logo: str = ""    # Rich-markup ASCII art logo (replaces HERMES_AGENT_LOGO)
    banner_hero: str = ""    # Rich-markup hero art (replaces HERMES_CADUCEUS)

@@ -541,6 +548,7 @@ def _build_skin_config(data: Dict[str, Any]) -> SkinConfig:
        spinner=spinner,
        branding=branding,
        tool_prefix=data.get("tool_prefix", default.get("tool_prefix", "┊")),
+        tool_emojis=data.get("tool_emojis", {}),
        banner_logo=data.get("banner_logo", ""),
        banner_hero=data.get("banner_hero", ""),
    )
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -91,7 +91,7 @@ CONFIGURABLE_TOOLSETS = [
    ("session_search",  "🔎 Session Search",            "search past conversations"),
    ("clarify",         "❓ Clarifying Questions",      "clarify"),
    ("delegation",      "👥 Task Delegation",           "delegate_task"),
-    ("cronjob",         "⏰ Cron Jobs",                 "schedule, list, remove"),
+    ("cronjob",         "⏰ Cron Jobs",                 "create/list/update/pause/resume/run, with optional attached skills"),
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
 ]
@@ -354,22 +354,49 @@ def _get_platform_tools(config: dict, platform: str) -> Set[str]:


 def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[str]):
-    """Save the selected toolset keys for a platform to config."""
+    """Save the selected toolset keys for a platform to config.
+
+    Preserves any non-configurable toolset entries (like MCP server names)
+    that were already in the config for this platform.
+    """
    config.setdefault("platform_toolsets", {})
-    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys)
+
+    # Get the set of all configurable toolset keys
+    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+
+    # Get existing toolsets for this platform
+    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
+    if not isinstance(existing_toolsets, list):
+        existing_toolsets = []
+
+    # Preserve any entries that are NOT configurable toolsets (i.e. MCP server names)
+    preserved_entries = {
+        entry for entry in existing_toolsets
+        if entry not in configurable_keys
+    }
+
+    # Merge preserved entries with new enabled toolsets
+    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
    save_config(config)


 def _toolset_has_keys(ts_key: str) -> bool:
    """Check if a toolset's required API keys are configured."""
+    if ts_key == "vision":
+        try:
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            _provider, client, _model = resolve_vision_provider_client()
+            return client is not None
+        except Exception:
+            return False
+
    # Check TOOL_CATEGORIES first (provider-aware)
    cat = TOOL_CATEGORIES.get(ts_key)
    if cat:
-        for provider in cat["providers"]:
+        for provider in cat.get("providers", []):
            env_vars = provider.get("env_vars", [])
-            if not env_vars:
-                return True  # Free provider (e.g., Edge TTS)
-            if all(get_env_value(v["key"]) for v in env_vars):
+            if env_vars and all(get_env_value(e["key"]) for e in env_vars):
                return True
        return False

@@ -628,6 +655,39 @@ def _configure_provider(provider: dict, config: dict):

 def _configure_simple_requirements(ts_key: str):
    """Simple fallback for toolsets that just need env vars (no provider selection)."""
+    if ts_key == "vision":
+        if _toolset_has_keys("vision"):
+            return
+        print()
+        print(color("  Vision / Image Analysis requires a multimodal backend:", Colors.YELLOW))
+        choices = [
+            "OpenRouter — uses Gemini",
+            "OpenAI-compatible endpoint — base URL, API key, and vision model",
+            "Skip",
+        ]
+        idx = _prompt_choice("  Configure vision backend", choices, 2)
+        if idx == 0:
+            _print_info("  Get key at: https://openrouter.ai/keys")
+            value = _prompt("    OPENROUTER_API_KEY", password=True)
+            if value and value.strip():
+                save_env_value("OPENROUTER_API_KEY", value.strip())
+                _print_success("    Saved")
+            else:
+                _print_warning("    Skipped")
+        elif idx == 1:
+            base_url = _prompt("    OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
+            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
+            api_key = _prompt(key_label, password=True)
+            if api_key and api_key.strip():
+                save_env_value("OPENAI_BASE_URL", base_url)
+                save_env_value("OPENAI_API_KEY", api_key.strip())
+                if "api.openai.com" in base_url.lower():
+                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
+                _print_success("    Saved")
+            else:
+                _print_warning("    Skipped")
+        return
+
    requirements = TOOLSET_ENV_REQUIREMENTS.get(ts_key, [])
    if not requirements:
        return
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -249,6 +249,32 @@ class SessionDB:
        row = cursor.fetchone()
        return dict(row) if row else None

+    def resolve_session_id(self, session_id_or_prefix: str) -> Optional[str]:
+        """Resolve an exact or uniquely prefixed session ID to the full ID.
+
+        Returns the exact ID when it exists. Otherwise treats the input as a
+        prefix and returns the single matching session ID if the prefix is
+        unambiguous. Returns None for no matches or ambiguous prefixes.
+        """
+        exact = self.get_session(session_id_or_prefix)
+        if exact:
+            return exact["id"]
+
+        escaped = (
+            session_id_or_prefix
+            .replace("\\", "\\\\")
+            .replace("%", "\\%")
+            .replace("_", "\\_")
+        )
+        cursor = self._conn.execute(
+            "SELECT id FROM sessions WHERE id LIKE ? ESCAPE '\\' ORDER BY started_at DESC LIMIT 2",
+            (f"{escaped}%",),
+        )
+        matches = [row["id"] for row in cursor.fetchall()]
+        if len(matches) == 1:
+            return matches[0]
+        return None
+
    # Maximum length for session titles
    MAX_TITLE_LENGTH = 100

--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -927,6 +927,11 @@ class HonchoSessionManager:
            return False

        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            logger.warning("No Honcho session cached for '%s', skipping AI seed", session_key)
+            return False
+
        try:
            wrapped = (
                f"<ai_identity_seed>\n"
@@ -935,7 +940,7 @@ class HonchoSessionManager:
                f"{content.strip()}\n"
                f"</ai_identity_seed>"
            )
-            assistant_peer.add_message("assistant", wrapped)
+            honcho_session.add_messages([assistant_peer.message(wrapped)])
            logger.info("Seeded AI identity from '%s' into %s", source, session_key)
            return True
        except Exception as e:
--- a/model_tools.py
+++ b/model_tools.py
@@ -144,7 +144,7 @@ _LEGACY_TOOLSET_MAP = {
        "browser_press", "browser_close", "browser_get_images",
        "browser_vision"
    ],
-    "cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
+    "cronjob_tools": ["cronjob"],
    "rl_tools": [
        "rl_list_environments", "rl_select_environment",
        "rl_get_current_config", "rl_edit_config",
--- a/rl_cli.py
+++ b/rl_cli.py
@@ -27,25 +27,16 @@ from pathlib import Path
 import fire
 import yaml

-# Load .env from ~/.hermes/.env first, then project root as dev fallback
-from dotenv import load_dotenv
-
+# Load .env from ~/.hermes/.env first, then project root as dev fallback.
+# User-managed env files should override stale shell exports on restart.
 _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-_user_env = _hermes_home / ".env"
 _project_env = Path(__file__).parent / '.env'

-if _user_env.exists():
-    try:
-        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
-    print(f"✅ Loaded environment variables from {_user_env}")
-elif _project_env.exists():
-    try:
-        load_dotenv(dotenv_path=_project_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_project_env, encoding="latin-1")
-    print(f"✅ Loaded environment variables from {_project_env}")
+from hermes_cli.env_loader import load_hermes_dotenv
+
+_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
+for _env_path in _loaded_env_paths:
+    print(f"✅ Loaded environment variables from {_env_path}")

 # Set terminal working directory to tinker-atropos submodule
 # This ensures terminal commands run in the right context for RL work
--- a/run_agent.py
+++ b/run_agent.py
@@ -21,6 +21,8 @@ Usage:
 """

 import atexit
+import asyncio
+import base64
 import concurrent.futures
 import copy
 import hashlib
@@ -31,6 +33,7 @@ import os
 import random
 import re
 import sys
+import tempfile
 import time
 import threading
 import weakref
@@ -42,24 +45,16 @@ import fire
 from datetime import datetime
 from pathlib import Path

-# Load .env from ~/.hermes/.env first, then project root as dev fallback
-from dotenv import load_dotenv
+# Load .env from ~/.hermes/.env first, then project root as dev fallback.
+# User-managed env files should override stale shell exports on restart.
+from hermes_cli.env_loader import load_hermes_dotenv

 _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-_user_env = _hermes_home / ".env"
 _project_env = Path(__file__).parent / '.env'
-if _user_env.exists():
-    try:
-        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
-    logger.info("Loaded environment variables from %s", _user_env)
-elif _project_env.exists():
-    try:
-        load_dotenv(dotenv_path=_project_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_project_env, encoding="latin-1")
-    logger.info("Loaded environment variables from %s", _project_env)
+_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
+if _loaded_env_paths:
+    for _env_path in _loaded_env_paths:
+        logger.info("Loaded environment variables from %s", _env_path)
 else:
    logger.info("No .env file found. Using system environment variables.")

@@ -95,6 +90,7 @@ from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
    get_cute_tool_message as _get_cute_tool_message_impl,
    _detect_tool_failure,
+    get_tool_emoji as _get_tool_emoji,
 )
 from agent.trajectory import (
    convert_scratchpad_to_think, has_incomplete_scratchpad,
@@ -377,6 +373,7 @@ class AIAgent:
        # Interrupt mechanism for breaking out of tool loops
        self._interrupt_requested = False
        self._interrupt_message = None  # Optional message that triggered interrupt
+        self._client_lock = threading.RLock()
        
        # Subagent delegation state
        self._delegate_depth = 0        # 0 = top-level agent, incremented for children
@@ -503,6 +500,11 @@ class AIAgent:
        self._persist_user_message_idx = None
        self._persist_user_message_override = None

+        # Cache anthropic image-to-text fallbacks per image payload/URL so a
+        # single tool loop does not repeatedly re-run auxiliary vision on the
+        # same image history.
+        self._anthropic_image_fallback_cache: Dict[str, str] = {}
+
        # Initialize LLM client via centralized provider router.
        # The router handles auth resolution, base URL, headers, and
        # Codex/Anthropic wrapping for all known providers.
@@ -566,7 +568,7 @@ class AIAgent:
            
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
            try:
-                self.client = OpenAI(**client_kwargs)
+                self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
                if not self.quiet_mode:
                    print(f"🤖 AI Agent initialized with model: {self.model}")
                    if base_url:
@@ -2406,7 +2408,7 @@ class AIAgent:
                fn_name = getattr(item, "name", "") or ""
                arguments = getattr(item, "arguments", "{}")
                if not isinstance(arguments, str):
-                    arguments = str(arguments)
+                    arguments = json.dumps(arguments, ensure_ascii=False)
                raw_call_id = getattr(item, "call_id", None)
                raw_item_id = getattr(item, "id", None)
                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
@@ -2427,7 +2429,7 @@ class AIAgent:
                fn_name = getattr(item, "name", "") or ""
                arguments = getattr(item, "input", "{}")
                if not isinstance(arguments, str):
-                    arguments = str(arguments)
+                    arguments = json.dumps(arguments, ensure_ascii=False)
                raw_call_id = getattr(item, "call_id", None)
                raw_item_id = getattr(item, "id", None)
                embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
@@ -2468,12 +2470,118 @@ class AIAgent:
            finish_reason = "stop"
        return assistant_message, finish_reason

-    def _run_codex_stream(self, api_kwargs: dict):
+    def _thread_identity(self) -> str:
+        thread = threading.current_thread()
+        return f"{thread.name}:{thread.ident}"
+
+    def _client_log_context(self) -> str:
+        provider = getattr(self, "provider", "unknown")
+        base_url = getattr(self, "base_url", "unknown")
+        model = getattr(self, "model", "unknown")
+        return (
+            f"thread={self._thread_identity()} provider={provider} "
+            f"base_url={base_url} model={model}"
+        )
+
+    def _openai_client_lock(self) -> threading.RLock:
+        lock = getattr(self, "_client_lock", None)
+        if lock is None:
+            lock = threading.RLock()
+            self._client_lock = lock
+        return lock
+
+    @staticmethod
+    def _is_openai_client_closed(client: Any) -> bool:
+        from unittest.mock import Mock
+
+        if isinstance(client, Mock):
+            return False
+        http_client = getattr(client, "_client", None)
+        return bool(getattr(http_client, "is_closed", False))
+
+    def _create_openai_client(self, client_kwargs: dict, *, reason: str, shared: bool) -> Any:
+        client = OpenAI(**client_kwargs)
+        logger.info(
+            "OpenAI client created (%s, shared=%s) %s",
+            reason,
+            shared,
+            self._client_log_context(),
+        )
+        return client
+
+    def _close_openai_client(self, client: Any, *, reason: str, shared: bool) -> None:
+        if client is None:
+            return
+        try:
+            client.close()
+            logger.info(
+                "OpenAI client closed (%s, shared=%s) %s",
+                reason,
+                shared,
+                self._client_log_context(),
+            )
+        except Exception as exc:
+            logger.debug(
+                "OpenAI client close failed (%s, shared=%s) %s error=%s",
+                reason,
+                shared,
+                self._client_log_context(),
+                exc,
+            )
+
+    def _replace_primary_openai_client(self, *, reason: str) -> bool:
+        with self._openai_client_lock():
+            old_client = getattr(self, "client", None)
+            try:
+                new_client = self._create_openai_client(self._client_kwargs, reason=reason, shared=True)
+            except Exception as exc:
+                logger.warning(
+                    "Failed to rebuild shared OpenAI client (%s) %s error=%s",
+                    reason,
+                    self._client_log_context(),
+                    exc,
+                )
+                return False
+            self.client = new_client
+        self._close_openai_client(old_client, reason=f"replace:{reason}", shared=True)
+        return True
+
+    def _ensure_primary_openai_client(self, *, reason: str) -> Any:
+        with self._openai_client_lock():
+            client = getattr(self, "client", None)
+            if client is not None and not self._is_openai_client_closed(client):
+                return client
+
+        logger.warning(
+            "Detected closed shared OpenAI client; recreating before use (%s) %s",
+            reason,
+            self._client_log_context(),
+        )
+        if not self._replace_primary_openai_client(reason=f"recreate_closed:{reason}"):
+            raise RuntimeError("Failed to recreate closed OpenAI client")
+        with self._openai_client_lock():
+            return self.client
+
+    def _create_request_openai_client(self, *, reason: str) -> Any:
+        from unittest.mock import Mock
+
+        primary_client = self._ensure_primary_openai_client(reason=reason)
+        if isinstance(primary_client, Mock):
+            return primary_client
+        with self._openai_client_lock():
+            request_kwargs = dict(self._client_kwargs)
+        return self._create_openai_client(request_kwargs, reason=reason, shared=False)
+
+    def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
+        self._close_openai_client(client, reason=reason, shared=False)
+
+    def _run_codex_stream(self, api_kwargs: dict, client: Any = None):
        """Execute one streaming Responses API request and return the final response."""
+        active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
        max_stream_retries = 1
        for attempt in range(max_stream_retries + 1):
            try:
-                with self.client.responses.stream(**api_kwargs) as stream:
+                with active_client.responses.stream(**api_kwargs) as stream:
                    for _ in stream:
                        pass
                    return stream.get_final_response()
@@ -2482,24 +2590,27 @@ class AIAgent:
                missing_completed = "response.completed" in err_text
                if missing_completed and attempt < max_stream_retries:
                    logger.debug(
-                        "Responses stream closed before completion (attempt %s/%s); retrying.",
+                        "Responses stream closed before completion (attempt %s/%s); retrying. %s",
                        attempt + 1,
                        max_stream_retries + 1,
+                        self._client_log_context(),
                    )
                    continue
                if missing_completed:
                    logger.debug(
-                        "Responses stream did not emit response.completed; falling back to create(stream=True)."
+                        "Responses stream did not emit response.completed; falling back to create(stream=True). %s",
+                        self._client_log_context(),
                    )
-                    return self._run_codex_create_stream_fallback(api_kwargs)
+                    return self._run_codex_create_stream_fallback(api_kwargs, client=active_client)
                raise

-    def _run_codex_create_stream_fallback(self, api_kwargs: dict):
+    def _run_codex_create_stream_fallback(self, api_kwargs: dict, client: Any = None):
        """Fallback path for stream completion edge cases on Codex-style Responses backends."""
+        active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
        fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
-        stream_or_response = self.client.responses.create(**fallback_kwargs)
+        stream_or_response = active_client.responses.create(**fallback_kwargs)

        # Compatibility shim for mocks or providers that still return a concrete response.
        if hasattr(stream_or_response, "output"):
@@ -2557,15 +2668,7 @@ class AIAgent:
        self._client_kwargs["api_key"] = self.api_key
        self._client_kwargs["base_url"] = self.base_url

-        try:
-            self.client.close()
-        except Exception:
-            pass
-
-        try:
-            self.client = OpenAI(**self._client_kwargs)
-        except Exception as exc:
-            logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
+        if not self._replace_primary_openai_client(reason="codex_credential_refresh"):
            return False

        return True
@@ -2600,61 +2703,101 @@ class AIAgent:
        # Nous requests should not inherit OpenRouter-only attribution headers.
        self._client_kwargs.pop("default_headers", None)

+        if not self._replace_primary_openai_client(reason="nous_credential_refresh"):
+            return False
+
+        return True
+
+    def _try_refresh_anthropic_client_credentials(self) -> bool:
+        if self.api_mode != "anthropic_messages" or not hasattr(self, "_anthropic_api_key"):
+            return False
+
        try:
-            self.client.close()
+            from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
+
+            new_token = resolve_anthropic_token()
+        except Exception as exc:
+            logger.debug("Anthropic credential refresh failed: %s", exc)
+            return False
+
+        if not isinstance(new_token, str) or not new_token.strip():
+            return False
+        new_token = new_token.strip()
+        if new_token == self._anthropic_api_key:
+            return False
+
+        try:
+            self._anthropic_client.close()
        except Exception:
            pass

        try:
-            self.client = OpenAI(**self._client_kwargs)
+            self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
        except Exception as exc:
-            logger.warning("Failed to rebuild OpenAI client after Nous refresh: %s", exc)
+            logger.warning("Failed to rebuild Anthropic client after credential refresh: %s", exc)
            return False

+        self._anthropic_api_key = new_token
        return True

+    def _anthropic_messages_create(self, api_kwargs: dict):
+        if self.api_mode == "anthropic_messages":
+            self._try_refresh_anthropic_client_credentials()
+        return self._anthropic_client.messages.create(**api_kwargs)
+
    def _interruptible_api_call(self, api_kwargs: dict):
        """
        Run the API call in a background thread so the main conversation loop
        can detect interrupts without waiting for the full HTTP round-trip.

-        On interrupt, closes the HTTP client to cancel the in-flight request
-        (stops token generation and avoids wasting money), then rebuilds the
-        client for future calls.
+        Each worker thread gets its own OpenAI client instance. Interrupts only
+        close that worker-local client, so retries and other requests never
+        inherit a closed transport.
        """
        result = {"response": None, "error": None}
+        request_client_holder = {"client": None}

        def _call():
            try:
                if self.api_mode == "codex_responses":
-                    result["response"] = self._run_codex_stream(api_kwargs)
+                    request_client_holder["client"] = self._create_request_openai_client(reason="codex_stream_request")
+                    result["response"] = self._run_codex_stream(
+                        api_kwargs,
+                        client=request_client_holder["client"],
+                    )
                elif self.api_mode == "anthropic_messages":
-                    result["response"] = self._anthropic_client.messages.create(**api_kwargs)
+                    result["response"] = self._anthropic_messages_create(api_kwargs)
                else:
-                    result["response"] = self.client.chat.completions.create(**api_kwargs)
+                    request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
+                    result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
            except Exception as e:
                result["error"] = e
+            finally:
+                request_client = request_client_holder.get("client")
+                if request_client is not None:
+                    self._close_request_openai_client(request_client, reason="request_complete")

        t = threading.Thread(target=_call, daemon=True)
        t.start()
        while t.is_alive():
            t.join(timeout=0.3)
            if self._interrupt_requested:
-                # Force-close the HTTP connection to stop token generation
-                try:
-                    if self.api_mode == "anthropic_messages":
-                        self._anthropic_client.close()
-                    else:
-                        self.client.close()
-                except Exception:
-                    pass
-                # Rebuild the client for future calls (cheap, no network)
+                # Force-close the in-flight worker-local HTTP connection to stop
+                # token generation without poisoning the shared client used to
+                # seed future retries.
                try:
                    if self.api_mode == "anthropic_messages":
                        from agent.anthropic_adapter import build_anthropic_client
-                        self._anthropic_client = build_anthropic_client(self._anthropic_api_key, getattr(self, "_anthropic_base_url", None))
+
+                        self._anthropic_client.close()
+                        self._anthropic_client = build_anthropic_client(
+                            self._anthropic_api_key,
+                            getattr(self, "_anthropic_base_url", None),
+                        )
                    else:
-                        self.client = OpenAI(**self._client_kwargs)
+                        request_client = request_client_holder.get("client")
+                        if request_client is not None:
+                            self._close_request_openai_client(request_client, reason="interrupt_abort")
                except Exception:
                    pass
                raise InterruptedError("Agent interrupted during API call")
@@ -2673,11 +2816,15 @@ class AIAgent:
        core agent loop untouched for non-voice users.
        """
        result = {"response": None, "error": None}
+        request_client_holder = {"client": None}

        def _call():
            try:
                stream_kwargs = {**api_kwargs, "stream": True}
-                stream = self.client.chat.completions.create(**stream_kwargs)
+                request_client_holder["client"] = self._create_request_openai_client(
+                    reason="chat_completion_stream_request"
+                )
+                stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)

                content_parts: list[str] = []
                tool_calls_acc: dict[int, dict] = {}
@@ -2768,25 +2915,29 @@ class AIAgent:

            except Exception as e:
                result["error"] = e
+            finally:
+                request_client = request_client_holder.get("client")
+                if request_client is not None:
+                    self._close_request_openai_client(request_client, reason="stream_request_complete")

        t = threading.Thread(target=_call, daemon=True)
        t.start()
        while t.is_alive():
            t.join(timeout=0.3)
            if self._interrupt_requested:
-                try:
-                    if self.api_mode == "anthropic_messages":
-                        self._anthropic_client.close()
-                    else:
-                        self.client.close()
-                except Exception:
-                    pass
                try:
                    if self.api_mode == "anthropic_messages":
                        from agent.anthropic_adapter import build_anthropic_client
-                        self._anthropic_client = build_anthropic_client(self._anthropic_api_key, getattr(self, "_anthropic_base_url", None))
+
+                        self._anthropic_client.close()
+                        self._anthropic_client = build_anthropic_client(
+                            self._anthropic_api_key,
+                            getattr(self, "_anthropic_base_url", None),
+                        )
                    else:
-                        self.client = OpenAI(**self._client_kwargs)
+                        request_client = request_client_holder.get("client")
+                        if request_client is not None:
+                            self._close_request_openai_client(request_client, reason="stream_interrupt_abort")
                except Exception:
                    pass
                raise InterruptedError("Agent interrupted during API call")
@@ -2884,13 +3035,156 @@ class AIAgent:

    # ── End provider fallback ──────────────────────────────────────────────

+    @staticmethod
+    def _content_has_image_parts(content: Any) -> bool:
+        if not isinstance(content, list):
+            return False
+        for part in content:
+            if isinstance(part, dict) and part.get("type") in {"image_url", "input_image"}:
+                return True
+        return False
+
+    @staticmethod
+    def _materialize_data_url_for_vision(image_url: str) -> tuple[str, Optional[Path]]:
+        header, _, data = str(image_url or "").partition(",")
+        mime = "image/jpeg"
+        if header.startswith("data:"):
+            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            if mime_part.startswith("image/"):
+                mime = mime_part
+        suffix = {
+            "image/png": ".png",
+            "image/gif": ".gif",
+            "image/webp": ".webp",
+            "image/jpeg": ".jpg",
+            "image/jpg": ".jpg",
+        }.get(mime, ".jpg")
+        tmp = tempfile.NamedTemporaryFile(prefix="anthropic_image_", suffix=suffix, delete=False)
+        with tmp:
+            tmp.write(base64.b64decode(data))
+        path = Path(tmp.name)
+        return str(path), path
+
+    def _describe_image_for_anthropic_fallback(self, image_url: str, role: str) -> str:
+        cache_key = hashlib.sha256(str(image_url or "").encode("utf-8")).hexdigest()
+        cached = self._anthropic_image_fallback_cache.get(cache_key)
+        if cached:
+            return cached
+
+        role_label = {
+            "assistant": "assistant",
+            "tool": "tool result",
+        }.get(role, "user")
+        analysis_prompt = (
+            "Describe everything visible in this image in thorough detail. "
+            "Include any text, code, UI, data, objects, people, layout, colors, "
+            "and any other notable visual information."
+        )
+
+        vision_source = str(image_url or "")
+        cleanup_path: Optional[Path] = None
+        if vision_source.startswith("data:"):
+            vision_source, cleanup_path = self._materialize_data_url_for_vision(vision_source)
+
+        description = ""
+        try:
+            from tools.vision_tools import vision_analyze_tool
+
+            result_json = asyncio.run(
+                vision_analyze_tool(image_url=vision_source, user_prompt=analysis_prompt)
+            )
+            result = json.loads(result_json) if isinstance(result_json, str) else {}
+            description = (result.get("analysis") or "").strip()
+        except Exception as e:
+            description = f"Image analysis failed: {e}"
+        finally:
+            if cleanup_path and cleanup_path.exists():
+                try:
+                    cleanup_path.unlink()
+                except OSError:
+                    pass
+
+        if not description:
+            description = "Image analysis failed."
+
+        note = f"[The {role_label} attached an image. Here's what it contains:\n{description}]"
+        if vision_source and not str(image_url or "").startswith("data:"):
+            note += (
+                f"\n[If you need a closer look, use vision_analyze with image_url: {vision_source}]"
+            )
+
+        self._anthropic_image_fallback_cache[cache_key] = note
+        return note
+
+    def _preprocess_anthropic_content(self, content: Any, role: str) -> Any:
+        if not self._content_has_image_parts(content):
+            return content
+
+        text_parts: List[str] = []
+        image_notes: List[str] = []
+        for part in content:
+            if isinstance(part, str):
+                if part.strip():
+                    text_parts.append(part.strip())
+                continue
+            if not isinstance(part, dict):
+                continue
+
+            ptype = part.get("type")
+            if ptype in {"text", "input_text"}:
+                text = str(part.get("text", "") or "").strip()
+                if text:
+                    text_parts.append(text)
+                continue
+
+            if ptype in {"image_url", "input_image"}:
+                image_data = part.get("image_url", {})
+                image_url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data or "")
+                if image_url:
+                    image_notes.append(self._describe_image_for_anthropic_fallback(image_url, role))
+                else:
+                    image_notes.append("[An image was attached but no image source was available.]")
+                continue
+
+            text = str(part.get("text", "") or "").strip()
+            if text:
+                text_parts.append(text)
+
+        prefix = "\n\n".join(note for note in image_notes if note).strip()
+        suffix = "\n".join(text for text in text_parts if text).strip()
+        if prefix and suffix:
+            return f"{prefix}\n\n{suffix}"
+        if prefix:
+            return prefix
+        if suffix:
+            return suffix
+        return "[A multimodal message was converted to text for Anthropic compatibility.]"
+
+    def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
+        if not any(
+            isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
+            for msg in api_messages
+        ):
+            return api_messages
+
+        transformed = copy.deepcopy(api_messages)
+        for msg in transformed:
+            if not isinstance(msg, dict):
+                continue
+            msg["content"] = self._preprocess_anthropic_content(
+                msg.get("content"),
+                str(msg.get("role", "user") or "user"),
+            )
+        return transformed
+
    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "anthropic_messages":
            from agent.anthropic_adapter import build_anthropic_kwargs
+            anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
            return build_anthropic_kwargs(
                model=self.model,
-                messages=api_messages,
+                messages=anthropic_messages,
                tools=self.tools,
                max_tokens=self.max_tokens,
                reasoning_config=self.reasoning_config,
@@ -3008,8 +3302,7 @@ class AIAgent:
            extra_body["provider"] = provider_preferences
        _is_nous = "nousresearch" in self.base_url.lower()

-        _is_mistral = "api.mistral.ai" in self.base_url.lower()
-        if (_is_openrouter or _is_nous) and not _is_mistral:
+        if self._supports_reasoning_extra_body():
            if self.reasoning_config is not None:
                rc = dict(self.reasoning_config)
                # Nous Portal requires reasoning enabled — don't send
@@ -3033,6 +3326,32 @@ class AIAgent:

        return api_kwargs

+    def _supports_reasoning_extra_body(self) -> bool:
+        """Return True when reasoning extra_body is safe to send for this route/model.
+
+        OpenRouter forwards unknown extra_body fields to upstream providers.
+        Some providers/routes reject `reasoning` with 400s, so gate it to
+        known reasoning-capable model families and direct Nous Portal.
+        """
+        base_url = (self.base_url or "").lower()
+        if "nousresearch" in base_url:
+            return True
+        if "openrouter" not in base_url:
+            return False
+        if "api.mistral.ai" in base_url:
+            return False
+
+        model = (self.model or "").lower()
+        reasoning_model_prefixes = (
+            "deepseek/",
+            "anthropic/",
+            "openai/",
+            "x-ai/",
+            "google/gemini-2",
+            "qwen/qwen3",
+        )
+        return any(model.startswith(prefix) for prefix in reasoning_model_prefixes)
+
    def _build_assistant_message(self, assistant_message, finish_reason: str) -> dict:
        """Build a normalized assistant message dict from an API response message.

@@ -3052,8 +3371,7 @@ class AIAgent:
                reasoning_text = combined or None

        if reasoning_text and self.verbose_logging:
-            preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
-            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
+            logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}")

        if reasoning_text and self.reasoning_callback:
            try:
@@ -3267,7 +3585,7 @@ class AIAgent:
                    tools=[memory_tool_def], max_tokens=5120,
                    reasoning_config=None,
                )
-                response = self._anthropic_client.messages.create(**ant_kwargs)
+                response = self._anthropic_messages_create(ant_kwargs)
            elif not _aux_available:
                api_kwargs = {
                    "model": self.model,
@@ -3276,7 +3594,7 @@ class AIAgent:
                    "temperature": 0.3,
                    **self._max_tokens_param(5120),
                }
-                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+                response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(**api_kwargs, timeout=30.0)

            # Extract tool calls from the response, handling all API formats
            tool_calls = []
@@ -3530,8 +3848,12 @@ class AIAgent:
            print(f"  ⚡ Concurrent: {num_tools} tool calls — {tool_names_str}")
            for i, (tc, name, args) in enumerate(parsed_calls, 1):
                args_str = json.dumps(args, ensure_ascii=False)
-                args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
-                print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")
+                if self.verbose_logging:
+                    print(f"  📞 Tool {i}: {name}({list(args.keys())})")
+                    print(f"     Args: {args_str}")
+                else:
+                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
+                    print(f"  📞 Tool {i}: {name}({list(args.keys())}) - {args_preview}")

        for _, name, args in parsed_calls:
            if self.tool_progress_callback:
@@ -3596,17 +3918,20 @@ class AIAgent:
                    logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)

                if self.verbose_logging:
-                    result_preview = function_result[:200] if len(function_result) > 200 else function_result
                    logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
-                    logging.debug(f"Tool result preview: {result_preview}...")
+                    logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")

            # Print cute message per tool
            if self.quiet_mode:
                cute_msg = _get_cute_tool_message_impl(name, args, tool_duration, result=function_result)
                print(f"  {cute_msg}")
            elif not self.quiet_mode:
-                response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
-                print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")
+                if self.verbose_logging:
+                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s")
+                    print(f"     Result: {function_result}")
+                else:
+                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
+                    print(f"  ✅ Tool {i+1} completed in {tool_duration:.2f}s - {response_preview}")

            # Truncate oversized results
            MAX_TOOL_RESULT_CHARS = 100_000
@@ -3682,8 +4007,12 @@ class AIAgent:

            if not self.quiet_mode:
                args_str = json.dumps(function_args, ensure_ascii=False)
-                args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
-                print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
+                if self.verbose_logging:
+                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
+                    print(f"     Args: {args_str}")
+                else:
+                    args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
+                    print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")

            if self.tool_progress_callback:
                try:
@@ -3792,23 +4121,7 @@ class AIAgent:
                        self._vprint(f"  {cute_msg}")
            elif self.quiet_mode and self._stream_callback is None:
                face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-                tool_emoji_map = {
-                    'web_search': '🔍', 'web_extract': '📄', 'web_crawl': '🕸️',
-                    'terminal': '💻', 'process': '⚙️',
-                    'read_file': '📖', 'write_file': '✍️', 'patch': '🔧', 'search_files': '🔎',
-                    'browser_navigate': '🌐', 'browser_snapshot': '📸',
-                    'browser_click': '👆', 'browser_type': '⌨️',
-                    'browser_scroll': '📜', 'browser_back': '◀️',
-                    'browser_press': '⌨️', 'browser_close': '🚪',
-                    'browser_get_images': '🖼️', 'browser_vision': '👁️',
-                    'image_generate': '🎨', 'text_to_speech': '🔊',
-                    'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
-                    'skills_list': '📚', 'skill_view': '📚',
-                    'schedule_cronjob': '⏰', 'list_cronjobs': '⏰', 'remove_cronjob': '⏰',
-                    'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
-                    'clarify': '❓', 'execute_code': '🐍', 'delegate_task': '🔀',
-                }
-                emoji = tool_emoji_map.get(function_name, '⚡')
+                emoji = _get_tool_emoji(function_name)
                preview = _build_tool_preview(function_name, function_args) or function_name
                if len(preview) > 30:
                    preview = preview[:27] + "..."
@@ -3839,7 +4152,9 @@ class AIAgent:
                    logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
                tool_duration = time.time() - tool_start_time

-            result_preview = function_result[:200] if len(function_result) > 200 else function_result
+            result_preview = function_result if self.verbose_logging else (
+                function_result[:200] if len(function_result) > 200 else function_result
+            )

            # Log tool errors to the persistent error log so [error] tags
            # in the UI always have a corresponding detailed entry on disk.
@@ -3849,7 +4164,7 @@ class AIAgent:

            if self.verbose_logging:
                logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
-                logging.debug(f"Tool result preview: {result_preview}...")
+                logging.debug(f"Tool result ({len(function_result)} chars): {function_result}")

            # Guard against tools returning absurdly large content that would
            # blow up the context window. 100K chars ≈ 25K tokens — generous
@@ -3872,8 +4187,12 @@ class AIAgent:
            messages.append(tool_msg)

            if not self.quiet_mode:
-                response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
-                print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
+                if self.verbose_logging:
+                    print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
+                    print(f"     Result: {function_result}")
+                else:
+                    response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
+                    print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")

            if self._interrupt_requested and i < len(assistant_message.tool_calls):
                remaining = len(assistant_message.tool_calls) - i
@@ -3971,9 +4290,8 @@ class AIAgent:
                    api_messages.insert(sys_offset + idx, pfm.copy())

            summary_extra_body = {}
-            _is_openrouter = "openrouter" in self.base_url.lower()
            _is_nous = "nousresearch" in self.base_url.lower()
-            if _is_openrouter or _is_nous:
+            if self._supports_reasoning_extra_body():
                if self.reasoning_config is not None:
                    summary_extra_body["reasoning"] = self.reasoning_config
                else:
@@ -4018,11 +4336,11 @@ class AIAgent:
                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
-                    summary_response = self._anthropic_client.messages.create(**_ant_kw)
+                    summary_response = self._anthropic_messages_create(_ant_kw)
                    _msg, _ = _nar(summary_response)
                    final_response = (_msg.content or "").strip()
                else:
-                    summary_response = self.client.chat.completions.create(**summary_kwargs)
+                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)

                    if summary_response.choices and summary_response.choices[0].message.content:
                        final_response = summary_response.choices[0].message.content
@@ -4048,7 +4366,7 @@ class AIAgent:
                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
-                    retry_response = self._anthropic_client.messages.create(**_ant_kw2)
+                    retry_response = self._anthropic_messages_create(_ant_kw2)
                    _retry_msg, _ = _nar2(retry_response)
                    final_response = (_retry_msg.content or "").strip()
                else:
@@ -4061,7 +4379,7 @@ class AIAgent:
                    if summary_extra_body:
                        summary_kwargs["extra_body"] = summary_extra_body

-                    summary_response = self.client.chat.completions.create(**summary_kwargs)
+                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)

                    if summary_response.choices and summary_response.choices[0].message.content:
                        final_response = summary_response.choices[0].message.content
@@ -4822,12 +5140,8 @@ class AIAgent:
                        and not anthropic_auth_retry_attempted
                    ):
                        anthropic_auth_retry_attempted = True
-                        # Try re-reading Claude Code credentials (they may have been refreshed)
-                        from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client, _is_oauth_token
-                        new_token = resolve_anthropic_token()
-                        if new_token and new_token != self._anthropic_api_key:
-                            self._anthropic_api_key = new_token
-                            self._anthropic_client = build_anthropic_client(new_token, getattr(self, "_anthropic_base_url", None))
+                        from agent.anthropic_adapter import _is_oauth_token
+                        if self._try_refresh_anthropic_client_credentials():
                            print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
                            continue
                        # Credential refresh didn't help — show diagnostic info
@@ -4850,7 +5164,15 @@ class AIAgent:
                    # Enhanced error logging
                    error_type = type(api_error).__name__
                    error_msg = str(api_error).lower()
-                    
+                    logger.warning(
+                        "API call failed (attempt %s/%s) error_type=%s %s error=%s",
+                        retry_count,
+                        max_retries,
+                        error_type,
+                        self._client_log_context(),
+                        api_error,
+                    )
+
                    self._vprint(f"{self.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}", force=True)
                    self._vprint(f"{self.log_prefix}   ⏱️  Time elapsed before failure: {elapsed_time:.2f}s")
                    self._vprint(f"{self.log_prefix}   📝 Error: {str(api_error)[:200]}", force=True)
@@ -5040,7 +5362,14 @@ class AIAgent:
                        raise api_error

                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
-                    logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
+                    logger.warning(
+                        "Retrying API call in %ss (attempt %s/%s) %s error=%s",
+                        wait_time,
+                        retry_count,
+                        max_retries,
+                        self._client_log_context(),
+                        api_error,
+                    )
                    if retry_count >= max_retries:
                        self._vprint(f"{self.log_prefix}⚠️  API call failed after {retry_count} attempts: {str(api_error)[:100]}")
                        self._vprint(f"{self.log_prefix}⏳ Final retry in {wait_time}s...")
@@ -5114,7 +5443,10 @@ class AIAgent:

                # Handle assistant response
                if assistant_message.content and not self.quiet_mode:
-                    self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+                    if self.verbose_logging:
+                        self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content}")
+                    else:
+                        self._vprint(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")

                # Notify progress callback of model's thinking (used by subagent
                # delegation to relay the child's reasoning to the parent display).
@@ -5278,6 +5610,12 @@ class AIAgent:
                    invalid_json_args = []
                    for tc in assistant_message.tool_calls:
                        args = tc.function.arguments
+                        if isinstance(args, (dict, list)):
+                            tc.function.arguments = json.dumps(args)
+                            continue
+                        if args is not None and not isinstance(args, str):
+                            tc.function.arguments = str(args)
+                            args = tc.function.arguments
                        # Treat empty/whitespace strings as empty object
                        if not args or not args.strip():
                            tc.function.arguments = "{}"
--- a/scripts/discord-voice-doctor.py
+++ b/scripts/discord-voice-doctor.py
@@ -0,0 +1,389 @@
+#!/usr/bin/env python3
+"""Discord Voice Doctor — diagnostic tool for voice channel support.
+
+Checks all dependencies, configuration, and bot permissions needed
+for Discord voice mode to work correctly.
+
+Usage:
+    python scripts/discord-voice-doctor.py
+    .venv/bin/python scripts/discord-voice-doctor.py
+"""
+
+import os
+import sys
+import shutil
+from pathlib import Path
+
+# Resolve project root
+SCRIPT_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = SCRIPT_DIR.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+ENV_FILE = HERMES_HOME / ".env"
+
+OK = "\033[92m\u2713\033[0m"
+FAIL = "\033[91m\u2717\033[0m"
+WARN = "\033[93m!\033[0m"
+
+# Track whether discord.py is available for later sections
+_discord_available = False
+
+
+def mask(value):
+    """Mask sensitive value: show only first 4 chars."""
+    if not value or len(value) < 8:
+        return "****"
+    return f"{value[:4]}{'*' * (len(value) - 4)}"
+
+
+def check(label, ok, detail=""):
+    symbol = OK if ok else FAIL
+    msg = f"  {symbol} {label}"
+    if detail:
+        msg += f"  ({detail})"
+    print(msg)
+    return ok
+
+
+def warn(label, detail=""):
+    msg = f"  {WARN} {label}"
+    if detail:
+        msg += f"  ({detail})"
+    print(msg)
+
+
+def section(title):
+    print(f"\n\033[1m{title}\033[0m")
+
+
+def check_packages():
+    """Check Python package dependencies. Returns True if all critical deps OK."""
+    global _discord_available
+    section("Python Packages")
+    ok = True
+
+    # discord.py
+    try:
+        import discord
+        _discord_available = True
+        check("discord.py", True, f"v{discord.__version__}")
+    except ImportError:
+        check("discord.py", False, "pip install discord.py[voice]")
+        ok = False
+
+    # PyNaCl
+    try:
+        import nacl
+        ver = getattr(nacl, "__version__", "unknown")
+        try:
+            import nacl.secret
+            nacl.secret.Aead(bytes(32))
+            check("PyNaCl", True, f"v{ver}")
+        except (AttributeError, Exception):
+            check("PyNaCl (Aead)", False, f"v{ver} — need >=1.5.0")
+            ok = False
+    except ImportError:
+        check("PyNaCl", False, "pip install PyNaCl>=1.5.0")
+        ok = False
+
+    # davey (DAVE E2EE)
+    try:
+        import davey
+        check("davey (DAVE E2EE)", True, f"v{getattr(davey, '__version__', '?')}")
+    except ImportError:
+        check("davey (DAVE E2EE)", False, "pip install davey")
+        ok = False
+
+    # Optional: local STT
+    try:
+        import faster_whisper
+        check("faster-whisper (local STT)", True)
+    except ImportError:
+        warn("faster-whisper (local STT)", "not installed — local STT unavailable")
+
+    # Optional: TTS providers
+    try:
+        import edge_tts
+        check("edge-tts", True)
+    except ImportError:
+        warn("edge-tts", "not installed — edge TTS unavailable")
+
+    try:
+        import elevenlabs
+        check("elevenlabs SDK", True)
+    except ImportError:
+        warn("elevenlabs SDK", "not installed — premium TTS unavailable")
+
+    return ok
+
+
+def check_system_tools():
+    """Check system-level tools (opus, ffmpeg). Returns True if all OK."""
+    section("System Tools")
+    ok = True
+
+    # Opus codec
+    if _discord_available:
+        try:
+            import discord
+            opus_loaded = discord.opus.is_loaded()
+            if not opus_loaded:
+                import ctypes.util
+                opus_path = ctypes.util.find_library("opus")
+                if not opus_path:
+                    # Platform-specific fallback paths
+                    candidates = [
+                        "/opt/homebrew/lib/libopus.dylib",   # macOS Apple Silicon
+                        "/usr/local/lib/libopus.dylib",      # macOS Intel
+                        "/usr/lib/x86_64-linux-gnu/libopus.so.0",  # Debian/Ubuntu x86
+                        "/usr/lib/aarch64-linux-gnu/libopus.so.0", # Debian/Ubuntu ARM
+                        "/usr/lib/libopus.so",               # Arch Linux
+                        "/usr/lib64/libopus.so",             # RHEL/Fedora
+                    ]
+                    for p in candidates:
+                        if os.path.isfile(p):
+                            opus_path = p
+                            break
+                if opus_path:
+                    discord.opus.load_opus(opus_path)
+                    opus_loaded = discord.opus.is_loaded()
+            if opus_loaded:
+                check("Opus codec", True)
+            else:
+                check("Opus codec", False, "brew install opus / apt install libopus0")
+                ok = False
+        except Exception as e:
+            check("Opus codec", False, str(e))
+            ok = False
+    else:
+        warn("Opus codec", "skipped — discord.py not installed")
+
+    # ffmpeg
+    ffmpeg_path = shutil.which("ffmpeg")
+    if ffmpeg_path:
+        check("ffmpeg", True, ffmpeg_path)
+    else:
+        check("ffmpeg", False, "brew install ffmpeg / apt install ffmpeg")
+        ok = False
+
+    return ok
+
+
+def check_env_vars():
+    """Check environment variables. Returns (ok, token, groq_key, eleven_key)."""
+    section("Environment Variables")
+
+    # Load .env
+    try:
+        from dotenv import load_dotenv
+        if ENV_FILE.exists():
+            load_dotenv(ENV_FILE)
+    except ImportError:
+        pass
+
+    ok = True
+
+    token = os.getenv("DISCORD_BOT_TOKEN", "")
+    if token:
+        check("DISCORD_BOT_TOKEN", True, mask(token))
+    else:
+        check("DISCORD_BOT_TOKEN", False, "not set")
+        ok = False
+
+    # Allowed users — resolve usernames if possible
+    allowed = os.getenv("DISCORD_ALLOWED_USERS", "")
+    if allowed:
+        users = [u.strip() for u in allowed.split(",") if u.strip()]
+        user_labels = []
+        for uid in users:
+            label = mask(uid)
+            if token and uid.isdigit():
+                try:
+                    import requests
+                    r = requests.get(
+                        f"https://discord.com/api/v10/users/{uid}",
+                        headers={"Authorization": f"Bot {token}"},
+                        timeout=3,
+                    )
+                    if r.status_code == 200:
+                        label = f"{r.json().get('username', '?')} ({mask(uid)})"
+                except Exception:
+                    pass
+            user_labels.append(label)
+        check("DISCORD_ALLOWED_USERS", True, f"{len(users)} user(s): {', '.join(user_labels)}")
+    else:
+        warn("DISCORD_ALLOWED_USERS", "not set — all users can use voice")
+
+    groq_key = os.getenv("GROQ_API_KEY", "")
+    eleven_key = os.getenv("ELEVENLABS_API_KEY", "")
+
+    if groq_key:
+        check("GROQ_API_KEY (STT)", True, mask(groq_key))
+    else:
+        warn("GROQ_API_KEY", "not set — Groq STT unavailable")
+
+    if eleven_key:
+        check("ELEVENLABS_API_KEY (TTS)", True, mask(eleven_key))
+    else:
+        warn("ELEVENLABS_API_KEY", "not set — ElevenLabs TTS unavailable")
+
+    return ok, token, groq_key, eleven_key
+
+
+def check_config(groq_key, eleven_key):
+    """Check hermes config.yaml."""
+    section("Configuration")
+
+    config_path = HERMES_HOME / "config.yaml"
+    if config_path.exists():
+        try:
+            import yaml
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+
+            stt_provider = cfg.get("stt", {}).get("provider", "local")
+            tts_provider = cfg.get("tts", {}).get("provider", "edge")
+            check("STT provider", True, stt_provider)
+            check("TTS provider", True, tts_provider)
+
+            if stt_provider == "groq" and not groq_key:
+                warn("STT config says groq but GROQ_API_KEY is missing")
+            if tts_provider == "elevenlabs" and not eleven_key:
+                warn("TTS config says elevenlabs but ELEVENLABS_API_KEY is missing")
+        except Exception as e:
+            warn("config.yaml", f"parse error: {e}")
+    else:
+        warn("config.yaml", "not found — using defaults")
+
+    # Voice mode state
+    voice_mode_path = HERMES_HOME / "gateway_voice_mode.json"
+    if voice_mode_path.exists():
+        try:
+            import json
+            modes = json.loads(voice_mode_path.read_text())
+            off_count = sum(1 for v in modes.values() if v == "off")
+            all_count = sum(1 for v in modes.values() if v == "all")
+            check("Voice mode state", True, f"{all_count} on, {off_count} off, {len(modes)} total")
+        except Exception:
+            warn("Voice mode state", "parse error")
+    else:
+        check("Voice mode state", True, "no saved state (fresh)")
+
+
+def check_bot_permissions(token):
+    """Check bot permissions via Discord API. Returns True if all OK."""
+    section("Bot Permissions")
+
+    if not token:
+        warn("Bot permissions", "no token — skipping")
+        return True
+
+    try:
+        import requests
+    except ImportError:
+        warn("Bot permissions", "requests not installed — skipping")
+        return True
+
+    VOICE_PERMS = {
+        "Priority Speaker":      8,
+        "Stream":                9,
+        "View Channel":         10,
+        "Send Messages":        11,
+        "Embed Links":          14,
+        "Attach Files":         15,
+        "Read Message History": 16,
+        "Connect":              20,
+        "Speak":                21,
+        "Mute Members":         22,
+        "Deafen Members":       23,
+        "Move Members":         24,
+        "Use VAD":              25,
+        "Send Voice Messages":  46,
+    }
+    REQUIRED_PERMS = {"Connect", "Speak", "View Channel", "Send Messages"}
+    ok = True
+
+    try:
+        headers = {"Authorization": f"Bot {token}"}
+        r = requests.get("https://discord.com/api/v10/users/@me", headers=headers, timeout=5)
+
+        if r.status_code == 401:
+            check("Bot login", False, "invalid token (401)")
+            return False
+        if r.status_code != 200:
+            check("Bot login", False, f"HTTP {r.status_code}")
+            return False
+
+        bot = r.json()
+        bot_name = bot.get("username", "?")
+        check("Bot login", True, f"{bot_name[:3]}{'*' * (len(bot_name) - 3)}")
+
+        # Check guilds
+        r2 = requests.get("https://discord.com/api/v10/users/@me/guilds", headers=headers, timeout=5)
+        if r2.status_code != 200:
+            warn("Guilds", f"HTTP {r2.status_code}")
+            return ok
+
+        guilds = r2.json()
+        check("Guilds", True, f"{len(guilds)} guild(s)")
+
+        for g in guilds[:5]:
+            perms = int(g.get("permissions", 0))
+            is_admin = bool(perms & (1 << 3))
+
+            if is_admin:
+                print(f"    {OK} {g['name']}: Administrator (all permissions)")
+                continue
+
+            has = []
+            missing = []
+            for name, bit in sorted(VOICE_PERMS.items(), key=lambda x: x[1]):
+                if perms & (1 << bit):
+                    has.append(name)
+                elif name in REQUIRED_PERMS:
+                    missing.append(name)
+
+            if missing:
+                print(f"    {FAIL} {g['name']}: missing {', '.join(missing)}")
+                ok = False
+            else:
+                print(f"    {OK} {g['name']}: {', '.join(has)}")
+
+    except requests.exceptions.Timeout:
+        warn("Bot permissions", "Discord API timeout")
+    except requests.exceptions.ConnectionError:
+        warn("Bot permissions", "cannot reach Discord API")
+    except Exception as e:
+        warn("Bot permissions", f"check failed: {e}")
+
+    return ok
+
+
+def main():
+    print()
+    print("\033[1m" + "=" * 50 + "\033[0m")
+    print("\033[1m  Discord Voice Doctor\033[0m")
+    print("\033[1m" + "=" * 50 + "\033[0m")
+
+    all_ok = True
+
+    all_ok &= check_packages()
+    all_ok &= check_system_tools()
+    env_ok, token, groq_key, eleven_key = check_env_vars()
+    all_ok &= env_ok
+    check_config(groq_key, eleven_key)
+    all_ok &= check_bot_permissions(token)
+
+    # Summary
+    print()
+    print("\033[1m" + "-" * 50 + "\033[0m")
+    if all_ok:
+        print(f"  {OK} \033[92mAll checks passed — voice mode ready!\033[0m")
+    else:
+        print(f"  {FAIL} \033[91mSome checks failed — fix issues above.\033[0m")
+    print()
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md
+++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@@ -155,7 +155,7 @@ terminal(command="hermes chat -q 'Summarize this codebase' --model google/gemini

 ## Gateway Cron Integration

-For scheduled autonomous tasks, use the `schedule_cronjob` tool instead of spawning processes — cron jobs handle delivery, retry, and persistence automatically.
+For scheduled autonomous tasks, use the unified `cronjob` tool instead of spawning processes — cron jobs handle delivery, retry, and persistence automatically.

 ## Key Differences Between Modes

--- a/skills/productivity/google-workspace/SKILL.md
+++ b/skills/productivity/google-workspace/SKILL.md
@@ -102,7 +102,9 @@ This prints a URL. **Send the URL to the user** and tell them:
 ### Step 4: Exchange the code

 The user will paste back either a URL like `http://localhost:1/?code=4/0A...&scope=...`
-or just the code string. Either works:
+or just the code string. Either works. The `--auth-url` step stores a temporary
+pending OAuth session locally so `--auth-code` can complete the PKCE exchange
+later, even on headless systems:

 ```bash
 $GSETUP --auth-code "THE_URL_OR_CODE_THE_USER_PASTED"
@@ -119,6 +121,7 @@ Should print `AUTHENTICATED`. Setup is complete — token refreshes automaticall
 ### Notes

 - Token is stored at `~/.hermes/google_token.json` and auto-refreshes.
+- Pending OAuth session state/verifier are stored temporarily at `~/.hermes/google_oauth_pending.json` until exchange completes.
 - To revoke: `$GSETUP --revoke`

 ## Usage
--- a/skills/productivity/google-workspace/scripts/setup.py
+++ b/skills/productivity/google-workspace/scripts/setup.py
@@ -31,6 +31,7 @@ from pathlib import Path
 HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 TOKEN_PATH = HERMES_HOME / "google_token.json"
 CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"
+PENDING_AUTH_PATH = HERMES_HOME / "google_oauth_pending.json"

 SCOPES = [
    "https://www.googleapis.com/auth/gmail.readonly",
@@ -141,6 +142,58 @@ def store_client_secret(path: str):
    print(f"OK: Client secret saved to {CLIENT_SECRET_PATH}")


+def _save_pending_auth(*, state: str, code_verifier: str):
+    """Persist the OAuth session bits needed for a later token exchange."""
+    PENDING_AUTH_PATH.write_text(
+        json.dumps(
+            {
+                "state": state,
+                "code_verifier": code_verifier,
+                "redirect_uri": REDIRECT_URI,
+            },
+            indent=2,
+        )
+    )
+
+
+def _load_pending_auth() -> dict:
+    """Load the pending OAuth session created by get_auth_url()."""
+    if not PENDING_AUTH_PATH.exists():
+        print("ERROR: No pending OAuth session found. Run --auth-url first.")
+        sys.exit(1)
+
+    try:
+        data = json.loads(PENDING_AUTH_PATH.read_text())
+    except Exception as e:
+        print(f"ERROR: Could not read pending OAuth session: {e}")
+        print("Run --auth-url again to start a fresh OAuth session.")
+        sys.exit(1)
+
+    if not data.get("state") or not data.get("code_verifier"):
+        print("ERROR: Pending OAuth session is missing PKCE data.")
+        print("Run --auth-url again to start a fresh OAuth session.")
+        sys.exit(1)
+
+    return data
+
+
+def _extract_code_and_state(code_or_url: str) -> tuple[str, str | None]:
+    """Accept either a raw auth code or the full redirect URL pasted by the user."""
+    if not code_or_url.startswith("http"):
+        return code_or_url, None
+
+    from urllib.parse import parse_qs, urlparse
+
+    parsed = urlparse(code_or_url)
+    params = parse_qs(parsed.query)
+    if "code" not in params:
+        print("ERROR: No 'code' parameter found in URL.")
+        sys.exit(1)
+
+    state = params.get("state", [None])[0]
+    return params["code"][0], state
+
+
 def get_auth_url():
    """Print the OAuth authorization URL. User visits this in a browser."""
    if not CLIENT_SECRET_PATH.exists():
@@ -154,11 +207,13 @@ def get_auth_url():
        str(CLIENT_SECRET_PATH),
        scopes=SCOPES,
        redirect_uri=REDIRECT_URI,
+        autogenerate_code_verifier=True,
    )
-    auth_url, _ = flow.authorization_url(
+    auth_url, state = flow.authorization_url(
        access_type="offline",
        prompt="consent",
    )
+    _save_pending_auth(state=state, code_verifier=flow.code_verifier)
    # Print just the URL so the agent can extract it cleanly
    print(auth_url)

@@ -169,26 +224,23 @@ def exchange_auth_code(code: str):
        print("ERROR: No client secret stored. Run --client-secret first.")
        sys.exit(1)

+    pending_auth = _load_pending_auth()
+    code, returned_state = _extract_code_and_state(code)
+    if returned_state and returned_state != pending_auth["state"]:
+        print("ERROR: OAuth state mismatch. Run --auth-url again to start a fresh session.")
+        sys.exit(1)
+
    _ensure_deps()
    from google_auth_oauthlib.flow import Flow

    flow = Flow.from_client_secrets_file(
        str(CLIENT_SECRET_PATH),
        scopes=SCOPES,
-        redirect_uri=REDIRECT_URI,
+        redirect_uri=pending_auth.get("redirect_uri", REDIRECT_URI),
+        state=pending_auth["state"],
+        code_verifier=pending_auth["code_verifier"],
    )

-    # The code might come as a full redirect URL or just the code itself
-    if code.startswith("http"):
-        # Extract code from redirect URL: http://localhost:1/?code=CODE&scope=...
-        from urllib.parse import urlparse, parse_qs
-        parsed = urlparse(code)
-        params = parse_qs(parsed.query)
-        if "code" not in params:
-            print("ERROR: No 'code' parameter found in URL.")
-            sys.exit(1)
-        code = params["code"][0]
-
    try:
        flow.fetch_token(code=code)
    except Exception as e:
@@ -198,6 +250,7 @@ def exchange_auth_code(code: str):

    creds = flow.credentials
    TOKEN_PATH.write_text(creds.to_json())
+    PENDING_AUTH_PATH.unlink(missing_ok=True)
    print(f"OK: Authenticated. Token saved to {TOKEN_PATH}")


@@ -229,6 +282,7 @@ def revoke():
        print(f"Remote revocation failed (token may already be invalid): {e}")

    TOKEN_PATH.unlink(missing_ok=True)
+    PENDING_AUTH_PATH.unlink(missing_ok=True)
    print(f"Deleted {TOKEN_PATH}")


--- a/skills/software-development/plan/SKILL.md
+++ b/skills/software-development/plan/SKILL.md
@@ -0,0 +1,57 @@
+---
+name: plan
+description: Plan mode for Hermes — inspect context, write a markdown plan into the active workspace's `.hermes/plans/` directory, and do not execute the work.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [planning, plan-mode, implementation, workflow]
+    related_skills: [writing-plans, subagent-driven-development]
+---
+
+# Plan Mode
+
+Use this skill when the user wants a plan instead of execution.
+
+## Core behavior
+
+For this turn, you are planning only.
+
+- Do not implement code.
+- Do not edit project files except the plan markdown file.
+- Do not run mutating terminal commands, commit, push, or perform external actions.
+- You may inspect the repo or other context with read-only commands/tools when needed.
+- Your deliverable is a markdown plan saved inside the active workspace under `.hermes/plans/`.
+
+## Output requirements
+
+Write a markdown plan that is concrete and actionable.
+
+Include, when relevant:
+- Goal
+- Current context / assumptions
+- Proposed approach
+- Step-by-step plan
+- Files likely to change
+- Tests / validation
+- Risks, tradeoffs, and open questions
+
+If the task is code-related, include exact file paths, likely test targets, and verification steps.
+
+## Save location
+
+Save the plan with `write_file` under:
+- `.hermes/plans/YYYY-MM-DD_HHMMSS-<slug>.md`
+
+Treat that as relative to the active working directory / backend workspace. Hermes file tools are backend-aware, so using this relative path keeps the plan with the workspace on local, docker, ssh, modal, and daytona backends.
+
+If the runtime provides a specific target path, use that exact path.
+If not, create a sensible timestamped filename yourself under `.hermes/plans/`.
+
+## Interaction style
+
+- If the request is clear enough, write the plan directly.
+- If no explicit instruction accompanies `/plan`, infer the task from the current conversation context.
+- If it is genuinely underspecified, ask a brief clarifying question instead of guessing.
+- After saving the plan, reply briefly with what you planned and the saved path.
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -10,6 +10,8 @@ import pytest
 from agent.auxiliary_client import (
    get_text_auxiliary_client,
    get_vision_auxiliary_client,
+    get_available_vision_backends,
+    resolve_provider_client,
    auxiliary_max_tokens_param,
    _read_codex_access_token,
    _get_auxiliary_provider,
@@ -24,9 +26,12 @@ def _clean_env(monkeypatch):
    for key in (
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
-        # Per-task provider/model overrides
+        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
+        # Per-task provider/model/direct-endpoint overrides
        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)
@@ -142,11 +147,55 @@ class TestGetTextAuxiliaryClient:
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

+    def test_task_direct_endpoint_override(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client("web_extract")
+        assert model == "task-model"
+        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
+        assert mock_openai.call_args.kwargs["api_key"] == "task-key"
+
+    def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
+        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client("web_extract")
+        assert client is None
+        assert model is None
+        mock_openai.assert_not_called()
+
+    def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:1234/v1",
+                "default": "my-local-model",
+            }
+        }
+        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
+
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+
+        assert client is not None
+        assert model == "my-local-model"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
+
    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"
        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
@@ -164,14 +213,74 @@ class TestGetTextAuxiliaryClient:


 class TestVisionClientFallback:
-    """Vision client auto mode only tries OpenRouter + Nous (multimodal-capable)."""
+    """Vision client auto mode resolves known-good multimodal backends."""

    def test_vision_returns_none_without_any_credentials(self):
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.auxiliary_client._try_anthropic", return_value=(None, None)),
+        ):
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None

+    def test_vision_auto_includes_anthropic_when_configured(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+        ):
+            backends = get_available_vision_backends()
+
+        assert "anthropic" in backends
+
+    def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+        ):
+            client, model = resolve_provider_client("anthropic")
+
+        assert client is not None
+        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
+        assert model == "claude-haiku-4-5-20251001"
+
+    def test_vision_auto_uses_anthropic_when_no_higher_priority_backend(self, monkeypatch):
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+        ):
+            client, model = get_vision_auxiliary_client()
+
+        assert client is not None
+        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
+        assert model == "claude-haiku-4-5-20251001"
+
+    def test_selected_anthropic_provider_is_preferred_for_vision_auto(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+
+        def fake_load_config():
+            return {"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}}
+
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+            patch("hermes_cli.config.load_config", fake_load_config),
+        ):
+            client, model = get_vision_auxiliary_client()
+
+        assert client is not None
+        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
+        assert model == "claude-haiku-4-5-20251001"
+
    def test_vision_auto_includes_codex(self, codex_auth_dir):
        """Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
@@ -179,7 +288,7 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
        """Custom endpoint is used as fallback in vision auto mode.
@@ -194,6 +303,27 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        assert client is not None  # Custom endpoint picked up as fallback

+    def test_vision_direct_endpoint_override(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
+        monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
+        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert model == "vision-model"
+        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
+        assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
+
+    def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
+        monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+        mock_openai.assert_not_called()
+
    def test_vision_uses_openrouter_when_available(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
@@ -241,7 +371,7 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"


 class TestGetAuxiliaryProvider:
@@ -320,6 +450,27 @@ class TestResolveForcedProvider:
            client, model = _resolve_forced_provider("main")
        assert model == "my-local-model"

+    def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch):
+        config = {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://local:8080/v1",
+                "default": "my-local-model",
+            }
+        }
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config)
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = _resolve_forced_provider("main")
+        assert client is not None
+        assert model == "my-local-model"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1"
+
    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
        """Even if OpenRouter key is set, 'main' skips it."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
@@ -338,7 +489,7 @@ class TestResolveForcedProvider:
            client, model = _resolve_forced_provider("main")
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_forced_codex(self, codex_auth_dir, monkeypatch):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
@@ -346,7 +497,7 @@ class TestResolveForcedProvider:
            client, model = _resolve_forced_provider("codex")
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_forced_codex_no_token(self, monkeypatch):
        with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
@@ -390,6 +541,24 @@ class TestTaskSpecificOverrides:
            client, model = get_text_auxiliary_client("web_extract")
        assert model == "google/gemini-3-flash-preview"

+    def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
+        hermes_home = tmp_path / "hermes"
+        hermes_home.mkdir(parents=True, exist_ok=True)
+        (hermes_home / "config.yaml").write_text(
+            """auxiliary:
+  web_extract:
+    base_url: http://localhost:3456/v1
+    api_key: config-key
+    model: config-model
+"""
+        )
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client("web_extract")
+        assert model == "config-model"
+        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
+        assert mock_openai.call_args.kwargs["api_key"] == "config-key"
+
    def test_task_without_override_uses_auto(self, monkeypatch):
        """A task with no provider env var falls through to auto chain."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
--- a/tests/agent/test_display_emoji.py
+++ b/tests/agent/test_display_emoji.py
@@ -0,0 +1,123 @@
+"""Tests for get_tool_emoji in agent/display.py — skin + registry integration."""
+
+from unittest.mock import patch as mock_patch, MagicMock
+
+from agent.display import get_tool_emoji
+
+
+class TestGetToolEmoji:
+    """Verify the skin → registry → fallback resolution chain."""
+
+    def test_returns_registry_emoji_when_no_skin(self):
+        """Registry-registered emoji is used when no skin is active."""
+        mock_registry = MagicMock()
+        mock_registry.get_emoji.return_value = "🎨"
+        with mock_patch("agent.display._get_skin", return_value=None), \
+             mock_patch("agent.display.registry", mock_registry, create=True):
+            # Need to patch the import inside get_tool_emoji
+            pass
+        # Direct test: patch the lazy import path
+        with mock_patch("agent.display._get_skin", return_value=None):
+            # get_tool_emoji will try to import registry — mock that
+            mock_reg = MagicMock()
+            mock_reg.get_emoji.return_value = "📖"
+            with mock_patch.dict("sys.modules", {}):
+                import sys
+                # Patch tools.registry module
+                mock_module = MagicMock()
+                mock_module.registry = mock_reg
+                with mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+                    result = get_tool_emoji("read_file")
+                    assert result == "📖"
+
+    def test_skin_override_takes_precedence(self):
+        """Skin tool_emojis override registry defaults."""
+        skin = MagicMock()
+        skin.tool_emojis = {"terminal": "⚔"}
+        with mock_patch("agent.display._get_skin", return_value=skin):
+            result = get_tool_emoji("terminal")
+            assert result == "⚔"
+
+    def test_skin_empty_dict_falls_through(self):
+        """Empty skin tool_emojis falls through to registry."""
+        skin = MagicMock()
+        skin.tool_emojis = {}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = "💻"
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            result = get_tool_emoji("terminal")
+            assert result == "💻"
+
+    def test_fallback_default(self):
+        """When neither skin nor registry has an emoji, use the default."""
+        skin = MagicMock()
+        skin.tool_emojis = {}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = ""
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            result = get_tool_emoji("unknown_tool")
+            assert result == "⚡"
+
+    def test_custom_default(self):
+        """Custom default is returned when nothing matches."""
+        with mock_patch("agent.display._get_skin", return_value=None):
+            mock_reg = MagicMock()
+            mock_reg.get_emoji.return_value = ""
+            import sys
+            mock_module = MagicMock()
+            mock_module.registry = mock_reg
+            with mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+                result = get_tool_emoji("x", default="⚙️")
+                assert result == "⚙️"
+
+    def test_skin_override_only_for_matching_tool(self):
+        """Skin override for one tool doesn't affect others."""
+        skin = MagicMock()
+        skin.tool_emojis = {"terminal": "⚔"}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = "🔍"
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            assert get_tool_emoji("terminal") == "⚔"  # skin override
+            assert get_tool_emoji("web_search") == "🔍"  # registry fallback
+
+
+class TestSkinConfigToolEmojis:
+    """Verify SkinConfig handles tool_emojis field correctly."""
+
+    def test_skin_config_has_tool_emojis_field(self):
+        from hermes_cli.skin_engine import SkinConfig
+        skin = SkinConfig(name="test")
+        assert skin.tool_emojis == {}
+
+    def test_skin_config_accepts_tool_emojis(self):
+        from hermes_cli.skin_engine import SkinConfig
+        emojis = {"terminal": "⚔", "web_search": "🔮"}
+        skin = SkinConfig(name="test", tool_emojis=emojis)
+        assert skin.tool_emojis == emojis
+
+    def test_build_skin_config_includes_tool_emojis(self):
+        from hermes_cli.skin_engine import _build_skin_config
+        data = {
+            "name": "custom",
+            "tool_emojis": {"terminal": "🗡️", "patch": "⚒️"},
+        }
+        skin = _build_skin_config(data)
+        assert skin.tool_emojis == {"terminal": "🗡️", "patch": "⚒️"}
+
+    def test_build_skin_config_empty_tool_emojis_default(self):
+        from hermes_cli.skin_engine import _build_skin_config
+        data = {"name": "minimal"}
+        skin = _build_skin_config(data)
+        assert skin.tool_emojis == {}
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@@ -1,10 +1,17 @@
 """Tests for agent/skill_commands.py — skill slash command scanning and platform filtering."""

 import os
+from datetime import datetime
+from pathlib import Path
 from unittest.mock import patch

 import tools.skills_tool as skills_tool_module
-from agent.skill_commands import scan_skill_commands, build_skill_invocation_message
+from agent.skill_commands import (
+    build_plan_path,
+    build_preloaded_skills_prompt,
+    build_skill_invocation_message,
+    scan_skill_commands,
+)


 def _make_skill(
@@ -79,6 +86,33 @@ class TestScanSkillCommands:
        assert "/generic-tool" in result


+class TestBuildPreloadedSkillsPrompt:
+    def test_builds_prompt_for_multiple_named_skills(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(tmp_path, "first-skill")
+            _make_skill(tmp_path, "second-skill")
+            prompt, loaded, missing = build_preloaded_skills_prompt(
+                ["first-skill", "second-skill"]
+            )
+
+        assert missing == []
+        assert loaded == ["first-skill", "second-skill"]
+        assert "first-skill" in prompt
+        assert "second-skill" in prompt
+        assert "preloaded" in prompt.lower()
+
+    def test_reports_missing_named_skills(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(tmp_path, "present-skill")
+            prompt, loaded, missing = build_preloaded_skills_prompt(
+                ["present-skill", "missing-skill"]
+            )
+
+        assert "present-skill" in prompt
+        assert loaded == ["present-skill"]
+        assert missing == ["missing-skill"]
+
+
 class TestBuildSkillInvocationMessage:
    def test_loads_skill_by_stored_path_when_frontmatter_name_differs(self, tmp_path):
        skill_dir = tmp_path / "mlops" / "audiocraft"
@@ -241,3 +275,37 @@ Generate some audio.

        assert msg is not None
        assert 'file_path="<path>"' in msg
+
+
+class TestPlanSkillHelpers:
+    def test_build_plan_path_uses_workspace_relative_dir_and_slugifies_request(self):
+        path = build_plan_path(
+            "Implement OAuth login + refresh tokens!",
+            now=datetime(2026, 3, 15, 9, 30, 45),
+        )
+
+        assert path == Path(".hermes") / "plans" / "2026-03-15_093045-implement-oauth-login-refresh-tokens.md"
+
+    def test_plan_skill_message_can_include_runtime_save_path_note(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "plan",
+                body="Save plans under .hermes/plans in the active workspace and do not execute the work.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message(
+                "/plan",
+                "Add a /plan command",
+                runtime_note=(
+                    "Save the markdown plan with write_file to this exact relative path inside "
+                    "the active workspace/backend cwd: .hermes/plans/plan.md"
+                ),
+            )
+
+        assert msg is not None
+        assert "Save plans under $HERMES_HOME/plans" not in msg
+        assert ".hermes/plans" in msg
+        assert "Add a /plan command" in msg
+        assert ".hermes/plans/plan.md" in msg
+        assert "Runtime note:" in msg
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
+    # Tests should not inherit the agent's current gateway/messaging surface.
+    # Individual tests that need gateway behavior set these explicitly.
+    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
+    monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)


@pytest.fixture()
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@@ -16,6 +16,8 @@ from cron.jobs import (
    get_job,
    list_jobs,
    update_job,
+    pause_job,
+    resume_job,
    remove_job,
    mark_job_run,
    get_due_jobs,
@@ -233,14 +235,18 @@ class TestUpdateJob:
        job = create_job(prompt="Daily report", schedule="every 1h")
        assert job["schedule"]["kind"] == "interval"
        assert job["schedule"]["minutes"] == 60
+        old_next_run = job["next_run_at"]
        new_schedule = parse_schedule("every 2h")
-        updated = update_job(job["id"], {"schedule": new_schedule})
+        updated = update_job(job["id"], {"schedule": new_schedule, "schedule_display": new_schedule["display"]})
        assert updated is not None
        assert updated["schedule"]["kind"] == "interval"
        assert updated["schedule"]["minutes"] == 120
+        assert updated["schedule_display"] == "every 120m"
+        assert updated["next_run_at"] != old_next_run
        # Verify persisted to disk
        fetched = get_job(job["id"])
        assert fetched["schedule"]["minutes"] == 120
+        assert fetched["schedule_display"] == "every 120m"

    def test_update_enable_disable(self, tmp_cron_dir):
        job = create_job(prompt="Toggle me", schedule="every 1h")
@@ -255,6 +261,26 @@ class TestUpdateJob:
        assert result is None


+class TestPauseResumeJob:
+    def test_pause_sets_state(self, tmp_cron_dir):
+        job = create_job(prompt="Pause me", schedule="every 1h")
+        paused = pause_job(job["id"], reason="user paused")
+        assert paused is not None
+        assert paused["enabled"] is False
+        assert paused["state"] == "paused"
+        assert paused["paused_reason"] == "user paused"
+
+    def test_resume_reenables_job(self, tmp_cron_dir):
+        job = create_job(prompt="Resume me", schedule="every 1h")
+        pause_job(job["id"], reason="user paused")
+        resumed = resume_job(job["id"])
+        assert resumed is not None
+        assert resumed["enabled"] is True
+        assert resumed["state"] == "scheduled"
+        assert resumed["paused_at"] is None
+        assert resumed["paused_reason"] is None
+
+
 class TestMarkJobRun:
    def test_increments_completed(self, tmp_cron_dir):
        job = create_job(prompt="Test", schedule="every 1h")
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -2,7 +2,8 @@

 import json
 import logging
-from unittest.mock import patch, MagicMock
+import os
+from unittest.mock import AsyncMock, patch, MagicMock

 import pytest

@@ -107,7 +108,7 @@ class TestDeliverResultMirrorLogging:
        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}

        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
-             patch("asyncio.run", return_value=None), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})), \
             patch("gateway.mirror.mirror_to_session", side_effect=ConnectionError("network down")):
            job = {
                "id": "test-job",
@@ -140,9 +141,8 @@ class TestDeliverResultMirrorLogging:
        }

        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
-             patch("tools.send_message_tool._send_to_platform", return_value={"success": True}) as send_mock, \
-             patch("gateway.mirror.mirror_to_session") as mirror_mock, \
-             patch("asyncio.run", side_effect=lambda coro: None):
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
+             patch("gateway.mirror.mirror_to_session") as mirror_mock:
            _deliver_result(job, "hello")

        send_mock.assert_called_once()
@@ -196,6 +196,60 @@ class TestRunJobSessionPersistence:
        assert kwargs["session_id"].startswith("cron_test-job_")
        fake_db.close.assert_called_once()

+    def test_run_job_sets_auto_delivery_env_from_dotenv_home_channel(self, tmp_path, monkeypatch):
+        job = {
+            "id": "test-job",
+            "name": "test",
+            "prompt": "hello",
+            "deliver": "telegram",
+        }
+        fake_db = MagicMock()
+        seen = {}
+
+        (tmp_path / ".env").write_text("TELEGRAM_HOME_CHANNEL=-2002\n")
+        monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
+        monkeypatch.delenv("HERMES_CRON_AUTO_DELIVER_PLATFORM", raising=False)
+        monkeypatch.delenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID", raising=False)
+        monkeypatch.delenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID", raising=False)
+
+        class FakeAgent:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def run_conversation(self, *args, **kwargs):
+                seen["platform"] = os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM")
+                seen["chat_id"] = os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID")
+                seen["thread_id"] = os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID")
+                return {"final_response": "ok"}
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("hermes_state.SessionDB", return_value=fake_db), \
+             patch(
+                 "hermes_cli.runtime_provider.resolve_runtime_provider",
+                 return_value={
+                     "api_key": "***",
+                     "base_url": "https://example.invalid/v1",
+                     "provider": "openrouter",
+                     "api_mode": "chat_completions",
+                 },
+             ), \
+             patch("run_agent.AIAgent", FakeAgent):
+            success, output, final_response, error = run_job(job)
+
+        assert success is True
+        assert error is None
+        assert final_response == "ok"
+        assert "ok" in output
+        assert seen == {
+            "platform": "telegram",
+            "chat_id": "-2002",
+            "thread_id": None,
+        }
+        assert os.getenv("HERMES_CRON_AUTO_DELIVER_PLATFORM") is None
+        assert os.getenv("HERMES_CRON_AUTO_DELIVER_CHAT_ID") is None
+        assert os.getenv("HERMES_CRON_AUTO_DELIVER_THREAD_ID") is None
+        fake_db.close.assert_called_once()
+

 class TestRunJobConfigLogging:
    """Verify that config.yaml parse failures are logged, not silently swallowed."""
@@ -253,3 +307,145 @@ class TestRunJobConfigLogging:

        assert any("failed to parse prefill messages" in r.message for r in caplog.records), \
            f"Expected 'failed to parse prefill messages' warning in logs, got: {[r.message for r in caplog.records]}"
+
+
+class TestRunJobPerJobOverrides:
+    def test_job_level_model_provider_and_base_url_overrides_are_used(self, tmp_path):
+        config_yaml = tmp_path / "config.yaml"
+        config_yaml.write_text(
+            "model:\n"
+            "  default: gpt-5.4\n"
+            "  provider: openai-codex\n"
+            "  base_url: https://chatgpt.com/backend-api/codex\n"
+        )
+
+        job = {
+            "id": "briefing-job",
+            "name": "briefing",
+            "prompt": "hello",
+            "model": "perplexity/sonar-pro",
+            "provider": "custom",
+            "base_url": "http://127.0.0.1:4000/v1",
+        }
+
+        fake_db = MagicMock()
+        fake_runtime = {
+            "provider": "openrouter",
+            "api_mode": "chat_completions",
+            "base_url": "http://127.0.0.1:4000/v1",
+            "api_key": "***",
+        }
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("hermes_state.SessionDB", return_value=fake_db), \
+             patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as runtime_mock, \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+
+            success, output, final_response, error = run_job(job)
+
+        assert success is True
+        assert error is None
+        assert final_response == "ok"
+        assert "ok" in output
+        runtime_mock.assert_called_once_with(
+            requested="custom",
+            explicit_base_url="http://127.0.0.1:4000/v1",
+        )
+        assert mock_agent_cls.call_args.kwargs["model"] == "perplexity/sonar-pro"
+        fake_db.close.assert_called_once()
+
+
+class TestRunJobSkillBacked:
+    def test_run_job_loads_skill_and_disables_recursive_cron_tools(self, tmp_path):
+        job = {
+            "id": "skill-job",
+            "name": "skill test",
+            "prompt": "Check the feeds and summarize anything new.",
+            "skill": "blogwatcher",
+        }
+
+        fake_db = MagicMock()
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("hermes_state.SessionDB", return_value=fake_db), \
+             patch(
+                 "hermes_cli.runtime_provider.resolve_runtime_provider",
+                 return_value={
+                     "api_key": "***",
+                     "base_url": "https://example.invalid/v1",
+                     "provider": "openrouter",
+                     "api_mode": "chat_completions",
+                 },
+             ), \
+             patch("tools.skills_tool.skill_view", return_value=json.dumps({"success": True, "content": "# Blogwatcher\nFollow this skill."})), \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+
+            success, output, final_response, error = run_job(job)
+
+        assert success is True
+        assert error is None
+        assert final_response == "ok"
+
+        kwargs = mock_agent_cls.call_args.kwargs
+        assert "cronjob" in (kwargs["disabled_toolsets"] or [])
+
+        prompt_arg = mock_agent.run_conversation.call_args.args[0]
+        assert "blogwatcher" in prompt_arg
+        assert "Follow this skill" in prompt_arg
+        assert "Check the feeds and summarize anything new." in prompt_arg
+
+    def test_run_job_loads_multiple_skills_in_order(self, tmp_path):
+        job = {
+            "id": "multi-skill-job",
+            "name": "multi skill test",
+            "prompt": "Combine the results.",
+            "skills": ["blogwatcher", "find-nearby"],
+        }
+
+        fake_db = MagicMock()
+
+        def _skill_view(name):
+            return json.dumps({"success": True, "content": f"# {name}\nInstructions for {name}."})
+
+        with patch("cron.scheduler._hermes_home", tmp_path), \
+             patch("cron.scheduler._resolve_origin", return_value=None), \
+             patch("dotenv.load_dotenv"), \
+             patch("hermes_state.SessionDB", return_value=fake_db), \
+             patch(
+                 "hermes_cli.runtime_provider.resolve_runtime_provider",
+                 return_value={
+                     "api_key": "***",
+                     "base_url": "https://example.invalid/v1",
+                     "provider": "openrouter",
+                     "api_mode": "chat_completions",
+                 },
+             ), \
+             patch("tools.skills_tool.skill_view", side_effect=_skill_view) as skill_view_mock, \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+
+            success, output, final_response, error = run_job(job)
+
+        assert success is True
+        assert error is None
+        assert final_response == "ok"
+        assert skill_view_mock.call_count == 2
+        assert [call.args[0] for call in skill_view_mock.call_args_list] == ["blogwatcher", "find-nearby"]
+
+        prompt_arg = mock_agent.run_conversation.call_args.args[0]
+        assert prompt_arg.index("blogwatcher") < prompt_arg.index("find-nearby")
+        assert "Instructions for blogwatcher." in prompt_arg
+        assert "Instructions for find-nearby." in prompt_arg
+        assert "Combine the results." in prompt_arg
--- a/tests/gateway/test_discord_free_response.py
+++ b/tests/gateway/test_discord_free_response.py
@@ -252,3 +252,109 @@ async def test_discord_dms_ignore_mention_requirement(adapter, monkeypatch):
    event = adapter.handle_message.await_args.args[0]
    assert event.text == "dm without mention"
    assert event.source.chat_type == "dm"
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_enabled_by_default(adapter, monkeypatch):
+    """Auto-threading should be enabled by default (DISCORD_AUTO_THREAD defaults to 'true')."""
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    # Patch _auto_create_thread to return a fake thread
+    fake_thread = FakeThread(channel_id=999, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "thread"
+    assert event.source.thread_id == "999"
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_can_be_disabled(adapter, monkeypatch):
+    """Setting auto_thread to false skips thread creation."""
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "group"
+
+
+@pytest.mark.asyncio
+async def test_discord_bot_thread_skips_mention_requirement(adapter, monkeypatch):
+    """Messages in a thread the bot has participated in should not require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    # Simulate bot having previously participated in thread 456
+    adapter._bot_participated_threads.add("456")
+
+    thread = FakeThread(channel_id=456, name="existing thread")
+    message = make_message(channel=thread, content="follow-up without mention")
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "follow-up without mention"
+    assert event.source.chat_type == "thread"
+
+
+@pytest.mark.asyncio
+async def test_discord_unknown_thread_still_requires_mention(adapter, monkeypatch):
+    """Messages in a thread the bot hasn't participated in should still require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    # Bot has NOT participated in thread 789
+    thread = FakeThread(channel_id=789, name="some thread")
+    message = make_message(channel=thread, content="hello from unknown thread")
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_tracks_participation(adapter, monkeypatch):
+    """Auto-created threads should be tracked for future mention-free replies."""
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    fake_thread = FakeThread(channel_id=555, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="start a thread")
+
+    await adapter._handle_message(message)
+
+    assert "555" in adapter._bot_participated_threads
+
+
+@pytest.mark.asyncio
+async def test_discord_thread_participation_tracked_on_dispatch(adapter, monkeypatch):
+    """When the bot processes a message in a thread, it tracks participation."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    thread = FakeThread(channel_id=777, name="manually created thread")
+    message = make_message(channel=thread, content="hello in thread")
+
+    await adapter._handle_message(message)
+
+    assert "777" in adapter._bot_participated_threads
--- a/tests/gateway/test_discord_send.py
+++ b/tests/gateway/test_discord_send.py
@@ -0,0 +1,80 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+import sys
+
+import pytest
+
+from gateway.config import PlatformConfig
+
+
+def _ensure_discord_mock():
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return
+
+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.Client = MagicMock
+    discord_mod.File = MagicMock
+    discord_mod.DMChannel = type("DMChannel", (), {})
+    discord_mod.Thread = type("Thread", (), {})
+    discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+    discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, danger=3, green=1, blurple=2, red=3)
+    discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4)
+    discord_mod.Interaction = object
+    discord_mod.Embed = MagicMock
+    discord_mod.app_commands = SimpleNamespace(
+        describe=lambda **kwargs: (lambda fn: fn),
+        choices=lambda **kwargs: (lambda fn: fn),
+        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+    )
+
+    ext_mod = MagicMock()
+    commands_mod = MagicMock()
+    commands_mod.Bot = MagicMock
+    ext_mod.commands = commands_mod
+
+    sys.modules.setdefault("discord", discord_mod)
+    sys.modules.setdefault("discord.ext", ext_mod)
+    sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+
+_ensure_discord_mock()
+
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
+
+
+@pytest.mark.asyncio
+async def test_send_retries_without_reference_when_reply_target_is_system_message():
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="***"))
+
+    ref_msg = SimpleNamespace(id=99)
+    sent_msg = SimpleNamespace(id=1234)
+    send_calls = []
+
+    async def fake_send(*, content, reference=None):
+        send_calls.append({"content": content, "reference": reference})
+        if len(send_calls) == 1:
+            raise RuntimeError(
+                "400 Bad Request (error code: 50035): Invalid Form Body\n"
+                "In message_reference: Cannot reply to a system message"
+            )
+        return sent_msg
+
+    channel = SimpleNamespace(
+        fetch_message=AsyncMock(return_value=ref_msg),
+        send=AsyncMock(side_effect=fake_send),
+    )
+    adapter._client = SimpleNamespace(
+        get_channel=lambda _chat_id: channel,
+        fetch_channel=AsyncMock(),
+    )
+
+    result = await adapter.send("555", "hello", reply_to="99")
+
+    assert result.success is True
+    assert result.message_id == "1234"
+    assert channel.fetch_message.await_count == 1
+    assert channel.send.await_count == 2
+    assert send_calls[0]["reference"] is ref_msg
+    assert send_calls[1]["reference"] is None
--- a/tests/gateway/test_discord_slash_commands.py
+++ b/tests/gateway/test_discord_slash_commands.py
@@ -363,11 +363,37 @@ async def test_auto_thread_creates_thread_and_redirects(adapter, monkeypatch):


@pytest.mark.asyncio
-async def test_auto_thread_disabled_by_default(adapter, monkeypatch):
-    """Without DISCORD_AUTO_THREAD, messages stay in the channel."""
+async def test_auto_thread_enabled_by_default_slash_commands(adapter, monkeypatch):
+    """Without DISCORD_AUTO_THREAD env var, auto-threading is enabled (default: true)."""
    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")

+    fake_thread = _FakeThreadChannel(channel_id=999, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    captured_events = []
+
+    async def capture_handle(event):
+        captured_events.append(event)
+
+    adapter.handle_message = capture_handle
+
+    msg = _fake_message(_FakeTextChannel())
+
+    await adapter._handle_message(msg)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    assert len(captured_events) == 1
+    assert captured_events[0].source.chat_id == "999"  # redirected to thread
+    assert captured_events[0].source.chat_type == "thread"
+
+
+@pytest.mark.asyncio
+async def test_auto_thread_can_be_disabled(adapter, monkeypatch):
+    """Setting DISCORD_AUTO_THREAD=false keeps messages in the channel."""
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
    adapter._auto_create_thread = AsyncMock()

    captured_events = []
--- a/tests/gateway/test_gateway_shutdown.py
+++ b/tests/gateway/test_gateway_shutdown.py
@@ -0,0 +1,106 @@
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource, build_session_key
+
+
+class StubAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
+
+    async def connect(self):
+        return True
+
+    async def disconnect(self):
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        return SendResult(success=True, message_id="1")
+
+    async def send_typing(self, chat_id, metadata=None):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+def _source(chat_id="123456", chat_type="dm"):
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id=chat_id,
+        chat_type=chat_type,
+    )
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_cancels_inflight_message_processing():
+    adapter = StubAdapter()
+    release = asyncio.Event()
+
+    async def block_forever(_event):
+        await release.wait()
+        return None
+
+    adapter.set_message_handler(block_forever)
+    event = MessageEvent(text="work", source=_source(), message_id="1")
+
+    await adapter.handle_message(event)
+    await asyncio.sleep(0)
+
+    session_key = build_session_key(event.source)
+    assert session_key in adapter._active_sessions
+    assert adapter._background_tasks
+
+    await adapter.cancel_background_tasks()
+
+    assert adapter._background_tasks == set()
+    assert adapter._active_sessions == {}
+    assert adapter._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks():
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")})
+    runner._running = True
+    runner._shutdown_event = asyncio.Event()
+    runner._exit_reason = None
+    runner._pending_messages = {"session": "pending text"}
+    runner._pending_approvals = {"session": {"command": "rm -rf /tmp/x"}}
+    runner._shutdown_all_gateway_honcho = lambda: None
+
+    adapter = StubAdapter()
+    release = asyncio.Event()
+
+    async def block_forever(_event):
+        await release.wait()
+        return None
+
+    adapter.set_message_handler(block_forever)
+    event = MessageEvent(text="work", source=_source(), message_id="1")
+    await adapter.handle_message(event)
+    await asyncio.sleep(0)
+
+    disconnect_mock = AsyncMock()
+    adapter.disconnect = disconnect_mock
+
+    session_key = build_session_key(event.source)
+    running_agent = MagicMock()
+    runner._running_agents = {session_key: running_agent}
+    runner.adapters = {Platform.TELEGRAM: adapter}
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    running_agent.interrupt.assert_called_once_with("Gateway shutting down")
+    disconnect_mock.assert_awaited_once()
+    assert runner.adapters == {}
+    assert runner._running_agents == {}
+    assert runner._pending_messages == {}
+    assert runner._pending_approvals == {}
+    assert runner._shutdown_event.is_set() is True
--- a/tests/gateway/test_image_enrichment.py
+++ b/tests/gateway/test_image_enrichment.py
@@ -0,0 +1,25 @@
+from unittest.mock import patch
+
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_image_enrichment_uses_athabasca_upload_guidance_without_stale_r2_warning():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+
+    with patch(
+        "tools.vision_tools.vision_analyze_tool",
+        return_value='{"success": true, "analysis": "A painted serpent warrior."}',
+    ):
+        enriched = await runner._enrich_message_with_vision(
+            "caption",
+            ["/tmp/test.jpg"],
+        )
+
+    assert "R2 not configured" not in enriched
+    assert "Gateway media URL available for reference" not in enriched
+    assert "POST /api/uploads" in enriched
+    assert "Do not store the local cache path" in enriched
+    assert "caption" in enriched
--- a/tests/gateway/test_interrupt_key_match.py
+++ b/tests/gateway/test_interrupt_key_match.py
@@ -11,7 +11,7 @@ import asyncio
 import pytest

 from gateway.config import Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
 from gateway.session import SessionSource, build_session_key


@@ -50,11 +50,11 @@ class TestInterruptKeyConsistency:
    """Ensure adapter interrupt methods are queried with session_key, not chat_id."""

    def test_session_key_differs_from_chat_id_for_dm(self):
-        """Session key for a DM is NOT the same as chat_id."""
+        """Session key for a DM is namespaced and includes the DM chat_id."""
        source = _source("123456", "dm")
        session_key = build_session_key(source)
        assert session_key != source.chat_id
-        assert session_key == "agent:main:telegram:dm"
+        assert session_key == "agent:main:telegram:dm:123456"

    def test_session_key_differs_from_chat_id_for_group(self):
        """Session key for a group chat includes prefix, unlike raw chat_id."""
@@ -122,3 +122,29 @@ class TestInterruptKeyConsistency:

        # Interrupt event was set
        assert adapter._active_sessions[session_key].is_set()
+
+    @pytest.mark.asyncio
+    async def test_photo_followup_is_queued_without_interrupt(self):
+        """Photo follow-ups should queue behind the active run instead of interrupting it."""
+        adapter = StubAdapter()
+        adapter.set_message_handler(lambda event: asyncio.sleep(0, result=None))
+
+        source = _source("-1001234", "group")
+        session_key = build_session_key(source)
+        interrupt_event = asyncio.Event()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        event = MessageEvent(
+            text="caption",
+            source=source,
+            message_type=MessageType.PHOTO,
+            message_id="2",
+            media_urls=["/tmp/photo-a.jpg"],
+            media_types=["image/jpeg"],
+        )
+        await adapter.handle_message(event)
+
+        queued = adapter._pending_messages[session_key]
+        assert queued is event
+        assert queued.media_urls == ["/tmp/photo-a.jpg"]
+        assert interrupt_event.is_set() is False
--- a/tests/gateway/test_plan_command.py
+++ b/tests/gateway/test_plan_command.py
@@ -0,0 +1,129 @@
+"""Tests for the /plan gateway slash command."""
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from agent.skill_commands import scan_skill_commands
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    runner.adapters = {}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = SessionEntry(
+        session_key="agent:main:telegram:dm:c1:u1",
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._run_agent = AsyncMock(
+        return_value={
+            "final_response": "planned",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 0,
+        }
+    )
+    return runner
+
+
+def _make_event(text="/plan"):
+    return MessageEvent(
+        text=text,
+        source=SessionSource(
+            platform=Platform.TELEGRAM,
+            user_id="u1",
+            chat_id="c1",
+            user_name="tester",
+            chat_type="dm",
+        ),
+        message_id="m1",
+    )
+
+
+def _make_plan_skill(skills_dir):
+    skill_dir = skills_dir / "plan"
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        """---
+name: plan
+description: Plan mode skill.
+---
+
+# Plan
+
+Use the current conversation context when no explicit instruction is provided.
+Save plans under the active workspace's .hermes/plans directory.
+"""
+    )
+
+
+class TestGatewayPlanCommand:
+    @pytest.mark.asyncio
+    async def test_plan_command_loads_skill_and_runs_agent(self, monkeypatch, tmp_path):
+        import gateway.run as gateway_run
+
+        runner = _make_runner()
+        event = _make_event("/plan Add OAuth login")
+
+        monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+        monkeypatch.setattr(
+            "agent.model_metadata.get_model_context_length",
+            lambda *_args, **_kwargs: 100_000,
+        )
+
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_plan_skill(tmp_path)
+            scan_skill_commands()
+            result = await runner._handle_message(event)
+
+        assert result == "planned"
+        forwarded = runner._run_agent.call_args.kwargs["message"]
+        assert "Plan mode skill" in forwarded
+        assert "Add OAuth login" in forwarded
+        assert ".hermes/plans" in forwarded
+        assert str(tmp_path / "plans") not in forwarded
+        assert "active workspace/backend cwd" in forwarded
+        assert "Runtime note:" in forwarded
+
+    @pytest.mark.asyncio
+    async def test_plan_command_appears_in_help_output_via_skill_listing(self, tmp_path):
+        runner = _make_runner()
+        event = _make_event("/help")
+
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_plan_skill(tmp_path)
+            scan_skill_commands()
+            result = await runner._handle_help_command(event)
+
+        assert "/plan" in result
--- a/tests/gateway/test_retry_replacement.py
+++ b/tests/gateway/test_retry_replacement.py
@@ -0,0 +1,97 @@
+"""Regression tests for /retry replacement semantics."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.run import GatewayRunner
+from gateway.session import SessionStore
+
+
+@pytest.mark.asyncio
+async def test_gateway_retry_replaces_last_user_turn_in_transcript(tmp_path):
+    config = GatewayConfig()
+    with patch("gateway.session.SessionStore._ensure_loaded"):
+        store = SessionStore(sessions_dir=tmp_path, config=config)
+    store._db = None
+    store._loaded = True
+
+    session_id = "retry_session"
+    for msg in [
+        {"role": "session_meta", "tools": []},
+        {"role": "user", "content": "first question"},
+        {"role": "assistant", "content": "first answer"},
+        {"role": "user", "content": "retry me"},
+        {"role": "assistant", "content": "old answer"},
+    ]:
+        store.append_to_transcript(session_id, msg)
+
+    gw = GatewayRunner.__new__(GatewayRunner)
+    gw.config = config
+    gw.session_store = store
+
+    session_entry = MagicMock(session_id=session_id)
+    session_entry.last_prompt_tokens = 111
+    gw.session_store.get_or_create_session = MagicMock(return_value=session_entry)
+
+    async def fake_handle_message(event):
+        assert event.text == "retry me"
+        transcript_before = store.load_transcript(session_id)
+        assert [m.get("content") for m in transcript_before if m.get("role") == "user"] == [
+            "first question"
+        ]
+        store.append_to_transcript(session_id, {"role": "user", "content": event.text})
+        store.append_to_transcript(session_id, {"role": "assistant", "content": "new answer"})
+        return "new answer"
+
+    gw._handle_message = AsyncMock(side_effect=fake_handle_message)
+
+    result = await gw._handle_retry_command(
+        MessageEvent(text="/retry", message_type=MessageType.TEXT, source=MagicMock())
+    )
+
+    assert result == "new answer"
+    transcript_after = store.load_transcript(session_id)
+    assert [m.get("content") for m in transcript_after if m.get("role") == "user"] == [
+        "first question",
+        "retry me",
+    ]
+    assert [m.get("content") for m in transcript_after if m.get("role") == "assistant"] == [
+        "first answer",
+        "new answer",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_gateway_retry_replays_original_text_not_retry_command(tmp_path):
+    config = MagicMock()
+    config.sessions_dir = tmp_path
+    config.max_context_messages = 20
+    gw = GatewayRunner.__new__(GatewayRunner)
+    gw.config = config
+    gw.session_store = MagicMock()
+
+    session_entry = MagicMock(session_id="test-session")
+    session_entry.last_prompt_tokens = 55
+    gw.session_store.get_or_create_session.return_value = session_entry
+    gw.session_store.load_transcript.return_value = [
+        {"role": "user", "content": "real message"},
+        {"role": "assistant", "content": "answer"},
+    ]
+    gw.session_store.rewrite_transcript = MagicMock()
+
+    captured = {}
+
+    async def fake_handle_message(event):
+        captured["text"] = event.text
+        return "ok"
+
+    gw._handle_message = AsyncMock(side_effect=fake_handle_message)
+
+    await gw._handle_retry_command(
+        MessageEvent(text="/retry", message_type=MessageType.TEXT, source=MagicMock())
+    )
+
+    assert captured["text"] == "real message"
--- a/tests/gateway/test_send_image_file.py
+++ b/tests/gateway/test_send_image_file.py
@@ -199,6 +199,57 @@ class TestDiscordSendImageFile:
        assert result.message_id == "99"
        mock_channel.send.assert_awaited_once()

+    def test_send_document_uploads_file_attachment(self, adapter, tmp_path):
+        """send_document should upload a native Discord attachment."""
+        pdf = tmp_path / "sample.pdf"
+        pdf.write_bytes(b"%PDF-1.4\n%\xe2\xe3\xcf\xd3\n")
+
+        mock_channel = MagicMock()
+        mock_msg = MagicMock()
+        mock_msg.id = 100
+        mock_channel.send = AsyncMock(return_value=mock_msg)
+        adapter._client.get_channel = MagicMock(return_value=mock_channel)
+
+        with patch.object(discord_mod_ref, "File", MagicMock()) as file_cls:
+            result = _run(
+                adapter.send_document(
+                    chat_id="67890",
+                    file_path=str(pdf),
+                    file_name="renamed.pdf",
+                    metadata={"thread_id": "123"},
+                )
+            )
+
+        assert result.success
+        assert result.message_id == "100"
+        assert "file" in mock_channel.send.call_args.kwargs
+        assert file_cls.call_args.kwargs["filename"] == "renamed.pdf"
+
+    def test_send_video_uploads_file_attachment(self, adapter, tmp_path):
+        """send_video should upload a native Discord attachment."""
+        video = tmp_path / "clip.mp4"
+        video.write_bytes(b"\x00\x00\x00\x18ftypmp42" + b"\x00" * 50)
+
+        mock_channel = MagicMock()
+        mock_msg = MagicMock()
+        mock_msg.id = 101
+        mock_channel.send = AsyncMock(return_value=mock_msg)
+        adapter._client.get_channel = MagicMock(return_value=mock_channel)
+
+        with patch.object(discord_mod_ref, "File", MagicMock()) as file_cls:
+            result = _run(
+                adapter.send_video(
+                    chat_id="67890",
+                    video_path=str(video),
+                    metadata={"thread_id": "123"},
+                )
+            )
+
+        assert result.success
+        assert result.message_id == "101"
+        assert "file" in mock_channel.send.call_args.kwargs
+        assert file_cls.call_args.kwargs["filename"] == "clip.mp4"
+
    def test_returns_error_when_file_missing(self, adapter):
        result = _run(
            adapter.send_image_file(chat_id="67890", image_path="/nonexistent.png")
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -338,7 +338,7 @@ class TestSessionStoreRewriteTranscript:

 class TestWhatsAppDMSessionKeyConsistency:
    """Regression: all session-key construction must go through build_session_key
-    so WhatsApp DMs include chat_id while other DMs do not."""
+    so DMs are isolated by chat_id across platforms."""

    @pytest.fixture()
    def store(self, tmp_path):
@@ -369,15 +369,24 @@ class TestWhatsAppDMSessionKeyConsistency:
        )
        assert store._generate_session_key(source) == build_session_key(source)

-    def test_telegram_dm_omits_chat_id(self):
-        """Non-WhatsApp DMs should still omit chat_id (single owner DM)."""
+    def test_telegram_dm_includes_chat_id(self):
+        """Non-WhatsApp DMs should also include chat_id to separate users."""
        source = SessionSource(
            platform=Platform.TELEGRAM,
            chat_id="99",
            chat_type="dm",
        )
        key = build_session_key(source)
-        assert key == "agent:main:telegram:dm"
+        assert key == "agent:main:telegram:dm:99"
+
+    def test_distinct_dm_chat_ids_get_distinct_session_keys(self):
+        """Different DM chats must not collapse into one shared session."""
+        first = SessionSource(platform=Platform.TELEGRAM, chat_id="99", chat_type="dm")
+        second = SessionSource(platform=Platform.TELEGRAM, chat_id="100", chat_type="dm")
+
+        assert build_session_key(first) == "agent:main:telegram:dm:99"
+        assert build_session_key(second) == "agent:main:telegram:dm:100"
+        assert build_session_key(first) != build_session_key(second)

    def test_discord_group_includes_chat_id(self):
        """Group/channel keys include chat_type and chat_id."""
--- a/tests/gateway/test_session_env.py
+++ b/tests/gateway/test_session_env.py
@@ -0,0 +1,45 @@
+import os
+
+from gateway.config import Platform
+from gateway.run import GatewayRunner
+from gateway.session import SessionContext, SessionSource
+
+
+def test_set_session_env_includes_thread_id(monkeypatch):
+    runner = object.__new__(GatewayRunner)
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_name="Group",
+        chat_type="group",
+        thread_id="17585",
+    )
+    context = SessionContext(source=source, connected_platforms=[], home_channels={})
+
+    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
+
+    runner._set_session_env(context)
+
+    assert os.getenv("HERMES_SESSION_PLATFORM") == "telegram"
+    assert os.getenv("HERMES_SESSION_CHAT_ID") == "-1001"
+    assert os.getenv("HERMES_SESSION_CHAT_NAME") == "Group"
+    assert os.getenv("HERMES_SESSION_THREAD_ID") == "17585"
+
+
+def test_clear_session_env_removes_thread_id(monkeypatch):
+    runner = object.__new__(GatewayRunner)
+
+    monkeypatch.setenv("HERMES_SESSION_PLATFORM", "telegram")
+    monkeypatch.setenv("HERMES_SESSION_CHAT_ID", "-1001")
+    monkeypatch.setenv("HERMES_SESSION_CHAT_NAME", "Group")
+    monkeypatch.setenv("HERMES_SESSION_THREAD_ID", "17585")
+
+    runner._clear_session_env()
+
+    assert os.getenv("HERMES_SESSION_PLATFORM") is None
+    assert os.getenv("HERMES_SESSION_CHAT_ID") is None
+    assert os.getenv("HERMES_SESSION_CHAT_NAME") is None
+    assert os.getenv("HERMES_SESSION_THREAD_ID") is None
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -0,0 +1,133 @@
+"""Tests for gateway /status behavior and token persistence."""
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=_make_source(),
+        message_id="m1",
+    )
+
+
+def _make_runner(session_entry: SessionEntry):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_status_command_reports_running_agent_without_interrupt(monkeypatch):
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=321,
+    )
+    runner = _make_runner(session_entry)
+    running_agent = MagicMock()
+    runner._running_agents[build_session_key(_make_source())] = running_agent
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Tokens:** 321" in result
+    assert "**Agent Running:** Yes ⚡" in result
+    running_agent.interrupt.assert_not_called()
+    assert runner._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_handle_message_persists_agent_token_counts(monkeypatch):
+    import gateway.run as gateway_run
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    runner._run_agent = AsyncMock(
+        return_value={
+            "final_response": "ok",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+    )
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result == "ok"
+    runner.session_store.update_session.assert_called_once_with(
+        session_entry.session_key,
+        input_tokens=120,
+        output_tokens=45,
+        last_prompt_tokens=80,
+        model="openai/test-model",
+    )
--- a/tests/gateway/test_stt_config.py
+++ b/tests/gateway/test_stt_config.py
@@ -0,0 +1,53 @@
+"""Gateway STT config tests — honor stt.enabled: false from config.yaml."""
+
+from pathlib import Path
+from unittest.mock import AsyncMock, patch
+
+import pytest
+import yaml
+
+from gateway.config import GatewayConfig, load_gateway_config
+
+
+def test_gateway_config_stt_disabled_from_dict_nested():
+    config = GatewayConfig.from_dict({"stt": {"enabled": False}})
+    assert config.stt_enabled is False
+
+
+def test_load_gateway_config_bridges_stt_enabled_from_config_yaml(tmp_path, monkeypatch):
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text(
+        yaml.dump({"stt": {"enabled": False}}),
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setattr(Path, "home", lambda: tmp_path)
+
+    config = load_gateway_config()
+
+    assert config.stt_enabled is False
+
+
+@pytest.mark.asyncio
+async def test_enrich_message_with_transcription_skips_when_stt_disabled():
+    from gateway.run import GatewayRunner
+
+    runner = GatewayRunner.__new__(GatewayRunner)
+    runner.config = GatewayConfig(stt_enabled=False)
+
+    with patch(
+        "tools.transcription_tools.transcribe_audio",
+        side_effect=AssertionError("transcribe_audio should not be called when STT is disabled"),
+    ), patch(
+        "tools.transcription_tools.get_stt_model_from_config",
+        return_value=None,
+    ):
+        result = await runner._enrich_message_with_transcription(
+            "caption",
+            ["/tmp/voice.ogg"],
+        )
+
+    assert "transcription is disabled" in result.lower()
+    assert "caption" in result
--- a/tests/gateway/test_telegram_conflict.py
+++ b/tests/gateway/test_telegram_conflict.py
@@ -98,3 +98,27 @@ async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch):
    assert adapter.has_fatal_error is True
    updater.stop.assert_awaited()
    fatal_handler.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_disconnect_skips_inactive_updater_and_app(monkeypatch):
+    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
+
+    updater = SimpleNamespace(running=False, stop=AsyncMock())
+    app = SimpleNamespace(
+        updater=updater,
+        running=False,
+        stop=AsyncMock(),
+        shutdown=AsyncMock(),
+    )
+    adapter._app = app
+
+    warning = MagicMock()
+    monkeypatch.setattr("gateway.platforms.telegram.logger.warning", warning)
+
+    await adapter.disconnect()
+
+    updater.stop.assert_not_awaited()
+    app.stop.assert_not_awaited()
+    app.shutdown.assert_awaited_once()
+    warning.assert_not_called()
--- a/tests/gateway/test_telegram_documents.py
+++ b/tests/gateway/test_telegram_documents.py
@@ -12,6 +12,7 @@ import asyncio
 import importlib
 import os
 import sys
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
@@ -351,6 +352,26 @@ class TestDocumentDownloadBlock:
 # ---------------------------------------------------------------------------

 class TestMediaGroups:
+    @pytest.mark.asyncio
+    async def test_non_album_photo_burst_is_buffered_and_combined(self, adapter):
+        first_photo = _make_photo(_make_file_obj(b"first"))
+        second_photo = _make_photo(_make_file_obj(b"second"))
+
+        msg1 = _make_message(caption="two images", photo=[first_photo])
+        msg2 = _make_message(photo=[second_photo])
+
+        with patch("gateway.platforms.telegram.cache_image_from_bytes", side_effect=["/tmp/burst-one.jpg", "/tmp/burst-two.jpg"]):
+            await adapter._handle_media_message(_make_update(msg1), MagicMock())
+            await adapter._handle_media_message(_make_update(msg2), MagicMock())
+            assert adapter.handle_message.await_count == 0
+            await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05)
+
+        adapter.handle_message.assert_awaited_once()
+        event = adapter.handle_message.await_args.args[0]
+        assert event.text == "two images"
+        assert event.media_urls == ["/tmp/burst-one.jpg", "/tmp/burst-two.jpg"]
+        assert len(event.media_types) == 2
+
    @pytest.mark.asyncio
    async def test_photo_album_is_buffered_and_combined(self, adapter):
        first_photo = _make_photo(_make_file_obj(b"first"))
@@ -537,6 +558,51 @@ class TestSendDocument:
        assert call_kwargs["reply_to_message_id"] == 50


+class TestTelegramPhotoBatching:
+    @pytest.mark.asyncio
+    async def test_flush_photo_batch_does_not_drop_newer_scheduled_task(self, adapter):
+        old_task = MagicMock()
+        new_task = MagicMock()
+        batch_key = "session:photo-burst"
+        adapter._pending_photo_batch_tasks[batch_key] = new_task
+        adapter._pending_photo_batches[batch_key] = MessageEvent(
+            text="",
+            message_type=MessageType.PHOTO,
+            source=SimpleNamespace(channel_id="chat-1"),
+            media_urls=["/tmp/a.jpg"],
+            media_types=["image/jpeg"],
+        )
+
+        with (
+            patch("gateway.platforms.telegram.asyncio.current_task", return_value=old_task),
+            patch("gateway.platforms.telegram.asyncio.sleep", new=AsyncMock()),
+        ):
+            await adapter._flush_photo_batch(batch_key)
+
+        assert adapter._pending_photo_batch_tasks[batch_key] is new_task
+
+    @pytest.mark.asyncio
+    async def test_disconnect_cancels_pending_photo_batch_tasks(self, adapter):
+        task = MagicMock()
+        task.done.return_value = False
+        adapter._pending_photo_batch_tasks["session:photo-burst"] = task
+        adapter._pending_photo_batches["session:photo-burst"] = MessageEvent(
+            text="",
+            message_type=MessageType.PHOTO,
+            source=SimpleNamespace(channel_id="chat-1"),
+        )
+        adapter._app = MagicMock()
+        adapter._app.updater.stop = AsyncMock()
+        adapter._app.stop = AsyncMock()
+        adapter._app.shutdown = AsyncMock()
+
+        await adapter.disconnect()
+
+        task.cancel.assert_called_once()
+        assert adapter._pending_photo_batch_tasks == {}
+        assert adapter._pending_photo_batches == {}
+
+
 # ---------------------------------------------------------------------------
 # TestSendVideo — outbound video delivery
 # ---------------------------------------------------------------------------
--- a/tests/gateway/test_telegram_format.py
+++ b/tests/gateway/test_telegram_format.py
@@ -7,7 +7,7 @@ or corrupt user-visible content.

 import re
 import sys
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock

 import pytest

@@ -392,3 +392,27 @@ class TestStripMdv2:

    def test_empty_string(self):
        assert _strip_mdv2("") == ""
+
+
+@pytest.mark.asyncio
+async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
+    adapter.MAX_MESSAGE_LENGTH = 80
+    adapter._bot = MagicMock()
+
+    sent_texts = []
+
+    async def _fake_send_message(**kwargs):
+        sent_texts.append(kwargs["text"])
+        msg = MagicMock()
+        msg.message_id = len(sent_texts)
+        return msg
+
+    adapter._bot.send_message = AsyncMock(side_effect=_fake_send_message)
+
+    content = ("**bold** chunk content " * 12).strip()
+    result = await adapter.send("123", content)
+
+    assert result.success is True
+    assert len(sent_texts) > 1
+    assert re.search(r" \\\([0-9]+/[0-9]+\\\)$", sent_texts[0])
+    assert re.search(r" \\\([0-9]+/[0-9]+\\\)$", sent_texts[-1])
--- a/tests/gateway/test_telegram_photo_interrupts.py
+++ b/tests/gateway/test_telegram_photo_interrupts.py
@@ -0,0 +1,49 @@
+import asyncio
+from unittest.mock import MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+from gateway.run import GatewayRunner
+
+
+class _PendingAdapter:
+    def __init__(self):
+        self._pending_messages = {}
+
+
+def _make_runner():
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")})
+    runner.adapters = {Platform.TELEGRAM: _PendingAdapter()}
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._voice_mode = {}
+    runner._is_user_authorized = lambda _source: True
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_handle_message_does_not_priority_interrupt_photo_followup():
+    runner = _make_runner()
+    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+    session_key = build_session_key(source)
+    running_agent = MagicMock()
+    runner._running_agents[session_key] = running_agent
+
+    event = MessageEvent(
+        text="caption",
+        message_type=MessageType.PHOTO,
+        source=source,
+        media_urls=["/tmp/photo-a.jpg"],
+        media_types=["image/jpeg"],
+    )
+
+    result = await runner._handle_message(event)
+
+    assert result is None
+    running_agent.interrupt.assert_not_called()
+    assert runner.adapters[Platform.TELEGRAM]._pending_messages[session_key] is event
--- a/tests/gateway/test_voice_command.py
+++ b/tests/gateway/test_voice_command.py
@@ -1,5 +1,6 @@
 """Tests for the /voice command and auto voice reply in the gateway."""

+import importlib.util
 import json
 import os
 import queue
@@ -206,9 +207,11 @@ class TestAutoVoiceReply:
      2. gateway _send_voice_reply: fires based on voice_mode setting

    To prevent double audio, _send_voice_reply is skipped when voice input
-    already triggered base adapter auto-TTS (skip_double = is_voice_input).
-    Exception: Discord voice channel — both auto-TTS and Discord play_tts
-    override skip, so the runner must handle it via play_in_voice_channel.
+    already triggered base adapter auto-TTS.
+
+    For Discord voice channels, the base adapter now routes play_tts directly
+    into VC playback, so the runner should still skip voice-input follow-ups to
+    avoid double playback.
    """

    @pytest.fixture
@@ -292,14 +295,14 @@ class TestAutoVoiceReply:

    # -- Discord VC exception: runner must handle --------------------------

-    def test_discord_vc_voice_input_runner_fires(self, runner):
-        """Discord VC + voice input: base play_tts skips (VC override),
-        so runner must handle via play_in_voice_channel."""
-        assert self._call(runner, "all", MessageType.VOICE, in_voice_channel=True) is True
+    def test_discord_vc_voice_input_base_handles(self, runner):
+        """Discord VC + voice input: base adapter play_tts plays in VC,
+        so runner skips to avoid double playback."""
+        assert self._call(runner, "all", MessageType.VOICE, in_voice_channel=True) is False

-    def test_discord_vc_voice_only_runner_fires(self, runner):
-        """Discord VC + voice_only + voice: runner must handle."""
-        assert self._call(runner, "voice_only", MessageType.VOICE, in_voice_channel=True) is True
+    def test_discord_vc_voice_only_base_handles(self, runner):
+        """Discord VC + voice_only + voice: base adapter handles."""
+        assert self._call(runner, "voice_only", MessageType.VOICE, in_voice_channel=True) is False

    # -- Edge cases --------------------------------------------------------

@@ -422,17 +425,23 @@ class TestDiscordPlayTtsSkip:
        return adapter

    @pytest.mark.asyncio
-    async def test_play_tts_skipped_when_in_vc(self):
+    async def test_play_tts_plays_in_vc_when_connected(self):
        adapter = self._make_discord_adapter()
        # Simulate bot in voice channel for guild 111, text channel 123
        mock_vc = MagicMock()
        mock_vc.is_connected.return_value = True
+        mock_vc.is_playing.return_value = False
        adapter._voice_clients[111] = mock_vc
        adapter._voice_text_channels[111] = 123

+        # Mock play_in_voice_channel to avoid actual ffmpeg call
+        async def fake_play(gid, path):
+            return True
+        adapter.play_in_voice_channel = fake_play
+
        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/test.ogg")
+        # play_tts now plays in VC instead of being a no-op
        assert result.success is True
-        # send_voice should NOT have been called (no client, would fail)

    @pytest.mark.asyncio
    async def test_play_tts_not_skipped_when_not_in_vc(self):
@@ -728,6 +737,24 @@ class TestVoiceChannelCommands:
        result = await runner._handle_voice_channel_join(event)
        assert "failed" in result.lower()

+    @pytest.mark.asyncio
+    async def test_join_missing_voice_dependencies(self, runner):
+        """Missing PyNaCl/davey should return a user-actionable install hint."""
+        mock_channel = MagicMock()
+        mock_channel.name = "General"
+        mock_adapter = AsyncMock()
+        mock_adapter.join_voice_channel = AsyncMock(
+            side_effect=RuntimeError("PyNaCl library needed in order to use voice")
+        )
+        mock_adapter.get_user_voice_channel = AsyncMock(return_value=mock_channel)
+        event = self._make_discord_event()
+        runner.adapters[event.source.platform] = mock_adapter
+
+        result = await runner._handle_voice_channel_join(event)
+
+        assert "voice dependencies are missing" in result.lower()
+        assert "hermes-agent[messaging]" in result
+
    # -- _handle_voice_channel_leave --

    @pytest.mark.asyncio
@@ -2031,3 +2058,534 @@ class TestDisconnectVoiceCleanup:
        assert len(adapter._voice_receivers) == 0
        assert len(adapter._voice_listen_tasks) == 0
        assert len(adapter._voice_timeout_tasks) == 0
+
+
+# =====================================================================
+# Discord Voice Channel Flow Tests
+# =====================================================================
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("nacl") is None,
+    reason="PyNaCl not installed",
+)
+class TestVoiceReception:
+    """Audio reception: SSRC mapping, DAVE passthrough, buffer lifecycle."""
+
+    @staticmethod
+    def _make_receiver(allowed_ids=None, members=None, dave=False, bot_id=9999):
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = MagicMock() if dave else None
+        vc._connection.ssrc = bot_id
+        vc._connection.add_socket_listener = MagicMock()
+        vc._connection.remove_socket_listener = MagicMock()
+        vc._connection.hook = None
+        vc.user = SimpleNamespace(id=bot_id)
+        vc.channel = MagicMock()
+        vc.channel.members = members or []
+        receiver = VoiceReceiver(vc, allowed_user_ids=allowed_ids)
+        return receiver
+
+    @staticmethod
+    def _fill_buffer(receiver, ssrc, duration_s=1.0, age_s=3.0):
+        """Add PCM data to buffer. 48kHz stereo 16-bit = 192000 bytes/sec."""
+        size = int(192000 * duration_s)
+        receiver._buffers[ssrc] = bytearray(b"\x00" * size)
+        receiver._last_packet_time[ssrc] = time.monotonic() - age_s
+
+    # -- Known SSRC (normal flow) --
+
+    def test_known_ssrc_returns_completed(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+        assert len(receiver._buffers[100]) == 0  # cleared
+
+    def test_known_ssrc_short_buffer_ignored(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100, duration_s=0.1)  # too short
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_known_ssrc_recent_audio_waits(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100, age_s=0.0)  # just arrived
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    # -- Unknown SSRC + DAVE passthrough --
+
+    def test_unknown_ssrc_no_automap_no_completed(self):
+        """Unknown SSRC, no members to infer — buffer cleared, not returned."""
+        receiver = self._make_receiver(dave=True, members=[])
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+        assert len(receiver._buffers[100]) == 0
+
+    def test_unknown_ssrc_late_speaking_event(self):
+        """Audio buffered before SPEAKING → SPEAKING maps → next check returns it."""
+        receiver = self._make_receiver(dave=True)
+        receiver.start()
+        self._fill_buffer(receiver, 100, age_s=0.0)  # still receiving
+        # No user yet
+        assert receiver.check_silence() == []
+        # SPEAKING event arrives
+        receiver.map_ssrc(100, 42)
+        # Silence kicks in
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    # -- SSRC auto-mapping --
+
+    def test_automap_single_allowed_user(self):
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+        assert receiver._ssrc_to_user[100] == 42
+
+    def test_automap_multiple_allowed_users_no_map(self):
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+            SimpleNamespace(id=43, name="Bob"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42", "43"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_no_allowlist_single_member(self):
+        """No allowed_user_ids → sole non-bot member inferred."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids=None, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_automap_unallowed_user_rejected(self):
+        """User in channel but not in allowed list — not mapped."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"99"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_only_bot_in_channel(self):
+        """Only bot in channel — no one to map to."""
+        members = [SimpleNamespace(id=9999, name="Bot")]
+        receiver = self._make_receiver(allowed_ids=None, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_persists_across_calls(self):
+        """Auto-mapped SSRC stays mapped for subsequent checks."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        receiver.check_silence()
+        assert receiver._ssrc_to_user[100] == 42
+        # Second utterance — should use cached mapping
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    # -- Stale buffer cleanup --
+
+    def test_stale_unknown_buffer_discarded(self):
+        """Buffer with no user and very old timestamp is discarded."""
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver._buffers[200] = bytearray(b"\x00" * 100)
+        receiver._last_packet_time[200] = time.monotonic() - 10.0
+        receiver.check_silence()
+        assert 200 not in receiver._buffers
+
+    # -- Pause / resume (echo prevention) --
+
+    def test_paused_receiver_ignores_packets(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.pause()
+        receiver._on_packet(b"\x00" * 100)
+        assert len(receiver._buffers) == 0
+
+    def test_resumed_receiver_accepts_packets(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.pause()
+        receiver.resume()
+        assert receiver._paused is False
+
+    # -- _on_packet DAVE passthrough behavior --
+
+    def _make_receiver_with_nacl(self, dave_session=None, mapped_ssrcs=None):
+        """Create a receiver that can process _on_packet with mocked NaCl + Opus."""
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = dave_session
+        vc._connection.ssrc = 9999
+        vc._connection.add_socket_listener = MagicMock()
+        vc._connection.remove_socket_listener = MagicMock()
+        vc._connection.hook = None
+        vc.user = SimpleNamespace(id=9999)
+        vc.channel = MagicMock()
+        vc.channel.members = []
+        receiver = VoiceReceiver(vc)
+        receiver.start()
+        # Pre-map SSRCs if provided
+        if mapped_ssrcs:
+            for ssrc, uid in mapped_ssrcs.items():
+                receiver.map_ssrc(ssrc, uid)
+        return receiver
+
+    @staticmethod
+    def _build_rtp_packet(ssrc=100, seq=1, timestamp=960):
+        """Build a minimal valid RTP packet for _on_packet.
+
+        We need: RTP header (12 bytes) + encrypted payload + 4-byte nonce.
+        NaCl decrypt is mocked so payload content doesn't matter.
+        """
+        import struct
+        # RTP header: version=2, payload_type=0x78, no extension, no CSRC
+        header = struct.pack(">BBHII", 0x80, 0x78, seq, timestamp, ssrc)
+        # Fake encrypted payload (NaCl will be mocked) + 4 byte nonce
+        payload = b"\x00" * 20 + b"\x00\x00\x00\x01"
+        return header + payload
+
+    def _inject_mock_decoder(self, receiver, ssrc):
+        """Pre-inject a mock Opus decoder for the given SSRC."""
+        mock_decoder = MagicMock()
+        mock_decoder.decode.return_value = b"\x00" * 3840
+        receiver._decoders[ssrc] = mock_decoder
+        return mock_decoder
+
+    def test_on_packet_dave_known_user_decrypt_ok(self):
+        """Known SSRC + DAVE decrypt success → audio buffered."""
+        dave = MagicMock()
+        dave.decrypt.return_value = b"\xf8\xff\xfe"
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+        dave.decrypt.assert_called_once()
+
+    def test_on_packet_dave_unknown_ssrc_passthrough(self):
+        """Unknown SSRC + DAVE → skip DAVE, attempt Opus decode (passthrough)."""
+        dave = MagicMock()
+        receiver = self._make_receiver_with_nacl(dave_session=dave)
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        dave.decrypt.assert_not_called()
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_dave_unencrypted_error_passthrough(self):
+        """DAVE decrypt 'Unencrypted' error → use data as-is, don't drop."""
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception(
+            "Failed to decrypt: DecryptionFailed(UnencryptedWhenPassthroughDisabled)"
+        )
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_dave_other_error_drops(self):
+        """DAVE decrypt non-Unencrypted error → packet dropped."""
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception("KeyRotationFailed")
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_on_packet_no_dave_direct_decode(self):
+        """No DAVE session → decode directly."""
+        receiver = self._make_receiver_with_nacl(dave_session=None)
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_bot_own_ssrc_ignored(self):
+        """Bot's own SSRC → dropped (echo prevention)."""
+        receiver = self._make_receiver_with_nacl()
+        with patch("nacl.secret.Aead"):
+            receiver._on_packet(self._build_rtp_packet(ssrc=9999))
+        assert len(receiver._buffers) == 0
+
+    def test_on_packet_multiple_ssrcs_separate_buffers(self):
+        """Different SSRCs → separate buffers."""
+        receiver = self._make_receiver_with_nacl(dave_session=None)
+        self._inject_mock_decoder(receiver, 100)
+        self._inject_mock_decoder(receiver, 200)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+            receiver._on_packet(self._build_rtp_packet(ssrc=200))
+
+        assert 100 in receiver._buffers
+        assert 200 in receiver._buffers
+
+
+class TestVoiceTTSPlayback:
+    """TTS playback: play_tts in VC, dedup, fallback."""
+
+    @staticmethod
+    def _make_discord_adapter():
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake-token"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_receivers = {}
+        return adapter
+
+    # -- play_tts behavior --
+
+    @pytest.mark.asyncio
+    async def test_play_tts_plays_in_vc(self):
+        """play_tts calls play_in_voice_channel when bot is in VC."""
+        adapter = self._make_discord_adapter()
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 123
+
+        played = []
+        async def fake_play(gid, path):
+            played.append((gid, path))
+            return True
+        adapter.play_in_voice_channel = fake_play
+
+        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/tts.ogg")
+        assert result.success is True
+        assert played == [(111, "/tmp/tts.ogg")]
+
+    @pytest.mark.asyncio
+    async def test_play_tts_fallback_when_not_in_vc(self):
+        """play_tts sends as file attachment when bot is not in VC."""
+        adapter = self._make_discord_adapter()
+        from gateway.platforms.base import SendResult
+        adapter.send_voice = AsyncMock(return_value=SendResult(success=False, error="no client"))
+        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/tts.ogg")
+        assert result.success is False
+        adapter.send_voice.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_play_tts_wrong_channel_no_match(self):
+        """play_tts doesn't match if chat_id is for a different channel."""
+        adapter = self._make_discord_adapter()
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 123
+
+        from gateway.platforms.base import SendResult
+        adapter.send_voice = AsyncMock(return_value=SendResult(success=True))
+        # Different chat_id — shouldn't match VC
+        result = await adapter.play_tts(chat_id="999", audio_path="/tmp/tts.ogg")
+        adapter.send_voice.assert_called_once()
+
+    # -- Runner dedup --
+
+    @staticmethod
+    def _make_runner():
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._voice_mode = {}
+        runner.adapters = {}
+        return runner
+
+    def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", agent_msgs=None):
+        from gateway.platforms.base import MessageType, MessageEvent, SessionSource
+        from gateway.config import Platform
+        runner._voice_mode["ch1"] = voice_mode
+        source = SessionSource(
+            platform=Platform.DISCORD, chat_id="ch1",
+            user_id="1", user_name="test", chat_type="channel",
+        )
+        event = MessageEvent(source=source, text="test", message_type=msg_type)
+        return runner._should_send_voice_reply(event, response, agent_msgs or [])
+
+    def test_voice_input_runner_skips(self):
+        """Voice input: runner skips — base adapter handles via play_tts."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.VOICE) is False
+
+    def test_text_input_voice_all_runner_fires(self):
+        """Text input + voice_mode=all: runner generates TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT) is True
+
+    def test_text_input_voice_off_no_tts(self):
+        """Text input + voice_mode=off: no TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "off", MessageType.TEXT) is False
+
+    def test_text_input_voice_only_no_tts(self):
+        """Text input + voice_mode=voice_only: no TTS for text."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "voice_only", MessageType.TEXT) is False
+
+    def test_error_response_no_tts(self):
+        """Error response: no TTS regardless of voice_mode."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, response="Error: boom") is False
+
+    def test_empty_response_no_tts(self):
+        """Empty response: no TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, response="") is False
+
+    def test_agent_tts_tool_dedup(self):
+        """Agent already called text_to_speech tool: runner skips."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        agent_msgs = [{"role": "assistant", "tool_calls": [
+            {"id": "1", "type": "function", "function": {"name": "text_to_speech", "arguments": "{}"}}
+        ]}]
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, agent_msgs=agent_msgs) is False
+
+
+class TestUDPKeepalive:
+    """UDP keepalive prevents Discord from dropping the voice session."""
+
+    def test_keepalive_interval_is_reasonable(self):
+        from gateway.platforms.discord import DiscordAdapter
+        interval = DiscordAdapter._KEEPALIVE_INTERVAL
+        assert 5 <= interval <= 30, f"Keepalive interval {interval}s should be between 5-30s"
+
+    @pytest.mark.asyncio
+    async def test_keepalive_sends_silence_frame(self):
+        """Listen loop sends silence frame via send_packet after interval."""
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_receivers = {}
+        adapter._voice_listen_tasks = {}
+
+        # Mock VC and receiver
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        mock_conn = MagicMock()
+        adapter._voice_clients[111] = mock_vc
+        mock_vc._connection = mock_conn
+
+        from gateway.platforms.discord import VoiceReceiver
+        mock_receiver_vc = MagicMock()
+        mock_receiver_vc._connection.secret_key = [0] * 32
+        mock_receiver_vc._connection.dave_session = None
+        mock_receiver_vc._connection.ssrc = 9999
+        mock_receiver_vc._connection.add_socket_listener = MagicMock()
+        mock_receiver_vc._connection.remove_socket_listener = MagicMock()
+        mock_receiver_vc._connection.hook = None
+        receiver = VoiceReceiver(mock_receiver_vc)
+        receiver.start()
+        adapter._voice_receivers[111] = receiver
+
+        # Set keepalive interval very short for test
+        original_interval = DiscordAdapter._KEEPALIVE_INTERVAL
+        DiscordAdapter._KEEPALIVE_INTERVAL = 0.1
+
+        try:
+            # Run listen loop briefly
+            import asyncio
+            loop_task = asyncio.create_task(adapter._voice_listen_loop(111))
+            await asyncio.sleep(0.3)
+            receiver._running = False  # stop loop
+            await asyncio.sleep(0.1)
+            loop_task.cancel()
+            try:
+                await loop_task
+            except asyncio.CancelledError:
+                pass
+
+            # send_packet should have been called with silence frame
+            mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
+        finally:
+            DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
--- a/tests/hermes_cli/test_chat_skills_flag.py
+++ b/tests/hermes_cli/test_chat_skills_flag.py
@@ -0,0 +1,77 @@
+import sys
+
+
+def test_top_level_skills_flag_defaults_to_chat(monkeypatch):
+    import hermes_cli.main as main_mod
+
+    captured = {}
+
+    def fake_cmd_chat(args):
+        captured["skills"] = args.skills
+        captured["command"] = args.command
+
+    monkeypatch.setattr(main_mod, "cmd_chat", fake_cmd_chat)
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "-s", "hermes-agent-dev,github-auth"],
+    )
+
+    main_mod.main()
+
+    assert captured == {
+        "skills": ["hermes-agent-dev,github-auth"],
+        "command": None,
+    }
+
+
+def test_chat_subcommand_accepts_skills_flag(monkeypatch):
+    import hermes_cli.main as main_mod
+
+    captured = {}
+
+    def fake_cmd_chat(args):
+        captured["skills"] = args.skills
+        captured["query"] = args.query
+
+    monkeypatch.setattr(main_mod, "cmd_chat", fake_cmd_chat)
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "chat", "-s", "github-auth", "-q", "hello"],
+    )
+
+    main_mod.main()
+
+    assert captured == {
+        "skills": ["github-auth"],
+        "query": "hello",
+    }
+
+
+def test_continue_worktree_and_skills_flags_work_together(monkeypatch):
+    import hermes_cli.main as main_mod
+
+    captured = {}
+
+    def fake_cmd_chat(args):
+        captured["continue_last"] = args.continue_last
+        captured["worktree"] = args.worktree
+        captured["skills"] = args.skills
+        captured["command"] = args.command
+
+    monkeypatch.setattr(main_mod, "cmd_chat", fake_cmd_chat)
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "-c", "-w", "-s", "hermes-agent-dev"],
+    )
+
+    main_mod.main()
+
+    assert captured == {
+        "continue_last": True,
+        "worktree": True,
+        "skills": ["hermes-agent-dev"],
+        "command": "chat",
+    }
--- a/tests/hermes_cli/test_cron.py
+++ b/tests/hermes_cli/test_cron.py
@@ -0,0 +1,107 @@
+"""Tests for hermes_cli.cron command handling."""
+
+from argparse import Namespace
+
+import pytest
+
+from cron.jobs import create_job, get_job, list_jobs
+from hermes_cli.cron import cron_command
+
+
+@pytest.fixture()
+def tmp_cron_dir(tmp_path, monkeypatch):
+    monkeypatch.setattr("cron.jobs.CRON_DIR", tmp_path / "cron")
+    monkeypatch.setattr("cron.jobs.JOBS_FILE", tmp_path / "cron" / "jobs.json")
+    monkeypatch.setattr("cron.jobs.OUTPUT_DIR", tmp_path / "cron" / "output")
+    return tmp_path
+
+
+class TestCronCommandLifecycle:
+    def test_pause_resume_run(self, tmp_cron_dir, capsys):
+        job = create_job(prompt="Check server status", schedule="every 1h")
+
+        cron_command(Namespace(cron_command="pause", job_id=job["id"]))
+        paused = get_job(job["id"])
+        assert paused["state"] == "paused"
+
+        cron_command(Namespace(cron_command="resume", job_id=job["id"]))
+        resumed = get_job(job["id"])
+        assert resumed["state"] == "scheduled"
+
+        cron_command(Namespace(cron_command="run", job_id=job["id"]))
+        triggered = get_job(job["id"])
+        assert triggered["state"] == "scheduled"
+
+        out = capsys.readouterr().out
+        assert "Paused job" in out
+        assert "Resumed job" in out
+        assert "Triggered job" in out
+
+    def test_edit_can_replace_and_clear_skills(self, tmp_cron_dir, capsys):
+        job = create_job(
+            prompt="Combine skill outputs",
+            schedule="every 1h",
+            skill="blogwatcher",
+        )
+
+        cron_command(
+            Namespace(
+                cron_command="edit",
+                job_id=job["id"],
+                schedule="every 2h",
+                prompt="Revised prompt",
+                name="Edited Job",
+                deliver=None,
+                repeat=None,
+                skill=None,
+                skills=["find-nearby", "blogwatcher"],
+                clear_skills=False,
+            )
+        )
+        updated = get_job(job["id"])
+        assert updated["skills"] == ["find-nearby", "blogwatcher"]
+        assert updated["name"] == "Edited Job"
+        assert updated["prompt"] == "Revised prompt"
+        assert updated["schedule_display"] == "every 120m"
+
+        cron_command(
+            Namespace(
+                cron_command="edit",
+                job_id=job["id"],
+                schedule=None,
+                prompt=None,
+                name=None,
+                deliver=None,
+                repeat=None,
+                skill=None,
+                skills=None,
+                clear_skills=True,
+            )
+        )
+        cleared = get_job(job["id"])
+        assert cleared["skills"] == []
+        assert cleared["skill"] is None
+
+        out = capsys.readouterr().out
+        assert "Updated job" in out
+
+    def test_create_with_multiple_skills(self, tmp_cron_dir, capsys):
+        cron_command(
+            Namespace(
+                cron_command="create",
+                schedule="every 1h",
+                prompt="Use both skills",
+                name="Skill combo",
+                deliver=None,
+                repeat=None,
+                skill=None,
+                skills=["blogwatcher", "find-nearby"],
+            )
+        )
+        out = capsys.readouterr().out
+        assert "Created job" in out
+
+        jobs = list_jobs()
+        assert len(jobs) == 1
+        assert jobs[0]["skills"] == ["blogwatcher", "find-nearby"]
+        assert jobs[0]["name"] == "Skill combo"
--- a/tests/hermes_cli/test_env_loader.py
+++ b/tests/hermes_cli/test_env_loader.py
@@ -0,0 +1,70 @@
+import importlib
+import os
+import sys
+from pathlib import Path
+
+from hermes_cli.env_loader import load_hermes_dotenv
+
+
+def test_user_env_overrides_stale_shell_values(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    env_file = home / ".env"
+    env_file.write_text("OPENAI_BASE_URL=https://new.example/v1\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+
+    loaded = load_hermes_dotenv(hermes_home=home)
+
+    assert loaded == [env_file]
+    assert os.getenv("OPENAI_BASE_URL") == "https://new.example/v1"
+
+
+def test_project_env_overrides_stale_shell_values_when_user_env_missing(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    project_env = tmp_path / ".env"
+    project_env.write_text("OPENAI_BASE_URL=https://project.example/v1\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [project_env]
+    assert os.getenv("OPENAI_BASE_URL") == "https://project.example/v1"
+
+
+def test_user_env_takes_precedence_over_project_env(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    user_env = home / ".env"
+    project_env = tmp_path / ".env"
+    user_env.write_text("OPENAI_BASE_URL=https://user.example/v1\n", encoding="utf-8")
+    project_env.write_text("OPENAI_BASE_URL=https://project.example/v1\nOPENAI_API_KEY=project-key\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [user_env, project_env]
+    assert os.getenv("OPENAI_BASE_URL") == "https://user.example/v1"
+    assert os.getenv("OPENAI_API_KEY") == "project-key"
+
+
+def test_main_import_applies_user_env_over_shell_values(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    (home / ".env").write_text(
+        "OPENAI_BASE_URL=https://new.example/v1\nHERMES_INFERENCE_PROVIDER=custom\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+
+    sys.modules.pop("hermes_cli.main", None)
+    importlib.import_module("hermes_cli.main")
+
+    assert os.getenv("OPENAI_BASE_URL") == "https://new.example/v1"
+    assert os.getenv("HERMES_INFERENCE_PROVIDER") == "custom"
--- a/tests/hermes_cli/test_gateway.py
+++ b/tests/hermes_cli/test_gateway.py
@@ -35,7 +35,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
    unit_path = tmp_path / "hermes-gateway.service"
    unit_path.write_text("[Unit]\n")

-    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path)
+    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)
    monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, ""))

    def fake_run(cmd, capture_output=False, text=False, check=False):
@@ -50,7 +50,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
    gateway.systemd_status(deep=False)

    out = capsys.readouterr().out
-    assert "Gateway service is running" in out
+    assert "gateway service is running" in out
    assert "Systemd linger is disabled" in out
    assert "loginctl enable-linger" in out

@@ -58,7 +58,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
 def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
    unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"

-    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path)
+    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)

    calls = []
    helper_calls = []
@@ -79,4 +79,93 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
        ["systemctl", "--user", "enable", gateway.SERVICE_NAME],
    ]
    assert helper_calls == [True]
-    assert "Service installed and enabled" in out
+    assert "User service installed and enabled" in out
+
+
+def test_systemd_install_system_scope_skips_linger_and_uses_systemctl(monkeypatch, tmp_path, capsys):
+    unit_path = tmp_path / "etc" / "systemd" / "system" / "hermes-gateway.service"
+
+    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)
+    monkeypatch.setattr(
+        gateway,
+        "generate_systemd_unit",
+        lambda system=False, run_as_user=None: f"scope={system} user={run_as_user}\n",
+    )
+    monkeypatch.setattr(gateway, "_require_root_for_system_service", lambda action: None)
+
+    calls = []
+    helper_calls = []
+
+    def fake_run(cmd, check=False, **kwargs):
+        calls.append((cmd, check))
+        return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+    monkeypatch.setattr(gateway.subprocess, "run", fake_run)
+    monkeypatch.setattr(gateway, "_ensure_linger_enabled", lambda: helper_calls.append(True))
+
+    gateway.systemd_install(force=False, system=True, run_as_user="alice")
+
+    out = capsys.readouterr().out
+    assert unit_path.exists()
+    assert unit_path.read_text(encoding="utf-8") == "scope=True user=alice\n"
+    assert [cmd for cmd, _ in calls] == [
+        ["systemctl", "daemon-reload"],
+        ["systemctl", "enable", gateway.SERVICE_NAME],
+    ]
+    assert helper_calls == []
+    assert "Configured to run as: alice" not in out  # generated test unit has no User= line
+    assert "System service installed and enabled" in out
+
+
+def test_conflicting_systemd_units_warning(monkeypatch, tmp_path, capsys):
+    user_unit = tmp_path / "user" / "hermes-gateway.service"
+    system_unit = tmp_path / "system" / "hermes-gateway.service"
+    user_unit.parent.mkdir(parents=True)
+    system_unit.parent.mkdir(parents=True)
+    user_unit.write_text("[Unit]\n", encoding="utf-8")
+    system_unit.write_text("[Unit]\n", encoding="utf-8")
+
+    monkeypatch.setattr(
+        gateway,
+        "get_systemd_unit_path",
+        lambda system=False: system_unit if system else user_unit,
+    )
+
+    gateway.print_systemd_scope_conflict_warning()
+
+    out = capsys.readouterr().out
+    assert "Both user and system gateway services are installed" in out
+    assert "hermes gateway uninstall" in out
+    assert "--system" in out
+
+
+def test_install_linux_gateway_from_setup_system_choice_without_root_prints_followup(monkeypatch, capsys):
+    monkeypatch.setattr(gateway, "prompt_linux_gateway_install_scope", lambda: "system")
+    monkeypatch.setattr(gateway.os, "geteuid", lambda: 1000)
+    monkeypatch.setattr(gateway, "_default_system_service_user", lambda: "alice")
+    monkeypatch.setattr(gateway, "systemd_install", lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("should not install")))
+
+    scope, did_install = gateway.install_linux_gateway_from_setup(force=False)
+
+    out = capsys.readouterr().out
+    assert (scope, did_install) == ("system", False)
+    assert "sudo hermes gateway install --system --run-as-user alice" in out
+    assert "sudo hermes gateway start --system" in out
+
+
+def test_install_linux_gateway_from_setup_system_choice_as_root_installs(monkeypatch):
+    monkeypatch.setattr(gateway, "prompt_linux_gateway_install_scope", lambda: "system")
+    monkeypatch.setattr(gateway.os, "geteuid", lambda: 0)
+    monkeypatch.setattr(gateway, "_default_system_service_user", lambda: "alice")
+
+    calls = []
+    monkeypatch.setattr(
+        gateway,
+        "systemd_install",
+        lambda force=False, system=False, run_as_user=None: calls.append((force, system, run_as_user)),
+    )
+
+    scope, did_install = gateway.install_linux_gateway_from_setup(force=True)
+
+    assert (scope, did_install) == ("system", True)
+    assert calls == [(True, True, "alice")]
--- a/tests/hermes_cli/test_gateway_linger.py
+++ b/tests/hermes_cli/test_gateway_linger.py
@@ -96,7 +96,7 @@ class TestEnsureLingerEnabled:
 def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys):
    unit_path = tmp_path / "systemd" / "user" / "hermes-gateway.service"

-    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda: unit_path)
+    monkeypatch.setattr(gateway, "get_systemd_unit_path", lambda system=False: unit_path)

    calls = []

@@ -117,4 +117,4 @@ def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys):
        ["systemctl", "--user", "enable", gateway.SERVICE_NAME],
    ]
    assert helper_calls == [True]
-    assert "Service installed and enabled" in out
+    assert "User service installed and enabled" in out
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@@ -10,8 +10,8 @@ class TestSystemdServiceRefresh:
        unit_path = tmp_path / "hermes-gateway.service"
        unit_path.write_text("old unit\n", encoding="utf-8")

-        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda: unit_path)
-        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda: "new unit\n")
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
+        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")

        calls = []

@@ -33,8 +33,8 @@ class TestSystemdServiceRefresh:
        unit_path = tmp_path / "hermes-gateway.service"
        unit_path.write_text("old unit\n", encoding="utf-8")

-        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda: unit_path)
-        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda: "new unit\n")
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
+        monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")

        calls = []

@@ -60,12 +60,12 @@ class TestGatewayStopCleanup:

        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
-        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda: unit_path)
+        monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)

        service_calls = []
        kill_calls = []

-        monkeypatch.setattr(gateway_cli, "systemd_stop", lambda: service_calls.append("stop"))
+        monkeypatch.setattr(gateway_cli, "systemd_stop", lambda system=False: service_calls.append("stop"))
        monkeypatch.setattr(
            gateway_cli,
            "kill_gateway_processes",
@@ -76,3 +76,66 @@ class TestGatewayStopCleanup:

        assert service_calls == ["stop"]
        assert kill_calls == [False]
+
+
+class TestGatewayServiceDetection:
+    def test_is_service_running_checks_system_scope_when_user_scope_is_inactive(self, monkeypatch):
+        user_unit = SimpleNamespace(exists=lambda: True)
+        system_unit = SimpleNamespace(exists=lambda: True)
+
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: system_unit if system else user_unit,
+        )
+
+        def fake_run(cmd, capture_output=True, text=True, **kwargs):
+            if cmd == ["systemctl", "--user", "is-active", gateway_cli.SERVICE_NAME]:
+                return SimpleNamespace(returncode=0, stdout="inactive\n", stderr="")
+            if cmd == ["systemctl", "is-active", gateway_cli.SERVICE_NAME]:
+                return SimpleNamespace(returncode=0, stdout="active\n", stderr="")
+            raise AssertionError(f"Unexpected command: {cmd}")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        assert gateway_cli._is_service_running() is True
+
+
+class TestGatewaySystemServiceRouting:
+    def test_gateway_install_passes_system_flags(self, monkeypatch):
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+
+        calls = []
+        monkeypatch.setattr(
+            gateway_cli,
+            "systemd_install",
+            lambda force=False, system=False, run_as_user=None: calls.append((force, system, run_as_user)),
+        )
+
+        gateway_cli.gateway_command(
+            SimpleNamespace(gateway_command="install", force=True, system=True, run_as_user="alice")
+        )
+
+        assert calls == [(True, True, "alice")]
+
+    def test_gateway_status_prefers_system_service_when_only_system_unit_exists(self, monkeypatch):
+        user_unit = SimpleNamespace(exists=lambda: False)
+        system_unit = SimpleNamespace(exists=lambda: True)
+
+        monkeypatch.setattr(gateway_cli, "is_linux", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(
+            gateway_cli,
+            "get_systemd_unit_path",
+            lambda system=False: system_unit if system else user_unit,
+        )
+
+        calls = []
+        monkeypatch.setattr(gateway_cli, "systemd_status", lambda deep=False, system=False: calls.append((deep, system)))
+
+        gateway_cli.gateway_command(SimpleNamespace(gateway_command="status", deep=False, system=False))
+
+        assert calls == [(False, False)]
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@@ -7,6 +7,7 @@ from hermes_cli.models import (
    fetch_api_models,
    normalize_provider,
    parse_model_input,
+    probe_api_models,
    provider_label,
    provider_model_ids,
    validate_requested_model,
@@ -26,7 +27,15 @@ FAKE_API_MODELS = [

 def _validate(model, provider="openrouter", api_models=FAKE_API_MODELS, **kw):
    """Shortcut: call validate_requested_model with mocked API."""
-    with patch("hermes_cli.models.fetch_api_models", return_value=api_models):
+    probe_payload = {
+        "models": api_models,
+        "probed_url": "http://localhost:11434/v1/models",
+        "resolved_base_url": kw.get("base_url", "") or "http://localhost:11434/v1",
+        "suggested_base_url": None,
+        "used_fallback": False,
+    }
+    with patch("hermes_cli.models.fetch_api_models", return_value=api_models), \
+         patch("hermes_cli.models.probe_api_models", return_value=probe_payload):
        return validate_requested_model(model, provider, **kw)


@@ -147,6 +156,33 @@ class TestFetchApiModels:
        with patch("hermes_cli.models.urllib.request.urlopen", side_effect=Exception("timeout")):
            assert fetch_api_models("key", "https://example.com/v1") is None

+    def test_probe_api_models_tries_v1_fallback(self):
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                return b'{"data": [{"id": "local-model"}]}'
+
+        calls = []
+
+        def _fake_urlopen(req, timeout=5.0):
+            calls.append(req.full_url)
+            if req.full_url.endswith("/v1/models"):
+                return _Resp()
+            raise Exception("404")
+
+        with patch("hermes_cli.models.urllib.request.urlopen", side_effect=_fake_urlopen):
+            probe = probe_api_models("key", "http://localhost:8000")
+
+        assert calls == ["http://localhost:8000/models", "http://localhost:8000/v1/models"]
+        assert probe["models"] == ["local-model"]
+        assert probe["resolved_base_url"] == "http://localhost:8000/v1"
+        assert probe["used_fallback"] is True
+

 # -- validate — format checks -----------------------------------------------

@@ -191,6 +227,7 @@ class TestValidateApiFound:
        )
        assert result["accepted"] is True
        assert result["persist"] is True
+        assert result["recognized"] is True


 # -- validate — API not found ------------------------------------------------
@@ -232,3 +269,26 @@ class TestValidateApiFallback:
        result = _validate("some-model", provider="totally-unknown", api_models=None)
        assert result["accepted"] is True
        assert result["persist"] is True
+
+    def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
+        with patch(
+            "hermes_cli.models.probe_api_models",
+            return_value={
+                "models": None,
+                "probed_url": "http://localhost:8000/v1/models",
+                "resolved_base_url": "http://localhost:8000",
+                "suggested_base_url": "http://localhost:8000/v1",
+                "used_fallback": False,
+            },
+        ):
+            result = validate_requested_model(
+                "qwen3",
+                "custom",
+                api_key="local-key",
+                base_url="http://localhost:8000",
+            )
+
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert "http://localhost:8000/v1/models" in result["message"]
+        assert "http://localhost:8000/v1" in result["message"]
--- a/tests/hermes_cli/test_sessions_delete.py
+++ b/tests/hermes_cli/test_sessions_delete.py
@@ -0,0 +1,64 @@
+import sys
+
+
+def test_sessions_delete_accepts_unique_id_prefix(monkeypatch, capsys):
+    import hermes_cli.main as main_mod
+    import hermes_state
+
+    captured = {}
+
+    class FakeDB:
+        def resolve_session_id(self, session_id):
+            captured["resolved_from"] = session_id
+            return "20260315_092437_c9a6ff"
+
+        def delete_session(self, session_id):
+            captured["deleted"] = session_id
+            return True
+
+        def close(self):
+            captured["closed"] = True
+
+    monkeypatch.setattr(hermes_state, "SessionDB", lambda: FakeDB())
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "sessions", "delete", "20260315_092437_c9a6", "--yes"],
+    )
+
+    main_mod.main()
+
+    output = capsys.readouterr().out
+    assert captured == {
+        "resolved_from": "20260315_092437_c9a6",
+        "deleted": "20260315_092437_c9a6ff",
+        "closed": True,
+    }
+    assert "Deleted session '20260315_092437_c9a6ff'." in output
+
+
+def test_sessions_delete_reports_not_found_when_prefix_is_unknown(monkeypatch, capsys):
+    import hermes_cli.main as main_mod
+    import hermes_state
+
+    class FakeDB:
+        def resolve_session_id(self, session_id):
+            return None
+
+        def delete_session(self, session_id):
+            raise AssertionError("delete_session should not be called when resolution fails")
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr(hermes_state, "SessionDB", lambda: FakeDB())
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "sessions", "delete", "missing-prefix", "--yes"],
+    )
+
+    main_mod.main()
+
+    output = capsys.readouterr().out
+    assert "Session 'missing-prefix' not found." in output
--- a/tests/hermes_cli/test_setup.py
+++ b/tests/hermes_cli/test_setup.py
@@ -25,7 +25,11 @@ def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(

    config = load_config()

-    prompt_choices = iter([0, 2])
+    # Provider selection always comes first. Depending on available vision
+    # backends, setup may either skip the optional vision step or prompt for
+    # it before the default-model choice. Provide enough selections for both
+    # paths while still ending on "keep current model".
+    prompt_choices = iter([0, 2, 2])
    monkeypatch.setattr(
        "hermes_cli.setup.prompt_choice",
        lambda *args, **kwargs: next(prompt_choices),
--- a/tests/hermes_cli/test_setup_model_provider.py
+++ b/tests/hermes_cli/test_setup_model_provider.py
@@ -39,6 +39,8 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    """Keep-current custom should not fall through to the generic model menu."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _clear_provider_env(monkeypatch)
+    save_env_value("OPENAI_BASE_URL", "https://example.invalid/v1")
+    save_env_value("OPENAI_API_KEY", "custom-key")

    config = load_config()
    config["model"] = {
@@ -55,10 +57,6 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
        if calls["count"] == 1:
            assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
            return len(choices) - 1
-        if calls["count"] == 2:
-            assert question == "Configure vision:"
-            assert choices[-1] == "Skip for now"
-            return len(choices) - 1
        raise AssertionError("Model menu should not appear for keep-current custom")

    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
@@ -74,7 +72,59 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    assert reloaded["model"]["provider"] == "custom"
    assert reloaded["model"]["default"] == "custom/model"
    assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
-    assert calls["count"] == 2
+    assert calls["count"] == 1
+
+
+def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+
+    config = load_config()
+
+    def fake_prompt_choice(question, choices, default=0):
+        if question == "Select your inference provider:":
+            return 3  # Custom endpoint
+        if question == "Configure vision:":
+            return len(choices) - 1  # Skip
+        raise AssertionError(f"Unexpected prompt_choice call: {question}")
+
+    def fake_prompt(message, current=None, **kwargs):
+        if "API base URL" in message:
+            return "http://localhost:8000"
+        if "API key" in message:
+            return "local-key"
+        if "Model name" in message:
+            return "llm"
+        return ""
+
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt)
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
+    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+    monkeypatch.setattr(
+        "hermes_cli.models.probe_api_models",
+        lambda api_key, base_url: {
+            "models": ["llm"],
+            "probed_url": "http://localhost:8000/v1/models",
+            "resolved_base_url": "http://localhost:8000/v1",
+            "suggested_base_url": "http://localhost:8000/v1",
+            "used_fallback": True,
+        },
+    )
+
+    setup_model_provider(config)
+    save_config(config)
+
+    env = _read_env(tmp_path)
+    reloaded = load_config()
+
+    assert env.get("OPENAI_BASE_URL") == "http://localhost:8000/v1"
+    assert env.get("OPENAI_API_KEY") == "local-key"
+    assert reloaded["model"]["provider"] == "custom"
+    assert reloaded["model"]["base_url"] == "http://localhost:8000/v1"
+    assert reloaded["model"]["default"] == "llm"


 def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
@@ -113,6 +163,7 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
    monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])

    setup_model_provider(config)
    save_config(config)
@@ -151,6 +202,7 @@ def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_pa
    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
    monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])

    setup_model_provider(config)
    env = _read_env(tmp_path)
@@ -214,7 +266,7 @@ def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatc
    _clear_provider_env(monkeypatch)

    (tmp_path / "auth.json").write_text(
-        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"tok"}}}}'
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "***", "refresh_token": "***"}}}}'
    )

    monkeypatch.setattr("shutil.which", lambda _name: None)
@@ -226,3 +278,17 @@ def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatc
    assert "missing run 'hermes setup' to configure" not in output
    assert "Mixture of Agents" in output
    assert "missing OPENROUTER_API_KEY" in output
+
+
+def test_setup_summary_marks_anthropic_auth_as_vision_available(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+    monkeypatch.setattr("shutil.which", lambda _name: None)
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: ["anthropic"])
+
+    _print_setup_summary(load_config(), tmp_path)
+    output = capsys.readouterr().out
+
+    assert "Vision (image analysis)" in output
+    assert "missing run 'hermes setup' to configure" not in output
--- a/tests/hermes_cli/test_setup_prompt_menus.py
+++ b/tests/hermes_cli/test_setup_prompt_menus.py
@@ -0,0 +1,29 @@
+from hermes_cli import setup as setup_mod
+
+
+def test_prompt_choice_uses_curses_helper(monkeypatch):
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: 1)
+
+    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)
+
+    assert idx == 1
+
+
+def test_prompt_choice_falls_back_to_numbered_input(monkeypatch):
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: -1)
+    monkeypatch.setattr("builtins.input", lambda _prompt="": "2")
+
+    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)
+
+    assert idx == 1
+
+
+def test_prompt_checklist_uses_shared_curses_checklist(monkeypatch):
+    monkeypatch.setattr(
+        "hermes_cli.curses_ui.curses_checklist",
+        lambda title, items, selected, cancel_returns=None: {0, 2},
+    )
+
+    selected = setup_mod.prompt_checklist("Pick tools", ["one", "two", "three"], pre_selected=[1])
+
+    assert selected == [0, 2]
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -1,6 +1,13 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""

-from hermes_cli.tools_config import _get_platform_tools, _platform_toolset_summary
+from unittest.mock import patch
+
+from hermes_cli.tools_config import (
+    _get_platform_tools,
+    _platform_toolset_summary,
+    _save_platform_tools,
+    _toolset_has_keys,
+)


 def test_get_platform_tools_uses_default_when_platform_not_configured():
@@ -26,3 +33,70 @@ def test_platform_toolset_summary_uses_explicit_platform_list():

    assert set(summary.keys()) == {"cli"}
    assert summary["cli"] == _get_platform_tools(config, "cli")
+
+
+def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    (tmp_path / "auth.json").write_text(
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "codex-...oken","refresh_token": "codex-...oken"}}}}'
+    )
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("AUXILIARY_VISION_PROVIDER", raising=False)
+    monkeypatch.delenv("CONTEXT_VISION_PROVIDER", raising=False)
+
+    assert _toolset_has_keys("vision") is True
+
+
+def test_save_platform_tools_preserves_mcp_server_names():
+    """Ensure MCP server names are preserved when saving platform tools.
+
+    Regression test for https://github.com/NousResearch/hermes-agent/issues/1247
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "time", "github", "custom-mcp-server"]
+        }
+    }
+
+    new_selection = {"web", "browser"}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", new_selection)
+
+    saved_toolsets = config["platform_toolsets"]["cli"]
+
+    assert "time" in saved_toolsets
+    assert "github" in saved_toolsets
+    assert "custom-mcp-server" in saved_toolsets
+    assert "web" in saved_toolsets
+    assert "browser" in saved_toolsets
+    assert "terminal" not in saved_toolsets
+
+
+def test_save_platform_tools_handles_empty_existing_config():
+    """Saving platform tools works when no existing config exists."""
+    config = {}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "telegram", {"web", "terminal"})
+
+    saved_toolsets = config["platform_toolsets"]["telegram"]
+    assert "web" in saved_toolsets
+    assert "terminal" in saved_toolsets
+
+
+def test_save_platform_tools_handles_invalid_existing_config():
+    """Saving platform tools works when existing config is not a list."""
+    config = {
+        "platform_toolsets": {
+            "cli": "invalid-string-value"
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web"})
+
+    saved_toolsets = config["platform_toolsets"]["cli"]
+    assert "web" in saved_toolsets
--- a/tests/hermes_cli/test_update_autostash.py
+++ b/tests/hermes_cli/test_update_autostash.py
@@ -46,6 +46,20 @@ def test_stash_local_changes_if_needed_returns_specific_stash_commit(monkeypatch
    assert calls[2][0][-3:] == ["rev-parse", "--verify", "refs/stash"]


+def test_resolve_stash_selector_returns_matching_entry(monkeypatch, tmp_path):
+    def fake_run(cmd, **kwargs):
+        assert cmd == ["git", "stash", "list", "--format=%gd %H"]
+        return SimpleNamespace(
+            stdout="stash@{0} def456\nstash@{1} abc123\n",
+            returncode=0,
+        )
+
+    monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)
+
+    assert hermes_main._resolve_stash_selector(["git"], tmp_path, "abc123") == "stash@{1}"
+
+
+
 def test_restore_stashed_changes_prompts_before_applying(monkeypatch, tmp_path, capsys):
    calls = []

@@ -53,6 +67,8 @@ def test_restore_stashed_changes_prompts_before_applying(monkeypatch, tmp_path,
        calls.append((cmd, kwargs))
        if cmd[1:3] == ["stash", "apply"]:
            return SimpleNamespace(stdout="applied\n", stderr="", returncode=0)
+        if cmd[1:3] == ["stash", "list"]:
+            return SimpleNamespace(stdout="stash@{1} abc123\n", stderr="", returncode=0)
        if cmd[1:3] == ["stash", "drop"]:
            return SimpleNamespace(stdout="dropped\n", stderr="", returncode=0)
        raise AssertionError(f"unexpected command: {cmd}")
@@ -64,7 +80,8 @@ def test_restore_stashed_changes_prompts_before_applying(monkeypatch, tmp_path,

    assert restored is True
    assert calls[0][0] == ["git", "stash", "apply", "abc123"]
-    assert calls[1][0] == ["git", "stash", "drop", "abc123"]
+    assert calls[1][0] == ["git", "stash", "list", "--format=%gd %H"]
+    assert calls[2][0] == ["git", "stash", "drop", "stash@{1}"]
    out = capsys.readouterr().out
    assert "Restore local changes now? [Y/n]" in out
    assert "restored on top of the updated codebase" in out
@@ -99,6 +116,8 @@ def test_restore_stashed_changes_applies_without_prompt_when_disabled(monkeypatc
        calls.append((cmd, kwargs))
        if cmd[1:3] == ["stash", "apply"]:
            return SimpleNamespace(stdout="applied\n", stderr="", returncode=0)
+        if cmd[1:3] == ["stash", "list"]:
+            return SimpleNamespace(stdout="stash@{0} abc123\n", stderr="", returncode=0)
        if cmd[1:3] == ["stash", "drop"]:
            return SimpleNamespace(stdout="dropped\n", stderr="", returncode=0)
        raise AssertionError(f"unexpected command: {cmd}")
@@ -109,9 +128,78 @@ def test_restore_stashed_changes_applies_without_prompt_when_disabled(monkeypatc

    assert restored is True
    assert calls[0][0] == ["git", "stash", "apply", "abc123"]
+    assert calls[1][0] == ["git", "stash", "list", "--format=%gd %H"]
+    assert calls[2][0] == ["git", "stash", "drop", "stash@{0}"]
    assert "Restore local changes now?" not in capsys.readouterr().out


+
+def test_print_stash_cleanup_guidance_with_selector(capsys):
+    hermes_main._print_stash_cleanup_guidance("abc123", "stash@{2}")
+
+    out = capsys.readouterr().out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
+    assert "git stash drop stash@{2}" in out
+
+
+
+def test_restore_stashed_changes_keeps_going_when_stash_entry_cannot_be_resolved(monkeypatch, tmp_path, capsys):
+    calls = []
+
+    def fake_run(cmd, **kwargs):
+        calls.append((cmd, kwargs))
+        if cmd[1:3] == ["stash", "apply"]:
+            return SimpleNamespace(stdout="applied\n", stderr="", returncode=0)
+        if cmd[1:3] == ["stash", "list"]:
+            return SimpleNamespace(stdout="stash@{0} def456\n", stderr="", returncode=0)
+        raise AssertionError(f"unexpected command: {cmd}")
+
+    monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)
+
+    restored = hermes_main._restore_stashed_changes(["git"], tmp_path, "abc123", prompt_user=False)
+
+    assert restored is True
+    assert calls == [
+        (["git", "stash", "apply", "abc123"], {"cwd": tmp_path, "capture_output": True, "text": True}),
+        (["git", "stash", "list", "--format=%gd %H"], {"cwd": tmp_path, "capture_output": True, "text": True, "check": True}),
+    ]
+    out = capsys.readouterr().out
+    assert "couldn't find the stash entry to drop" in out
+    assert "stash was left in place" in out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
+    assert "Look for commit abc123" in out
+
+
+
+def test_restore_stashed_changes_keeps_going_when_drop_fails(monkeypatch, tmp_path, capsys):
+    calls = []
+
+    def fake_run(cmd, **kwargs):
+        calls.append((cmd, kwargs))
+        if cmd[1:3] == ["stash", "apply"]:
+            return SimpleNamespace(stdout="applied\n", stderr="", returncode=0)
+        if cmd[1:3] == ["stash", "list"]:
+            return SimpleNamespace(stdout="stash@{0} abc123\n", stderr="", returncode=0)
+        if cmd[1:3] == ["stash", "drop"]:
+            return SimpleNamespace(stdout="", stderr="drop failed\n", returncode=1)
+        raise AssertionError(f"unexpected command: {cmd}")
+
+    monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)
+
+    restored = hermes_main._restore_stashed_changes(["git"], tmp_path, "abc123", prompt_user=False)
+
+    assert restored is True
+    assert calls[2][0] == ["git", "stash", "drop", "stash@{0}"]
+    out = capsys.readouterr().out
+    assert "couldn't drop the saved stash entry" in out
+    assert "drop failed" in out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
+    assert "git stash drop stash@{0}" in out
+
+
 def test_restore_stashed_changes_exits_cleanly_when_apply_fails(monkeypatch, tmp_path, capsys):
    calls = []

--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -0,0 +1,135 @@
+"""Tests for the update check mechanism in hermes_cli.banner."""
+
+import json
+import threading
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+def test_version_string_no_v_prefix():
+    """__version__ should be bare semver without a 'v' prefix."""
+    from hermes_cli import __version__
+    assert not __version__.startswith("v"), f"__version__ should not start with 'v', got {__version__!r}"
+
+
+def test_check_for_updates_uses_cache(tmp_path):
+    """When cache is fresh, check_for_updates should return cached value without calling git."""
+    from hermes_cli.banner import check_for_updates
+
+    # Create a fake git repo and fresh cache
+    repo_dir = tmp_path / "hermes-agent"
+    repo_dir.mkdir()
+    (repo_dir / ".git").mkdir()
+
+    cache_file = tmp_path / ".update_check"
+    cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3}))
+
+    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
+        with patch("hermes_cli.banner.subprocess.run") as mock_run:
+            result = check_for_updates()
+
+    assert result == 3
+    mock_run.assert_not_called()
+
+
+def test_check_for_updates_expired_cache(tmp_path):
+    """When cache is expired, check_for_updates should call git fetch."""
+    from hermes_cli.banner import check_for_updates
+
+    repo_dir = tmp_path / "hermes-agent"
+    repo_dir.mkdir()
+    (repo_dir / ".git").mkdir()
+
+    # Write an expired cache (timestamp far in the past)
+    cache_file = tmp_path / ".update_check"
+    cache_file.write_text(json.dumps({"ts": 0, "behind": 1}))
+
+    mock_result = MagicMock(returncode=0, stdout="5\n")
+
+    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
+        with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
+            result = check_for_updates()
+
+    assert result == 5
+    assert mock_run.call_count == 2  # git fetch + git rev-list
+
+
+def test_check_for_updates_no_git_dir(tmp_path):
+    """Returns None when .git directory doesn't exist anywhere."""
+    import hermes_cli.banner as banner
+
+    # Create a fake banner.py so the fallback path also has no .git
+    fake_banner = tmp_path / "hermes_cli" / "banner.py"
+    fake_banner.parent.mkdir(parents=True, exist_ok=True)
+    fake_banner.touch()
+
+    original = banner.__file__
+    try:
+        banner.__file__ = str(fake_banner)
+        with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
+            with patch("hermes_cli.banner.subprocess.run") as mock_run:
+                result = banner.check_for_updates()
+        assert result is None
+        mock_run.assert_not_called()
+    finally:
+        banner.__file__ = original
+
+
+def test_check_for_updates_fallback_to_project_root():
+    """Dev install: falls back to Path(__file__).parent.parent when HERMES_HOME has no git repo."""
+    import hermes_cli.banner as banner
+
+    project_root = Path(banner.__file__).parent.parent.resolve()
+    if not (project_root / ".git").exists():
+        pytest.skip("Not running from a git checkout")
+
+    # Point HERMES_HOME at a temp dir with no hermes-agent/.git
+    import tempfile
+    with tempfile.TemporaryDirectory() as td:
+        with patch("hermes_cli.banner.os.getenv", return_value=td):
+            with patch("hermes_cli.banner.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
+                result = banner.check_for_updates()
+        # Should have fallen back to project root and run git commands
+        assert mock_run.call_count >= 1
+
+
+def test_prefetch_non_blocking():
+    """prefetch_update_check() should return immediately without blocking."""
+    import hermes_cli.banner as banner
+
+    # Reset module state
+    banner._update_result = None
+    banner._update_check_done = threading.Event()
+
+    with patch.object(banner, "check_for_updates", return_value=5):
+        start = time.monotonic()
+        banner.prefetch_update_check()
+        elapsed = time.monotonic() - start
+
+        # Should return almost immediately (well under 1 second)
+        assert elapsed < 1.0
+
+        # Wait for the background thread to finish
+        banner._update_check_done.wait(timeout=5)
+        assert banner._update_result == 5
+
+
+def test_get_update_result_timeout():
+    """get_update_result() returns None when check hasn't completed within timeout."""
+    import hermes_cli.banner as banner
+
+    # Reset module state — don't set the event
+    banner._update_result = None
+    banner._update_check_done = threading.Event()
+
+    start = time.monotonic()
+    result = banner.get_update_result(timeout=0.1)
+    elapsed = time.monotonic() - start
+
+    # Should have waited ~0.1s and returned None
+    assert result is None
+    assert elapsed < 0.5
--- a/tests/integration/test_voice_channel_flow.py
+++ b/tests/integration/test_voice_channel_flow.py
@@ -0,0 +1,611 @@
+"""Integration tests for Discord voice channel audio flow.
+
+Uses real NaCl encryption and Opus codec (no mocks for crypto/codec).
+Does NOT require a Discord connection — tests the VoiceReceiver
+packet processing pipeline end-to-end.
+
+Requires: PyNaCl>=1.5.0, discord.py[voice] (opus codec)
+"""
+
+import struct
+import time
+import pytest
+
+pytestmark = pytest.mark.integration
+
+# Skip entire module if voice deps are missing
+pytest.importorskip("nacl.secret", reason="PyNaCl required for voice integration tests")
+discord = pytest.importorskip("discord", reason="discord.py required for voice integration tests")
+
+import nacl.secret
+
+try:
+    if not discord.opus.is_loaded():
+        import ctypes.util
+        opus_path = ctypes.util.find_library("opus")
+        if not opus_path:
+            import sys
+            for p in ("/opt/homebrew/lib/libopus.dylib", "/usr/local/lib/libopus.dylib"):
+                import os
+                if os.path.isfile(p):
+                    opus_path = p
+                    break
+        if opus_path:
+            discord.opus.load_opus(opus_path)
+    OPUS_AVAILABLE = discord.opus.is_loaded()
+except Exception:
+    OPUS_AVAILABLE = False
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+from gateway.platforms.discord import VoiceReceiver
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_secret_key():
+    """Generate a random 32-byte key."""
+    import os
+    return os.urandom(32)
+
+
+def _build_encrypted_rtp_packet(secret_key, opus_payload, ssrc=100, seq=1, timestamp=960):
+    """Build a real NaCl-encrypted RTP packet matching Discord's format.
+
+    Format: RTP header (12 bytes) + encrypted(opus) + 4-byte nonce
+    Encryption: aead_xchacha20_poly1305 with RTP header as AAD.
+    """
+    # RTP header: version=2, payload_type=0x78, no extension, no CSRC
+    header = struct.pack(">BBHII", 0x80, 0x78, seq, timestamp, ssrc)
+
+    # Encrypt with NaCl AEAD
+    box = nacl.secret.Aead(secret_key)
+    nonce_counter = struct.pack(">I", seq)  # 4-byte counter as nonce seed
+    # Full 24-byte nonce: counter in first 4 bytes, rest zeros
+    full_nonce = nonce_counter + b'\x00' * 20
+
+    enc_msg = box.encrypt(opus_payload, header, full_nonce)
+    ciphertext = enc_msg.ciphertext  # without nonce prefix
+
+    # Discord format: header + ciphertext + 4-byte nonce
+    return header + ciphertext + nonce_counter
+
+
+def _make_voice_receiver(secret_key, dave_session=None, bot_ssrc=9999,
+                         allowed_user_ids=None, members=None):
+    """Create a VoiceReceiver with real secret key."""
+    vc = MagicMock()
+    vc._connection.secret_key = list(secret_key)
+    vc._connection.dave_session = dave_session
+    vc._connection.ssrc = bot_ssrc
+    vc._connection.add_socket_listener = MagicMock()
+    vc._connection.remove_socket_listener = MagicMock()
+    vc._connection.hook = None
+    vc.user = SimpleNamespace(id=bot_ssrc)
+    vc.channel = MagicMock()
+    vc.channel.members = members or []
+    receiver = VoiceReceiver(vc, allowed_user_ids=allowed_user_ids)
+    receiver.start()
+    return receiver
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestRealNaClDecrypt:
+    """End-to-end: real NaCl encrypt → _on_packet decrypt → buffer."""
+
+    def test_valid_encrypted_packet_buffered(self):
+        """Real NaCl encrypted packet → decrypted → buffered."""
+        key = _make_secret_key()
+        opus_silence = b'\xf8\xff\xfe'
+        receiver = _make_voice_receiver(key)
+
+        packet = _build_encrypted_rtp_packet(key, opus_silence, ssrc=100)
+        receiver._on_packet(packet)
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_wrong_key_packet_dropped(self):
+        """Packet encrypted with wrong key → NaCl fails → not buffered."""
+        real_key = _make_secret_key()
+        wrong_key = _make_secret_key()
+        opus_silence = b'\xf8\xff\xfe'
+        receiver = _make_voice_receiver(real_key)
+
+        packet = _build_encrypted_rtp_packet(wrong_key, opus_silence, ssrc=100)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_bot_ssrc_ignored(self):
+        """Packet from bot's own SSRC → ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key, bot_ssrc=9999)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=9999)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers) == 0
+
+    def test_multiple_packets_accumulate(self):
+        """Multiple valid packets → buffer grows."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        for seq in range(1, 6):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert 100 in receiver._buffers
+        buf_size = len(receiver._buffers[100])
+        assert buf_size > 0, "Multiple packets should accumulate in buffer"
+
+    def test_different_ssrcs_separate_buffers(self):
+        """Packets from different SSRCs → separate buffers."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        for ssrc in [100, 200, 300]:
+            packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=ssrc)
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers) == 3
+        for ssrc in [100, 200, 300]:
+            assert ssrc in receiver._buffers
+
+
+class TestRealNaClWithDAVE:
+    """NaCl decrypt + DAVE passthrough scenarios with real crypto."""
+
+    def test_dave_unknown_ssrc_passthrough(self):
+        """DAVE enabled but SSRC unknown → skip DAVE, buffer audio."""
+        key = _make_secret_key()
+        dave = MagicMock()  # DAVE session present but SSRC not mapped
+        receiver = _make_voice_receiver(key, dave_session=dave)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        # DAVE decrypt not called (SSRC unknown)
+        dave.decrypt.assert_not_called()
+        # Audio still buffered via passthrough
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_dave_unencrypted_error_passthrough(self):
+        """DAVE raises 'Unencrypted' → use NaCl-decrypted data as-is."""
+        key = _make_secret_key()
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception(
+            "DecryptionFailed(UnencryptedWhenPassthroughDisabled)"
+        )
+        receiver = _make_voice_receiver(key, dave_session=dave)
+        receiver.map_ssrc(100, 42)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        # DAVE was called but failed → passthrough
+        dave.decrypt.assert_called_once()
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_dave_real_error_drops(self):
+        """DAVE raises non-Unencrypted error → packet dropped."""
+        key = _make_secret_key()
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception("KeyRotationFailed")
+        receiver = _make_voice_receiver(key, dave_session=dave)
+        receiver.map_ssrc(100, 42)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+
+class TestFullVoiceFlow:
+    """End-to-end: encrypt → receive → buffer → silence detect → complete."""
+
+    def test_single_utterance_flow(self):
+        """Encrypt packets → buffer → silence → check_silence returns utterance."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        # Send enough packets to exceed MIN_SPEECH_DURATION (0.5s)
+        # At 48kHz stereo 16-bit, each Opus silence frame decodes to ~3840 bytes
+        # Need 96000 bytes = ~25 frames
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        # Simulate silence by setting last_packet_time in the past
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        user_id, pcm_data = completed[0]
+        assert user_id == 42
+        assert len(pcm_data) > 0
+
+    def test_utterance_with_ssrc_automap(self):
+        """No SPEAKING event → auto-map sole allowed user → utterance processed."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members
+        )
+        # No map_ssrc call — simulating missing SPEAKING event
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42  # auto-mapped to sole allowed user
+
+    def test_pause_blocks_during_playback(self):
+        """Pause receiver → packets ignored → resume → packets accepted."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        # Pause (echo prevention during TTS playback)
+        receiver.pause()
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+        # Resume
+        receiver.resume()
+        receiver._on_packet(packet)
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_corrupted_packet_ignored(self):
+        """Corrupted/truncated packet → silently ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        # Too short
+        receiver._on_packet(b"\x00" * 5)
+        assert len(receiver._buffers) == 0
+
+        # Wrong RTP version
+        bad_header = struct.pack(">BBHII", 0x00, 0x78, 1, 960, 100)
+        receiver._on_packet(bad_header + b"\x00" * 20)
+        assert len(receiver._buffers) == 0
+
+        # Wrong payload type
+        bad_pt = struct.pack(">BBHII", 0x80, 0x00, 1, 960, 100)
+        receiver._on_packet(bad_pt + b"\x00" * 20)
+        assert len(receiver._buffers) == 0
+
+    def test_stop_cleans_everything(self):
+        """stop() clears all state cleanly."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers[100]) > 0
+
+        receiver.stop()
+        assert receiver._running is False
+        assert len(receiver._buffers) == 0
+        assert len(receiver._ssrc_to_user) == 0
+        assert len(receiver._decoders) == 0
+
+
+class TestSPEAKINGHook:
+    """SPEAKING event hook correctly maps SSRC to user_id."""
+
+    def test_speaking_hook_installed(self):
+        """start() installs speaking hook on connection."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        conn = receiver._vc._connection
+        # hook should be set (wrapped)
+        assert conn.hook is not None
+
+    def test_map_ssrc_via_speaking(self):
+        """SPEAKING op 5 event maps SSRC to user_id."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(500, 12345)
+        assert receiver._ssrc_to_user[500] == 12345
+
+    def test_map_ssrc_overwrites(self):
+        """New SPEAKING event for same SSRC overwrites old mapping."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(500, 111)
+        receiver.map_ssrc(500, 222)
+        assert receiver._ssrc_to_user[500] == 222
+
+    def test_speaking_mapped_audio_processed(self):
+        """After SSRC is mapped, audio from that SSRC gets correct user_id."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestAuthFiltering:
+    """Only allowed users' audio should be processed."""
+
+    def test_allowed_user_audio_processed(self):
+        """Allowed user's utterance is returned by check_silence."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members,
+        )
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_automap_rejects_unallowed_user(self):
+        """Auto-map refuses to map SSRC to user not in allowed list."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"99"},  # Alice not allowed
+            members=members,
+        )
+        # No map_ssrc — SSRC unknown, auto-map should reject
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_empty_allowlist_allows_all(self):
+        """Empty allowed_user_ids means no restriction."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids=None, members=members,
+        )
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        # Auto-mapped to sole non-bot member
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestRejoinFlow:
+    """Leave and rejoin: state cleanup and fresh receiver."""
+
+    def test_stop_then_new_receiver_clean_state(self):
+        """After stop(), a new receiver starts with empty state."""
+        key = _make_secret_key()
+        receiver1 = _make_voice_receiver(key)
+        receiver1.map_ssrc(100, 42)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver1._on_packet(packet)
+
+        assert len(receiver1._buffers[100]) > 0
+        receiver1.stop()
+
+        # New receiver (simulates rejoin)
+        receiver2 = _make_voice_receiver(key)
+        assert len(receiver2._buffers) == 0
+        assert len(receiver2._ssrc_to_user) == 0
+        assert len(receiver2._decoders) == 0
+
+    def test_rejoin_new_ssrc_works(self):
+        """After rejoin, user may get new SSRC — still works."""
+        key = _make_secret_key()
+        receiver1 = _make_voice_receiver(key)
+        receiver1.map_ssrc(100, 42)  # old SSRC
+        receiver1.stop()
+
+        receiver2 = _make_voice_receiver(key)
+        receiver2.map_ssrc(200, 42)  # new SSRC after rejoin
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=200, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver2._last_packet_time[200] = time.monotonic() - 3.0
+        completed = receiver2.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_rejoin_without_speaking_event_automap(self):
+        """Rejoin without SPEAKING event — auto-map sole allowed user."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+
+        # First session
+        receiver1 = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members,
+        )
+        receiver1.stop()
+
+        # Rejoin — new key (Discord may assign new secret_key)
+        new_key = _make_secret_key()
+        receiver2 = _make_voice_receiver(
+            new_key, allowed_user_ids={"42"}, members=members,
+        )
+        # No map_ssrc — simulating missing SPEAKING event
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                new_key, b'\xf8\xff\xfe', ssrc=300, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver2._last_packet_time[300] = time.monotonic() - 3.0
+        completed = receiver2.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestMultiGuildIsolation:
+    """Each guild has independent voice state."""
+
+    def test_separate_receivers_independent(self):
+        """Two receivers (different guilds) don't interfere."""
+        key1 = _make_secret_key()
+        key2 = _make_secret_key()
+
+        receiver1 = _make_voice_receiver(key1, bot_ssrc=1111)
+        receiver2 = _make_voice_receiver(key2, bot_ssrc=2222)
+
+        receiver1.map_ssrc(100, 42)
+        receiver2.map_ssrc(200, 99)
+
+        # Send to receiver1
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key1, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver1._on_packet(packet)
+
+        # receiver2 should be empty
+        assert len(receiver2._buffers) == 0
+        assert 100 in receiver1._buffers
+
+    def test_stop_one_doesnt_affect_other(self):
+        """Stopping one receiver doesn't affect another."""
+        key1 = _make_secret_key()
+        key2 = _make_secret_key()
+
+        receiver1 = _make_voice_receiver(key1)
+        receiver2 = _make_voice_receiver(key2)
+
+        receiver1.map_ssrc(100, 42)
+        receiver2.map_ssrc(200, 99)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key2, b'\xf8\xff\xfe', ssrc=200, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver1.stop()
+
+        # receiver2 still has data
+        assert receiver2._running is True
+        assert len(receiver2._buffers[200]) > 0
+
+
+class TestEchoPreventionFlow:
+    """Receiver pause/resume during TTS playback prevents echo."""
+
+    def test_audio_during_pause_ignored(self):
+        """Audio arriving while paused is completely ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+        receiver.pause()
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_audio_after_resume_processed(self):
+        """Audio arriving after resume is processed normally."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        # Pause → send packets → resume → send more packets
+        receiver.pause()
+        for seq in range(1, 5):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+        receiver.resume()
+        for seq in range(5, 35):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers[100]) > 0
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
--- a/tests/skills/test_google_oauth_setup.py
+++ b/tests/skills/test_google_oauth_setup.py
@@ -0,0 +1,203 @@
+"""Regression tests for Google Workspace OAuth setup.
+
+These tests cover the headless/manual auth-code flow where the browser step and
+code exchange happen in separate process invocations.
+"""
+
+import importlib.util
+import json
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+
+SCRIPT_PATH = (
+    Path(__file__).resolve().parents[2]
+    / "skills/productivity/google-workspace/scripts/setup.py"
+)
+
+
+class FakeCredentials:
+    def __init__(self, payload=None):
+        self._payload = payload or {
+            "token": "access-token",
+            "refresh_token": "refresh-token",
+            "token_uri": "https://oauth2.googleapis.com/token",
+            "client_id": "client-id",
+            "client_secret": "client-secret",
+            "scopes": ["scope-a"],
+        }
+
+    def to_json(self):
+        return json.dumps(self._payload)
+
+
+class FakeFlow:
+    created = []
+    default_state = "generated-state"
+    default_verifier = "generated-code-verifier"
+    credentials_payload = None
+    fetch_error = None
+
+    def __init__(
+        self,
+        client_secrets_file,
+        scopes,
+        *,
+        redirect_uri=None,
+        state=None,
+        code_verifier=None,
+        autogenerate_code_verifier=False,
+    ):
+        self.client_secrets_file = client_secrets_file
+        self.scopes = scopes
+        self.redirect_uri = redirect_uri
+        self.state = state
+        self.code_verifier = code_verifier
+        self.autogenerate_code_verifier = autogenerate_code_verifier
+        self.authorization_kwargs = None
+        self.fetch_token_calls = []
+        self.credentials = FakeCredentials(self.credentials_payload)
+
+        if autogenerate_code_verifier and not self.code_verifier:
+            self.code_verifier = self.default_verifier
+        if not self.state:
+            self.state = self.default_state
+
+    @classmethod
+    def reset(cls):
+        cls.created = []
+        cls.default_state = "generated-state"
+        cls.default_verifier = "generated-code-verifier"
+        cls.credentials_payload = None
+        cls.fetch_error = None
+
+    @classmethod
+    def from_client_secrets_file(cls, client_secrets_file, scopes, **kwargs):
+        inst = cls(client_secrets_file, scopes, **kwargs)
+        cls.created.append(inst)
+        return inst
+
+    def authorization_url(self, **kwargs):
+        self.authorization_kwargs = kwargs
+        return f"https://auth.example/authorize?state={self.state}", self.state
+
+    def fetch_token(self, **kwargs):
+        self.fetch_token_calls.append(kwargs)
+        if self.fetch_error:
+            raise self.fetch_error
+
+
+@pytest.fixture
+def setup_module(monkeypatch, tmp_path):
+    FakeFlow.reset()
+
+    google_auth_module = types.ModuleType("google_auth_oauthlib")
+    flow_module = types.ModuleType("google_auth_oauthlib.flow")
+    flow_module.Flow = FakeFlow
+    google_auth_module.flow = flow_module
+    monkeypatch.setitem(sys.modules, "google_auth_oauthlib", google_auth_module)
+    monkeypatch.setitem(sys.modules, "google_auth_oauthlib.flow", flow_module)
+
+    spec = importlib.util.spec_from_file_location("google_workspace_setup_test", SCRIPT_PATH)
+    module = importlib.util.module_from_spec(spec)
+    assert spec.loader is not None
+    spec.loader.exec_module(module)
+
+    monkeypatch.setattr(module, "_ensure_deps", lambda: None)
+    monkeypatch.setattr(module, "CLIENT_SECRET_PATH", tmp_path / "google_client_secret.json")
+    monkeypatch.setattr(module, "TOKEN_PATH", tmp_path / "google_token.json")
+    monkeypatch.setattr(module, "PENDING_AUTH_PATH", tmp_path / "google_oauth_pending.json", raising=False)
+
+    client_secret = {
+        "installed": {
+            "client_id": "client-id",
+            "client_secret": "client-secret",
+            "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+            "token_uri": "https://oauth2.googleapis.com/token",
+        }
+    }
+    module.CLIENT_SECRET_PATH.write_text(json.dumps(client_secret))
+    return module
+
+
+class TestGetAuthUrl:
+    def test_persists_state_and_code_verifier_for_later_exchange(self, setup_module, capsys):
+        setup_module.get_auth_url()
+
+        out = capsys.readouterr().out.strip()
+        assert out == "https://auth.example/authorize?state=generated-state"
+
+        saved = json.loads(setup_module.PENDING_AUTH_PATH.read_text())
+        assert saved["state"] == "generated-state"
+        assert saved["code_verifier"] == "generated-code-verifier"
+
+        flow = FakeFlow.created[-1]
+        assert flow.autogenerate_code_verifier is True
+        assert flow.authorization_kwargs == {"access_type": "offline", "prompt": "consent"}
+
+
+class TestExchangeAuthCode:
+    def test_reuses_saved_pkce_material_for_plain_code(self, setup_module):
+        setup_module.PENDING_AUTH_PATH.write_text(
+            json.dumps({"state": "saved-state", "code_verifier": "saved-verifier"})
+        )
+
+        setup_module.exchange_auth_code("4/test-auth-code")
+
+        flow = FakeFlow.created[-1]
+        assert flow.state == "saved-state"
+        assert flow.code_verifier == "saved-verifier"
+        assert flow.fetch_token_calls == [{"code": "4/test-auth-code"}]
+        assert json.loads(setup_module.TOKEN_PATH.read_text())["token"] == "access-token"
+        assert not setup_module.PENDING_AUTH_PATH.exists()
+
+    def test_extracts_code_from_redirect_url_and_checks_state(self, setup_module):
+        setup_module.PENDING_AUTH_PATH.write_text(
+            json.dumps({"state": "saved-state", "code_verifier": "saved-verifier"})
+        )
+
+        setup_module.exchange_auth_code(
+            "http://localhost:1/?code=4/extracted-code&state=saved-state&scope=gmail"
+        )
+
+        flow = FakeFlow.created[-1]
+        assert flow.fetch_token_calls == [{"code": "4/extracted-code"}]
+
+    def test_rejects_state_mismatch(self, setup_module, capsys):
+        setup_module.PENDING_AUTH_PATH.write_text(
+            json.dumps({"state": "saved-state", "code_verifier": "saved-verifier"})
+        )
+
+        with pytest.raises(SystemExit):
+            setup_module.exchange_auth_code(
+                "http://localhost:1/?code=4/extracted-code&state=wrong-state"
+            )
+
+        out = capsys.readouterr().out
+        assert "state mismatch" in out.lower()
+        assert not setup_module.TOKEN_PATH.exists()
+
+    def test_requires_pending_auth_session(self, setup_module, capsys):
+        with pytest.raises(SystemExit):
+            setup_module.exchange_auth_code("4/test-auth-code")
+
+        out = capsys.readouterr().out
+        assert "run --auth-url first" in out.lower()
+        assert not setup_module.TOKEN_PATH.exists()
+
+    def test_keeps_pending_auth_session_when_exchange_fails(self, setup_module, capsys):
+        setup_module.PENDING_AUTH_PATH.write_text(
+            json.dumps({"state": "saved-state", "code_verifier": "saved-verifier"})
+        )
+        FakeFlow.fetch_error = Exception("invalid_grant: Missing code verifier")
+
+        with pytest.raises(SystemExit):
+            setup_module.exchange_auth_code("4/test-auth-code")
+
+        out = capsys.readouterr().out
+        assert "token exchange failed" in out.lower()
+        assert setup_module.PENDING_AUTH_PATH.exists()
+        assert not setup_module.TOKEN_PATH.exists()
--- a/tests/test_anthropic_adapter.py
+++ b/tests/test_anthropic_adapter.py
@@ -16,6 +16,7 @@ from agent.anthropic_adapter import (
    build_anthropic_kwargs,
    convert_messages_to_anthropic,
    convert_tools_to_anthropic,
+    get_anthropic_token_source,
    is_claude_code_token_valid,
    normalize_anthropic_response,
    normalize_model_name,
@@ -87,16 +88,25 @@ class TestReadClaudeCodeCredentials:
        cred_file.parent.mkdir(parents=True)
        cred_file.write_text(json.dumps({
            "claudeAiOauth": {
-                "accessToken": "sk-ant-oat01-test-token",
-                "refreshToken": "sk-ant-ort01-refresh",
+                "accessToken": "sk-ant-oat01-token",
+                "refreshToken": "sk-ant-oat01-refresh",
                "expiresAt": int(time.time() * 1000) + 3600_000,
            }
        }))
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        creds = read_claude_code_credentials()
        assert creds is not None
-        assert creds["accessToken"] == "sk-ant-oat01-test-token"
-        assert creds["refreshToken"] == "sk-ant-ort01-refresh"
+        assert creds["accessToken"] == "sk-ant-oat01-token"
+        assert creds["refreshToken"] == "sk-ant-oat01-refresh"
+        assert creds["source"] == "claude_code_credentials_file"
+
+    def test_ignores_primary_api_key_for_native_anthropic_resolution(self, tmp_path, monkeypatch):
+        claude_json = tmp_path / ".claude.json"
+        claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        creds = read_claude_code_credentials()
+        assert creds is None

    def test_returns_none_for_missing_file(self, tmp_path, monkeypatch):
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
@@ -139,6 +149,24 @@ class TestResolveAnthropicToken:
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-mytoken")
        assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"

+    def test_reports_claude_json_primary_key_source(self, monkeypatch, tmp_path):
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+        (tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        assert get_anthropic_token_source("sk-ant-api03-primary") == "claude_json_primary_api_key"
+
+    def test_does_not_resolve_primary_api_key_as_native_anthropic_token(self, monkeypatch, tmp_path):
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
+        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+        (tmp_path / ".claude.json").write_text(json.dumps({"primaryApiKey": "sk-ant-api03-primary"}))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        assert resolve_anthropic_token() is None
+
    def test_falls_back_to_api_key_when_no_oauth_sources_exist(self, monkeypatch, tmp_path):
        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey")
        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
@@ -181,6 +209,33 @@ class TestResolveAnthropicToken:
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert resolve_anthropic_token() == "cc-auto-token"

+    def test_prefers_refreshable_claude_code_credentials_over_static_anthropic_token(self, monkeypatch, tmp_path):
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
+        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+        cred_file = tmp_path / ".claude" / ".credentials.json"
+        cred_file.parent.mkdir(parents=True)
+        cred_file.write_text(json.dumps({
+            "claudeAiOauth": {
+                "accessToken": "cc-auto-token",
+                "refreshToken": "refresh-token",
+                "expiresAt": int(time.time() * 1000) + 3600_000,
+            }
+        }))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        assert resolve_anthropic_token() == "cc-auto-token"
+
+    def test_keeps_static_anthropic_token_when_only_non_refreshable_claude_key_exists(self, monkeypatch, tmp_path):
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-static-token")
+        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+        claude_json = tmp_path / ".claude.json"
+        claude_json.write_text(json.dumps({"primaryApiKey": "sk-ant-api03-managed-key"}))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        assert resolve_anthropic_token() == "sk-ant-oat01-static-token"
+

 class TestRefreshOauthToken:
    def test_returns_none_without_refresh_token(self):
@@ -279,6 +334,27 @@ class TestResolveWithRefresh:

        assert result == "refreshed-token"

+    def test_static_env_oauth_token_does_not_block_refreshable_claude_creds(self, monkeypatch, tmp_path):
+        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-expired-env-token")
+        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+
+        cred_file = tmp_path / ".claude" / ".credentials.json"
+        cred_file.parent.mkdir(parents=True)
+        cred_file.write_text(json.dumps({
+            "claudeAiOauth": {
+                "accessToken": "expired-claude-creds-token",
+                "refreshToken": "valid-refresh",
+                "expiresAt": int(time.time() * 1000) - 3600_000,
+            }
+        }))
+        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
+
+        with patch("agent.anthropic_adapter._refresh_oauth_token", return_value="refreshed-token"):
+            result = resolve_anthropic_token()
+
+        assert result == "refreshed-token"
+

 class TestRunOauthSetupToken:
    def test_raises_when_claude_not_installed(self, monkeypatch):
@@ -419,6 +495,59 @@ class TestConvertMessages:
        assert len(result) == 1
        assert result[0]["role"] == "user"

+    def test_converts_user_image_url_blocks_to_anthropic_image_blocks(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Can you see this?"},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+
+        assert result == [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Can you see this?"},
+                    {"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
+                ],
+            }
+        ]
+
+    def test_converts_data_url_image_blocks_to_base64_anthropic_image_blocks(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "What is in this screenshot?"},
+                    {"type": "input_image", "image_url": "data:image/png;base64,AAAA"},
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+
+        assert result == [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What is in this screenshot?"},
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": "AAAA",
+                        },
+                    },
+                ],
+            }
+        ]
+
    def test_converts_tool_calls(self):
        messages = [
            {
@@ -519,6 +648,56 @@ class TestConvertMessages:
        assert tool_block["content"] == "result"
        assert tool_block["cache_control"] == {"type": "ephemeral"}

+    def test_converts_data_url_image_to_anthropic_image_block(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "data:image/png;base64,ZmFrZQ=="},
+                    },
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        assert blocks[0] == {"type": "text", "text": "Describe this image"}
+        assert blocks[1] == {
+            "type": "image",
+            "source": {
+                "type": "base64",
+                "media_type": "image/png",
+                "data": "ZmFrZQ==",
+            },
+        }
+
+    def test_converts_remote_image_url_to_anthropic_image_block(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image"},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/cat.png"},
+                    },
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        assert blocks[1] == {
+            "type": "image",
+            "source": {
+                "type": "url",
+                "url": "https://example.com/cat.png",
+            },
+        }
+
    def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
        messages = apply_anthropic_cache_control([
            {"role": "system", "content": "System prompt"},
--- a/tests/test_anthropic_oauth_flow.py
+++ b/tests/test_anthropic_oauth_flow.py
@@ -0,0 +1,51 @@
+"""Tests for Anthropic OAuth setup flow behavior."""
+
+from hermes_cli.config import load_env, save_env_value
+
+
+def test_run_anthropic_oauth_flow_prefers_claude_code_credentials(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.run_oauth_setup_token",
+        lambda: "sk-ant-oat01-from-claude-setup",
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_claude_code_credentials",
+        lambda: {
+            "accessToken": "cc-access-token",
+            "refreshToken": "cc-refresh-token",
+            "expiresAt": 9999999999999,
+        },
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.is_claude_code_token_valid",
+        lambda creds: True,
+    )
+
+    from hermes_cli.main import _run_anthropic_oauth_flow
+
+    save_env_value("ANTHROPIC_TOKEN", "stale-env-token")
+    assert _run_anthropic_oauth_flow(save_env_value) is True
+
+    env_vars = load_env()
+    assert env_vars["ANTHROPIC_TOKEN"] == ""
+    assert env_vars["ANTHROPIC_API_KEY"] == ""
+    output = capsys.readouterr().out
+    assert "Claude Code credentials linked" in output
+
+
+def test_run_anthropic_oauth_flow_manual_token_still_persists(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setattr("agent.anthropic_adapter.run_oauth_setup_token", lambda: None)
+    monkeypatch.setattr("agent.anthropic_adapter.read_claude_code_credentials", lambda: None)
+    monkeypatch.setattr("agent.anthropic_adapter.is_claude_code_token_valid", lambda creds: False)
+    monkeypatch.setattr("builtins.input", lambda _prompt="": "sk-ant-oat01-manual-token")
+
+    from hermes_cli.main import _run_anthropic_oauth_flow
+
+    assert _run_anthropic_oauth_flow(save_env_value) is True
+
+    env_vars = load_env()
+    assert env_vars["ANTHROPIC_TOKEN"] == "sk-ant-oat01-manual-token"
+    output = capsys.readouterr().out
+    assert "Setup-token saved" in output
--- a/tests/test_anthropic_provider_persistence.py
+++ b/tests/test_anthropic_provider_persistence.py
@@ -17,6 +17,21 @@ def test_save_anthropic_oauth_token_uses_token_slot_and_clears_api_key(tmp_path,
    assert env_vars["ANTHROPIC_API_KEY"] == ""


+def test_use_anthropic_claude_code_credentials_clears_env_slots(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(home))
+
+    from hermes_cli.config import save_anthropic_oauth_token, use_anthropic_claude_code_credentials
+
+    save_anthropic_oauth_token("sk-ant-oat01-token")
+    use_anthropic_claude_code_credentials()
+
+    env_vars = load_env()
+    assert env_vars["ANTHROPIC_TOKEN"] == ""
+    assert env_vars["ANTHROPIC_API_KEY"] == ""
+
+
 def test_save_anthropic_api_key_uses_api_key_slot_and_clears_token(tmp_path, monkeypatch):
    home = tmp_path / "hermes"
    home.mkdir()
@@ -24,8 +39,8 @@ def test_save_anthropic_api_key_uses_api_key_slot_and_clears_token(tmp_path, mon

    from hermes_cli.config import save_anthropic_api_key

-    save_anthropic_api_key("sk-ant-api03-test-key")
+    save_anthropic_api_key("sk-ant-api03-key")

    env_vars = load_env()
-    assert env_vars["ANTHROPIC_API_KEY"] == "sk-ant-api03-test-key"
+    assert env_vars["ANTHROPIC_API_KEY"] == "sk-ant-api03-key"
    assert env_vars["ANTHROPIC_TOKEN"] == ""
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -426,3 +426,30 @@ class TestKimiCodeCredentialAutoDetect:
        monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt")
        creds = resolve_api_key_provider_credentials("zai")
        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
+
+
+# =============================================================================
+# Kimi / Moonshot model list isolation tests
+# =============================================================================
+
+class TestKimiMoonshotModelListIsolation:
+    """Moonshot (legacy) users must not see Coding Plan-only models."""
+
+    def test_moonshot_list_excludes_coding_plan_only_models(self):
+        from hermes_cli.main import _PROVIDER_MODELS
+        moonshot_models = _PROVIDER_MODELS["moonshot"]
+        coding_plan_only = {"kimi-for-coding", "kimi-k2-thinking-turbo"}
+        leaked = set(moonshot_models) & coding_plan_only
+        assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
+
+    def test_moonshot_list_contains_shared_models(self):
+        from hermes_cli.main import _PROVIDER_MODELS
+        moonshot_models = _PROVIDER_MODELS["moonshot"]
+        assert "kimi-k2.5" in moonshot_models
+        assert "kimi-k2-thinking" in moonshot_models
+
+    def test_coding_plan_list_contains_plan_specific_models(self):
+        from hermes_cli.main import _PROVIDER_MODELS
+        coding_models = _PROVIDER_MODELS["kimi-coding"]
+        assert "kimi-for-coding" in coding_models
+        assert "kimi-k2-thinking-turbo" in coding_models
--- a/tests/test_atomic_json_write.py
+++ b/tests/test_atomic_json_write.py
@@ -68,6 +68,22 @@ class TestAtomicJsonWrite:
        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
        assert len(tmp_files) == 0

+    def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
+        class SimulatedAbort(BaseException):
+            pass
+
+        target = tmp_path / "data.json"
+        original = {"preserved": True}
+        target.write_text(json.dumps(original), encoding="utf-8")
+
+        with patch("utils.json.dump", side_effect=SimulatedAbort):
+            with pytest.raises(SimulatedAbort):
+                atomic_json_write(target, {"new": True})
+
+        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
+        assert len(tmp_files) == 0
+        assert json.loads(target.read_text(encoding="utf-8")) == original
+
    def test_accepts_string_path(self, tmp_path):
        target = str(tmp_path / "string_path.json")
        atomic_json_write(target, {"string": True})
--- a/tests/test_atomic_yaml_write.py
+++ b/tests/test_atomic_yaml_write.py
@@ -0,0 +1,44 @@
+"""Tests for utils.atomic_yaml_write — crash-safe YAML file writes."""
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+import yaml
+
+from utils import atomic_yaml_write
+
+
+class TestAtomicYamlWrite:
+    def test_writes_valid_yaml(self, tmp_path):
+        target = tmp_path / "data.yaml"
+        data = {"key": "value", "nested": {"a": 1}}
+
+        atomic_yaml_write(target, data)
+
+        assert yaml.safe_load(target.read_text(encoding="utf-8")) == data
+
+    def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
+        class SimulatedAbort(BaseException):
+            pass
+
+        target = tmp_path / "data.yaml"
+        original = {"preserved": True}
+        target.write_text(yaml.safe_dump(original), encoding="utf-8")
+
+        with patch("utils.yaml.dump", side_effect=SimulatedAbort):
+            with pytest.raises(SimulatedAbort):
+                atomic_yaml_write(target, {"new": True})
+
+        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
+        assert len(tmp_files) == 0
+        assert yaml.safe_load(target.read_text(encoding="utf-8")) == original
+
+    def test_appends_extra_content(self, tmp_path):
+        target = tmp_path / "data.yaml"
+
+        atomic_yaml_write(target, {"key": "value"}, extra_content="\n# comment\n")
+
+        text = target.read_text(encoding="utf-8")
+        assert "key: value" in text
+        assert "# comment" in text
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/test_auxiliary_config_bridge.py
@@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
    # Clear env vars
    for key in (
        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
+        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)
@@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
    auxiliary_cfg = config_dict.get("auxiliary", {})
    if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
        aux_task_env = {
-            "vision":      ("AUXILIARY_VISION_PROVIDER",      "AUXILIARY_VISION_MODEL"),
-            "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER",  "AUXILIARY_WEB_EXTRACT_MODEL"),
+            "vision": {
+                "provider": "AUXILIARY_VISION_PROVIDER",
+                "model": "AUXILIARY_VISION_MODEL",
+                "base_url": "AUXILIARY_VISION_BASE_URL",
+                "api_key": "AUXILIARY_VISION_API_KEY",
+            },
+            "web_extract": {
+                "provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
+                "model": "AUXILIARY_WEB_EXTRACT_MODEL",
+                "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
+                "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
+            },
        }
-        for task_key, (prov_env, model_env) in aux_task_env.items():
+        for task_key, env_map in aux_task_env.items():
            task_cfg = auxiliary_cfg.get(task_key, {})
            if not isinstance(task_cfg, dict):
                continue
            prov = str(task_cfg.get("provider", "")).strip()
            model = str(task_cfg.get("model", "")).strip()
+            base_url = str(task_cfg.get("base_url", "")).strip()
+            api_key = str(task_cfg.get("api_key", "")).strip()
            if prov and prov != "auto":
-                os.environ[prov_env] = prov
+                os.environ[env_map["provider"]] = prov
            if model:
-                os.environ[model_env] = model
+                os.environ[env_map["model"]] = model
+            if base_url:
+                os.environ[env_map["base_url"]] = base_url
+            if api_key:
+                os.environ[env_map["api_key"]] = api_key


 # ── Config bridging tests ────────────────────────────────────────────────────
@@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
        assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
        assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"

+    def test_direct_endpoint_bridged(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {
+                    "base_url": "http://localhost:1234/v1",
+                    "api_key": "local-key",
+                    "model": "qwen2.5-vl",
+                }
+            }
+        }
+        _run_auxiliary_bridge(config, monkeypatch)
+        assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
+        assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
+        assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
+
    def test_compression_provider_bridged(self, monkeypatch):
        config = {
            "compression": {
@@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
        # Check for key patterns that indicate the bridge is present
        assert "AUXILIARY_VISION_PROVIDER" in content
        assert "AUXILIARY_VISION_MODEL" in content
+        assert "AUXILIARY_VISION_BASE_URL" in content
+        assert "AUXILIARY_VISION_API_KEY" in content
        assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
        assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
+        assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
+        assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content

    def test_gateway_has_compression_provider(self):
        """Gateway must bridge compression.summary_provider."""
--- a/tests/test_cli_mcp_config_watch.py
+++ b/tests/test_cli_mcp_config_watch.py
@@ -0,0 +1,103 @@
+"""Tests for automatic MCP reload when config.yaml mcp_servers section changes."""
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+
+def _make_cli(tmp_path, mcp_servers=None):
+    """Create a minimal HermesCLI instance with mocked config."""
+    import cli as cli_mod
+    obj = object.__new__(cli_mod.HermesCLI)
+    obj.config = {"mcp_servers": mcp_servers or {}}
+    obj._agent_running = False
+    obj._last_config_check = 0.0
+    obj._config_mcp_servers = mcp_servers or {}
+
+    cfg_file = tmp_path / "config.yaml"
+    cfg_file.write_text("mcp_servers: {}\n")
+    obj._config_mtime = cfg_file.stat().st_mtime
+
+    obj._reload_mcp = MagicMock()
+    obj._busy_command = MagicMock()
+    obj._busy_command.return_value.__enter__ = MagicMock(return_value=None)
+    obj._busy_command.return_value.__exit__ = MagicMock(return_value=False)
+    obj._slow_command_status = MagicMock(return_value="reloading...")
+
+    return obj, cfg_file
+
+
+class TestMCPConfigWatch:
+
+    def test_no_change_does_not_reload(self, tmp_path):
+        """If mtime and mcp_servers unchanged, _reload_mcp is NOT called."""
+        obj, cfg_file = _make_cli(tmp_path)
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_mtime_change_with_same_mcp_servers_does_not_reload(self, tmp_path):
+        """If file mtime changes but mcp_servers is identical, no reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={"fs": {"command": "npx"}})
+
+        # Write same mcp_servers but touch the file
+        cfg_file.write_text(yaml.dump({"mcp_servers": {"fs": {"command": "npx"}}}))
+        # Force mtime to appear changed
+        obj._config_mtime = 0.0
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_new_mcp_server_triggers_reload(self, tmp_path):
+        """Adding a new MCP server to config triggers auto-reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={})
+
+        # Simulate user adding a new MCP server to config.yaml
+        cfg_file.write_text(yaml.dump({"mcp_servers": {"github": {"url": "https://mcp.github.com"}}}))
+        obj._config_mtime = 0.0  # force stale mtime
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_called_once()
+
+    def test_removed_mcp_server_triggers_reload(self, tmp_path):
+        """Removing an MCP server from config triggers auto-reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={"github": {"url": "https://mcp.github.com"}})
+
+        # Simulate user removing the server
+        cfg_file.write_text(yaml.dump({"mcp_servers": {}}))
+        obj._config_mtime = 0.0
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_called_once()
+
+    def test_interval_throttle_skips_check(self, tmp_path):
+        """If called within CONFIG_WATCH_INTERVAL, stat() is skipped."""
+        obj, cfg_file = _make_cli(tmp_path)
+        obj._last_config_check = time.monotonic()  # just checked
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file), \
+             patch.object(Path, "stat") as mock_stat:
+            obj._check_config_mcp_changes()
+            mock_stat.assert_not_called()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_missing_config_file_does_not_crash(self, tmp_path):
+        """If config.yaml doesn't exist, _check_config_mcp_changes is a no-op."""
+        obj, cfg_file = _make_cli(tmp_path)
+        missing = tmp_path / "nonexistent.yaml"
+
+        with patch("hermes_cli.config.get_config_path", return_value=missing):
+            obj._check_config_mcp_changes()  # should not raise
+
+        obj._reload_mcp.assert_not_called()
--- a/tests/test_cli_plan_command.py
+++ b/tests/test_cli_plan_command.py
@@ -0,0 +1,67 @@
+"""Tests for the /plan CLI slash command."""
+
+from unittest.mock import MagicMock, patch
+
+from agent.skill_commands import scan_skill_commands
+from cli import HermesCLI
+
+
+def _make_cli():
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    cli_obj.config = {}
+    cli_obj.console = MagicMock()
+    cli_obj.agent = None
+    cli_obj.conversation_history = []
+    cli_obj.session_id = "sess-123"
+    cli_obj._pending_input = MagicMock()
+    return cli_obj
+
+
+def _make_plan_skill(skills_dir):
+    skill_dir = skills_dir / "plan"
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        """---
+name: plan
+description: Plan mode skill.
+---
+
+# Plan
+
+Use the current conversation context when no explicit instruction is provided.
+Save plans under the active workspace's .hermes/plans directory.
+"""
+    )
+
+
+class TestCLIPlanCommand:
+    def test_plan_command_queues_plan_skill_message(self, tmp_path, monkeypatch):
+        cli_obj = _make_cli()
+
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_plan_skill(tmp_path)
+            scan_skill_commands()
+            result = cli_obj.process_command("/plan Add OAuth login")
+
+        assert result is True
+        cli_obj._pending_input.put.assert_called_once()
+        queued = cli_obj._pending_input.put.call_args[0][0]
+        assert "Plan mode skill" in queued
+        assert "Add OAuth login" in queued
+        assert ".hermes/plans" in queued
+        assert str(tmp_path / "plans") not in queued
+        assert "active workspace/backend cwd" in queued
+        assert "Runtime note:" in queued
+
+    def test_plan_without_args_uses_skill_context_guidance(self, tmp_path, monkeypatch):
+        cli_obj = _make_cli()
+
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_plan_skill(tmp_path)
+            scan_skill_commands()
+            cli_obj.process_command("/plan")
+
+        queued = cli_obj._pending_input.put.call_args[0][0]
+        assert "current conversation context" in queued
+        assert ".hermes/plans/" in queued
+        assert "conversation-plan.md" in queued
--- a/tests/test_cli_preloaded_skills.py
+++ b/tests/test_cli_preloaded_skills.py
@@ -0,0 +1,130 @@
+from __future__ import annotations
+
+import importlib
+import os
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+def _make_real_cli(**kwargs):
+    clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    prompt_toolkit_stubs = {
+        "prompt_toolkit": MagicMock(),
+        "prompt_toolkit.history": MagicMock(),
+        "prompt_toolkit.styles": MagicMock(),
+        "prompt_toolkit.patch_stdout": MagicMock(),
+        "prompt_toolkit.application": MagicMock(),
+        "prompt_toolkit.layout": MagicMock(),
+        "prompt_toolkit.layout.processors": MagicMock(),
+        "prompt_toolkit.filters": MagicMock(),
+        "prompt_toolkit.layout.dimension": MagicMock(),
+        "prompt_toolkit.layout.menus": MagicMock(),
+        "prompt_toolkit.widgets": MagicMock(),
+        "prompt_toolkit.key_binding": MagicMock(),
+        "prompt_toolkit.completion": MagicMock(),
+        "prompt_toolkit.formatted_text": MagicMock(),
+    }
+    with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
+        "os.environ", clean_env, clear=False
+    ):
+        import cli as cli_mod
+
+        cli_mod = importlib.reload(cli_mod)
+        with patch.object(cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
+            cli_mod.__dict__, {"CLI_CONFIG": clean_config}
+        ):
+            return cli_mod.HermesCLI(**kwargs)
+
+
+class _DummyCLI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.session_id = "session-123"
+        self.system_prompt = "base prompt"
+        self.preloaded_skills = []
+
+    def show_banner(self):
+        return None
+
+    def show_tools(self):
+        return None
+
+    def show_toolsets(self):
+        return None
+
+    def run(self):
+        return None
+
+
+def test_main_applies_preloaded_skills_to_system_prompt(monkeypatch):
+    import cli as cli_mod
+
+    created = {}
+
+    def fake_cli(**kwargs):
+        created["cli"] = _DummyCLI(**kwargs)
+        return created["cli"]
+
+    monkeypatch.setattr(cli_mod, "HermesCLI", fake_cli)
+    monkeypatch.setattr(
+        cli_mod,
+        "build_preloaded_skills_prompt",
+        lambda skills, task_id=None: ("skill prompt", ["hermes-agent-dev", "github-auth"], []),
+    )
+
+    with pytest.raises(SystemExit):
+        cli_mod.main(skills="hermes-agent-dev,github-auth", list_tools=True)
+
+    cli_obj = created["cli"]
+    assert cli_obj.system_prompt == "base prompt\n\nskill prompt"
+    assert cli_obj.preloaded_skills == ["hermes-agent-dev", "github-auth"]
+
+
+def test_main_raises_for_unknown_preloaded_skill(monkeypatch):
+    import cli as cli_mod
+
+    monkeypatch.setattr(cli_mod, "HermesCLI", lambda **kwargs: _DummyCLI(**kwargs))
+    monkeypatch.setattr(
+        cli_mod,
+        "build_preloaded_skills_prompt",
+        lambda skills, task_id=None: ("", [], ["missing-skill"]),
+    )
+
+    with pytest.raises(ValueError, match=r"Unknown skill\(s\): missing-skill"):
+        cli_mod.main(skills="missing-skill", list_tools=True)
+
+
+def test_show_banner_prints_preloaded_skills_once_before_banner():
+    cli_obj = _make_real_cli(compact=False)
+    cli_obj.preloaded_skills = ["hermes-agent-dev", "github-auth"]
+    cli_obj.console = MagicMock()
+
+    with patch("cli.build_welcome_banner") as mock_banner, patch(
+        "shutil.get_terminal_size", return_value=os.terminal_size((120, 40))
+    ):
+        cli_obj.show_banner()
+        cli_obj.show_banner()
+
+    print_calls = [
+        call.args[0]
+        for call in cli_obj.console.print.call_args_list
+        if call.args and isinstance(call.args[0], str)
+    ]
+    startup_lines = [line for line in print_calls if "Activated skills:" in line]
+
+    assert len(startup_lines) == 1
+    assert "Activated skills:" in startup_lines[0]
+    assert "hermes-agent-dev, github-auth" in startup_lines[0]
+    assert mock_banner.call_count == 2
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -336,4 +336,42 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):

    assert "Warning:" in output
    assert "falling back to auto provider detection" in output.lower()
-    assert "No change." in output
+    assert "No change." in output
+
+
+def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
+    monkeypatch.setattr(
+        "hermes_cli.config.get_env_value",
+        lambda key: "" if key in {"OPENAI_BASE_URL", "OPENAI_API_KEY"} else "",
+    )
+    saved_env = {}
+    monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: saved_env.__setitem__(key, value))
+    monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: saved_env.__setitem__("MODEL", model))
+    monkeypatch.setattr("hermes_cli.auth.deactivate_provider", lambda: None)
+    monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
+    monkeypatch.setattr(
+        "hermes_cli.models.probe_api_models",
+        lambda api_key, base_url: {
+            "models": ["llm"],
+            "probed_url": "http://localhost:8000/v1/models",
+            "resolved_base_url": "http://localhost:8000/v1",
+            "suggested_base_url": "http://localhost:8000/v1",
+            "used_fallback": True,
+        },
+    )
+    monkeypatch.setattr(
+        "hermes_cli.config.load_config",
+        lambda: {"model": {"default": "", "provider": "custom", "base_url": ""}},
+    )
+    monkeypatch.setattr("hermes_cli.config.save_config", lambda cfg: None)
+
+    answers = iter(["http://localhost:8000", "local-key", "llm"])
+    monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
+
+    hermes_main._model_flow_custom({})
+    output = capsys.readouterr().out
+
+    assert "Saving the working base URL instead" in output
+    assert saved_env["OPENAI_BASE_URL"] == "http://localhost:8000/v1"
+    assert saved_env["OPENAI_API_KEY"] == "local-key"
+    assert saved_env["MODEL"] == "llm"
--- a/tests/test_cli_retry.py
+++ b/tests/test_cli_retry.py
@@ -0,0 +1,49 @@
+"""Regression tests for CLI /retry history replacement semantics."""
+
+from tests.test_cli_init import _make_cli
+
+
+def test_retry_last_truncates_history_before_requeueing_message():
+    cli = _make_cli()
+    cli.conversation_history = [
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "one"},
+        {"role": "user", "content": "retry me"},
+        {"role": "assistant", "content": "old answer"},
+    ]
+
+    retry_msg = cli.retry_last()
+
+    assert retry_msg == "retry me"
+    assert cli.conversation_history == [
+        {"role": "user", "content": "first"},
+        {"role": "assistant", "content": "one"},
+    ]
+
+    cli.conversation_history.append({"role": "user", "content": retry_msg})
+    cli.conversation_history.append({"role": "assistant", "content": "new answer"})
+
+    assert [m["content"] for m in cli.conversation_history if m["role"] == "user"] == [
+        "first",
+        "retry me",
+    ]
+
+
+def test_process_command_retry_requeues_original_message_not_retry_command():
+    cli = _make_cli()
+    queued = []
+
+    class _Queue:
+        def put(self, value):
+            queued.append(value)
+
+    cli._pending_input = _Queue()
+    cli.conversation_history = [
+        {"role": "user", "content": "retry me"},
+        {"role": "assistant", "content": "old answer"},
+    ]
+
+    cli.process_command("/retry")
+
+    assert queued == ["retry me"]
+    assert cli.conversation_history == []
--- a/tests/test_dict_tool_call_args.py
+++ b/tests/test_dict_tool_call_args.py
@@ -0,0 +1,72 @@
+import json
+from types import SimpleNamespace
+
+
+def _tool_call(name: str, arguments):
+    return SimpleNamespace(
+        id="call_1",
+        type="function",
+        function=SimpleNamespace(name=name, arguments=arguments),
+    )
+
+
+def _response_with_tool_call(arguments):
+    assistant = SimpleNamespace(
+        content=None,
+        reasoning=None,
+        tool_calls=[_tool_call("read_file", arguments)],
+    )
+    choice = SimpleNamespace(message=assistant, finish_reason="tool_calls")
+    return SimpleNamespace(choices=[choice], usage=None)
+
+
+class _FakeChatCompletions:
+    def __init__(self):
+        self.calls = 0
+
+    def create(self, **kwargs):
+        self.calls += 1
+        if self.calls == 1:
+            return _response_with_tool_call({"path": "README.md"})
+        return SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    message=SimpleNamespace(content="done", reasoning=None, tool_calls=[]),
+                    finish_reason="stop",
+                )
+            ],
+            usage=None,
+        )
+
+
+class _FakeClient:
+    def __init__(self):
+        self.chat = SimpleNamespace(completions=_FakeChatCompletions())
+
+
+def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
+    from run_agent import AIAgent
+
+    monkeypatch.setattr("run_agent.OpenAI", lambda **kwargs: _FakeClient())
+    monkeypatch.setattr(
+        "run_agent.get_tool_definitions",
+        lambda *args, **kwargs: [{"function": {"name": "read_file"}}],
+    )
+    monkeypatch.setattr(
+        "run_agent.handle_function_call",
+        lambda name, args, task_id=None, **kwargs: json.dumps({"ok": True, "args": args}),
+    )
+
+    agent = AIAgent(
+        model="test-model",
+        api_key="test-key",
+        base_url="http://localhost:8080/v1",
+        platform="cli",
+        max_iterations=3,
+        quiet_mode=True,
+        skip_memory=True,
+    )
+
+    result = agent.run_conversation("read the file")
+
+    assert result["final_response"] == "done"
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -361,6 +361,24 @@ class TestDeleteAndExport:
    def test_delete_nonexistent(self, db):
        assert db.delete_session("nope") is False

+    def test_resolve_session_id_exact(self, db):
+        db.create_session(session_id="20260315_092437_c9a6ff", source="cli")
+        assert db.resolve_session_id("20260315_092437_c9a6ff") == "20260315_092437_c9a6ff"
+
+    def test_resolve_session_id_unique_prefix(self, db):
+        db.create_session(session_id="20260315_092437_c9a6ff", source="cli")
+        assert db.resolve_session_id("20260315_092437_c9a6") == "20260315_092437_c9a6ff"
+
+    def test_resolve_session_id_ambiguous_prefix_returns_none(self, db):
+        db.create_session(session_id="20260315_092437_c9a6aa", source="cli")
+        db.create_session(session_id="20260315_092437_c9a6bb", source="cli")
+        assert db.resolve_session_id("20260315_092437_c9a6") is None
+
+    def test_resolve_session_id_escapes_like_wildcards(self, db):
+        db.create_session(session_id="20260315_092437_c9a6ff", source="cli")
+        db.create_session(session_id="20260315X092437_c9a6ff", source="cli")
+        assert db.resolve_session_id("20260315_092437") == "20260315_092437_c9a6ff"
+
    def test_export_session(self, db):
        db.create_session(session_id="s1", source="cli", model="test")
        db.append_message("s1", role="user", content="Hello")
--- a/tests/test_openai_client_lifecycle.py
+++ b/tests/test_openai_client_lifecycle.py
@@ -0,0 +1,181 @@
+import sys
+import threading
+import types
+from types import SimpleNamespace
+
+import httpx
+import pytest
+from openai import APIConnectionError
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class FakeRequestClient:
+    def __init__(self, responder):
+        self._responder = responder
+        self._client = SimpleNamespace(is_closed=False)
+        self.chat = SimpleNamespace(
+            completions=SimpleNamespace(create=self._create)
+        )
+        self.responses = SimpleNamespace()
+        self.close_calls = 0
+
+    def _create(self, **kwargs):
+        return self._responder(**kwargs)
+
+    def close(self):
+        self.close_calls += 1
+        self._client.is_closed = True
+
+
+class FakeSharedClient(FakeRequestClient):
+    pass
+
+
+class OpenAIFactory:
+    def __init__(self, clients):
+        self._clients = list(clients)
+        self.calls = []
+
+    def __call__(self, **kwargs):
+        self.calls.append(dict(kwargs))
+        if not self._clients:
+            raise AssertionError("OpenAI factory exhausted")
+        return self._clients.pop(0)
+
+
+def _build_agent(shared_client=None):
+    agent = run_agent.AIAgent.__new__(run_agent.AIAgent)
+    agent.api_mode = "chat_completions"
+    agent.provider = "openai-codex"
+    agent.base_url = "https://chatgpt.com/backend-api/codex"
+    agent.model = "gpt-5-codex"
+    agent.log_prefix = ""
+    agent.quiet_mode = True
+    agent._interrupt_requested = False
+    agent._interrupt_message = None
+    agent._client_lock = threading.RLock()
+    agent._client_kwargs = {"api_key": "test-key", "base_url": agent.base_url}
+    agent.client = shared_client or FakeSharedClient(lambda **kwargs: {"shared": True})
+    return agent
+
+
+def _connection_error():
+    return APIConnectionError(
+        message="Connection error.",
+        request=httpx.Request("POST", "https://example.com/v1/chat/completions"),
+    )
+
+
+def test_retry_after_api_connection_error_recreates_request_client(monkeypatch):
+    first_request = FakeRequestClient(lambda **kwargs: (_ for _ in ()).throw(_connection_error()))
+    second_request = FakeRequestClient(lambda **kwargs: {"ok": True})
+    factory = OpenAIFactory([first_request, second_request])
+    monkeypatch.setattr(run_agent, "OpenAI", factory)
+
+    agent = _build_agent()
+
+    with pytest.raises(APIConnectionError):
+        agent._interruptible_api_call({"model": agent.model, "messages": []})
+
+    result = agent._interruptible_api_call({"model": agent.model, "messages": []})
+
+    assert result == {"ok": True}
+    assert len(factory.calls) == 2
+    assert first_request.close_calls >= 1
+    assert second_request.close_calls >= 1
+
+
+def test_closed_shared_client_is_recreated_before_request(monkeypatch):
+    stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))
+    stale_shared._client.is_closed = True
+
+    replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})
+    request_client = FakeRequestClient(lambda **kwargs: {"ok": "fresh-request-client"})
+    factory = OpenAIFactory([replacement_shared, request_client])
+    monkeypatch.setattr(run_agent, "OpenAI", factory)
+
+    agent = _build_agent(shared_client=stale_shared)
+    result = agent._interruptible_api_call({"model": agent.model, "messages": []})
+
+    assert result == {"ok": "fresh-request-client"}
+    assert agent.client is replacement_shared
+    assert stale_shared.close_calls >= 1
+    assert replacement_shared.close_calls == 0
+    assert len(factory.calls) == 2
+
+
+def test_concurrent_requests_do_not_break_each_other_when_one_client_closes(monkeypatch):
+    first_started = threading.Event()
+    first_closed = threading.Event()
+
+    def first_responder(**kwargs):
+        first_started.set()
+        first_client.close()
+        first_closed.set()
+        raise _connection_error()
+
+    def second_responder(**kwargs):
+        assert first_started.wait(timeout=2)
+        assert first_closed.wait(timeout=2)
+        return {"ok": "second"}
+
+    first_client = FakeRequestClient(first_responder)
+    second_client = FakeRequestClient(second_responder)
+    factory = OpenAIFactory([first_client, second_client])
+    monkeypatch.setattr(run_agent, "OpenAI", factory)
+
+    agent = _build_agent()
+    results = {}
+
+    def run_call(name):
+        try:
+            results[name] = agent._interruptible_api_call({"model": agent.model, "messages": []})
+        except Exception as exc:  # noqa: BLE001 - asserting exact type below
+            results[name] = exc
+
+    thread_one = threading.Thread(target=run_call, args=("first",), daemon=True)
+    thread_two = threading.Thread(target=run_call, args=("second",), daemon=True)
+    thread_one.start()
+    thread_two.start()
+    thread_one.join(timeout=5)
+    thread_two.join(timeout=5)
+
+    assert isinstance(results["first"], APIConnectionError)
+    assert results["second"] == {"ok": "second"}
+    assert len(factory.calls) == 2
+
+
+
+def test_streaming_call_recreates_closed_shared_client_before_request(monkeypatch):
+    chunks = iter([
+        SimpleNamespace(
+            model="gpt-5-codex",
+            choices=[SimpleNamespace(delta=SimpleNamespace(content="Hello", tool_calls=None), finish_reason=None)],
+        ),
+        SimpleNamespace(
+            model="gpt-5-codex",
+            choices=[SimpleNamespace(delta=SimpleNamespace(content=" world", tool_calls=None), finish_reason="stop")],
+        ),
+    ])
+
+    stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))
+    stale_shared._client.is_closed = True
+
+    replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})
+    request_client = FakeRequestClient(lambda **kwargs: chunks)
+    factory = OpenAIFactory([replacement_shared, request_client])
+    monkeypatch.setattr(run_agent, "OpenAI", factory)
+
+    agent = _build_agent(shared_client=stale_shared)
+    response = agent._streaming_api_call({"model": agent.model, "messages": []}, lambda _delta: None)
+
+    assert response.choices[0].message.content == "Hello world"
+    assert agent.client is replacement_shared
+    assert stale_shared.close_calls >= 1
+    assert request_client.close_calls >= 1
+    assert len(factory.calls) == 2
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@@ -543,7 +543,7 @@ class TestAuxiliaryClientProviderPriority:
             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
             patch("agent.auxiliary_client.OpenAI"):
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"
        assert isinstance(client, CodexAuxiliaryClient)


--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -12,7 +12,7 @@ import uuid
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

@@ -612,6 +612,25 @@ class TestBuildApiKwargs:
        kwargs = agent._build_api_kwargs(messages)
        assert kwargs["extra_body"]["reasoning"] == {"enabled": False}

+    def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent):
+        agent.model = "minimax/minimax-m2.5"
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning" not in kwargs.get("extra_body", {})
+
+    def test_reasoning_sent_for_supported_openrouter_model(self, agent):
+        agent.model = "qwen/qwen3.5-plus-02-15"
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
+
+    def test_reasoning_sent_for_nous_route(self, agent):
+        agent.base_url = "https://inference-api.nousresearch.com/v1"
+        agent.model = "minimax/minimax-m2.5"
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
+
    def test_max_tokens_injected(self, agent):
        agent.max_tokens = 4096
        messages = [{"role": "user", "content": "hi"}]
@@ -942,6 +961,19 @@ class TestHandleMaxIterations:
        assert "error" in result.lower()
        assert "API down" in result

+    def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent):
+        agent.model = "minimax/minimax-m2.5"
+        resp = _mock_response(content="Summary")
+        agent.client.chat.completions.create.return_value = resp
+        agent._cached_system_prompt = "You are helpful."
+        messages = [{"role": "user", "content": "do stuff"}]
+
+        result = agent._handle_max_iterations(messages, 60)
+
+        assert result == "Summary"
+        kwargs = agent.client.chat.completions.create.call_args.kwargs
+        assert "reasoning" not in kwargs.get("extra_body", {})
+

 class TestRunConversation:
    """Tests for the main run_conversation method.
@@ -1986,6 +2018,69 @@ class TestBuildApiKwargsAnthropicMaxTokens:
                assert call_args[0][3] is None


+class TestAnthropicImageFallback:
+    def test_build_api_kwargs_converts_multimodal_user_image_to_text(self, agent):
+        agent.api_mode = "anthropic_messages"
+        agent.reasoning_config = None
+
+        api_messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Can you see this now?"},
+                {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+            ],
+        }]
+
+        with (
+            patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=json.dumps({"success": True, "analysis": "A cat sitting on a chair."}))),
+            patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
+        ):
+            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
+            agent._build_api_kwargs(api_messages)
+
+        kwargs = mock_build.call_args.kwargs or dict(zip(
+            ["model", "messages", "tools", "max_tokens", "reasoning_config"],
+            mock_build.call_args.args,
+        ))
+        transformed = kwargs["messages"]
+        assert isinstance(transformed[0]["content"], str)
+        assert "A cat sitting on a chair." in transformed[0]["content"]
+        assert "Can you see this now?" in transformed[0]["content"]
+        assert "vision_analyze with image_url: https://example.com/cat.png" in transformed[0]["content"]
+
+    def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(self, agent):
+        agent.api_mode = "anthropic_messages"
+        agent.reasoning_config = None
+        data_url = "data:image/png;base64,QUFBQQ=="
+
+        api_messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "first"},
+                    {"type": "input_image", "image_url": data_url},
+                ],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "second"},
+                    {"type": "input_image", "image_url": data_url},
+                ],
+            },
+        ]
+
+        mock_vision = AsyncMock(return_value=json.dumps({"success": True, "analysis": "A small test image."}))
+        with (
+            patch("tools.vision_tools.vision_analyze_tool", new=mock_vision),
+            patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
+        ):
+            mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
+            agent._build_api_kwargs(api_messages)
+
+        assert mock_vision.await_count == 1
+
+
 class TestFallbackAnthropicProvider:
    """Bug fix: _try_activate_fallback had no case for anthropic provider."""

@@ -2085,6 +2180,92 @@ class TestAnthropicBaseUrlPassthrough:
            assert not passed_url or passed_url is None


+class TestAnthropicCredentialRefresh:
+    def test_try_refresh_anthropic_client_credentials_rebuilds_client(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client") as mock_build,
+        ):
+            old_client = MagicMock()
+            new_client = MagicMock()
+            mock_build.side_effect = [old_client, new_client]
+            agent = AIAgent(
+                api_key="sk-ant-oat01-stale-token",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        agent._anthropic_client = old_client
+        agent._anthropic_api_key = "sk-ant-oat01-stale-token"
+        agent._anthropic_base_url = "https://api.anthropic.com"
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-fresh-token"),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=new_client) as rebuild,
+        ):
+            assert agent._try_refresh_anthropic_client_credentials() is True
+
+        old_client.close.assert_called_once()
+        rebuild.assert_called_once_with("sk-ant-oat01-fresh-token", "https://api.anthropic.com")
+        assert agent._anthropic_client is new_client
+        assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token"
+
+    def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-same-token",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        old_client = MagicMock()
+        agent._anthropic_client = old_client
+        agent._anthropic_api_key = "sk-ant-oat01-same-token"
+
+        with (
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-same-token"),
+            patch("agent.anthropic_adapter.build_anthropic_client") as rebuild,
+        ):
+            assert agent._try_refresh_anthropic_client_credentials() is False
+
+        old_client.close.assert_not_called()
+        rebuild.assert_not_called()
+
+    def test_anthropic_messages_create_preflights_refresh(self):
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+        ):
+            agent = AIAgent(
+                api_key="sk-ant-oat01-current-token",
+                api_mode="anthropic_messages",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=True,
+            )
+
+        response = SimpleNamespace(content=[])
+        agent._anthropic_client = MagicMock()
+        agent._anthropic_client.messages.create.return_value = response
+
+        with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=True) as refresh:
+            result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"})
+
+        refresh.assert_called_once_with()
+        agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514")
+        assert result is response
+
+
 # ===================================================================
 # _streaming_api_call tests
 # ===================================================================
@@ -2447,3 +2628,56 @@ class TestVprintForceOnErrors:
            agent._vprint("debug")
            agent._vprint("error", force=True)
        assert len(printed) == 2
+
+
+class TestNormalizeCodexDictArguments:
+    """_normalize_codex_response must produce valid JSON strings for tool
+    call arguments, even when the Responses API returns them as dicts."""
+
+    def _make_codex_response(self, item_type, arguments, item_status="completed"):
+        """Build a minimal Responses API response with a single tool call."""
+        item = SimpleNamespace(
+            type=item_type,
+            status=item_status,
+        )
+        if item_type == "function_call":
+            item.name = "web_search"
+            item.arguments = arguments
+            item.call_id = "call_abc123"
+            item.id = "fc_abc123"
+        elif item_type == "custom_tool_call":
+            item.name = "web_search"
+            item.input = arguments
+            item.call_id = "call_abc123"
+            item.id = "fc_abc123"
+        return SimpleNamespace(
+            output=[item],
+            status="completed",
+        )
+
+    def test_function_call_dict_arguments_produce_valid_json(self, agent):
+        """dict arguments from function_call must be serialised with
+        json.dumps, not str(), so downstream json.loads() succeeds."""
+        args_dict = {"query": "weather in NYC", "units": "celsius"}
+        response = self._make_codex_response("function_call", args_dict)
+        msg, _ = agent._normalize_codex_response(response)
+        tc = msg.tool_calls[0]
+        parsed = json.loads(tc.function.arguments)
+        assert parsed == args_dict
+
+    def test_custom_tool_call_dict_arguments_produce_valid_json(self, agent):
+        """dict arguments from custom_tool_call must also use json.dumps."""
+        args_dict = {"path": "/tmp/test.txt", "content": "hello"}
+        response = self._make_codex_response("custom_tool_call", args_dict)
+        msg, _ = agent._normalize_codex_response(response)
+        tc = msg.tool_calls[0]
+        parsed = json.loads(tc.function.arguments)
+        assert parsed == args_dict
+
+    def test_string_arguments_unchanged(self, agent):
+        """String arguments must pass through without modification."""
+        args_str = '{"query": "test"}'
+        response = self._make_codex_response("function_call", args_str)
+        msg, _ = agent._normalize_codex_response(response)
+        tc = msg.tool_calls[0]
+        assert tc.function.arguments == args_str
--- a/Show More
+++ b/Show More