fix: Anthropic OAuth compatibility — Claude Code identity fingerprinting

Anthropic routes OAuth/subscription requests based on Claude Code's identity markers. Without them, requests get intermittent 500 errors (~25% failure rate observed). This matches what pi-ai (clawdbot) and OpenCode both implement for OAuth compatibility. Changes (OAuth tokens only — API key users unaffected): 1. Headers: user-agent 'claude-cli/2.1.2 (external, cli)' + x-app 'cli' 2. System prompt: prepend 'You are Claude Code, Anthropic's official CLI' 3. System prompt sanitization: replace Hermes/Nous references 4. Tool names: prefix with 'mcp_' (Claude Code convention for non-native tools) 5. Tool name stripping: remove 'mcp_' prefix from response tool calls Before: 9/12 OK, 1 hard fail, 4 needed retries (~25% error rate) After: 16/16 OK, 0 failures, 0 retries (0% error rate)
Merge pull request #1585 from 0xbyt4/fix/anthropic-error-handling
2026-03-16 17:07:25 -07:00 · 2026-03-16 15:46:06 -07:00 · 2026-03-17 01:07:11 +03:00 · 2026-03-16 14:22:57 -07:00 · 2026-03-16 14:22:38 -07:00 · 2026-03-16 12:53:49 -07:00
181 changed files with 17378 additions and 1620 deletions
@@ -235,6 +235,7 @@ hermes_cli/skin_engine.py    # SkinConfig dataclass, built-in skins, YAML loader
 | Spinner verbs | `spinner.thinking_verbs` | `display.py` |
 | Spinner wings (optional) | `spinner.wings` | `display.py` |
 | Tool output prefix | `tool_prefix` | `display.py` |
+| Per-tool emojis | `tool_emojis` | `display.py` → `get_tool_emoji()` |
 | Agent name | `branding.agent_name` | `banner.py`, `cli.py` |
 | Welcome message | `branding.welcome` | `cli.py` |
 | Response box label | `branding.response_label` | `cli.py` |
@@ -62,6 +62,24 @@ hermes doctor       # Diagnose any issues

 📖 **[Full documentation →](https://hermes-agent.nousresearch.com/docs/)**

+## CLI vs Messaging Quick Reference
+
+Hermes has two entry points: start the terminal UI with `hermes`, or run the gateway and talk to it from Telegram, Discord, Slack, WhatsApp, Signal, or Email. Once you're in a conversation, many slash commands are shared across both interfaces.
+
+| Action | CLI | Messaging platforms |
+|---------|-----|---------------------|
+| Start chatting | `hermes` | Run `hermes gateway setup` + `hermes gateway start`, then send the bot a message |
+| Start fresh conversation | `/new` or `/reset` | `/new` or `/reset` |
+| Change model | `/model [provider:model]` | `/model [provider:model]` |
+| Set a personality | `/personality [name]` | `/personality [name]` |
+| Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
+| Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
+| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
+| Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
+| Platform-specific status | `/platforms` | `/status`, `/sethome` |
+
+For the full command lists, see the [CLI guide](https://hermes-agent.nousresearch.com/docs/user-guide/cli) and the [Messaging Gateway guide](https://hermes-agent.nousresearch.com/docs/user-guide/messaging).
+
 ---

 ## Documentation
@@ -42,19 +42,16 @@ def _setup_logging() -> None:

 def _load_env() -> None:
    """Load .env from HERMES_HOME (default ``~/.hermes``)."""
-    from dotenv import load_dotenv
+    from hermes_cli.env_loader import load_hermes_dotenv

    hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-    env_file = hermes_home / ".env"
-    if env_file.exists():
-        try:
-            load_dotenv(dotenv_path=env_file, encoding="utf-8")
-        except UnicodeDecodeError:
-            load_dotenv(dotenv_path=env_file, encoding="latin-1")
-        logging.getLogger(__name__).info("Loaded env from %s", env_file)
+    loaded = load_hermes_dotenv(hermes_home=hermes_home)
+    if loaded:
+        for env_file in loaded:
+            logging.getLogger(__name__).info("Loaded env from %s", env_file)
    else:
        logging.getLogger(__name__).info(
-            "No .env found at %s, using system env", env_file
+            "No .env found at %s, using system env", hermes_home / ".env"
        )


@@ -42,7 +42,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
-from acp_adapter.session import SessionManager
+from acp_adapter.session import SessionManager, SessionState

 logger = logging.getLogger(__name__)

@@ -226,10 +226,19 @@ class HermesACPAgent(acp.Agent):
            logger.error("prompt: session %s not found", session_id)
            return PromptResponse(stop_reason="refusal")

-        user_text = _extract_text(prompt)
-        if not user_text.strip():
+        user_text = _extract_text(prompt).strip()
+        if not user_text:
            return PromptResponse(stop_reason="end_turn")

+        # Intercept slash commands — handle locally without calling the LLM
+        if user_text.startswith("/"):
+            response_text = self._handle_slash_command(user_text, state)
+            if response_text is not None:
+                if self._conn:
+                    update = acp.update_agent_message_text(response_text)
+                    await self._conn.session_update(session_id, update)
+                return PromptResponse(stop_reason="end_turn")
+
        logger.info("Prompt on session %s: %s", session_id, user_text[:100])

        conn = self._conn
@@ -315,12 +324,149 @@ class HermesACPAgent(acp.Agent):
        stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
        return PromptResponse(stop_reason=stop_reason, usage=usage)

-    # ---- Model switching ----------------------------------------------------
+    # ---- Slash commands (headless) -------------------------------------------
+
+    _SLASH_COMMANDS = {
+        "help": "Show available commands",
+        "model": "Show or change current model",
+        "tools": "List available tools",
+        "context": "Show conversation context info",
+        "reset": "Clear conversation history",
+        "compact": "Compress conversation context",
+        "version": "Show Hermes version",
+    }
+
+    def _handle_slash_command(self, text: str, state: SessionState) -> str | None:
+        """Dispatch a slash command and return the response text.
+
+        Returns ``None`` for unrecognized commands so they fall through
+        to the LLM (the user may have typed ``/something`` as prose).
+        """
+        parts = text.split(maxsplit=1)
+        cmd = parts[0].lstrip("/").lower()
+        args = parts[1].strip() if len(parts) > 1 else ""
+
+        handler = {
+            "help": self._cmd_help,
+            "model": self._cmd_model,
+            "tools": self._cmd_tools,
+            "context": self._cmd_context,
+            "reset": self._cmd_reset,
+            "compact": self._cmd_compact,
+            "version": self._cmd_version,
+        }.get(cmd)
+
+        if handler is None:
+            return None  # not a known command — let the LLM handle it
+
+        try:
+            return handler(args, state)
+        except Exception as e:
+            logger.error("Slash command /%s error: %s", cmd, e, exc_info=True)
+            return f"Error executing /{cmd}: {e}"
+
+    def _cmd_help(self, args: str, state: SessionState) -> str:
+        lines = ["Available commands:", ""]
+        for cmd, desc in self._SLASH_COMMANDS.items():
+            lines.append(f"  /{cmd:10s}  {desc}")
+        lines.append("")
+        lines.append("Unrecognized /commands are sent to the model as normal messages.")
+        return "\n".join(lines)
+
+    def _cmd_model(self, args: str, state: SessionState) -> str:
+        if not args:
+            model = state.model or getattr(state.agent, "model", "unknown")
+            provider = getattr(state.agent, "provider", None) or "auto"
+            return f"Current model: {model}\nProvider: {provider}"
+
+        new_model = args.strip()
+        target_provider = None
+
+        # Auto-detect provider for the requested model
+        try:
+            from hermes_cli.models import parse_model_input, detect_provider_for_model
+            current_provider = getattr(state.agent, "provider", None) or "openrouter"
+            target_provider, new_model = parse_model_input(new_model, current_provider)
+            if target_provider == current_provider:
+                detected = detect_provider_for_model(new_model, current_provider)
+                if detected:
+                    target_provider, new_model = detected
+        except Exception:
+            logger.debug("Provider detection failed, using model as-is", exc_info=True)
+
+        state.model = new_model
+        state.agent = self.session_manager._make_agent(
+            session_id=state.session_id,
+            cwd=state.cwd,
+            model=new_model,
+        )
+        provider_label = target_provider or getattr(state.agent, "provider", "auto")
+        logger.info("Session %s: model switched to %s", state.session_id, new_model)
+        return f"Model switched to: {new_model}\nProvider: {provider_label}"
+
+    def _cmd_tools(self, args: str, state: SessionState) -> str:
+        try:
+            from model_tools import get_tool_definitions
+            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
+            if not tools:
+                return "No tools available."
+            lines = [f"Available tools ({len(tools)}):"]
+            for t in tools:
+                name = t.get("function", {}).get("name", "?")
+                desc = t.get("function", {}).get("description", "")
+                # Truncate long descriptions
+                if len(desc) > 80:
+                    desc = desc[:77] + "..."
+                lines.append(f"  {name}: {desc}")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"Could not list tools: {e}"
+
+    def _cmd_context(self, args: str, state: SessionState) -> str:
+        n_messages = len(state.history)
+        if n_messages == 0:
+            return "Conversation is empty (no messages yet)."
+        # Count by role
+        roles: dict[str, int] = {}
+        for msg in state.history:
+            role = msg.get("role", "unknown")
+            roles[role] = roles.get(role, 0) + 1
+        lines = [
+            f"Conversation: {n_messages} messages",
+            f"  user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
+            f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
+        ]
+        model = state.model or getattr(state.agent, "model", "")
+        if model:
+            lines.append(f"Model: {model}")
+        return "\n".join(lines)
+
+    def _cmd_reset(self, args: str, state: SessionState) -> str:
+        state.history.clear()
+        return "Conversation history cleared."
+
+    def _cmd_compact(self, args: str, state: SessionState) -> str:
+        if not state.history:
+            return "Nothing to compress — conversation is empty."
+        try:
+            agent = state.agent
+            if hasattr(agent, "compress_context"):
+                agent.compress_context(state.history)
+                return f"Context compressed. Messages: {len(state.history)}"
+            return "Context compression not available for this agent."
+        except Exception as e:
+            return f"Compression failed: {e}"
+
+    def _cmd_version(self, args: str, state: SessionState) -> str:
+        return f"Hermes Agent v{HERMES_VERSION}"
+
+    # ---- Model switching (ACP protocol method) -------------------------------

    async def set_session_model(
        self, model_id: str, session_id: str, **kwargs: Any
    ):
-        """Switch the model for a session."""
+        """Switch the model for a session (called by ACP protocol)."""
        state = self.session_manager.get_session(session_id)
        if state:
            state.model = model_id
@@ -45,14 +45,19 @@ _COMMON_BETAS = [
    "fine-grained-tool-streaming-2025-05-14",
 ]

-# Additional beta headers required for OAuth/subscription auth
-# Both clawdbot and OpenCode include claude-code-20250219 alongside oauth-2025-04-20.
-# Without claude-code-20250219, Anthropic's API rejects OAuth tokens with 401.
+# Additional beta headers required for OAuth/subscription auth.
+# Matches what Claude Code (and pi-ai / OpenCode) send.
 _OAUTH_ONLY_BETAS = [
    "claude-code-20250219",
    "oauth-2025-04-20",
 ]

+# Claude Code identity — required for OAuth requests to be routed correctly.
+# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
+_CLAUDE_CODE_VERSION = "2.1.2"
+_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
+_MCP_TOOL_PREFIX = "mcp_"
+

 def _is_oauth_token(key: str) -> bool:
    """Check if the key is an OAuth/setup token (not a regular Console API key).
@@ -88,10 +93,16 @@ def build_anthropic_client(api_key: str, base_url: str = None):
        kwargs["base_url"] = base_url

    if _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + beta headers
+        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
+        # Anthropic routes OAuth requests based on user-agent and headers;
+        # without Claude Code's fingerprint, requests get intermittent 500s.
        all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
-        kwargs["default_headers"] = {"anthropic-beta": ",".join(all_betas)}
+        kwargs["default_headers"] = {
+            "anthropic-beta": ",".join(all_betas),
+            "user-agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+            "x-app": "cli",
+        }
    else:
        # Regular API key → x-api-key header + common betas
        kwargs["api_key"] = api_key
@@ -497,6 +508,66 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    return result


+def _image_source_from_openai_url(url: str) -> Dict[str, str]:
+    """Convert an OpenAI-style image URL/data URL into Anthropic image source."""
+    url = str(url or "").strip()
+    if not url:
+        return {"type": "url", "url": ""}
+
+    if url.startswith("data:"):
+        header, _, data = url.partition(",")
+        media_type = "image/jpeg"
+        if header.startswith("data:"):
+            mime_part = header[len("data:"):].split(";", 1)[0].strip()
+            if mime_part.startswith("image/"):
+                media_type = mime_part
+        return {
+            "type": "base64",
+            "media_type": media_type,
+            "data": data,
+        }
+
+    return {"type": "url", "url": url}
+
+
+def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
+    """Convert a single OpenAI-style content part to Anthropic format."""
+    if part is None:
+        return None
+    if isinstance(part, str):
+        return {"type": "text", "text": part}
+    if not isinstance(part, dict):
+        return {"type": "text", "text": str(part)}
+
+    ptype = part.get("type")
+
+    if ptype == "input_text":
+        block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
+    elif ptype in {"image_url", "input_image"}:
+        image_value = part.get("image_url", {})
+        url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
+        block = {"type": "image", "source": _image_source_from_openai_url(url)}
+    else:
+        block = dict(part)
+
+    if isinstance(part.get("cache_control"), dict) and "cache_control" not in block:
+        block["cache_control"] = dict(part["cache_control"])
+    return block
+
+
+def _convert_content_to_anthropic(content: Any) -> Any:
+    """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
+    if not isinstance(content, list):
+        return content
+
+    converted = []
+    for part in content:
+        block = _convert_content_part_to_anthropic(part)
+        if block is not None:
+            converted.append(block)
+    return converted
+
+
 def convert_messages_to_anthropic(
    messages: List[Dict],
 ) -> Tuple[Optional[Any], List[Dict]]:
@@ -533,11 +604,9 @@ def convert_messages_to_anthropic(
            blocks = []
            if content:
                if isinstance(content, list):
-                    for part in content:
-                        if isinstance(part, dict):
-                            blocks.append(dict(part))
-                        elif part is not None:
-                            blocks.append({"type": "text", "text": str(part)})
+                    converted_content = _convert_content_to_anthropic(content)
+                    if isinstance(converted_content, list):
+                        blocks.extend(converted_content)
                else:
                    blocks.append({"type": "text", "text": str(content)})
            for tc in m.get("tool_calls", []):
@@ -587,12 +656,11 @@ def convert_messages_to_anthropic(

        # Regular user message
        if isinstance(content, list):
-            converted_blocks = []
-            for part in content:
-                converted = _convert_user_content_part_to_anthropic(part)
-                if converted is not None:
-                    converted_blocks.append(converted)
-            result.append({"role": "user", "content": converted_blocks or [{"type": "text", "text": ""}]})
+            converted_blocks = _convert_content_to_anthropic(content)
+            result.append({
+                "role": "user",
+                "content": converted_blocks or [{"type": "text", "text": ""}],
+            })
        else:
            result.append({"role": "user", "content": content})

@@ -657,14 +725,59 @@ def build_anthropic_kwargs(
    max_tokens: Optional[int],
    reasoning_config: Optional[Dict[str, Any]],
    tool_choice: Optional[str] = None,
+    is_oauth: bool = False,
 ) -> Dict[str, Any]:
-    """Build kwargs for anthropic.messages.create()."""
+    """Build kwargs for anthropic.messages.create().
+
+    When *is_oauth* is True, applies Claude Code compatibility transforms:
+    system prompt prefix, tool name prefixing, and prompt sanitization.
+    """
    system, anthropic_messages = convert_messages_to_anthropic(messages)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model)
    effective_max_tokens = max_tokens or 16384

+    # ── OAuth: Claude Code identity ──────────────────────────────────
+    if is_oauth:
+        # 1. Prepend Claude Code system prompt identity
+        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
+        if isinstance(system, list):
+            system = [cc_block] + system
+        elif isinstance(system, str) and system:
+            system = [cc_block, {"type": "text", "text": system}]
+        else:
+            system = [cc_block]
+
+        # 2. Sanitize system prompt — replace product name references
+        #    to avoid Anthropic's server-side content filters.
+        for block in system:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                text = text.replace("Hermes Agent", "Claude Code")
+                text = text.replace("Hermes agent", "Claude Code")
+                text = text.replace("hermes-agent", "claude-code")
+                text = text.replace("Nous Research", "Anthropic")
+                block["text"] = text
+
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        if anthropic_tools:
+            for tool in anthropic_tools:
+                if "name" in tool:
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
+
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
+        for msg in anthropic_messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "tool_use" and "name" in block:
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
+                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                            pass  # tool_result uses ID, not name
+
    kwargs: Dict[str, Any] = {
        "model": model,
        "messages": anthropic_messages,
@@ -711,11 +824,15 @@ def build_anthropic_kwargs(

 def normalize_anthropic_response(
    response,
+    strip_tool_prefix: bool = False,
 ) -> Tuple[SimpleNamespace, str]:
    """Normalize Anthropic response to match the shape expected by AIAgent.

    Returns (assistant_message, finish_reason) where assistant_message has
    .content, .tool_calls, and .reasoning attributes.
+
+    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
+    added to tool names for OAuth Claude Code compatibility.
    """
    text_parts = []
    reasoning_parts = []
@@ -727,12 +844,15 @@ def normalize_anthropic_response(
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
        elif block.type == "tool_use":
+            name = block.name
+            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
+                name = name[len(_MCP_TOOL_PREFIX):]
            tool_calls.append(
                SimpleNamespace(
                    id=block.id,
                    type="function",
                    function=SimpleNamespace(
-                        name=block.name,
+                        name=name,
                        arguments=json.dumps(block.input),
                    ),
                )
@@ -83,7 +83,10 @@ _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

 # Codex fallback: uses the Responses API (the only endpoint the Codex
 # OAuth token can access) with a fast model for auxiliary tasks.
-_CODEX_AUX_MODEL = "gpt-5.3-codex"
+# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
+# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
+# vision via Responses.
+_CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -59,6 +59,32 @@ def get_skin_tool_prefix() -> str:
    return "┊"


+def get_tool_emoji(tool_name: str, default: str = "⚡") -> str:
+    """Get the display emoji for a tool.
+
+    Resolution order:
+    1. Active skin's ``tool_emojis`` overrides (if a skin is loaded)
+    2. Tool registry's per-tool ``emoji`` field
+    3. *default* fallback
+    """
+    # 1. Skin override
+    skin = _get_skin()
+    if skin and skin.tool_emojis:
+        override = skin.tool_emojis.get(tool_name)
+        if override:
+            return override
+    # 2. Registry default
+    try:
+        from tools.registry import registry
+        emoji = registry.get_emoji(tool_name, default="")
+        if emoji:
+            return emoji
+    except Exception:
+        pass
+    # 3. Hardcoded fallback
+    return default
+
+
 # =========================================================================
 # Tool preview (one-line summary of a tool call's primary argument)
 # =========================================================================
@@ -20,65 +20,16 @@ import json
 import time
 from collections import Counter, defaultdict
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List

-# =========================================================================
-# Model pricing (USD per million tokens) — approximate as of early 2026
-# =========================================================================
-MODEL_PRICING = {
-    # OpenAI
-    "gpt-4o": {"input": 2.50, "output": 10.00},
-    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
-    "gpt-4.1": {"input": 2.00, "output": 8.00},
-    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
-    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
-    "gpt-4.5-preview": {"input": 75.00, "output": 150.00},
-    "gpt-5": {"input": 10.00, "output": 30.00},
-    "gpt-5.4": {"input": 10.00, "output": 30.00},
-    "o3": {"input": 10.00, "output": 40.00},
-    "o3-mini": {"input": 1.10, "output": 4.40},
-    "o4-mini": {"input": 1.10, "output": 4.40},
-    # Anthropic
-    "claude-opus-4-20250514": {"input": 15.00, "output": 75.00},
-    "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
-    "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
-    "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
-    "claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
-    "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
-    # DeepSeek
-    "deepseek-chat": {"input": 0.14, "output": 0.28},
-    "deepseek-reasoner": {"input": 0.55, "output": 2.19},
-    # Google
-    "gemini-2.5-pro": {"input": 1.25, "output": 10.00},
-    "gemini-2.5-flash": {"input": 0.15, "output": 0.60},
-    "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
-    # Meta (via providers)
-    "llama-4-maverick": {"input": 0.50, "output": 0.70},
-    "llama-4-scout": {"input": 0.20, "output": 0.30},
-    # Z.AI / GLM (direct provider — pricing not published externally, treat as local)
-    "glm-5": {"input": 0.0, "output": 0.0},
-    "glm-4.7": {"input": 0.0, "output": 0.0},
-    "glm-4.5": {"input": 0.0, "output": 0.0},
-    "glm-4.5-flash": {"input": 0.0, "output": 0.0},
-    # Kimi / Moonshot (direct provider — pricing not published externally, treat as local)
-    "kimi-k2.5": {"input": 0.0, "output": 0.0},
-    "kimi-k2-thinking": {"input": 0.0, "output": 0.0},
-    "kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0},
-    "kimi-k2-0905-preview": {"input": 0.0, "output": 0.0},
-    # MiniMax (direct provider — pricing not published externally, treat as local)
-    "MiniMax-M2.5": {"input": 0.0, "output": 0.0},
-    "MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0},
-    "MiniMax-M2.1": {"input": 0.0, "output": 0.0},
-}
+from agent.usage_pricing import DEFAULT_PRICING, estimate_cost_usd, format_duration_compact, get_pricing, has_known_pricing

-# Fallback: unknown/custom models get zero cost (we can't assume pricing
-# for self-hosted models, custom OAI endpoints, local inference, etc.)
-_DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
+_DEFAULT_PRICING = DEFAULT_PRICING


 def _has_known_pricing(model_name: str) -> bool:
    """Check if a model has known pricing (vs unknown/custom endpoint)."""
-    return _get_pricing(model_name) is not _DEFAULT_PRICING
+    return has_known_pricing(model_name)


 def _get_pricing(model_name: str) -> Dict[str, float]:
@@ -87,67 +38,17 @@ def _get_pricing(model_name: str) -> Dict[str, float]:
    Returns _DEFAULT_PRICING (zero cost) for unknown/custom models —
    we can't assume costs for self-hosted endpoints, local inference, etc.
    """
-    if not model_name:
-        return _DEFAULT_PRICING
-
-    # Strip provider prefix (e.g., "anthropic/claude-..." -> "claude-...")
-    bare = model_name.split("/")[-1].lower()
-
-    # Exact match first
-    if bare in MODEL_PRICING:
-        return MODEL_PRICING[bare]
-
-    # Fuzzy prefix match — prefer the LONGEST matching key to avoid
-    # e.g. "gpt-4o" matching before "gpt-4o-mini" for "gpt-4o-mini-2024-07-18"
-    best_match = None
-    best_len = 0
-    for key, price in MODEL_PRICING.items():
-        if bare.startswith(key) and len(key) > best_len:
-            best_match = price
-            best_len = len(key)
-    if best_match:
-        return best_match
-
-    # Keyword heuristics (checked in most-specific-first order)
-    if "opus" in bare:
-        return {"input": 15.00, "output": 75.00}
-    if "sonnet" in bare:
-        return {"input": 3.00, "output": 15.00}
-    if "haiku" in bare:
-        return {"input": 0.80, "output": 4.00}
-    if "gpt-4o-mini" in bare:
-        return {"input": 0.15, "output": 0.60}
-    if "gpt-4o" in bare:
-        return {"input": 2.50, "output": 10.00}
-    if "gpt-5" in bare:
-        return {"input": 10.00, "output": 30.00}
-    if "deepseek" in bare:
-        return {"input": 0.14, "output": 0.28}
-    if "gemini" in bare:
-        return {"input": 0.15, "output": 0.60}
-
-    return _DEFAULT_PRICING
+    return get_pricing(model_name)


 def _estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
    """Estimate the USD cost for a given model and token counts."""
-    pricing = _get_pricing(model)
-    return (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
+    return estimate_cost_usd(model, input_tokens, output_tokens)


 def _format_duration(seconds: float) -> str:
    """Format seconds into a human-readable duration string."""
-    if seconds < 60:
-        return f"{seconds:.0f}s"
-    minutes = seconds / 60
-    if minutes < 60:
-        return f"{minutes:.0f}m"
-    hours = minutes / 60
-    if hours < 24:
-        remaining_min = int(minutes % 60)
-        return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
-    days = hours / 24
-    return f"{days:.1f}d"
+    return format_duration_compact(seconds)


 def _bar_chart(values: List[int], max_width: int = 20) -> List[str]:
@@ -73,9 +73,15 @@ DEFAULT_AGENT_IDENTITY = (
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
-    "Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
-    "session outcomes, or completed-work logs to memory; use session_search to recall "
-    "those from past transcripts."
+    "Memory is injected into every turn, so keep it compact and focused on facts that "
+    "will still matter later.\n"
+    "Prioritize what reduces future user steering — the most valuable memory is one "
+    "that prevents the user from having to correct or remind you again. "
+    "User preferences and recurring corrections matter more than procedural task details.\n"
+    "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
+    "state to memory; use session_search to recall those from past transcripts. "
+    "If you've discovered a new way to do something, solved a problem that could be "
+    "necessary later, save it as a skill with the skill tool."
 )

 SESSION_SEARCH_GUIDANCE = (
@@ -86,8 +92,11 @@ SESSION_SEARCH_GUIDANCE = (

 SKILLS_GUIDANCE = (
    "After completing a complex task (5+ tool calls), fixing a tricky error, "
-    "or discovering a non-trivial workflow, consider saving the approach as a "
-    "skill with skill_manage so you can reuse it next time."
+    "or discovering a non-trivial workflow, save the approach as a "
+    "skill with skill_manage so you can reuse it next time.\n"
+    "When using a skill and finding it outdated, incomplete, or wrong, "
+    "patch it immediately with skill_manage(action='patch') — don't wait to be asked. "
+    "Skills that aren't maintained become liabilities."
 )

 PLATFORM_HINTS = {
@@ -326,6 +335,9 @@ def build_skills_system_prompt(
        "Before replying, scan the skills below. If one clearly matches your task, "
        "load it with skill_view(name) and follow its instructions. "
        "If a skill has issues, fix it with skill_manage(action='patch').\n"
+        "After difficult/iterative tasks, offer to save as a skill. "
+        "If a skill you loaded was missing steps, had wrong commands, or needed "
+        "pitfalls you discovered, update it before finishing.\n"
        "\n"
        "<available_skills>\n"
        + "\n".join(index_lines) + "\n"
@@ -0,0 +1,184 @@
+"""Helpers for optional cheap-vs-strong model routing."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Any, Dict, Optional
+
+_COMPLEX_KEYWORDS = {
+    "debug",
+    "debugging",
+    "implement",
+    "implementation",
+    "refactor",
+    "patch",
+    "traceback",
+    "stacktrace",
+    "exception",
+    "error",
+    "analyze",
+    "analysis",
+    "investigate",
+    "architecture",
+    "design",
+    "compare",
+    "benchmark",
+    "optimize",
+    "optimise",
+    "review",
+    "terminal",
+    "shell",
+    "tool",
+    "tools",
+    "pytest",
+    "test",
+    "tests",
+    "plan",
+    "planning",
+    "delegate",
+    "subagent",
+    "cron",
+    "docker",
+    "kubernetes",
+}
+
+_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
+
+
+def _coerce_bool(value: Any, default: bool = False) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"1", "true", "yes", "on"}
+    return bool(value)
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+    """Return the configured cheap-model route when a message looks simple.
+
+    Conservative by design: if the message has signs of code/tool/debugging/
+    long-form work, keep the primary model.
+    """
+    cfg = routing_config or {}
+    if not _coerce_bool(cfg.get("enabled"), False):
+        return None
+
+    cheap_model = cfg.get("cheap_model") or {}
+    if not isinstance(cheap_model, dict):
+        return None
+    provider = str(cheap_model.get("provider") or "").strip().lower()
+    model = str(cheap_model.get("model") or "").strip()
+    if not provider or not model:
+        return None
+
+    text = (user_message or "").strip()
+    if not text:
+        return None
+
+    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
+    max_words = _coerce_int(cfg.get("max_simple_words"), 28)
+
+    if len(text) > max_chars:
+        return None
+    if len(text.split()) > max_words:
+        return None
+    if text.count("\n") > 1:
+        return None
+    if "```" in text or "`" in text:
+        return None
+    if _URL_RE.search(text):
+        return None
+
+    lowered = text.lower()
+    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
+    if words & _COMPLEX_KEYWORDS:
+        return None
+
+    route = dict(cheap_model)
+    route["provider"] = provider
+    route["model"] = model
+    route["routing_reason"] = "simple_turn"
+    return route
+
+
+def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
+    """Resolve the effective model/runtime for one turn.
+
+    Returns a dict with model/runtime/signature/label fields.
+    """
+    route = choose_cheap_model_route(user_message, routing_config)
+    if not route:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    explicit_api_key = None
+    api_key_env = str(route.get("api_key_env") or "").strip()
+    if api_key_env:
+        explicit_api_key = os.getenv(api_key_env) or None
+
+    try:
+        runtime = resolve_runtime_provider(
+            requested=route.get("provider"),
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=route.get("base_url"),
+        )
+    except Exception:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    return {
+        "model": route.get("model"),
+        "runtime": {
+            "api_key": runtime.get("api_key"),
+            "base_url": runtime.get("base_url"),
+            "provider": runtime.get("provider"),
+            "api_mode": runtime.get("api_mode"),
+        },
+        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
+        "signature": (
+            route.get("model"),
+            runtime.get("provider"),
+            runtime.get("base_url"),
+            runtime.get("api_mode"),
+        ),
+    }
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+from decimal import Decimal
+from typing import Dict
+
+
+MODEL_PRICING = {
+    "gpt-4o": {"input": 2.50, "output": 10.00},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+    "gpt-4.1": {"input": 2.00, "output": 8.00},
+    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
+    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
+    "gpt-4.5-preview": {"input": 75.00, "output": 150.00},
+    "gpt-5": {"input": 10.00, "output": 30.00},
+    "gpt-5.4": {"input": 10.00, "output": 30.00},
+    "o3": {"input": 10.00, "output": 40.00},
+    "o3-mini": {"input": 1.10, "output": 4.40},
+    "o4-mini": {"input": 1.10, "output": 4.40},
+    "claude-opus-4-20250514": {"input": 15.00, "output": 75.00},
+    "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
+    "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
+    "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
+    "claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
+    "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
+    "deepseek-chat": {"input": 0.14, "output": 0.28},
+    "deepseek-reasoner": {"input": 0.55, "output": 2.19},
+    "gemini-2.5-pro": {"input": 1.25, "output": 10.00},
+    "gemini-2.5-flash": {"input": 0.15, "output": 0.60},
+    "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
+    "llama-4-maverick": {"input": 0.50, "output": 0.70},
+    "llama-4-scout": {"input": 0.20, "output": 0.30},
+    "glm-5": {"input": 0.0, "output": 0.0},
+    "glm-4.7": {"input": 0.0, "output": 0.0},
+    "glm-4.5": {"input": 0.0, "output": 0.0},
+    "glm-4.5-flash": {"input": 0.0, "output": 0.0},
+    "kimi-k2.5": {"input": 0.0, "output": 0.0},
+    "kimi-k2-thinking": {"input": 0.0, "output": 0.0},
+    "kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0},
+    "kimi-k2-0905-preview": {"input": 0.0, "output": 0.0},
+    "MiniMax-M2.5": {"input": 0.0, "output": 0.0},
+    "MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0},
+    "MiniMax-M2.1": {"input": 0.0, "output": 0.0},
+}
+
+DEFAULT_PRICING = {"input": 0.0, "output": 0.0}
+
+
+def get_pricing(model_name: str) -> Dict[str, float]:
+    if not model_name:
+        return DEFAULT_PRICING
+
+    bare = model_name.split("/")[-1].lower()
+    if bare in MODEL_PRICING:
+        return MODEL_PRICING[bare]
+
+    best_match = None
+    best_len = 0
+    for key, price in MODEL_PRICING.items():
+        if bare.startswith(key) and len(key) > best_len:
+            best_match = price
+            best_len = len(key)
+    if best_match:
+        return best_match
+
+    if "opus" in bare:
+        return {"input": 15.00, "output": 75.00}
+    if "sonnet" in bare:
+        return {"input": 3.00, "output": 15.00}
+    if "haiku" in bare:
+        return {"input": 0.80, "output": 4.00}
+    if "gpt-4o-mini" in bare:
+        return {"input": 0.15, "output": 0.60}
+    if "gpt-4o" in bare:
+        return {"input": 2.50, "output": 10.00}
+    if "gpt-5" in bare:
+        return {"input": 10.00, "output": 30.00}
+    if "deepseek" in bare:
+        return {"input": 0.14, "output": 0.28}
+    if "gemini" in bare:
+        return {"input": 0.15, "output": 0.60}
+
+    return DEFAULT_PRICING
+
+
+def has_known_pricing(model_name: str) -> bool:
+    pricing = get_pricing(model_name)
+    return pricing is not DEFAULT_PRICING and any(
+        float(value) > 0 for value in pricing.values()
+    )
+
+
+def estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
+    pricing = get_pricing(model)
+    total = (
+        Decimal(input_tokens) * Decimal(str(pricing["input"]))
+        + Decimal(output_tokens) * Decimal(str(pricing["output"]))
+    ) / Decimal("1000000")
+    return float(total)
+
+
+def format_duration_compact(seconds: float) -> str:
+    if seconds < 60:
+        return f"{seconds:.0f}s"
+    minutes = seconds / 60
+    if minutes < 60:
+        return f"{minutes:.0f}m"
+    hours = minutes / 60
+    if hours < 24:
+        remaining_min = int(minutes % 60)
+        return f"{int(hours)}h {remaining_min}m" if remaining_min else f"{int(hours)}h"
+    days = hours / 24
+    return f"{days:.1f}d"
+
+
+def format_token_count_compact(value: int) -> str:
+    abs_value = abs(int(value))
+    if abs_value < 1_000:
+        return str(int(value))
+
+    sign = "-" if value < 0 else ""
+    units = ((1_000_000_000, "B"), (1_000_000, "M"), (1_000, "K"))
+    for threshold, suffix in units:
+        if abs_value >= threshold:
+            scaled = abs_value / threshold
+            if scaled < 10:
+                text = f"{scaled:.2f}"
+            elif scaled < 100:
+                text = f"{scaled:.1f}"
+            else:
+                text = f"{scaled:.0f}"
+            text = text.rstrip("0").rstrip(".")
+            return f"{sign}{text}{suffix}"
+
+    return f"{value:,}"
@@ -51,6 +51,20 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

+# =============================================================================
+# Smart Model Routing (optional)
+# =============================================================================
+# Use a cheaper model for short/simple turns while keeping your main model for
+# more complex requests. Disabled by default.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
+
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -76,8 +90,9 @@ model:
 #   - Messaging (Telegram/Discord): Uses MESSAGING_CWD from .env (default: home)
 terminal:
  backend: "local"
-  cwd: "."  # For local backend: "." = current directory. Ignored for remote backends.
+  cwd: "."  # For local backend: "." = current directory. Ignored for remote backends unless a backend documents otherwise.
  timeout: 180
+  docker_mount_cwd_to_workspace: false  # SECURITY: off by default. Opt in to mount the launch cwd into Docker /workspace.
  lifetime_seconds: 300
  # sudo_password: ""  # Enable sudo commands (pipes via sudo -S) - SECURITY WARNING: plaintext!

@@ -107,6 +122,7 @@ terminal:
 #   timeout: 180
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
+#   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace

 # -----------------------------------------------------------------------------
 # OPTION 4: Singularity/Apptainer container
@@ -333,6 +349,25 @@ session_reset:
  idle_minutes: 1440   # Inactivity timeout in minutes (default: 1440 = 24 hours)
  at_hour: 4           # Daily reset hour, 0-23 local time (default: 4 AM)

+# When true, group/channel chats use one session per participant when the platform
+# provides a user ID. This is the secure default and prevents users in the same
+# room from sharing context, interrupts, and token costs. Set false only if you
+# explicitly want one shared "room brain" per group/channel.
+group_sessions_per_user: true
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Gateway Streaming
+# ─────────────────────────────────────────────────────────────────────────────
+# Stream tokens to messaging platforms in real-time. The bot sends a message
+# on first token, then progressively edits it as more tokens arrive.
+# Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
+streaming:
+  enabled: false
+  # transport: edit           # "edit" = progressive editMessageText
+  # edit_interval: 0.3        # seconds between message edits
+  # buffer_threshold: 40      # chars before forcing an edit flush
+  # cursor: " ▉"              # cursor shown during streaming
+
 # =============================================================================
 # Skills Configuration
 # =============================================================================
@@ -694,6 +729,12 @@ display:
  # Toggle at runtime with /reasoning show or /reasoning hide.
  show_reasoning: false

+  # Stream tokens to the terminal as they arrive instead of waiting for the
+  # full response. The response box opens on first token and text appears
+  # line-by-line. Tool calls are still captured silently.
+  # Disabled by default — enable to try the streaming UX.
+  streaming: false
+
  # ───────────────────────────────────────────────────────────────────────────
  # Skin / Theme
  # ───────────────────────────────────────────────────────────────────────────
@@ -734,3 +775,14 @@ display:
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
+
+# =============================================================================
+# Privacy
+# =============================================================================
+# privacy:
+#   # Redact PII from the LLM context prompt.
+#   # When true, phone numbers are stripped and user/chat IDs are replaced
+#   # with deterministic hashes before being sent to the model.
+#   # Names and usernames are NOT affected (user-chosen, publicly visible).
+#   # Routing/delivery still uses the original values internally.
+#   redact_pii: false
@@ -315,6 +315,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        # Provider routing
        pr = _cfg.get("provider_routing", {})
+        smart_routing = _cfg.get("smart_model_routing", {}) or {}

        from hermes_cli.runtime_provider import (
            resolve_runtime_provider,
@@ -331,12 +332,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc

+        from agent.smart_model_routing import resolve_turn_route
+        turn_route = resolve_turn_route(
+            prompt,
+            smart_routing,
+            {
+                "model": model,
+                "api_key": runtime.get("api_key"),
+                "base_url": runtime.get("base_url"),
+                "provider": runtime.get("provider"),
+                "api_mode": runtime.get("api_mode"),
+            },
+        )
+
        agent = AIAgent(
-            model=model,
-            api_key=runtime.get("api_key"),
-            base_url=runtime.get("base_url"),
-            provider=runtime.get("provider"),
-            api_mode=runtime.get("api_mode"),
+            model=turn_route["model"],
+            api_key=turn_route["runtime"].get("api_key"),
+            base_url=turn_route["runtime"].get("base_url"),
+            provider=turn_route["runtime"].get("provider"),
+            api_mode=turn_route["runtime"].get("api_mode"),
            max_iterations=max_iterations,
            reasoning_config=reasoning_config,
            prefill_messages=prefill_messages,
@@ -97,10 +97,11 @@ class SessionResetPolicy:
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionResetPolicy":
        # Handle both missing keys and explicit null values (YAML null → None)
+        mode = data.get("mode")
        at_hour = data.get("at_hour")
        idle_minutes = data.get("idle_minutes")
        return cls(
-            mode=data.get("mode", "both"),
+            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
        )
@@ -145,6 +146,37 @@ class PlatformConfig:
        )


+@dataclass
+class StreamingConfig:
+    """Configuration for real-time token streaming to messaging platforms."""
+    enabled: bool = False
+    transport: str = "edit"       # "edit" (progressive editMessageText) or "off"
+    edit_interval: float = 0.3    # Seconds between message edits
+    buffer_threshold: int = 40    # Chars before forcing an edit
+    cursor: str = " ▉"           # Cursor shown during streaming
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "enabled": self.enabled,
+            "transport": self.transport,
+            "edit_interval": self.edit_interval,
+            "buffer_threshold": self.buffer_threshold,
+            "cursor": self.cursor,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "StreamingConfig":
+        if not data:
+            return cls()
+        return cls(
+            enabled=data.get("enabled", False),
+            transport=data.get("transport", "edit"),
+            edit_interval=float(data.get("edit_interval", 0.3)),
+            buffer_threshold=int(data.get("buffer_threshold", 40)),
+            cursor=data.get("cursor", " ▉"),
+        )
+
+
@dataclass
 class GatewayConfig:
    """
@@ -174,7 +206,13 @@ class GatewayConfig:

    # STT settings
    stt_enabled: bool = True  # Whether to auto-transcribe inbound voice messages
-    
+
+    # Session isolation in shared chats
+    group_sessions_per_user: bool = True  # Isolate group/channel sessions per participant when user IDs are available
+
+    # Streaming configuration
+    streaming: StreamingConfig = field(default_factory=StreamingConfig)
+
    def get_connected_platforms(self) -> List[Platform]:
        """Return list of platforms that are enabled and configured."""
        connected = []
@@ -239,6 +277,8 @@ class GatewayConfig:
            "sessions_dir": str(self.sessions_dir),
            "always_log_local": self.always_log_local,
            "stt_enabled": self.stt_enabled,
+            "group_sessions_per_user": self.group_sessions_per_user,
+            "streaming": self.streaming.to_dict(),
        }
    
    @classmethod
@@ -279,6 +319,8 @@ class GatewayConfig:
        if stt_enabled is None:
            stt_enabled = data.get("stt", {}).get("enabled") if isinstance(data.get("stt"), dict) else None

+        group_sessions_per_user = data.get("group_sessions_per_user")
+
        return cls(
            platforms=platforms,
            default_reset_policy=default_policy,
@@ -289,6 +331,8 @@ class GatewayConfig:
            sessions_dir=sessions_dir,
            always_log_local=data.get("always_log_local", True),
            stt_enabled=_coerce_bool(stt_enabled, True),
+            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
+            streaming=StreamingConfig.from_dict(data.get("streaming", {})),
        )


@@ -344,6 +388,14 @@ def load_gateway_config() -> GatewayConfig:
            if isinstance(stt_cfg, dict) and "enabled" in stt_cfg:
                config.stt_enabled = _coerce_bool(stt_cfg.get("enabled"), True)

+            # Bridge group session isolation from config.yaml into gateway runtime.
+            # Secure default is per-user isolation in shared chats.
+            if "group_sessions_per_user" in yaml_cfg:
+                config.group_sessions_per_user = _coerce_bool(
+                    yaml_cfg.get("group_sessions_per_user"),
+                    True,
+                )
+
            # Bridge discord settings from config.yaml to env vars
            # (env vars take precedence — only set if not already defined)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -288,6 +288,7 @@ class MessageEvent:
    message_id: Optional[str] = None
    
    # Media attachments
+    # media_urls: local file paths (for vision tool access)
    media_urls: List[str] = field(default_factory=list)
    media_types: List[str] = field(default_factory=list)
    
@@ -355,6 +356,10 @@ class BasePlatformAdapter(ABC):
        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        # Background message-processing tasks spawned by handle_message().
+        # Gateway shutdown cancels these so an old gateway instance doesn't keep
+        # working on a task after --replace or manual restarts.
+        self._background_tasks: set[asyncio.Task] = set()
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()

@@ -747,11 +752,32 @@ class BasePlatformAdapter(ABC):
        if not self._message_handler:
            return
        
-        session_key = build_session_key(event.source)
+        session_key = build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+        )
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
-            # Store this as a pending message - it will interrupt the running agent
+            # Special case: photo bursts/albums frequently arrive as multiple near-
+            # simultaneous messages. Queue them without interrupting the active run,
+            # then process them immediately after the current task finishes.
+            if event.message_type == MessageType.PHOTO:
+                print(f"[{self.name}] 🖼️ Queuing photo follow-up for session {session_key} without interrupt")
+                existing = self._pending_messages.get(session_key)
+                if existing and existing.message_type == MessageType.PHOTO:
+                    existing.media_urls.extend(event.media_urls)
+                    existing.media_types.extend(event.media_types)
+                    if event.text:
+                        if not existing.text:
+                            existing.text = event.text
+                        elif event.text not in existing.text:
+                            existing.text = f"{existing.text}\n\n{event.text}".strip()
+                else:
+                    self._pending_messages[session_key] = event
+                return  # Don't interrupt now - will run after current task completes
+
+            # Default behavior for non-photo follow-ups: interrupt the running agent
            print(f"[{self.name}] ⚡ New message while session {session_key} is active - triggering interrupt")
            self._pending_messages[session_key] = event
            # Signal the interrupt (the processing task checks this)
@@ -759,7 +785,15 @@ class BasePlatformAdapter(ABC):
            return  # Don't process now - will be handled after current task finishes
        
        # Spawn background task to process this message
-        asyncio.create_task(self._process_message_background(event, session_key))
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Some tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            return
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -969,6 +1003,21 @@ class BasePlatformAdapter(ABC):
            if session_key in self._active_sessions:
                del self._active_sessions[session_key]
    
+    async def cancel_background_tasks(self) -> None:
+        """Cancel any in-flight background message-processing tasks.
+
+        Used during gateway shutdown/replacement so active sessions from the old
+        process do not keep running after adapters are being torn down.
+        """
+        tasks = [task for task in self._background_tasks if not task.done()]
+        for task in tasks:
+            task.cancel()
+        if tasks:
+            await asyncio.gather(*tasks, return_exceptions=True)
+        self._background_tasks.clear()
+        self._pending_messages.clear()
+        self._active_sessions.clear()
+
    def has_pending_interrupt(self, session_key: str) -> bool:
        """Check if there's a pending interrupt for a session."""
        return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
@@ -87,8 +87,9 @@ class VoiceReceiver:
    SAMPLE_RATE = 48000        # Discord native rate
    CHANNELS = 2               # Discord sends stereo

-    def __init__(self, voice_client):
+    def __init__(self, voice_client, allowed_user_ids: set = None):
        self._vc = voice_client
+        self._allowed_user_ids = allowed_user_ids or set()
        self._running = False

        # Decryption
@@ -274,19 +275,21 @@ class VoiceReceiver:
        if self._dave_session:
            with self._lock:
                user_id = self._ssrc_to_user.get(ssrc, 0)
-            if user_id == 0:
-                if self._packet_debug_count <= 10:
-                    logger.warning("DAVE skip: unknown user for ssrc=%d", ssrc)
-                return  # unknown user, can't DAVE-decrypt
-            try:
-                import davey
-                decrypted = self._dave_session.decrypt(
-                    user_id, davey.MediaType.audio, decrypted
-                )
-            except Exception as e:
-                if self._packet_debug_count <= 10:
-                    logger.warning("DAVE decrypt failed for ssrc=%d: %s", ssrc, e)
-                return
+            if user_id:
+                try:
+                    import davey
+                    decrypted = self._dave_session.decrypt(
+                        user_id, davey.MediaType.audio, decrypted
+                    )
+                except Exception as e:
+                    # Unencrypted passthrough — use NaCl-decrypted data as-is
+                    if "Unencrypted" not in str(e):
+                        if self._packet_debug_count <= 10:
+                            logger.warning("DAVE decrypt failed for ssrc=%d: %s", ssrc, e)
+                        return
+            # If SSRC unknown (no SPEAKING event yet), skip DAVE and try
+            # Opus decode directly — audio may be in passthrough mode.
+            # Buffer will get a user_id when SPEAKING event arrives later.

        # --- Opus decode -> PCM ---
        try:
@@ -304,6 +307,32 @@ class VoiceReceiver:
    # Silence detection
    # ------------------------------------------------------------------

+    def _infer_user_for_ssrc(self, ssrc: int) -> int:
+        """Try to infer user_id for an unmapped SSRC.
+
+        When the bot rejoins a voice channel, Discord may not resend
+        SPEAKING events for users already speaking.  If exactly one
+        allowed user is in the channel, map the SSRC to them.
+        """
+        try:
+            channel = self._vc.channel
+            if not channel:
+                return 0
+            bot_id = self._vc.user.id if self._vc.user else 0
+            allowed = self._allowed_user_ids
+            candidates = [
+                m.id for m in channel.members
+                if m.id != bot_id and (not allowed or str(m.id) in allowed)
+            ]
+            if len(candidates) == 1:
+                uid = candidates[0]
+                self._ssrc_to_user[ssrc] = uid
+                logger.info("Auto-mapped ssrc=%d -> user=%d (sole allowed member)", ssrc, uid)
+                return uid
+        except Exception:
+            pass
+        return 0
+
    def check_silence(self) -> list:
        """Return list of (user_id, pcm_bytes) for completed utterances."""
        now = time.monotonic()
@@ -322,6 +351,10 @@ class VoiceReceiver:

                if silence_duration >= self.SILENCE_THRESHOLD and buf_duration >= self.MIN_SPEECH_DURATION:
                    user_id = ssrc_user_map.get(ssrc, 0)
+                    if not user_id:
+                        # SSRC not mapped (SPEAKING event missing after bot rejoin).
+                        # Infer from allowed users in the voice channel.
+                        user_id = self._infer_user_for_ssrc(ssrc)
                    if user_id:
                        completed.append((user_id, bytes(buf)))
                    self._buffers[ssrc] = bytearray()
@@ -400,6 +433,9 @@ class DiscordAdapter(BasePlatformAdapter):
        self._voice_listen_tasks: Dict[int, asyncio.Task] = {}  # guild_id -> listen loop
        self._voice_input_callback: Optional[Callable] = None  # set by run.py
        self._on_voice_disconnect: Optional[Callable] = None  # set by run.py
+        # Track threads where the bot has participated so follow-up messages
+        # in those threads don't require @mention.
+        self._bot_participated_threads: set = set()
    
    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -580,7 +616,7 @@ class DiscordAdapter(BasePlatformAdapter):
        """Send a message to a Discord channel."""
        if not self._client:
            return SendResult(success=False, error="Not connected")
-        
+
        try:
            # Get the channel
            channel = self._client.get_channel(int(chat_id))
@@ -695,13 +731,14 @@ class DiscordAdapter(BasePlatformAdapter):
    ) -> SendResult:
        """Play auto-TTS audio.

-        When the bot is in a voice channel for this chat's guild, skip the
-        file attachment — the gateway runner plays audio in the VC instead.
+        When the bot is in a voice channel for this chat's guild, play
+        directly in the VC instead of sending as a file attachment.
        """
        for gid, text_ch_id in self._voice_text_channels.items():
            if str(text_ch_id) == str(chat_id) and self.is_in_voice_channel(gid):
-                logger.debug("[%s] Skipping play_tts for %s — VC playback handled by runner", self.name, chat_id)
-                return SendResult(success=True)
+                logger.info("[%s] Playing TTS in voice channel (guild=%d)", self.name, gid)
+                success = await self.play_in_voice_channel(gid, audio_path)
+                return SendResult(success=success)
        return await self.send_voice(chat_id=chat_id, audio_path=audio_path, **kwargs)

    async def send_voice(
@@ -805,7 +842,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Start voice receiver (Phase 2: listen to users)
        try:
-            receiver = VoiceReceiver(vc)
+            receiver = VoiceReceiver(vc, allowed_user_ids=self._allowed_user_ids)
            receiver.start()
            self._voice_receivers[guild_id] = receiver
            self._voice_listen_tasks[guild_id] = asyncio.ensure_future(
@@ -1001,14 +1038,32 @@ class DiscordAdapter(BasePlatformAdapter):
    # Voice listening (Phase 2)
    # ------------------------------------------------------------------

+    # UDP keepalive interval in seconds — prevents Discord from dropping
+    # the UDP route after ~60s of silence.
+    _KEEPALIVE_INTERVAL = 15
+
    async def _voice_listen_loop(self, guild_id: int):
        """Periodically check for completed utterances and process them."""
        receiver = self._voice_receivers.get(guild_id)
        if not receiver:
            return
+        last_keepalive = time.monotonic()
        try:
            while receiver._running:
                await asyncio.sleep(0.2)
+
+                # Send periodic UDP keepalive to prevent Discord from
+                # dropping the UDP session after ~60s of silence.
+                now = time.monotonic()
+                if now - last_keepalive >= self._KEEPALIVE_INTERVAL:
+                    last_keepalive = now
+                    try:
+                        vc = self._voice_clients.get(guild_id)
+                        if vc and vc.is_connected():
+                            vc._connection.send_packet(b'\xf8\xff\xfe')
+                    except Exception:
+                        pass
+
                completed = receiver.check_silence()
                for user_id, pcm_data in completed:
                    if not self._is_allowed_user(str(user_id)):
@@ -1746,14 +1801,13 @@ class DiscordAdapter(BasePlatformAdapter):
    async def _handle_message(self, message: DiscordMessage) -> None:
        """Handle incoming Discord messages."""
        # In server channels (not DMs), require the bot to be @mentioned
-        # UNLESS the channel is in the free-response list.
+        # UNLESS the channel is in the free-response list or the message is
+        # in a thread where the bot has already participated.
        #
-        # Config:
-        #   DISCORD_FREE_RESPONSE_CHANNELS: Comma-separated channel IDs where the
-        #       bot responds to every message without needing a mention.
-        #   DISCORD_REQUIRE_MENTION: Set to "false" to disable mention requirement
-        #       globally (all channels become free-response). Default: "true".
-        #       Can also be set via discord.require_mention in config.yaml.
+        # Config (all settable via discord.* in config.yaml):
+        #   discord.require_mention: Require @mention in server channels (default: true)
+        #   discord.free_response_channels: Channel IDs where bot responds without mention
+        #   discord.auto_thread: Auto-create thread on @mention in channels (default: true)

        thread_id = None
        parent_channel_id = None
@@ -1772,7 +1826,11 @@ class DiscordAdapter(BasePlatformAdapter):
            require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
            is_free_channel = bool(channel_ids & free_channels)

-            if require_mention and not is_free_channel:
+            # Skip the mention check if the message is in a thread where
+            # the bot has previously participated (auto-created or replied in).
+            in_bot_thread = is_thread and thread_id in self._bot_participated_threads
+
+            if require_mention and not is_free_channel and not in_bot_thread:
                if self._client.user not in message.mentions:
                    return

@@ -1781,17 +1839,18 @@ class DiscordAdapter(BasePlatformAdapter):
                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()

        # Auto-thread: when enabled, automatically create a thread for every
-        # new message in a text channel so each conversation is isolated.
+        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
        auto_threaded_channel = None
        if not is_thread and not isinstance(message.channel, discord.DMChannel):
-            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "").lower() in ("true", "1", "yes")
+            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
            if auto_thread:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
+                    self._bot_participated_threads.add(thread_id)

        # Determine message type
        msg_type = MessageType.TEXT
@@ -1891,7 +1950,12 @@ class DiscordAdapter(BasePlatformAdapter):
            reply_to_message_id=str(message.reference.message_id) if message.reference else None,
            timestamp=message.created_at,
        )
-        
+
+        # Track thread participation so the bot won't require @mention for
+        # follow-up messages in threads it has already engaged in.
+        if thread_id:
+            self._bot_participated_threads.add(thread_id)
+
        await self.handle_message(event)


@@ -135,14 +135,23 @@ def _extract_email_address(raw: str) -> str:
    return raw.strip().lower()


-def _extract_attachments(msg: email_lib.message.Message) -> List[Dict[str, Any]]:
-    """Extract attachment metadata and cache files locally."""
+def _extract_attachments(
+    msg: email_lib.message.Message,
+    skip_attachments: bool = False,
+) -> List[Dict[str, Any]]:
+    """Extract attachment metadata and cache files locally.
+
+    When *skip_attachments* is True, all attachment/inline parts are ignored
+    (useful for malware protection or bandwidth savings).
+    """
    attachments = []
    if not msg.is_multipart():
        return attachments

    for part in msg.walk():
        disposition = str(part.get("Content-Disposition", ""))
+        if skip_attachments and ("attachment" in disposition or "inline" in disposition):
+            continue
        if "attachment" not in disposition and "inline" not in disposition:
            continue
        # Skip text/plain and text/html body parts
@@ -196,6 +205,13 @@ class EmailAdapter(BasePlatformAdapter):
        self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
        self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15"))

+        # Skip attachments — configured via config.yaml:
+        #   platforms:
+        #     email:
+        #       skip_attachments: true
+        extra = config.extra or {}
+        self._skip_attachments = extra.get("skip_attachments", False)
+
        # Track message IDs we've already processed to avoid duplicates
        self._seen_uids: set = set()
        self._poll_task: Optional[asyncio.Task] = None
@@ -306,7 +322,7 @@ class EmailAdapter(BasePlatformAdapter):
                message_id = msg.get("Message-ID", "")
                in_reply_to = msg.get("In-Reply-To", "")
                body = _extract_text_body(msg)
-                attachments = _extract_attachments(msg)
+                attachments = _extract_attachments(msg, skip_attachments=self._skip_attachments)

                results.append({
                    "uid": uid,
@@ -111,6 +111,11 @@ class TelegramAdapter(BasePlatformAdapter):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
        self._bot: Optional[Bot] = None
+        # Buffer rapid/album photo updates so Telegram image bursts are handled
+        # as a single MessageEvent instead of self-interrupting multiple turns.
+        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
+        self._pending_photo_batches: Dict[str, MessageEvent] = {}
+        self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {}
        self._media_group_events: Dict[str, MessageEvent] = {}
        self._media_group_tasks: Dict[str, asyncio.Task] = {}
        self._token_lock_identity: Optional[str] = None
@@ -197,8 +202,26 @@ class TelegramAdapter(BasePlatformAdapter):
                self._handle_media_message
            ))
            
-            # Start polling in background
-            await self._app.initialize()
+            # Start polling — retry initialize() for transient TLS resets
+            try:
+                from telegram.error import NetworkError, TimedOut
+            except ImportError:
+                NetworkError = TimedOut = OSError  # type: ignore[misc,assignment]
+            _max_connect = 3
+            for _attempt in range(_max_connect):
+                try:
+                    await self._app.initialize()
+                    break
+                except (NetworkError, TimedOut, OSError) as init_err:
+                    if _attempt < _max_connect - 1:
+                        wait = 2 ** _attempt
+                        logger.warning(
+                            "[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
+                            self.name, _attempt + 1, _max_connect, init_err, wait,
+                        )
+                        await asyncio.sleep(wait)
+                    else:
+                        raise
            await self._app.start()
            loop = asyncio.get_running_loop()

@@ -260,6 +283,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    release_scoped_lock("telegram-bot-token", self._token_lock_identity)
                except Exception:
                    pass
+            message = f"Telegram startup failed: {e}"
+            self._set_fatal_error("telegram_connect_error", message, retryable=True)
            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
            return False
    
@@ -289,13 +314,19 @@ class TelegramAdapter(BasePlatformAdapter):
                release_scoped_lock("telegram-bot-token", self._token_lock_identity)
            except Exception as e:
                logger.warning("[%s] Error releasing Telegram token lock: %s", self.name, e, exc_info=True)
-        
+
+        for task in self._pending_photo_batch_tasks.values():
+            if task and not task.done():
+                task.cancel()
+        self._pending_photo_batch_tasks.clear()
+        self._pending_photo_batches.clear()
+
        self._mark_disconnected()
        self._app = None
        self._bot = None
        self._token_lock_identity = None
        logger.info("[%s] Disconnected from Telegram", self.name)
-    
+
    async def send(
        self,
        chat_id: str,
@@ -311,36 +342,59 @@ class TelegramAdapter(BasePlatformAdapter):
            # Format and split message if needed
            formatted = self.format_message(content)
            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+            if len(chunks) > 1:
+                # truncate_message appends a raw " (1/2)" suffix. Escape the
+                # MarkdownV2-special parentheses so Telegram doesn't reject the
+                # chunk and fall back to plain text.
+                chunks = [
+                    re.sub(r" \((\d+)/(\d+)\)$", r" \\(\1/\2\\)", chunk)
+                    for chunk in chunks
+                ]
            
            message_ids = []
            thread_id = metadata.get("thread_id") if metadata else None
            
+            try:
+                from telegram.error import NetworkError as _NetErr
+            except ImportError:
+                _NetErr = OSError  # type: ignore[misc,assignment]
+
            for i, chunk in enumerate(chunks):
-                # Try Markdown first, fall back to plain text if it fails
-                try:
-                    msg = await self._bot.send_message(
-                        chat_id=int(chat_id),
-                        text=chunk,
-                        parse_mode=ParseMode.MARKDOWN_V2,
-                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
-                        message_thread_id=int(thread_id) if thread_id else None,
-                    )
-                except Exception as md_error:
-                    # Markdown parsing failed, try plain text
-                    if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
-                        logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
-                        # Strip MDV2 escape backslashes so the user doesn't
-                        # see raw backslashes littered through the message.
-                        plain_chunk = _strip_mdv2(chunk)
-                        msg = await self._bot.send_message(
-                            chat_id=int(chat_id),
-                            text=plain_chunk,
-                            parse_mode=None,  # Plain text
-                            reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
-                            message_thread_id=int(thread_id) if thread_id else None,
-                        )
-                    else:
-                        raise  # Re-raise if not a parse error
+                msg = None
+                for _send_attempt in range(3):
+                    try:
+                        # Try Markdown first, fall back to plain text if it fails
+                        try:
+                            msg = await self._bot.send_message(
+                                chat_id=int(chat_id),
+                                text=chunk,
+                                parse_mode=ParseMode.MARKDOWN_V2,
+                                reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                message_thread_id=int(thread_id) if thread_id else None,
+                            )
+                        except Exception as md_error:
+                            # Markdown parsing failed, try plain text
+                            if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
+                                logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
+                                plain_chunk = _strip_mdv2(chunk)
+                                msg = await self._bot.send_message(
+                                    chat_id=int(chat_id),
+                                    text=plain_chunk,
+                                    parse_mode=None,
+                                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                    message_thread_id=int(thread_id) if thread_id else None,
+                                )
+                            else:
+                                raise
+                        break  # success
+                    except _NetErr as send_err:
+                        if _send_attempt < 2:
+                            wait = 2 ** _send_attempt
+                            logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s",
+                                           self.name, _send_attempt + 1, wait, send_err)
+                            await asyncio.sleep(wait)
+                        else:
+                            raise
                message_ids.append(str(msg.message_id))
            
            return SendResult(
@@ -807,6 +861,52 @@ class TelegramAdapter(BasePlatformAdapter):
        event.text = "\n".join(parts)
        await self.handle_message(event)

+    def _photo_batch_key(self, event: MessageEvent, msg: Message) -> str:
+        """Return a batching key for Telegram photos/albums."""
+        from gateway.session import build_session_key
+        session_key = build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+        )
+        media_group_id = getattr(msg, "media_group_id", None)
+        if media_group_id:
+            return f"{session_key}:album:{media_group_id}"
+        return f"{session_key}:photo-burst"
+
+    async def _flush_photo_batch(self, batch_key: str) -> None:
+        """Send a buffered photo burst/album as a single MessageEvent."""
+        current_task = asyncio.current_task()
+        try:
+            await asyncio.sleep(self._media_batch_delay_seconds)
+            event = self._pending_photo_batches.pop(batch_key, None)
+            if not event:
+                return
+            logger.info("[Telegram] Flushing photo batch %s with %d image(s)", batch_key, len(event.media_urls))
+            await self.handle_message(event)
+        finally:
+            if self._pending_photo_batch_tasks.get(batch_key) is current_task:
+                self._pending_photo_batch_tasks.pop(batch_key, None)
+
+    def _enqueue_photo_event(self, batch_key: str, event: MessageEvent) -> None:
+        """Merge photo events into a pending batch and schedule flush."""
+        existing = self._pending_photo_batches.get(batch_key)
+        if existing is None:
+            self._pending_photo_batches[batch_key] = event
+        else:
+            existing.media_urls.extend(event.media_urls)
+            existing.media_types.extend(event.media_types)
+            if event.text:
+                if not existing.text:
+                    existing.text = event.text
+                elif event.text not in existing.text:
+                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+
+        prior_task = self._pending_photo_batch_tasks.get(batch_key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+
+        self._pending_photo_batch_tasks[batch_key] = asyncio.create_task(self._flush_photo_batch(batch_key))
+
    async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming media messages, downloading images to local cache."""
        if not update.message:
@@ -858,14 +958,22 @@ class TelegramAdapter(BasePlatformAdapter):
                        if file_obj.file_path.lower().endswith(candidate):
                            ext = candidate
                            break
-                # Save to cache and populate media_urls with the local path
+                # Save to local cache (for vision tool access)
                cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
                event.media_urls = [cached_path]
-                event.media_types = [f"image/{ext.lstrip('.')}"]
+                event.media_types = [f"image/{ext.lstrip('.')}" ]
                logger.info("[Telegram] Cached user photo at %s", cached_path)
+                media_group_id = getattr(msg, "media_group_id", None)
+                if media_group_id:
+                    await self._queue_media_group_event(str(media_group_id), event)
+                else:
+                    batch_key = self._photo_batch_key(event, msg)
+                    self._enqueue_photo_event(batch_key, event)
+                return
+
            except Exception as e:
                logger.warning("[Telegram] Failed to cache photo: %s", e, exc_info=True)
-        
+
        # Download voice/audio messages to cache for STT transcription
        if msg.voice:
            try:
@@ -29,22 +29,61 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

+# ---------------------------------------------------------------------------
+# SSL certificate auto-detection for NixOS and other non-standard systems.
+# Must run BEFORE any HTTP library (discord, aiohttp, etc.) is imported.
+# ---------------------------------------------------------------------------
+def _ensure_ssl_certs() -> None:
+    """Set SSL_CERT_FILE if the system doesn't expose CA certs to Python."""
+    if "SSL_CERT_FILE" in os.environ:
+        return  # user already configured it
+
+    import ssl
+
+    # 1. Python's compiled-in defaults
+    paths = ssl.get_default_verify_paths()
+    for candidate in (paths.cafile, paths.openssl_cafile):
+        if candidate and os.path.exists(candidate):
+            os.environ["SSL_CERT_FILE"] = candidate
+            return
+
+    # 2. certifi (ships its own Mozilla bundle)
+    try:
+        import certifi
+        os.environ["SSL_CERT_FILE"] = certifi.where()
+        return
+    except ImportError:
+        pass
+
+    # 3. Common distro / macOS locations
+    for candidate in (
+        "/etc/ssl/certs/ca-certificates.crt",               # Debian/Ubuntu/Gentoo
+        "/etc/pki/tls/certs/ca-bundle.crt",                 # RHEL/CentOS 7
+        "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem", # RHEL/CentOS 8+
+        "/etc/ssl/ca-bundle.pem",                            # SUSE/OpenSUSE
+        "/etc/ssl/cert.pem",                                 # Alpine / macOS
+        "/etc/pki/tls/cert.pem",                             # Fedora
+        "/usr/local/etc/openssl@1.1/cert.pem",               # macOS Homebrew Intel
+        "/opt/homebrew/etc/openssl@1.1/cert.pem",            # macOS Homebrew ARM
+    ):
+        if os.path.exists(candidate):
+            os.environ["SSL_CERT_FILE"] = candidate
+            return
+
+_ensure_ssl_certs()
+
 # Add parent directory to path
 sys.path.insert(0, str(Path(__file__).parent.parent))

 # Resolve Hermes home directory (respects HERMES_HOME override)
 _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))

-# Load environment variables from ~/.hermes/.env first
-from dotenv import load_dotenv
+# Load environment variables from ~/.hermes/.env first.
+# User-managed env files should override stale shell exports on restart.
+from dotenv import load_dotenv  # backward-compat for tests that monkeypatch this symbol
+from hermes_cli.env_loader import load_hermes_dotenv
 _env_path = _hermes_home / '.env'
-if _env_path.exists():
-    try:
-        load_dotenv(_env_path, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(_env_path, encoding="latin-1")
-# Also try project .env as fallback
-load_dotenv()
+load_hermes_dotenv(hermes_home=_hermes_home, project_env=Path(__file__).resolve().parents[1] / '.env')

 # Bridge config.yaml values into the environment so os.getenv() picks them up.
 # config.yaml is authoritative for terminal settings — overrides .env.
@@ -81,6 +120,7 @@ if _config_path.exists():
                "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
                "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
                "sandbox_dir": "TERMINAL_SANDBOX_DIR",
+                "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
            }
            for _cfg_key, _env_var in _terminal_env_map.items():
                if _cfg_key in _terminal_cfg:
@@ -117,6 +157,12 @@ if _config_path.exists():
                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
                },
+                "approval": {
+                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
+                    "model": "AUXILIARY_APPROVAL_MODEL",
+                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
+                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
+                },
            }
            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
@@ -278,6 +324,7 @@ class GatewayRunner:
        self._show_reasoning = self._load_show_reasoning()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()
+        self._smart_model_routing = self._load_smart_model_routing()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
@@ -309,7 +356,7 @@ class GatewayRunner:
        # Ensure tirith security scanner is available (downloads if needed)
        try:
            from tools.tirith_security import ensure_installed
-            ensure_installed()
+            ensure_installed(log_failures=False)
        except Exception:
            pass  # Non-fatal — fail-open at scan time if unavailable
        
@@ -438,7 +485,11 @@ class GatewayRunner:

    # -----------------------------------------------------------------

-    def _flush_memories_for_session(self, old_session_id: str):
+    def _flush_memories_for_session(
+        self,
+        old_session_id: str,
+        honcho_session_key: Optional[str] = None,
+    ):
        """Prompt the agent to save memories/skills before context is lost.

        Synchronous worker — meant to be called via run_in_executor from
@@ -466,6 +517,7 @@ class GatewayRunner:
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
                session_id=old_session_id,
+                honcho_session_key=honcho_session_key,
            )

            # Build conversation history from transcript
@@ -493,6 +545,7 @@ class GatewayRunner:
            tmp_agent.run_conversation(
                user_message=flush_prompt,
                conversation_history=msgs,
+                sync_honcho=False,
            )
            logger.info("Pre-reset memory flush completed for session %s", old_session_id)
            # Flush any queued Honcho writes before the session is dropped
@@ -504,10 +557,19 @@ class GatewayRunner:
        except Exception as e:
            logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)

-    async def _async_flush_memories(self, old_session_id: str):
+    async def _async_flush_memories(
+        self,
+        old_session_id: str,
+        honcho_session_key: Optional[str] = None,
+    ):
        """Run the sync memory flush in a thread pool so it won't block the event loop."""
        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(None, self._flush_memories_for_session, old_session_id)
+        await loop.run_in_executor(
+            None,
+            self._flush_memories_for_session,
+            old_session_id,
+            honcho_session_key,
+        )

    @property
    def should_exit_cleanly(self) -> bool:
@@ -517,6 +579,33 @@ class GatewayRunner:
    def exit_reason(self) -> Optional[str]:
        return self._exit_reason

+    def _session_key_for_source(self, source: SessionSource) -> str:
+        """Resolve the current session key for a source, honoring gateway config when available."""
+        if hasattr(self, "session_store") and self.session_store is not None:
+            try:
+                session_key = self.session_store._generate_session_key(source)
+                if isinstance(session_key, str) and session_key:
+                    return session_key
+            except Exception:
+                pass
+        config = getattr(self, "config", None)
+        return build_session_key(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+        )
+
+    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
+        from agent.smart_model_routing import resolve_turn_route
+
+        primary = {
+            "model": model,
+            "api_key": runtime_kwargs.get("api_key"),
+            "base_url": runtime_kwargs.get("base_url"),
+            "provider": runtime_kwargs.get("provider"),
+            "api_mode": runtime_kwargs.get("api_mode"),
+        }
+        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
+
    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
        """React to a non-retryable adapter failure after startup."""
        logger.error(
@@ -719,6 +808,20 @@ class GatewayRunner:
            pass
        return None

+    @staticmethod
+    def _load_smart_model_routing() -> dict:
+        """Load optional smart cheap-vs-strong model routing config."""
+        try:
+            import yaml as _y
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                with open(cfg_path, encoding="utf-8") as _f:
+                    cfg = _y.safe_load(_f) or {}
+                return cfg.get("smart_model_routing", {}) or {}
+        except Exception:
+            pass
+        return {}
+
    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
@@ -761,12 +864,15 @@ class GatewayRunner:
            logger.warning("Process checkpoint recovery: %s", e)
        
        connected_count = 0
+        enabled_platform_count = 0
        startup_nonretryable_errors: list[str] = []
+        startup_retryable_errors: list[str] = []
        
        # Initialize and connect each configured platform
        for platform, platform_config in self.config.platforms.items():
            if not platform_config.enabled:
                continue
+            enabled_platform_count += 1
            
            adapter = self._create_adapter(platform, platform_config)
            if not adapter:
@@ -788,12 +894,22 @@ class GatewayRunner:
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
-                    if adapter.has_fatal_error and not adapter.fatal_error_retryable:
-                        startup_nonretryable_errors.append(
+                    if adapter.has_fatal_error:
+                        target = (
+                            startup_retryable_errors
+                            if adapter.fatal_error_retryable
+                            else startup_nonretryable_errors
+                        )
+                        target.append(
                            f"{platform.value}: {adapter.fatal_error_message}"
                        )
+                    else:
+                        startup_retryable_errors.append(
+                            f"{platform.value}: failed to connect"
+                        )
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
+                startup_retryable_errors.append(f"{platform.value}: {e}")
        
        if connected_count == 0:
            if startup_nonretryable_errors:
@@ -806,7 +922,16 @@ class GatewayRunner:
                    pass
                self._request_clean_exit(reason)
                return True
-            logger.warning("No messaging platforms connected.")
+            if enabled_platform_count > 0:
+                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
+                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
+                try:
+                    from gateway.status import write_runtime_status
+                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
+                except Exception:
+                    pass
+                return False
+            logger.warning("No messaging platforms enabled.")
            logger.info("Gateway will continue running for cron job execution.")
        
        # Update delivery router with adapters
@@ -883,7 +1008,7 @@ class GatewayRunner:
                        entry.session_id, key,
                    )
                    try:
-                        await self._async_flush_memories(entry.session_id)
+                        await self._async_flush_memories(entry.session_id, key)
                        self._shutdown_gateway_honcho(key)
                        self.session_store._pre_flushed_sessions.add(entry.session_id)
                    except Exception as e:
@@ -900,8 +1025,19 @@ class GatewayRunner:
        """Stop the gateway and disconnect all adapters."""
        logger.info("Stopping gateway...")
        self._running = False
-        
+
+        for session_key, agent in list(self._running_agents.items()):
+            try:
+                agent.interrupt("Gateway shutting down")
+                logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
+            except Exception as e:
+                logger.debug("Failed interrupting agent during shutdown: %s", e)
+
        for platform, adapter in list(self.adapters.items()):
+            try:
+                await adapter.cancel_background_tasks()
+            except Exception as e:
+                logger.debug("✗ %s background-task cancel error: %s", platform.value, e)
            try:
                await adapter.disconnect()
                logger.info("✓ %s disconnected", platform.value)
@@ -909,6 +1045,9 @@ class GatewayRunner:
                logger.error("✗ %s disconnect error: %s", platform.value, e)

        self.adapters.clear()
+        self._running_agents.clear()
+        self._pending_messages.clear()
+        self._pending_approvals.clear()
        self._shutdown_all_gateway_honcho()
        self._shutdown_event.set()
        
@@ -931,6 +1070,12 @@ class GatewayRunner:
        config: Any
    ) -> Optional[BasePlatformAdapter]:
        """Create the appropriate adapter for a platform."""
+        if hasattr(config, "extra") and isinstance(config.extra, dict):
+            config.extra.setdefault(
+                "group_sessions_per_user",
+                self.config.group_sessions_per_user,
+            )
+
        if platform == Platform.TELEGRAM:
            from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
            if not check_telegram_requirements():
@@ -1095,11 +1240,39 @@ class GatewayRunner:
                        )
            return None
        
-        # PRIORITY: If an agent is already running for this session, interrupt it
-        # immediately. This is before command parsing to minimize latency -- the
-        # user's "stop" message reaches the agent as fast as possible.
-        _quick_key = build_session_key(source)
+        # PRIORITY handling when an agent is already running for this session.
+        # Default behavior is to interrupt immediately so user text/stop messages
+        # are handled with minimal latency.
+        #
+        # Special case: Telegram/photo bursts often arrive as multiple near-
+        # simultaneous updates. Do NOT interrupt for photo-only follow-ups here;
+        # let the adapter-level batching/queueing logic absorb them.
+        _quick_key = self._session_key_for_source(source)
        if _quick_key in self._running_agents:
+            if event.get_command() == "status":
+                return await self._handle_status_command(event)
+
+            if event.message_type == MessageType.PHOTO:
+                logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
+                adapter = self.adapters.get(source.platform)
+                if adapter:
+                    # Reuse adapter queue semantics so photo bursts merge cleanly.
+                    if _quick_key in adapter._pending_messages:
+                        existing = adapter._pending_messages[_quick_key]
+                        if getattr(existing, "message_type", None) == MessageType.PHOTO:
+                            existing.media_urls.extend(event.media_urls)
+                            existing.media_types.extend(event.media_types)
+                            if event.text:
+                                if not existing.text:
+                                    existing.text = event.text
+                                elif event.text not in existing.text:
+                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                        else:
+                            adapter._pending_messages[_quick_key] = event
+                    else:
+                        adapter._pending_messages[_quick_key] = event
+                return None
+
            running_agent = self._running_agents[_quick_key]
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
@@ -1263,7 +1436,7 @@ class GatewayRunner:
                logger.debug("Skill command check failed (non-fatal): %s", e)
        
        # Check for pending exec approval responses
-        session_key_preview = build_session_key(source)
+        session_key_preview = self._session_key_for_source(source)
        if session_key_preview in self._pending_approvals:
            user_text = event.text.strip().lower()
            if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
@@ -1312,8 +1485,17 @@ class GatewayRunner:
        # Set environment variables for tools
        self._set_session_env(context)
        
+        # Read privacy.redact_pii from config (re-read per message)
+        _redact_pii = False
+        try:
+            with open(_config_path, encoding="utf-8") as _pf:
+                _pcfg = yaml.safe_load(_pf) or {}
+            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
+        except Exception:
+            pass
+
        # Build the context prompt to inject
-        context_prompt = build_session_context_prompt(context)
+        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
        
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
@@ -1682,9 +1864,17 @@ class GatewayRunner:
                session_key=session_key
            )
            
-            response = agent_result.get("final_response", "")
+            response = agent_result.get("final_response") or ""
            agent_messages = agent_result.get("messages", [])

+            # Surface error details when the agent failed silently (final_response=None)
+            if not response and agent_result.get("failed"):
+                error_detail = agent_result.get("error", "unknown error")
+                response = (
+                    f"The request failed: {str(error_detail)[:300]}\n"
+                    "Try again or use /reset to start a fresh session."
+                )
+
            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
@@ -1787,6 +1977,8 @@ class GatewayRunner:
            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
+                input_tokens=agent_result.get("input_tokens", 0),
+                output_tokens=agent_result.get("output_tokens", 0),
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
                model=agent_result.get("model"),
            )
@@ -1795,13 +1987,29 @@ class GatewayRunner:
            if self._should_send_voice_reply(event, response, agent_messages):
                await self._send_voice_reply(event, response)

+            # If streaming already delivered the response, return None so
+            # _process_message_background doesn't send it again.
+            if agent_result.get("already_sent"):
+                return None
+
            return response
            
        except Exception as e:
            logger.exception("Agent error in session %s", session_key)
+            error_type = type(e).__name__
+            error_detail = str(e)[:300] if str(e) else "no details available"
+            status_hint = ""
+            status_code = getattr(e, "status_code", None)
+            if status_code == 401:
+                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
+            elif status_code == 429:
+                status_hint = " You are being rate-limited. Please wait a moment and try again."
+            elif status_code == 529:
+                status_hint = " The API is temporarily overloaded. Please try again shortly."
            return (
-                "Sorry, I encountered an unexpected error. "
-                "The details have been logged for debugging. "
+                f"Sorry, I encountered an error ({error_type}).\n"
+                f"{error_detail}\n"
+                f"{status_hint}"
                "Try again or use /reset to start a fresh session."
            )
        finally:
@@ -1813,14 +2021,16 @@ class GatewayRunner:
        source = event.source
        
        # Get existing session key
-        session_key = self.session_store._generate_session_key(source)
+        session_key = self._session_key_for_source(source)
        
        # Flush memories in the background (fire-and-forget) so the user
        # gets the "Session reset!" response immediately.
        try:
            old_entry = self.session_store._entries.get(session_key)
            if old_entry:
-                asyncio.create_task(self._async_flush_memories(old_entry.session_id))
+                asyncio.create_task(
+                    self._async_flush_memories(old_entry.session_id, session_key)
+                )
        except Exception as e:
            logger.debug("Gateway memory flush on reset failed: %s", e)

@@ -1984,6 +2194,12 @@ class GatewayRunner:

        # Parse provider:model syntax
        target_provider, new_model = parse_model_input(args, current_provider)
+        # Auto-detect provider when no explicit provider:model syntax was used
+        if target_provider == current_provider:
+            from hermes_cli.models import detect_provider_for_model
+            detected = detect_provider_for_model(new_model, current_provider)
+            if detected:
+                target_provider, new_model = detected
        provider_changed = target_provider != current_provider

        # Resolve credentials for the target provider (for API probe)
@@ -2396,6 +2612,13 @@ class GatewayRunner:
        except Exception as e:
            logger.warning("Failed to join voice channel: %s", e)
            adapter._voice_input_callback = None
+            err_lower = str(e).lower()
+            if "pynacl" in err_lower or "nacl" in err_lower or "davey" in err_lower:
+                return (
+                    "Voice dependencies are missing (PyNaCl / davey). "
+                    "Install or reinstall Hermes with the messaging extra, e.g. "
+                    "`pip install hermes-agent[messaging]`."
+                )
            return f"Failed to join voice channel: {e}"

        if success:
@@ -2536,18 +2759,9 @@ class GatewayRunner:
        if has_agent_tts:
            return False

-        # Dedup: base adapter auto-TTS already handles voice input.
-        # Exception: Discord voice channel — play_tts override is a no-op,
-        # so the runner must handle VC playback.
-        skip_double = is_voice_input
-        if skip_double:
-            adapter = self.adapters.get(event.source.platform)
-            guild_id = self._get_guild_id(event)
-            if (guild_id and adapter
-                    and hasattr(adapter, "is_in_voice_channel")
-                    and adapter.is_in_voice_channel(guild_id)):
-                skip_double = False
-        if skip_double:
+        # Dedup: base adapter auto-TTS already handles voice input
+        # (play_tts plays in VC when connected, so runner can skip).
+        if is_voice_input:
            return False

        return True
@@ -2768,11 +2982,12 @@ class GatewayRunner:
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
+            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)

            def run_sync():
                agent = AIAgent(
-                    model=model,
-                    **runtime_kwargs,
+                    model=turn_route["model"],
+                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
@@ -3045,7 +3260,7 @@ class GatewayRunner:
            return "Session database not available."

        source = event.source
-        session_key = build_session_key(source)
+        session_key = self._session_key_for_source(source)
        name = event.get_command_args().strip()

        if not name:
@@ -3089,7 +3304,9 @@ class GatewayRunner:

        # Flush memories for current session before switching
        try:
-            asyncio.create_task(self._async_flush_memories(current_entry.session_id))
+            asyncio.create_task(
+                self._async_flush_memories(current_entry.session_id, session_key)
+            )
        except Exception as e:
            logger.debug("Memory flush on resume failed: %s", e)

@@ -3117,7 +3334,7 @@ class GatewayRunner:
    async def _handle_usage_command(self, event: MessageEvent) -> str:
        """Handle /usage command -- show token usage for the session's last agent run."""
        source = event.source
-        session_key = build_session_key(source)
+        session_key = self._session_key_for_source(source)

        agent = self._running_agents.get(session_key)
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
@@ -3469,10 +3686,12 @@ class GatewayRunner:
        os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
        if context.source.chat_name:
            os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
+        if context.source.thread_id:
+            os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
    
    def _clear_session_env(self) -> None:
        """Clear session environment variables."""
-        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
+        for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME", "HERMES_SESSION_THREAD_ID"]:
            if var in os.environ:
                del os.environ[var]
    
@@ -3584,7 +3803,10 @@ class GatewayRunner:
                    )
                else:
                    error = result.get("error", "unknown error")
-                    if "No STT provider" in error or "not set" in error:
+                    if (
+                        "No STT provider" in error
+                        or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set")
+                    ):
                        enriched_parts.append(
                            "[The user sent a voice message but I can't listen "
                            "to it right now~ No STT provider is configured "
@@ -3629,6 +3851,7 @@ class GatewayRunner:
        session_key = watcher.get("session_key", "")
        platform_name = watcher.get("platform", "")
        chat_id = watcher.get("chat_id", "")
+        thread_id = watcher.get("thread_id", "")
        notify_mode = self._load_background_notifications_mode()

        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
@@ -3676,7 +3899,8 @@ class GatewayRunner:
                            break
                    if adapter and chat_id:
                        try:
-                            await adapter.send(chat_id, message_text)
+                            send_meta = {"thread_id": thread_id} if thread_id else None
+                            await adapter.send(chat_id, message_text, metadata=send_meta)
                        except Exception as e:
                            logger.error("Watcher delivery error: %s", e)
                break
@@ -3695,7 +3919,8 @@ class GatewayRunner:
                        break
                if adapter and chat_id:
                    try:
-                        await adapter.send(chat_id, message_text)
+                        send_meta = {"thread_id": thread_id} if thread_id else None
+                        await adapter.send(chat_id, message_text, metadata=send_meta)
                    except Exception as e:
                        logger.error("Watcher delivery error: %s", e)

@@ -3806,45 +4031,8 @@ class GatewayRunner:
            last_tool[0] = tool_name
            
            # Build progress message with primary argument preview
-            tool_emojis = {
-                "terminal": "💻",
-                "process": "⚙️",
-                "web_search": "🔍",
-                "web_extract": "📄",
-                "read_file": "📖",
-                "write_file": "✍️",
-                "patch": "🔧",
-                "search": "🔎",
-                "search_files": "🔎",
-                "list_directory": "📂",
-                "image_generate": "🎨",
-                "text_to_speech": "🔊",
-                "browser_navigate": "🌐",
-                "browser_click": "👆",
-                "browser_type": "⌨️",
-                "browser_snapshot": "📸",
-                "browser_scroll": "📜",
-                "browser_back": "◀️",
-                "browser_press": "⌨️",
-                "browser_close": "🚪",
-                "browser_get_images": "🖼️",
-                "browser_vision": "👁️",
-                "moa_query": "🧠",
-                "mixture_of_agents": "🧠",
-                "vision_analyze": "👁️",
-                "skill_view": "📚",
-                "skills_list": "📋",
-                "todo": "📋",
-                "memory": "🧠",
-                "session_search": "🔍",
-                "send_message": "📨",
-                "cronjob": "⏰",
-                "execute_code": "🐍",
-                "delegate_task": "🔀",
-                "clarify": "❓",
-                "skill_manage": "📝",
-            }
-            emoji = tool_emojis.get(tool_name, "⚙️")
+            from agent.display import get_tool_emoji
+            emoji = get_tool_emoji(tool_name, default="⚙️")
            
            # Verbose mode: show detailed arguments
            if progress_mode == "verbose" and args:
@@ -3971,6 +4159,7 @@ class GatewayRunner:
        agent_holder = [None]  # Mutable container for the agent instance
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
+        stream_consumer_holder = [None]  # Mutable container for stream consumer
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
        _loop_for_step = asyncio.get_event_loop()
@@ -4033,9 +4222,39 @@ class GatewayRunner:
            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
+            # Set up streaming consumer if enabled
+            _stream_consumer = None
+            _stream_delta_cb = None
+            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
+            if _scfg is None:
+                from gateway.config import StreamingConfig
+                _scfg = StreamingConfig()
+
+            if _scfg.enabled and _scfg.transport != "off":
+                try:
+                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
+                    _adapter = self.adapters.get(source.platform)
+                    if _adapter:
+                        _consumer_cfg = StreamConsumerConfig(
+                            edit_interval=_scfg.edit_interval,
+                            buffer_threshold=_scfg.buffer_threshold,
+                            cursor=_scfg.cursor,
+                        )
+                        _stream_consumer = GatewayStreamConsumer(
+                            adapter=_adapter,
+                            chat_id=source.chat_id,
+                            config=_consumer_cfg,
+                            metadata={"thread_id": source.thread_id} if source.thread_id else None,
+                        )
+                        _stream_delta_cb = _stream_consumer.on_delta
+                        stream_consumer_holder[0] = _stream_consumer
+                except Exception as _sc_err:
+                    logger.debug("Could not set up stream consumer: %s", _sc_err)
+
+            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
            agent = AIAgent(
-                model=model,
-                **runtime_kwargs,
+                model=turn_route["model"],
+                **turn_route["runtime"],
                max_iterations=max_iterations,
                quiet_mode=True,
                verbose_logging=False,
@@ -4052,6 +4271,7 @@ class GatewayRunner:
                session_id=session_id,
                tool_progress_callback=progress_callback if tool_progress_enabled else None,
                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
+                stream_delta_callback=_stream_delta_cb,
                platform=platform_key,
                honcho_session_key=session_key,
                honcho_manager=honcho_manager,
@@ -4122,15 +4342,23 @@ class GatewayRunner:
            
            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            result_holder[0] = result
+
+            # Signal the stream consumer that the agent is done
+            if _stream_consumer is not None:
+                _stream_consumer.finish()
            
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")

-            # Extract last actual prompt token count from the agent's compressor
+            # Extract actual token counts from the agent instance used for this run
            _last_prompt_toks = 0
+            _input_toks = 0
+            _output_toks = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
+                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
+                _output_toks = getattr(_agent, "session_completion_tokens", 0)
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
@@ -4142,6 +4370,8 @@ class GatewayRunner:
                    "tools": tools_holder[0] or [],
                    "history_offset": len(agent_history),
                    "last_prompt_tokens": _last_prompt_toks,
+                    "input_tokens": _input_toks,
+                    "output_tokens": _output_toks,
                    "model": _resolved_model,
                }
            
@@ -4205,6 +4435,8 @@ class GatewayRunner:
                "tools": tools_holder[0] or [],
                "history_offset": len(agent_history),
                "last_prompt_tokens": _last_prompt_toks,
+                "input_tokens": _input_toks,
+                "output_tokens": _output_toks,
                "model": _resolved_model,
                "session_id": effective_session_id,
            }
@@ -4213,6 +4445,20 @@ class GatewayRunner:
        progress_task = None
        if tool_progress_enabled:
            progress_task = asyncio.create_task(send_progress_messages())
+
+        # Start stream consumer task — polls for consumer creation since it
+        # happens inside run_sync (thread pool) after the agent is constructed.
+        stream_task = None
+
+        async def _start_stream_consumer():
+            """Wait for the stream consumer to be created, then run it."""
+            for _ in range(200):  # Up to 10s wait
+                if stream_consumer_holder[0] is not None:
+                    await stream_consumer_holder[0].run()
+                    return
+                await asyncio.sleep(0.05)
+
+        stream_task = asyncio.create_task(_start_stream_consumer())
        
        # Track this agent as running for this session (for interrupt support)
        # We do this in a callback after the agent is created
@@ -4295,6 +4541,17 @@ class GatewayRunner:
            if progress_task:
                progress_task.cancel()
            interrupt_monitor.cancel()
+
+            # Wait for stream consumer to finish its final edit
+            if stream_task:
+                try:
+                    await asyncio.wait_for(stream_task, timeout=5.0)
+                except (asyncio.TimeoutError, asyncio.CancelledError):
+                    stream_task.cancel()
+                    try:
+                        await stream_task
+                    except asyncio.CancelledError:
+                        pass
            
            # Clean up tracking
            tracking_task.cancel()
@@ -4308,6 +4565,12 @@ class GatewayRunner:
                        await task
                    except asyncio.CancelledError:
                        pass
+
+        # If streaming already delivered the response, mark it so the
+        # caller's send() is skipped (avoiding duplicate messages).
+        _sc = stream_consumer_holder[0]
+        if _sc and _sc.already_sent and isinstance(response, dict):
+            response["already_sent"] = True
        
        return response

@@ -8,9 +8,11 @@ Handles:
 - Dynamic system prompt injection (agent knows its context)
 """

+import hashlib
 import logging
 import os
 import json
+import re
 import uuid
 from pathlib import Path
 from datetime import datetime, timedelta
@@ -19,6 +21,41 @@ from typing import Dict, List, Optional, Any

 logger = logging.getLogger(__name__)

+
+# ---------------------------------------------------------------------------
+# PII redaction helpers
+# ---------------------------------------------------------------------------
+
+_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$")
+
+
+def _hash_id(value: str) -> str:
+    """Deterministic 12-char hex hash of an identifier."""
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
+
+
+def _hash_sender_id(value: str) -> str:
+    """Hash a sender ID to ``user_<12hex>``."""
+    return f"user_{_hash_id(value)}"
+
+
+def _hash_chat_id(value: str) -> str:
+    """Hash the numeric portion of a chat ID, preserving platform prefix.
+
+    ``telegram:12345`` → ``telegram:<hash>``
+    ``12345``          → ``<hash>``
+    """
+    colon = value.find(":")
+    if colon > 0:
+        prefix = value[:colon]
+        return f"{prefix}:{_hash_id(value[colon + 1:])}"
+    return _hash_id(value)
+
+
+def _looks_like_phone(value: str) -> bool:
+    """Return True if *value* looks like a phone number (E.164 or similar)."""
+    return bool(_PHONE_RE.match(value.strip()))
+
 from .config import (
    Platform,
    GatewayConfig,
@@ -146,7 +183,21 @@ class SessionContext:
        }


-def build_session_context_prompt(context: SessionContext) -> str:
+_PII_SAFE_PLATFORMS = frozenset({
+    Platform.WHATSAPP,
+    Platform.SIGNAL,
+    Platform.TELEGRAM,
+})
+"""Platforms where user IDs can be safely redacted (no in-message mention system
+that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
+and the LLM needs the real ID to tag users."""
+
+
+def build_session_context_prompt(
+    context: SessionContext,
+    *,
+    redact_pii: bool = False,
+) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
    
@@ -154,7 +205,15 @@ def build_session_context_prompt(context: SessionContext) -> str:
    - Where messages are coming from
    - What platforms are connected
    - Where it can deliver scheduled task outputs
+
+    When *redact_pii* is True **and** the source platform is in
+    ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs
+    are replaced with deterministic hashes before being sent to the LLM.
+    Platforms like Discord are excluded because mentions need real IDs.
+    Routing still uses the original values (they stay in SessionSource).
    """
+    # Only apply redaction on platforms where IDs aren't needed for mentions
+    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
    lines = [
        "## Current Session Context",
        "",
@@ -165,7 +224,25 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.platform == Platform.LOCAL:
        lines.append(f"**Source:** {platform_name} (the machine running this agent)")
    else:
-        lines.append(f"**Source:** {platform_name} ({context.source.description})")
+        # Build a description that respects PII redaction
+        src = context.source
+        if redact_pii:
+            # Build a safe description without raw IDs
+            _uname = src.user_name or (
+                _hash_sender_id(src.user_id) if src.user_id else "user"
+            )
+            _cname = src.chat_name or _hash_chat_id(src.chat_id)
+            if src.chat_type == "dm":
+                desc = f"DM with {_uname}"
+            elif src.chat_type == "group":
+                desc = f"group: {_cname}"
+            elif src.chat_type == "channel":
+                desc = f"channel: {_cname}"
+            else:
+                desc = _cname
+        else:
+            desc = src.description
+        lines.append(f"**Source:** {platform_name} ({desc})")
    
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
@@ -175,7 +252,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.user_name:
        lines.append(f"**User:** {context.source.user_name}")
    elif context.source.user_id:
-        lines.append(f"**User ID:** {context.source.user_id}")
+        uid = context.source.user_id
+        if redact_pii:
+            uid = _hash_sender_id(uid)
+        lines.append(f"**User ID:** {uid}")
    
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
@@ -210,7 +290,8 @@ def build_session_context_prompt(context: SessionContext) -> str:
        lines.append("")
        lines.append("**Home Channels (default destinations):**")
        for platform, home in context.home_channels.items():
-            lines.append(f"  - {platform.value}: {home.name} (ID: {home.chat_id})")
+            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
+            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
    
    # Delivery options for scheduled tasks
    lines.append("")
@@ -220,7 +301,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.platform == Platform.LOCAL:
        lines.append("- `\"origin\"` → Local output (saved to files)")
    else:
-        lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})")
+        _origin_label = context.source.chat_name or (
+            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
+        )
+        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
    
    # Local always available
    lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
@@ -315,31 +399,47 @@ class SessionEntry:
        )


-def build_session_key(source: SessionSource) -> str:
+def build_session_key(source: SessionSource, group_sessions_per_user: bool = True) -> str:
    """Build a deterministic session key from a message source.

    This is the single source of truth for session key construction.

    DM rules:
-      - WhatsApp DMs include chat_id (multi-user support).
-      - Other DMs include thread_id when present (e.g. Slack threaded DMs),
-        so each DM thread gets its own session while top-level DMs share one.
-      - Without thread_id or chat_id, all DMs share a single session.
+      - DMs include chat_id when present, so each private conversation is isolated.
+      - thread_id further differentiates threaded DMs within the same DM chat.
+      - Without chat_id, thread_id is used as a best-effort fallback.
+      - Without thread_id or chat_id, DMs share a single session.

    Group/channel rules:
-      - thread_id differentiates threads within a channel.
-      - Without thread_id, all messages in a channel share one session.
+      - chat_id identifies the parent group/channel.
+      - user_id/user_id_alt isolates participants within that parent chat when available when
+        ``group_sessions_per_user`` is enabled.
+      - thread_id differentiates threads within that parent chat.
+      - Without participant identifiers, or when isolation is disabled, messages fall back to one
+        shared session per chat.
+      - Without identifiers, messages fall back to one session per platform/chat_type.
    """
    platform = source.platform.value
    if source.chat_type == "dm":
+        if source.chat_id:
+            if source.thread_id:
+                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{source.chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
-        if platform == "whatsapp" and source.chat_id:
-            return f"agent:main:{platform}:dm:{source.chat_id}"
        return f"agent:main:{platform}:dm"
+
+    participant_id = source.user_id_alt or source.user_id
+    key_parts = ["agent:main", platform, source.chat_type]
+
+    if source.chat_id:
+        key_parts.append(source.chat_id)
    if source.thread_id:
-        return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}:{source.thread_id}"
-    return f"agent:main:{platform}:{source.chat_type}:{source.chat_id}"
+        key_parts.append(source.thread_id)
+    if group_sessions_per_user and participant_id:
+        key_parts.append(str(participant_id))
+
+    return ":".join(key_parts)


 class SessionStore:
@@ -418,7 +518,10 @@ class SessionStore:
    
    def _generate_session_key(self, source: SessionSource) -> str:
        """Generate a session key from a source."""
-        return build_session_key(source)
+        return build_session_key(
+            source,
+            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
+        )
    
    def _is_session_expired(self, entry: SessionEntry) -> bool:
        """Check if a session has expired based on its reset policy.
@@ -83,8 +83,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
    """Return True when the live PID still looks like the Hermes gateway."""
    cmdline = _read_process_cmdline(pid)
    if not cmdline:
-        # If we cannot inspect the process, fall back to the liveness check.
-        return True
+        return False

    patterns = (
        "hermes_cli.main gateway",
@@ -94,6 +93,24 @@ def _looks_like_gateway_process(pid: int) -> bool:
    return any(pattern in cmdline for pattern in patterns)


+def _record_looks_like_gateway(record: dict[str, Any]) -> bool:
+    """Validate gateway identity from PID-file metadata when cmdline is unavailable."""
+    if record.get("kind") != _GATEWAY_KIND:
+        return False
+
+    argv = record.get("argv")
+    if not isinstance(argv, list) or not argv:
+        return False
+
+    cmdline = " ".join(str(part) for part in argv)
+    patterns = (
+        "hermes_cli.main gateway",
+        "hermes gateway",
+        "gateway/run.py",
+    )
+    return any(pattern in cmdline for pattern in patterns)
+
+
 def _build_pid_record() -> dict:
    return {
        "pid": os.getpid(),
@@ -325,8 +342,9 @@ def get_running_pid() -> Optional[int]:
        return None

    if not _looks_like_gateway_process(pid):
-        remove_pid_file()
-        return None
+        if not _record_looks_like_gateway(record):
+            remove_pid_file()
+            return None

    return pid

@@ -0,0 +1,177 @@
+"""Gateway streaming consumer — bridges sync agent callbacks to async platform delivery.
+
+The agent fires stream_delta_callback(text) synchronously from its worker thread.
+GatewayStreamConsumer:
+  1. Receives deltas via on_delta() (thread-safe, sync)
+  2. Queues them to an asyncio task via queue.Queue
+  3. The async run() task buffers, rate-limits, and progressively edits
+     a single message on the target platform
+
+Design: Uses the edit transport (send initial message, then editMessageText).
+This is universally supported across Telegram, Discord, and Slack.
+
+Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import queue
+import time
+from dataclasses import dataclass
+from typing import Any, Optional
+
+logger = logging.getLogger("gateway.stream_consumer")
+
+# Sentinel to signal the stream is complete
+_DONE = object()
+
+
+@dataclass
+class StreamConsumerConfig:
+    """Runtime config for a single stream consumer instance."""
+    edit_interval: float = 0.3
+    buffer_threshold: int = 40
+    cursor: str = " ▉"
+
+
+class GatewayStreamConsumer:
+    """Async consumer that progressively edits a platform message with streamed tokens.
+
+    Usage::
+
+        consumer = GatewayStreamConsumer(adapter, chat_id, config, metadata=metadata)
+        # Pass consumer.on_delta as stream_delta_callback to AIAgent
+        agent = AIAgent(..., stream_delta_callback=consumer.on_delta)
+        # Start the consumer as an asyncio task
+        task = asyncio.create_task(consumer.run())
+        # ... run agent in thread pool ...
+        consumer.finish()  # signal completion
+        await task         # wait for final edit
+    """
+
+    def __init__(
+        self,
+        adapter: Any,
+        chat_id: str,
+        config: Optional[StreamConsumerConfig] = None,
+        metadata: Optional[dict] = None,
+    ):
+        self.adapter = adapter
+        self.chat_id = chat_id
+        self.cfg = config or StreamConsumerConfig()
+        self.metadata = metadata
+        self._queue: queue.Queue = queue.Queue()
+        self._accumulated = ""
+        self._message_id: Optional[str] = None
+        self._already_sent = False
+        self._edit_supported = True  # Disabled on first edit failure (Signal/Email/HA)
+        self._last_edit_time = 0.0
+
+    @property
+    def already_sent(self) -> bool:
+        """True if at least one message was sent/edited — signals the base
+        adapter to skip re-sending the final response."""
+        return self._already_sent
+
+    def on_delta(self, text: str) -> None:
+        """Thread-safe callback — called from the agent's worker thread."""
+        if text:
+            self._queue.put(text)
+
+    def finish(self) -> None:
+        """Signal that the stream is complete."""
+        self._queue.put(_DONE)
+
+    async def run(self) -> None:
+        """Async task that drains the queue and edits the platform message."""
+        try:
+            while True:
+                # Drain all available items from the queue
+                got_done = False
+                while True:
+                    try:
+                        item = self._queue.get_nowait()
+                        if item is _DONE:
+                            got_done = True
+                            break
+                        self._accumulated += item
+                    except queue.Empty:
+                        break
+
+                # Decide whether to flush an edit
+                now = time.monotonic()
+                elapsed = now - self._last_edit_time
+                should_edit = (
+                    got_done
+                    or (elapsed >= self.cfg.edit_interval
+                        and len(self._accumulated) > 0)
+                    or len(self._accumulated) >= self.cfg.buffer_threshold
+                )
+
+                if should_edit and self._accumulated:
+                    display_text = self._accumulated
+                    if not got_done:
+                        display_text += self.cfg.cursor
+
+                    await self._send_or_edit(display_text)
+                    self._last_edit_time = time.monotonic()
+
+                if got_done:
+                    # Final edit without cursor
+                    if self._accumulated and self._message_id:
+                        await self._send_or_edit(self._accumulated)
+                    return
+
+                await asyncio.sleep(0.05)  # Small yield to not busy-loop
+
+        except asyncio.CancelledError:
+            # Best-effort final edit on cancellation
+            if self._accumulated and self._message_id:
+                try:
+                    await self._send_or_edit(self._accumulated)
+                except Exception:
+                    pass
+        except Exception as e:
+            logger.error("Stream consumer error: %s", e)
+
+    async def _send_or_edit(self, text: str) -> None:
+        """Send or edit the streaming message."""
+        try:
+            if self._message_id is not None:
+                if self._edit_supported:
+                    # Edit existing message
+                    result = await self.adapter.edit_message(
+                        chat_id=self.chat_id,
+                        message_id=self._message_id,
+                        content=text,
+                    )
+                    if result.success:
+                        self._already_sent = True
+                    else:
+                        # Edit not supported by this adapter — stop streaming,
+                        # let the normal send path handle the final response.
+                        # Without this guard, adapters like Signal/Email would
+                        # flood the chat with a new message every edit_interval.
+                        logger.debug("Edit failed, disabling streaming for this adapter")
+                        self._edit_supported = False
+                else:
+                    # Editing not supported — skip intermediate updates.
+                    # The final response will be sent by the normal path.
+                    pass
+            else:
+                # First message — send new
+                result = await self.adapter.send(
+                    chat_id=self.chat_id,
+                    content=text,
+                    metadata=self.metadata,
+                )
+                if result.success and result.message_id:
+                    self._message_id = result.message_id
+                    self._already_sent = True
+                else:
+                    # Initial send failed — disable streaming for this session
+                    self._edit_supported = False
+        except Exception as e:
+            logger.error("Stream send/edit error: %s", e)
@@ -147,6 +147,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_CN_API_KEY",),
        base_url_env_var="MINIMAX_CN_BASE_URL",
    ),
+    "deepseek": ProviderConfig(
+        id="deepseek",
+        name="DeepSeek",
+        auth_type="api_key",
+        inference_base_url="https://api.deepseek.com/v1",
+        api_key_env_vars=("DEEPSEEK_API_KEY",),
+        base_url_env_var="DEEPSEEK_BASE_URL",
+    ),
 }


@@ -7,7 +7,9 @@ interactive CLI.

 from __future__ import annotations

+import os
 from collections.abc import Callable, Mapping
+from pathlib import Path
 from typing import Any

 from prompt_toolkit.completion import Completer, Completion
@@ -26,6 +28,7 @@ COMMANDS_BY_CATEGORY = {
        "/title": "Set a title for the current session (usage: /title My Session Name)",
        "/compress": "Manually compress conversation context (flush memories + summarize)",
        "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
+        "/stop": "Kill all running background processes",
        "/background": "Run a prompt in the background (usage: /background <prompt>)",
    },
    "Configuration": {
@@ -45,6 +48,8 @@ COMMANDS_BY_CATEGORY = {
        "/skills": "Search, install, inspect, or manage skills from online registries",
        "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)",
        "/reload-mcp": "Reload MCP servers from config.yaml",
+        "/browser": "Connect browser tools to your live Chrome (usage: /browser connect|disconnect|status)",
+        "/plugins": "List installed plugins and their status",
    },
    "Info": {
        "/help": "Show this help message",
@@ -92,9 +97,88 @@ class SlashCommandCompleter(Completer):
        """
        return f"{cmd_name} " if cmd_name == word else cmd_name

+    @staticmethod
+    def _extract_path_word(text: str) -> str | None:
+        """Extract the current word if it looks like a file path.
+
+        Returns the path-like token under the cursor, or None if the
+        current word doesn't look like a path.  A word is path-like when
+        it starts with ``./``, ``../``, ``~/``, ``/``, or contains a
+        ``/`` separator (e.g. ``src/main.py``).
+        """
+        if not text:
+            return None
+        # Walk backwards to find the start of the current "word".
+        # Words are delimited by spaces, but paths can contain almost anything.
+        i = len(text) - 1
+        while i >= 0 and text[i] != " ":
+            i -= 1
+        word = text[i + 1:]
+        if not word:
+            return None
+        # Only trigger path completion for path-like tokens
+        if word.startswith(("./", "../", "~/", "/")) or "/" in word:
+            return word
+        return None
+
+    @staticmethod
+    def _path_completions(word: str, limit: int = 30):
+        """Yield Completion objects for file paths matching *word*."""
+        expanded = os.path.expanduser(word)
+        # Split into directory part and prefix to match inside it
+        if expanded.endswith("/"):
+            search_dir = expanded
+            prefix = ""
+        else:
+            search_dir = os.path.dirname(expanded) or "."
+            prefix = os.path.basename(expanded)
+
+        try:
+            entries = os.listdir(search_dir)
+        except OSError:
+            return
+
+        count = 0
+        prefix_lower = prefix.lower()
+        for entry in sorted(entries):
+            if prefix and not entry.lower().startswith(prefix_lower):
+                continue
+            if count >= limit:
+                break
+
+            full_path = os.path.join(search_dir, entry)
+            is_dir = os.path.isdir(full_path)
+
+            # Build the completion text (what replaces the typed word)
+            if word.startswith("~"):
+                display_path = "~/" + os.path.relpath(full_path, os.path.expanduser("~"))
+            elif os.path.isabs(word):
+                display_path = full_path
+            else:
+                # Keep relative
+                display_path = os.path.relpath(full_path)
+
+            if is_dir:
+                display_path += "/"
+
+            suffix = "/" if is_dir else ""
+            meta = "dir" if is_dir else _file_size_label(full_path)
+
+            yield Completion(
+                display_path,
+                start_position=-len(word),
+                display=entry + suffix,
+                display_meta=meta,
+            )
+            count += 1
+
    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
        if not text.startswith("/"):
+            # Try file path completion for non-slash input
+            path_word = self._extract_path_word(text)
+            if path_word is not None:
+                yield from self._path_completions(path_word)
            return

        word = text[1:]
@@ -120,3 +204,18 @@ class SlashCommandCompleter(Completer):
                    display=cmd,
                    display_meta=f"⚡ {short_desc}",
                )
+
+
+def _file_size_label(path: str) -> str:
+    """Return a compact human-readable file size, or '' on error."""
+    try:
+        size = os.path.getsize(path)
+    except OSError:
+        return ""
+    if size < 1024:
+        return f"{size}B"
+    if size < 1024 * 1024:
+        return f"{size / 1024:.0f}K"
+    if size < 1024 * 1024 * 1024:
+        return f"{size / (1024 * 1024):.1f}M"
+    return f"{size / (1024 * 1024 * 1024):.1f}G"
@@ -118,6 +118,14 @@ DEFAULT_CONFIG = {
        # Each entry is "host_path:container_path" (standard Docker -v syntax).
        # Example: ["/home/user/projects:/workspace/projects", "/data:/data"]
        "docker_volumes": [],
+        # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
+        # Default off because passing host directories into a sandbox weakens isolation.
+        "docker_mount_cwd_to_workspace": False,
+        # Persistent shell — keep a long-lived bash shell across execute() calls
+        # so cwd/env vars/shell variables survive between commands.
+        # Enabled by default for non-local backends (SSH); local is always opt-in
+        # via TERMINAL_LOCAL_PERSISTENT env var.
+        "persistent_shell": True,
    },
    
    "browser": {
@@ -129,7 +137,7 @@ DEFAULT_CONFIG = {
    # When enabled, the agent takes a snapshot of the working directory once per
    # conversation turn (on first write_file/patch call).  Use /rollback to restore.
    "checkpoints": {
-        "enabled": False,
+        "enabled": True,
        "max_snapshots": 50,  # Max checkpoints to keep per directory
    },
    
@@ -139,6 +147,12 @@ DEFAULT_CONFIG = {
        "summary_model": "google/gemini-3-flash-preview",
        "summary_provider": "auto",
    },
+    "smart_model_routing": {
+        "enabled": False,
+        "max_simple_chars": 160,
+        "max_simple_words": 28,
+        "cheap_model": {},
+    },
    
    # Auxiliary model config — provider:model for each side task.
    # Format: provider is the provider name, model is the model slug.
@@ -177,6 +191,12 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
        },
+        "approval": {
+            "provider": "auto",
+            "model": "",           # fast/cheap model recommended (e.g. gemini-flash, haiku)
+            "base_url": "",
+            "api_key": "",
+        },
        "mcp": {
            "provider": "auto",
            "model": "",
@@ -197,8 +217,15 @@ DEFAULT_CONFIG = {
        "resume_display": "full",
        "bell_on_complete": False,
        "show_reasoning": False,
+        "streaming": False,
+        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
    },
+
+    # Privacy settings
+    "privacy": {
+        "redact_pii": False,  # When True, hash user IDs and strip phone numbers from LLM context
+    },
    
    # Text-to-speech configuration
    "tts": {
@@ -280,6 +307,15 @@ DEFAULT_CONFIG = {
    "discord": {
        "require_mention": True,       # Require @mention to respond in server channels
        "free_response_channels": "",  # Comma-separated channel IDs where bot responds without mention
+        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
+    },
+
+    # Approval mode for dangerous commands:
+    #   manual — always prompt the user (default)
+    #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
+    #   off    — skip all approval prompts (equivalent to --yolo)
+    "approvals": {
+        "mode": "manual",
    },

    # Permanently allowed dangerous command patterns (added via "always" approval)
@@ -423,6 +459,20 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "DEEPSEEK_API_KEY": {
+        "description": "DeepSeek API key for direct DeepSeek access",
+        "prompt": "DeepSeek API Key",
+        "url": "https://platform.deepseek.com/api_keys",
+        "password": True,
+        "category": "provider",
+    },
+    "DEEPSEEK_BASE_URL": {
+        "description": "Custom DeepSeek API base URL (advanced)",
+        "prompt": "DeepSeek Base URL",
+        "url": "",
+        "password": False,
+        "category": "provider",
+    },

    # ── Tool API keys ──
    "FIRECRAWL_API_KEY": {
@@ -967,6 +1017,19 @@ _FALLBACK_COMMENT = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """


@@ -997,6 +1060,19 @@ _COMMENTED_SECTIONS = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """


@@ -1387,9 +1463,11 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
+        "terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL",
    }
    if key in _config_to_env_sync:
        save_env_value(_config_to_env_sync[key], str(value))
@@ -0,0 +1,46 @@
+"""Helpers for loading Hermes .env files consistently across entrypoints."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Iterable
+
+from dotenv import load_dotenv
+
+
+def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
+    try:
+        load_dotenv(dotenv_path=path, override=override, encoding="utf-8")
+    except UnicodeDecodeError:
+        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")
+
+
+def load_hermes_dotenv(
+    *,
+    hermes_home: str | os.PathLike | None = None,
+    project_env: str | os.PathLike | None = None,
+) -> list[Path]:
+    """Load Hermes environment files with user config taking precedence.
+
+    Behavior:
+    - `~/.hermes/.env` overrides stale shell-exported values when present.
+    - project `.env` acts as a dev fallback and only fills missing values when
+      the user env exists.
+    - if no user env exists, the project `.env` also overrides stale shell vars.
+    """
+    loaded: list[Path] = []
+
+    home_path = Path(hermes_home or os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+    user_env = home_path / ".env"
+    project_env_path = Path(project_env) if project_env else None
+
+    if user_env.exists():
+        _load_dotenv_with_fallback(user_env, override=True)
+        loaded.append(user_env)
+
+    if project_env_path and project_env_path.exists():
+        _load_dotenv_with_fallback(project_env_path, override=not loaded)
+        loaded.append(project_env_path)
+
+    return loaded
@@ -119,14 +119,35 @@ def is_windows() -> bool:
 # Service Configuration
 # =============================================================================

-SERVICE_NAME = "hermes-gateway"
+_SERVICE_BASE = "hermes-gateway"
 SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"


+def get_service_name() -> str:
+    """Derive a systemd service name scoped to this HERMES_HOME.
+
+    Default ``~/.hermes`` returns ``hermes-gateway`` (backward compatible).
+    Any other HERMES_HOME appends a short hash so multiple installations
+    can each have their own systemd service without conflicting.
+    """
+    import hashlib
+    from pathlib import Path as _Path  # local import to avoid monkeypatch interference
+    home = _Path(os.getenv("HERMES_HOME", _Path.home() / ".hermes")).resolve()
+    default = (_Path.home() / ".hermes").resolve()
+    if home == default:
+        return _SERVICE_BASE
+    suffix = hashlib.sha256(str(home).encode()).hexdigest()[:8]
+    return f"{_SERVICE_BASE}-{suffix}"
+
+
+SERVICE_NAME = _SERVICE_BASE  # backward-compat for external importers; prefer get_service_name()
+
+
 def get_systemd_unit_path(system: bool = False) -> Path:
+    name = get_service_name()
    if system:
-        return Path("/etc/systemd/system") / f"{SERVICE_NAME}.service"
-    return Path.home() / ".config" / "systemd" / "user" / f"{SERVICE_NAME}.service"
+        return Path("/etc/systemd/system") / f"{name}.service"
+    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


 def _systemctl_cmd(system: bool = False) -> list[str]:
@@ -350,8 +371,6 @@ def get_hermes_cli_path() -> str:
 # =============================================================================

 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
-    import shutil
-
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
    venv_dir = str(PROJECT_ROOT / "venv")
@@ -360,7 +379,8 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)

    # Build a PATH that includes the venv, node_modules, and standard system dirs
    sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
-    hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main"
+
+    hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve())

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -380,11 +400,12 @@ Environment="USER={username}"
 Environment="LOGNAME={username}"
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
+Environment="HERMES_HOME={hermes_home}"
 Restart=on-failure
 RestartSec=10
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=15
+TimeoutStopSec=60
 StandardOutput=journal
 StandardError=journal

@@ -399,15 +420,15 @@ After=network.target
 [Service]
 Type=simple
 ExecStart={python_path} -m hermes_cli.main gateway run --replace
-ExecStop={hermes_cli} gateway stop
 WorkingDirectory={working_dir}
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
+Environment="HERMES_HOME={hermes_home}"
 Restart=on-failure
 RestartSec=10
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=15
+TimeoutStopSec=60
 StandardOutput=journal
 StandardError=journal

@@ -455,7 +476,7 @@ def _print_linger_enable_warning(username: str, detail: str | None = None) -> No
    print(f"    sudo loginctl enable-linger {username}")
    print()
    print("  Then restart the gateway:")
-    print(f"    systemctl --user restart {SERVICE_NAME}.service")
+    print(f"    systemctl --user restart {get_service_name()}.service")
    print()


@@ -526,7 +547,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    unit_path.write_text(generate_systemd_unit(system=system, run_as_user=run_as_user), encoding="utf-8")

    subprocess.run(_systemctl_cmd(system) + ["daemon-reload"], check=True)
-    subprocess.run(_systemctl_cmd(system) + ["enable", SERVICE_NAME], check=True)
+    subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True)

    print()
    print(f"✓ {_service_scope_label(system).capitalize()} service installed and enabled!")
@@ -534,7 +555,7 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    print("Next steps:")
    print(f"  {'sudo ' if system else ''}hermes gateway start{scope_flag}              # Start the service")
    print(f"  {'sudo ' if system else ''}hermes gateway status{scope_flag}             # Check status")
-    print(f"  {'journalctl' if system else 'journalctl --user'} -u {SERVICE_NAME} -f  # View logs")
+    print(f"  {'journalctl' if system else 'journalctl --user'} -u {get_service_name()} -f  # View logs")
    print()

    if system:
@@ -552,8 +573,8 @@ def systemd_uninstall(system: bool = False):
    if system:
        _require_root_for_system_service("uninstall")

-    subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=False)
-    subprocess.run(_systemctl_cmd(system) + ["disable", SERVICE_NAME], check=False)
+    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=False)
+    subprocess.run(_systemctl_cmd(system) + ["disable", get_service_name()], check=False)

    unit_path = get_systemd_unit_path(system=system)
    if unit_path.exists():
@@ -569,7 +590,7 @@ def systemd_start(system: bool = False):
    if system:
        _require_root_for_system_service("start")
    refresh_systemd_unit_if_needed(system=system)
-    subprocess.run(_systemctl_cmd(system) + ["start", SERVICE_NAME], check=True)
+    subprocess.run(_systemctl_cmd(system) + ["start", get_service_name()], check=True)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")


@@ -578,7 +599,7 @@ def systemd_stop(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("stop")
-    subprocess.run(_systemctl_cmd(system) + ["stop", SERVICE_NAME], check=True)
+    subprocess.run(_systemctl_cmd(system) + ["stop", get_service_name()], check=True)
    print(f"✓ {_service_scope_label(system).capitalize()} service stopped")


@@ -588,7 +609,7 @@ def systemd_restart(system: bool = False):
    if system:
        _require_root_for_system_service("restart")
    refresh_systemd_unit_if_needed(system=system)
-    subprocess.run(_systemctl_cmd(system) + ["restart", SERVICE_NAME], check=True)
+    subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")


@@ -613,12 +634,12 @@ def systemd_status(deep: bool = False, system: bool = False):
        print()

    subprocess.run(
-        _systemctl_cmd(system) + ["status", SERVICE_NAME, "--no-pager"],
+        _systemctl_cmd(system) + ["status", get_service_name(), "--no-pager"],
        capture_output=False,
    )

    result = subprocess.run(
-        _systemctl_cmd(system) + ["is-active", SERVICE_NAME],
+        _systemctl_cmd(system) + ["is-active", get_service_name()],
        capture_output=True,
        text=True,
    )
@@ -657,7 +678,7 @@ def systemd_status(deep: bool = False, system: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", SERVICE_NAME, "-n", "20", "--no-pager"])
+        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"])


 # =============================================================================
@@ -684,6 +705,7 @@ def generate_launchd_plist() -> str:
        <string>hermes_cli.main</string>
        <string>gateway</string>
        <string>run</string>
+        <string>--replace</string>
    </array>
    
    <key>WorkingDirectory</key>
@@ -707,6 +729,36 @@ def generate_launchd_plist() -> str:
 </plist>
 """

+def launchd_plist_is_current() -> bool:
+    """Check if the installed launchd plist matches the currently generated one."""
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists():
+        return False
+
+    installed = plist_path.read_text(encoding="utf-8")
+    expected = generate_launchd_plist()
+    return _normalize_service_definition(installed) == _normalize_service_definition(expected)
+
+
+def refresh_launchd_plist_if_needed() -> bool:
+    """Rewrite the installed launchd plist when the generated definition has changed.
+
+    Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/
+    ``launchctl start`` cycle — no daemon-reload is needed.  We still unload/reload
+    to make launchd re-read the updated plist immediately.
+    """
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists() or launchd_plist_is_current():
+        return False
+
+    plist_path.write_text(generate_launchd_plist(), encoding="utf-8")
+    # Unload/reload so launchd picks up the new definition
+    subprocess.run(["launchctl", "unload", str(plist_path)], check=False)
+    subprocess.run(["launchctl", "load", str(plist_path)], check=False)
+    print("↻ Updated gateway launchd service definition to match the current Hermes install")
+    return True
+
+
 def launchd_install(force: bool = False):
    plist_path = get_launchd_plist_path()
    
@@ -739,6 +791,7 @@ def launchd_uninstall():
    print("✓ Service uninstalled")

 def launchd_start():
+    refresh_launchd_plist_if_needed()
    subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
    print("✓ Service started")

@@ -747,6 +800,7 @@ def launchd_stop():
    print("✓ Service stopped")

 def launchd_restart():
+    refresh_launchd_plist_if_needed()
    launchd_stop()
    launchd_start()

@@ -1118,7 +1172,7 @@ def _is_service_running() -> bool:

        if user_unit_exists:
            result = subprocess.run(
-                _systemctl_cmd(False) + ["is-active", SERVICE_NAME],
+                _systemctl_cmd(False) + ["is-active", get_service_name()],
                capture_output=True, text=True
            )
            if result.stdout.strip() == "active":
@@ -1126,7 +1180,7 @@ def _is_service_running() -> bool:

        if system_unit_exists:
            result = subprocess.run(
-                _systemctl_cmd(True) + ["is-active", SERVICE_NAME],
+                _systemctl_cmd(True) + ["is-active", get_service_name()],
                capture_output=True, text=True
            )
            if result.stdout.strip() == "active":
@@ -54,16 +54,11 @@ from typing import Optional
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
 sys.path.insert(0, str(PROJECT_ROOT))

-# Load .env from ~/.hermes/.env first, then project root as dev fallback
-from dotenv import load_dotenv
-from hermes_cli.config import get_env_path, get_hermes_home
-_user_env = get_env_path()
-if _user_env.exists():
-    try:
-        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
-load_dotenv(dotenv_path=PROJECT_ROOT / '.env', override=False)
+# Load .env from ~/.hermes/.env first, then project root as dev fallback.
+# User-managed env files should override stale shell exports on restart.
+from hermes_cli.config import get_hermes_home
+from hermes_cli.env_loader import load_hermes_dotenv
+load_hermes_dotenv(project_env=PROJECT_ROOT / '.env')

 # Point mini-swe-agent at ~/.hermes/ so it shares our config
 os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(get_hermes_home()))
@@ -1117,8 +1112,32 @@ def _model_flow_custom(config):

    effective_key = api_key or current_key

+    from hermes_cli.models import probe_api_models
+
+    probe = probe_api_models(effective_key, effective_url)
+    if probe.get("used_fallback") and probe.get("resolved_base_url"):
+        print(
+            f"Warning: endpoint verification worked at {probe['resolved_base_url']}/models, "
+            f"not the exact URL you entered. Saving the working base URL instead."
+        )
+        effective_url = probe["resolved_base_url"]
+        if base_url:
+            base_url = effective_url
+    elif probe.get("models") is not None:
+        print(
+            f"Verified endpoint via {probe.get('probed_url')} "
+            f"({len(probe.get('models') or [])} model(s) visible)"
+        )
+    else:
+        print(
+            f"Warning: could not verify this endpoint via {probe.get('probed_url')}. "
+            f"Hermes will still save it."
+        )
+        if probe.get("suggested_base_url"):
+            print(f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}")
+
    if base_url:
-        save_env_value("OPENAI_BASE_URL", base_url)
+        save_env_value("OPENAI_BASE_URL", effective_url)
    if api_key:
        save_env_value("OPENAI_API_KEY", api_key)

@@ -2032,6 +2051,16 @@ def _resolve_stash_selector(git_cmd: list[str], cwd: Path, stash_ref: str) -> Op



+def _print_stash_cleanup_guidance(stash_ref: str, stash_selector: Optional[str] = None) -> None:
+    print("  Check `git status` first so you don't accidentally reapply the same change twice.")
+    print("  Find the saved entry with: git stash list --format='%gd %H %s'")
+    if stash_selector:
+        print(f"  Remove it with: git stash drop {stash_selector}")
+    else:
+        print(f"  Look for commit {stash_ref}, then drop its selector with: git stash drop stash@{{N}}")
+
+
+
 def _restore_stashed_changes(
    git_cmd: list[str],
    cwd: Path,
@@ -2072,7 +2101,7 @@ def _restore_stashed_changes(
    if stash_selector is None:
        print("⚠ Local changes were restored, but Hermes couldn't find the stash entry to drop.")
        print("  The stash was left in place. You can remove it manually after checking the result.")
-        print(f"  Look for commit {stash_ref} in `git stash list --format='%gd %H'` and drop that selector.")
+        _print_stash_cleanup_guidance(stash_ref)
    else:
        drop = subprocess.run(
            git_cmd + ["stash", "drop", stash_selector],
@@ -2087,7 +2116,7 @@ def _restore_stashed_changes(
            if drop.stderr.strip():
                print(drop.stderr.strip())
            print("  The stash was left in place. You can remove it manually after checking the result.")
-            print(f"  If needed: git stash drop {stash_selector}")
+            _print_stash_cleanup_guidance(stash_ref, stash_selector)

    print("⚠ Local changes were restored on top of the updated codebase.")
    print("  Review `git diff` / `git status` if Hermes behaves unexpectedly.")
@@ -2272,26 +2301,106 @@ def cmd_update(args):
        print()
        print("✓ Update complete!")
        
-        # Auto-restart gateway if it's running as a systemd service
+        # Auto-restart gateway if it's running.
+        # Uses the PID file (scoped to HERMES_HOME) to find this
+        # installation's gateway — safe with multiple installations.
        try:
-            check = subprocess.run(
-                ["systemctl", "--user", "is-active", "hermes-gateway"],
-                capture_output=True, text=True, timeout=5,
+            from gateway.status import get_running_pid, remove_pid_file
+            from hermes_cli.gateway import (
+                get_service_name, get_launchd_plist_path, is_macos,
+                refresh_launchd_plist_if_needed,
            )
-            if check.stdout.strip() == "active":
-                print()
-                print("→ Gateway service is running — restarting to pick up changes...")
-                restart = subprocess.run(
-                    ["systemctl", "--user", "restart", "hermes-gateway"],
-                    capture_output=True, text=True, timeout=15,
+            import signal as _signal
+
+            _gw_service_name = get_service_name()
+            existing_pid = get_running_pid()
+            has_systemd_service = False
+            has_launchd_service = False
+
+            try:
+                check = subprocess.run(
+                    ["systemctl", "--user", "is-active", _gw_service_name],
+                    capture_output=True, text=True, timeout=5,
                )
-                if restart.returncode == 0:
-                    print("✓ Gateway restarted.")
-                else:
-                    print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
-                    print("  Try manually: hermes gateway restart")
-        except (FileNotFoundError, subprocess.TimeoutExpired):
-            pass  # No systemd (macOS, WSL1, etc.) — skip silently
+                has_systemd_service = check.stdout.strip() == "active"
+            except (FileNotFoundError, subprocess.TimeoutExpired):
+                pass
+
+            # Check for macOS launchd service
+            if is_macos():
+                try:
+                    plist_path = get_launchd_plist_path()
+                    if plist_path.exists():
+                        check = subprocess.run(
+                            ["launchctl", "list", "ai.hermes.gateway"],
+                            capture_output=True, text=True, timeout=5,
+                        )
+                        has_launchd_service = check.returncode == 0
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    pass
+
+            if existing_pid or has_systemd_service or has_launchd_service:
+                print()
+
+                # When a service manager is handling the gateway, let it
+                # manage the lifecycle — don't manually SIGTERM the PID
+                # (launchd KeepAlive would respawn immediately, causing races).
+                if has_systemd_service:
+                    import time as _time
+                    if existing_pid:
+                        try:
+                            os.kill(existing_pid, _signal.SIGTERM)
+                            print(f"→ Stopped gateway process (PID {existing_pid})")
+                        except ProcessLookupError:
+                            pass
+                        except PermissionError:
+                            print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                        remove_pid_file()
+                    _time.sleep(1)  # Brief pause for port/socket release
+                    print("→ Restarting gateway service...")
+                    restart = subprocess.run(
+                        ["systemctl", "--user", "restart", _gw_service_name],
+                        capture_output=True, text=True, timeout=15,
+                    )
+                    if restart.returncode == 0:
+                        print("✓ Gateway restarted.")
+                    else:
+                        print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
+                        print("  Try manually: hermes gateway restart")
+                elif has_launchd_service:
+                    # Refresh the plist first (picks up --replace and other
+                    # changes from the update we just pulled).
+                    refresh_launchd_plist_if_needed()
+                    # Explicit stop+start — don't rely on KeepAlive respawn
+                    # after a manual SIGTERM, which would race with the
+                    # PID file cleanup.
+                    print("→ Restarting gateway service...")
+                    stop = subprocess.run(
+                        ["launchctl", "stop", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    start = subprocess.run(
+                        ["launchctl", "start", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    if start.returncode == 0:
+                        print("✓ Gateway restarted via launchd.")
+                    else:
+                        print(f"⚠ Gateway restart failed: {start.stderr.strip()}")
+                        print("  Try manually: hermes gateway restart")
+                elif existing_pid:
+                    try:
+                        os.kill(existing_pid, _signal.SIGTERM)
+                        print(f"→ Stopped gateway process (PID {existing_pid})")
+                    except ProcessLookupError:
+                        pass  # Already gone
+                    except PermissionError:
+                        print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                    remove_pid_file()
+                    print("  ℹ️  Gateway was running manually (not as a service).")
+                    print("  Restart it with: hermes gateway run")
+        except Exception as e:
+            logger.debug("Gateway restart during update failed: %s", e)
        
        print()
        print("Tip: You can now select a provider and model:")
@@ -3093,7 +3202,11 @@ For more help on a command:

        elif action == "export":
            if args.session_id:
-                data = db.export_session(args.session_id)
+                resolved_session_id = db.resolve_session_id(args.session_id)
+                if not resolved_session_id:
+                    print(f"Session '{args.session_id}' not found.")
+                    return
+                data = db.export_session(resolved_session_id)
                if not data:
                    print(f"Session '{args.session_id}' not found.")
                    return
@@ -3108,13 +3221,17 @@ For more help on a command:
                print(f"Exported {len(sessions)} sessions to {args.output}")

        elif action == "delete":
+            resolved_session_id = db.resolve_session_id(args.session_id)
+            if not resolved_session_id:
+                print(f"Session '{args.session_id}' not found.")
+                return
            if not args.yes:
-                confirm = input(f"Delete session '{args.session_id}' and all its messages? [y/N] ")
+                confirm = input(f"Delete session '{resolved_session_id}' and all its messages? [y/N] ")
                if confirm.lower() not in ("y", "yes"):
                    print("Cancelled.")
                    return
-            if db.delete_session(args.session_id):
-                print(f"Deleted session '{args.session_id}'.")
+            if db.delete_session(resolved_session_id):
+                print(f"Deleted session '{resolved_session_id}'.")
            else:
                print(f"Session '{args.session_id}' not found.")

@@ -3130,10 +3247,14 @@ For more help on a command:
            print(f"Pruned {count} session(s).")

        elif action == "rename":
+            resolved_session_id = db.resolve_session_id(args.session_id)
+            if not resolved_session_id:
+                print(f"Session '{args.session_id}' not found.")
+                return
            title = " ".join(args.title)
            try:
-                if db.set_session_title(args.session_id, title):
-                    print(f"Session '{args.session_id}' renamed to: {title}")
+                if db.set_session_title(resolved_session_id, title):
+                    print(f"Session '{resolved_session_id}' renamed to: {title}")
                else:
                    print(f"Session '{args.session_id}' not found.")
            except ValueError as e:
@@ -78,6 +78,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "claude-sonnet-4-20250514",
        "claude-haiku-4-5-20251001",
    ],
+    "deepseek": [
+        "deepseek-chat",
+        "deepseek-reasoner",
+    ],
 }

 _PROVIDER_LABELS = {
@@ -89,6 +93,7 @@ _PROVIDER_LABELS = {
    "minimax": "MiniMax",
    "minimax-cn": "MiniMax (China)",
    "anthropic": "Anthropic",
+    "deepseek": "DeepSeek",
    "custom": "Custom endpoint",
 }

@@ -103,6 +108,7 @@ _PROVIDER_ALIASES = {
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
    "claude-code": "anthropic",
+    "deep-seek": "deepseek",
 }


@@ -136,7 +142,7 @@ def list_available_providers() -> list[dict[str, str]]:
    # Canonical providers in display order
    _PROVIDER_ORDER = [
        "openrouter", "nous", "openai-codex",
-        "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic",
+        "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
    ]
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
@@ -212,6 +218,111 @@ def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]
    return [(m, "") for m in models]


+def detect_provider_for_model(
+    model_name: str,
+    current_provider: str,
+) -> Optional[tuple[str, str]]:
+    """Auto-detect the best provider for a model name.
+
+    Returns ``(provider_id, model_name)`` — the model name may be remapped
+    (e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
+    Returns ``None`` when no confident match is found.
+
+    Priority:
+    1. Direct provider with credentials (highest)
+    2. Direct provider without credentials → remap to OpenRouter slug
+    3. OpenRouter catalog match
+    """
+    name = (model_name or "").strip()
+    if not name:
+        return None
+
+    name_lower = name.lower()
+
+    # Aggregators list other providers' models — never auto-switch TO them
+    _AGGREGATORS = {"nous", "openrouter"}
+
+    # If the model belongs to the current provider's catalog, don't suggest switching
+    current_models = _PROVIDER_MODELS.get(current_provider, [])
+    if any(name_lower == m.lower() for m in current_models):
+        return None
+
+    # --- Step 1: check static provider catalogs for a direct match ---
+    direct_match: Optional[str] = None
+    for pid, models in _PROVIDER_MODELS.items():
+        if pid == current_provider or pid in _AGGREGATORS:
+            continue
+        if any(name_lower == m.lower() for m in models):
+            direct_match = pid
+            break
+
+    if direct_match:
+        # Check if we have credentials for this provider
+        has_creds = False
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY
+            pconfig = PROVIDER_REGISTRY.get(direct_match)
+            if pconfig:
+                import os
+                for env_var in pconfig.api_key_env_vars:
+                    if os.getenv(env_var, "").strip():
+                        has_creds = True
+                        break
+        except Exception:
+            pass
+
+        if has_creds:
+            return (direct_match, name)
+
+        # No direct creds — try to find this model on OpenRouter instead
+        or_slug = _find_openrouter_slug(name)
+        if or_slug:
+            return ("openrouter", or_slug)
+        # Still return the direct provider — credential resolution will
+        # give a clear error rather than silently using the wrong provider
+        return (direct_match, name)
+
+    # --- Step 2: check OpenRouter catalog ---
+    # First try exact match (handles provider/model format)
+    or_slug = _find_openrouter_slug(name)
+    if or_slug:
+        if current_provider != "openrouter":
+            return ("openrouter", or_slug)
+        # Already on openrouter, just return the resolved slug
+        if or_slug != name:
+            return ("openrouter", or_slug)
+        return None  # already on openrouter with matching name
+
+    return None
+
+
+def _find_openrouter_slug(model_name: str) -> Optional[str]:
+    """Find the full OpenRouter model slug for a bare or partial model name.
+
+    Handles:
+    - Exact match: ``anthropic/claude-opus-4.6`` → as-is
+    - Bare name: ``deepseek-chat`` → ``deepseek/deepseek-chat``
+    - Bare name: ``claude-opus-4.6`` → ``anthropic/claude-opus-4.6``
+    """
+    name_lower = model_name.strip().lower()
+    if not name_lower:
+        return None
+
+    # Exact match (already has provider/ prefix)
+    for mid, _ in OPENROUTER_MODELS:
+        if name_lower == mid.lower():
+            return mid
+
+    # Try matching just the model part (after the /)
+    for mid, _ in OPENROUTER_MODELS:
+        if "/" in mid:
+            _, model_part = mid.split("/", 1)
+            if name_lower == model_part.lower():
+                return mid
+
+    return None
+
+
 def normalize_provider(provider: Optional[str]) -> str:
    """Normalize provider aliases to Hermes' canonical provider ids.

@@ -308,6 +419,62 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
        return None


+def probe_api_models(
+    api_key: Optional[str],
+    base_url: Optional[str],
+    timeout: float = 5.0,
+) -> dict[str, Any]:
+    """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        return {
+            "models": None,
+            "probed_url": None,
+            "resolved_base_url": "",
+            "suggested_base_url": None,
+            "used_fallback": False,
+        }
+
+    if normalized.endswith("/v1"):
+        alternate_base = normalized[:-3].rstrip("/")
+    else:
+        alternate_base = normalized + "/v1"
+
+    candidates: list[tuple[str, bool]] = [(normalized, False)]
+    if alternate_base and alternate_base != normalized:
+        candidates.append((alternate_base, True))
+
+    tried: list[str] = []
+    headers: dict[str, str] = {}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    for candidate_base, is_fallback in candidates:
+        url = candidate_base.rstrip("/") + "/models"
+        tried.append(url)
+        req = urllib.request.Request(url, headers=headers)
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                data = json.loads(resp.read().decode())
+                return {
+                    "models": [m.get("id", "") for m in data.get("data", [])],
+                    "probed_url": url,
+                    "resolved_base_url": candidate_base.rstrip("/"),
+                    "suggested_base_url": alternate_base if alternate_base != candidate_base else normalized,
+                    "used_fallback": is_fallback,
+                }
+        except Exception:
+            continue
+
+    return {
+        "models": None,
+        "probed_url": tried[-1] if tried else normalized.rstrip("/") + "/models",
+        "resolved_base_url": normalized,
+        "suggested_base_url": alternate_base if alternate_base != normalized else None,
+        "used_fallback": False,
+    }
+
+
 def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
@@ -318,22 +485,7 @@ def fetch_api_models(
    Returns a list of model ID strings, or ``None`` if the endpoint could not
    be reached (network error, timeout, auth failure, etc.).
    """
-    if not base_url:
-        return None
-
-    url = base_url.rstrip("/") + "/models"
-    headers: dict[str, str] = {}
-    if api_key:
-        headers["Authorization"] = f"Bearer {api_key}"
-
-    req = urllib.request.Request(url, headers=headers)
-    try:
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-            # Standard OpenAI format: {"data": [{"id": "model-name", ...}, ...]}
-            return [m.get("id", "") for m in data.get("data", [])]
-    except Exception:
-        return None
+    return probe_api_models(api_key, base_url, timeout=timeout).get("models")


 def validate_requested_model(
@@ -376,13 +528,53 @@ def validate_requested_model(
            "message": "Model names cannot contain spaces.",
        }

-    # Custom endpoints can serve any model — skip validation
    if normalized == "custom":
+        probe = probe_api_models(api_key, base_url)
+        api_models = probe.get("models")
+        if api_models is not None:
+            if requested in set(api_models):
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+
+            suggestions = get_close_matches(requested, api_models, n=3, cutoff=0.5)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
+
+            message = (
+                f"Note: `{requested}` was not found in this custom endpoint's model listing "
+                f"({probe.get('probed_url')}). It may still work if the server supports hidden or aliased models."
+                f"{suggestion_text}"
+            )
+            if probe.get("used_fallback"):
+                message += (
+                    f"\n  Endpoint verification succeeded after trying `{probe.get('resolved_base_url')}`. "
+                    f"Consider saving that as your base URL."
+                )
+
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": False,
+                "message": message,
+            }
+
+        message = (
+            f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
+            f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
+        )
+        if probe.get("suggested_base_url"):
+            message += f"\n  If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
+
        return {
            "accepted": True,
            "persist": True,
            "recognized": False,
-            "message": None,
+            "message": message,
        }

    # Probe the live API to check if the model actually exists
@@ -0,0 +1,449 @@
+"""
+Hermes Plugin System
+====================
+
+Discovers, loads, and manages plugins from three sources:
+
+1. **User plugins**   – ``~/.hermes/plugins/<name>/``
+2. **Project plugins** – ``./.hermes/plugins/<name>/``
+3. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
+   entry-point group.
+
+Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
+``__init__.py`` with a ``register(ctx)`` function.
+
+Lifecycle hooks
+---------------
+Plugins may register callbacks for any of the hooks in ``VALID_HOOKS``.
+The agent core calls ``invoke_hook(name, **kwargs)`` at the appropriate
+points.
+
+Tool registration
+-----------------
+``PluginContext.register_tool()`` delegates to ``tools.registry.register()``
+so plugin-defined tools appear alongside the built-in tools.
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.metadata
+import importlib.util
+import logging
+import os
+import sys
+import types
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+try:
+    import yaml
+except ImportError:  # pragma: no cover – yaml is optional at import time
+    yaml = None  # type: ignore[assignment]
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+VALID_HOOKS: Set[str] = {
+    "pre_tool_call",
+    "post_tool_call",
+    "pre_llm_call",
+    "post_llm_call",
+    "on_session_start",
+    "on_session_end",
+}
+
+ENTRY_POINTS_GROUP = "hermes_agent.plugins"
+
+_NS_PARENT = "hermes_plugins"
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PluginManifest:
+    """Parsed representation of a plugin.yaml manifest."""
+
+    name: str
+    version: str = ""
+    description: str = ""
+    author: str = ""
+    requires_env: List[str] = field(default_factory=list)
+    provides_tools: List[str] = field(default_factory=list)
+    provides_hooks: List[str] = field(default_factory=list)
+    source: str = ""        # "user", "project", or "entrypoint"
+    path: Optional[str] = None
+
+
+@dataclass
+class LoadedPlugin:
+    """Runtime state for a single loaded plugin."""
+
+    manifest: PluginManifest
+    module: Optional[types.ModuleType] = None
+    tools_registered: List[str] = field(default_factory=list)
+    hooks_registered: List[str] = field(default_factory=list)
+    enabled: bool = False
+    error: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# PluginContext  – handed to each plugin's ``register()`` function
+# ---------------------------------------------------------------------------
+
+class PluginContext:
+    """Facade given to plugins so they can register tools and hooks."""
+
+    def __init__(self, manifest: PluginManifest, manager: "PluginManager"):
+        self.manifest = manifest
+        self._manager = manager
+
+    # -- tool registration --------------------------------------------------
+
+    def register_tool(
+        self,
+        name: str,
+        toolset: str,
+        schema: dict,
+        handler: Callable,
+        check_fn: Callable | None = None,
+        requires_env: list | None = None,
+        is_async: bool = False,
+        description: str = "",
+        emoji: str = "",
+    ) -> None:
+        """Register a tool in the global registry **and** track it as plugin-provided."""
+        from tools.registry import registry
+
+        registry.register(
+            name=name,
+            toolset=toolset,
+            schema=schema,
+            handler=handler,
+            check_fn=check_fn,
+            requires_env=requires_env,
+            is_async=is_async,
+            description=description,
+            emoji=emoji,
+        )
+        self._manager._plugin_tool_names.add(name)
+        logger.debug("Plugin %s registered tool: %s", self.manifest.name, name)
+
+    # -- hook registration --------------------------------------------------
+
+    def register_hook(self, hook_name: str, callback: Callable) -> None:
+        """Register a lifecycle hook callback.
+
+        Unknown hook names produce a warning but are still stored so
+        forward-compatible plugins don't break.
+        """
+        if hook_name not in VALID_HOOKS:
+            logger.warning(
+                "Plugin '%s' registered unknown hook '%s' "
+                "(valid: %s)",
+                self.manifest.name,
+                hook_name,
+                ", ".join(sorted(VALID_HOOKS)),
+            )
+        self._manager._hooks.setdefault(hook_name, []).append(callback)
+        logger.debug("Plugin %s registered hook: %s", self.manifest.name, hook_name)
+
+
+# ---------------------------------------------------------------------------
+# PluginManager
+# ---------------------------------------------------------------------------
+
+class PluginManager:
+    """Central manager that discovers, loads, and invokes plugins."""
+
+    def __init__(self) -> None:
+        self._plugins: Dict[str, LoadedPlugin] = {}
+        self._hooks: Dict[str, List[Callable]] = {}
+        self._plugin_tool_names: Set[str] = set()
+        self._discovered: bool = False
+
+    # -----------------------------------------------------------------------
+    # Public
+    # -----------------------------------------------------------------------
+
+    def discover_and_load(self) -> None:
+        """Scan all plugin sources and load each plugin found."""
+        if self._discovered:
+            return
+        self._discovered = True
+
+        manifests: List[PluginManifest] = []
+
+        # 1. User plugins (~/.hermes/plugins/)
+        hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
+        user_dir = Path(hermes_home) / "plugins"
+        manifests.extend(self._scan_directory(user_dir, source="user"))
+
+        # 2. Project plugins (./.hermes/plugins/)
+        project_dir = Path.cwd() / ".hermes" / "plugins"
+        manifests.extend(self._scan_directory(project_dir, source="project"))
+
+        # 3. Pip / entry-point plugins
+        manifests.extend(self._scan_entry_points())
+
+        # Load each manifest
+        for manifest in manifests:
+            self._load_plugin(manifest)
+
+        if manifests:
+            logger.info(
+                "Plugin discovery complete: %d found, %d enabled",
+                len(self._plugins),
+                sum(1 for p in self._plugins.values() if p.enabled),
+            )
+
+    # -----------------------------------------------------------------------
+    # Directory scanning
+    # -----------------------------------------------------------------------
+
+    def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
+        """Read ``plugin.yaml`` manifests from subdirectories of *path*."""
+        manifests: List[PluginManifest] = []
+        if not path.is_dir():
+            return manifests
+
+        for child in sorted(path.iterdir()):
+            if not child.is_dir():
+                continue
+            manifest_file = child / "plugin.yaml"
+            if not manifest_file.exists():
+                manifest_file = child / "plugin.yml"
+            if not manifest_file.exists():
+                logger.debug("Skipping %s (no plugin.yaml)", child)
+                continue
+
+            try:
+                if yaml is None:
+                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                    continue
+                data = yaml.safe_load(manifest_file.read_text()) or {}
+                manifest = PluginManifest(
+                    name=data.get("name", child.name),
+                    version=str(data.get("version", "")),
+                    description=data.get("description", ""),
+                    author=data.get("author", ""),
+                    requires_env=data.get("requires_env", []),
+                    provides_tools=data.get("provides_tools", []),
+                    provides_hooks=data.get("provides_hooks", []),
+                    source=source,
+                    path=str(child),
+                )
+                manifests.append(manifest)
+            except Exception as exc:
+                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+
+        return manifests
+
+    # -----------------------------------------------------------------------
+    # Entry-point scanning
+    # -----------------------------------------------------------------------
+
+    def _scan_entry_points(self) -> List[PluginManifest]:
+        """Check ``importlib.metadata`` for pip-installed plugins."""
+        manifests: List[PluginManifest] = []
+        try:
+            eps = importlib.metadata.entry_points()
+            # Python 3.12+ returns a SelectableGroups; earlier returns dict
+            if hasattr(eps, "select"):
+                group_eps = eps.select(group=ENTRY_POINTS_GROUP)
+            elif isinstance(eps, dict):
+                group_eps = eps.get(ENTRY_POINTS_GROUP, [])
+            else:
+                group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP]
+
+            for ep in group_eps:
+                manifest = PluginManifest(
+                    name=ep.name,
+                    source="entrypoint",
+                    path=ep.value,
+                )
+                manifests.append(manifest)
+        except Exception as exc:
+            logger.debug("Entry-point scan failed: %s", exc)
+
+        return manifests
+
+    # -----------------------------------------------------------------------
+    # Loading
+    # -----------------------------------------------------------------------
+
+    def _load_plugin(self, manifest: PluginManifest) -> None:
+        """Import a plugin module and call its ``register(ctx)`` function."""
+        loaded = LoadedPlugin(manifest=manifest)
+
+        try:
+            if manifest.source in ("user", "project"):
+                module = self._load_directory_module(manifest)
+            else:
+                module = self._load_entrypoint_module(manifest)
+
+            loaded.module = module
+
+            # Call register()
+            register_fn = getattr(module, "register", None)
+            if register_fn is None:
+                loaded.error = "no register() function"
+                logger.warning("Plugin '%s' has no register() function", manifest.name)
+            else:
+                ctx = PluginContext(manifest, self)
+                register_fn(ctx)
+                loaded.tools_registered = [
+                    t for t in self._plugin_tool_names
+                    if t not in {
+                        n
+                        for name, p in self._plugins.items()
+                        for n in p.tools_registered
+                    }
+                ]
+                loaded.hooks_registered = list(
+                    {
+                        h
+                        for h, cbs in self._hooks.items()
+                        if cbs  # non-empty
+                    }
+                    - {
+                        h
+                        for name, p in self._plugins.items()
+                        for h in p.hooks_registered
+                    }
+                )
+                loaded.enabled = True
+
+        except Exception as exc:
+            loaded.error = str(exc)
+            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
+
+        self._plugins[manifest.name] = loaded
+
+    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
+        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
+        init_file = plugin_dir / "__init__.py"
+        if not init_file.exists():
+            raise FileNotFoundError(f"No __init__.py in {plugin_dir}")
+
+        # Ensure the namespace parent package exists
+        if _NS_PARENT not in sys.modules:
+            ns_pkg = types.ModuleType(_NS_PARENT)
+            ns_pkg.__path__ = []  # type: ignore[attr-defined]
+            ns_pkg.__package__ = _NS_PARENT
+            sys.modules[_NS_PARENT] = ns_pkg
+
+        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        spec = importlib.util.spec_from_file_location(
+            module_name,
+            init_file,
+            submodule_search_locations=[str(plugin_dir)],
+        )
+        if spec is None or spec.loader is None:
+            raise ImportError(f"Cannot create module spec for {init_file}")
+
+        module = importlib.util.module_from_spec(spec)
+        module.__package__ = module_name
+        module.__path__ = [str(plugin_dir)]  # type: ignore[attr-defined]
+        sys.modules[module_name] = module
+        spec.loader.exec_module(module)
+        return module
+
+    def _load_entrypoint_module(self, manifest: PluginManifest) -> types.ModuleType:
+        """Load a pip-installed plugin via its entry-point reference."""
+        eps = importlib.metadata.entry_points()
+        if hasattr(eps, "select"):
+            group_eps = eps.select(group=ENTRY_POINTS_GROUP)
+        elif isinstance(eps, dict):
+            group_eps = eps.get(ENTRY_POINTS_GROUP, [])
+        else:
+            group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP]
+
+        for ep in group_eps:
+            if ep.name == manifest.name:
+                return ep.load()
+
+        raise ImportError(
+            f"Entry point '{manifest.name}' not found in group '{ENTRY_POINTS_GROUP}'"
+        )
+
+    # -----------------------------------------------------------------------
+    # Hook invocation
+    # -----------------------------------------------------------------------
+
+    def invoke_hook(self, hook_name: str, **kwargs: Any) -> None:
+        """Call all registered callbacks for *hook_name*.
+
+        Each callback is wrapped in its own try/except so a misbehaving
+        plugin cannot break the core agent loop.
+        """
+        callbacks = self._hooks.get(hook_name, [])
+        for cb in callbacks:
+            try:
+                cb(**kwargs)
+            except Exception as exc:
+                logger.warning(
+                    "Hook '%s' callback %s raised: %s",
+                    hook_name,
+                    getattr(cb, "__name__", repr(cb)),
+                    exc,
+                )
+
+    # -----------------------------------------------------------------------
+    # Introspection
+    # -----------------------------------------------------------------------
+
+    def list_plugins(self) -> List[Dict[str, Any]]:
+        """Return a list of info dicts for all discovered plugins."""
+        result: List[Dict[str, Any]] = []
+        for name, loaded in sorted(self._plugins.items()):
+            result.append(
+                {
+                    "name": name,
+                    "version": loaded.manifest.version,
+                    "description": loaded.manifest.description,
+                    "source": loaded.manifest.source,
+                    "enabled": loaded.enabled,
+                    "tools": len(loaded.tools_registered),
+                    "hooks": len(loaded.hooks_registered),
+                    "error": loaded.error,
+                }
+            )
+        return result
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton & convenience functions
+# ---------------------------------------------------------------------------
+
+_plugin_manager: Optional[PluginManager] = None
+
+
+def get_plugin_manager() -> PluginManager:
+    """Return (and lazily create) the global PluginManager singleton."""
+    global _plugin_manager
+    if _plugin_manager is None:
+        _plugin_manager = PluginManager()
+    return _plugin_manager
+
+
+def discover_plugins() -> None:
+    """Discover and load all plugins (idempotent)."""
+    get_plugin_manager().discover_and_load()
+
+
+def invoke_hook(hook_name: str, **kwargs: Any) -> None:
+    """Invoke a lifecycle hook on all loaded plugins."""
+    get_plugin_manager().invoke_hook(hook_name, **kwargs)
+
+
+def get_plugin_tool_names() -> Set[str]:
+    """Return the set of tool names registered by plugins."""
+    return get_plugin_manager()._plugin_tool_names
@@ -227,54 +227,86 @@ def prompt(question: str, default: str = None, password: bool = False) -> str:
        sys.exit(1)


+def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int:
+    """Single-select menu using curses to avoid simple_term_menu rendering bugs."""
+    try:
+        import curses
+        result_holder = [default]
+
+        def _curses_menu(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+            cursor = default
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+                try:
+                    stdscr.addnstr(
+                        0,
+                        0,
+                        question,
+                        max_x - 1,
+                        curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0),
+                    )
+                except curses.error:
+                    pass
+
+                for i, choice in enumerate(choices):
+                    y = i + 2
+                    if y >= max_y - 1:
+                        break
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow}  {choice}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+                if key in (curses.KEY_UP, ord("k")):
+                    cursor = (cursor - 1) % len(choices)
+                elif key in (curses.KEY_DOWN, ord("j")):
+                    cursor = (cursor + 1) % len(choices)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = cursor
+                    return
+                elif key in (27, ord("q")):
+                    return
+
+        curses.wrapper(_curses_menu)
+        return result_holder[0]
+    except Exception:
+        return -1
+
+
+
 def prompt_choice(question: str, choices: list, default: int = 0) -> int:
    """Prompt for a choice from a list with arrow key navigation.

    Escape keeps the current default (skips the question).
    Ctrl+C exits the wizard.
    """
-    print(color(question, Colors.YELLOW))
-
-    # Try to use interactive menu if available
-    try:
-        from simple_term_menu import TerminalMenu
-        import re
-
-        # Strip emoji characters — simple_term_menu miscalculates visual
-        # width of emojis, causing duplicated/garbled lines on redraw.
-        _emoji_re = re.compile(
-            "[\U0001f300-\U0001f9ff\U00002600-\U000027bf\U0000fe00-\U0000fe0f"
-            "\U0001fa00-\U0001fa6f\U0001fa70-\U0001faff\u200d]+",
-            flags=re.UNICODE,
-        )
-        menu_choices = [f"  {_emoji_re.sub('', choice).strip()}" for choice in choices]
-
-        print_info("  ↑/↓ Navigate  Enter Select  Esc Skip  Ctrl+C Exit")
-
-        terminal_menu = TerminalMenu(
-            menu_choices,
-            cursor_index=default,
-            menu_cursor="→ ",
-            menu_cursor_style=("fg_green", "bold"),
-            menu_highlight_style=("fg_green",),
-            cycle_cursor=True,
-            clear_screen=False,
-        )
-
-        idx = terminal_menu.show()
-        if idx is None:  # User pressed Escape — keep current value
-            print_info(f"  Skipped (keeping current)")
+    idx = _curses_prompt_choice(question, choices, default)
+    if idx >= 0:
+        if idx == default:
+            print_info("  Skipped (keeping current)")
            print()
            return default
-        print()  # Add newline after selection
+        print()
        return idx

-    except (ImportError, NotImplementedError):
-        pass
-    except Exception as e:
-        print(f"  (Interactive menu unavailable: {e})")
-
-    # Fallback to number-based selection (simple_term_menu doesn't support Windows)
+    print(color(question, Colors.YELLOW))
    for i, choice in enumerate(choices):
        marker = "●" if i == default else "○"
        if i == default:
@@ -344,84 +376,15 @@ def prompt_checklist(title: str, items: list, pre_selected: list = None) -> list
    if pre_selected is None:
        pre_selected = []

-    print(color(title, Colors.YELLOW))
-    print_info("  SPACE Toggle  ENTER Confirm  ESC Skip  Ctrl+C Exit")
-    print()
+    from hermes_cli.curses_ui import curses_checklist

-    try:
-        from simple_term_menu import TerminalMenu
-        import re
-
-        # Strip emoji characters from menu labels — simple_term_menu miscalculates
-        # visual width of emojis on macOS, causing duplicated/garbled lines.
-        _emoji_re = re.compile(
-            "[\U0001f300-\U0001f9ff\U00002600-\U000027bf\U0000fe00-\U0000fe0f"
-            "\U0001fa00-\U0001fa6f\U0001fa70-\U0001faff\u200d]+",
-            flags=re.UNICODE,
-        )
-        menu_items = [f"  {_emoji_re.sub('', item).strip()}" for item in items]
-
-        # Map pre-selected indices to the actual menu entry strings
-        preselected = [menu_items[i] for i in pre_selected if i < len(menu_items)]
-
-        terminal_menu = TerminalMenu(
-            menu_items,
-            multi_select=True,
-            show_multi_select_hint=False,
-            multi_select_cursor="[✓] ",
-            multi_select_select_on_accept=False,
-            multi_select_empty_ok=True,
-            preselected_entries=preselected if preselected else None,
-            menu_cursor="→ ",
-            menu_cursor_style=("fg_green", "bold"),
-            menu_highlight_style=("fg_green",),
-            cycle_cursor=True,
-            clear_screen=False,
-        )
-
-        terminal_menu.show()
-
-        if terminal_menu.chosen_menu_entries is None:
-            print_info("  Skipped (keeping current)")
-            return list(pre_selected)
-
-        selected = list(terminal_menu.chosen_menu_indices or [])
-        return selected
-
-    except (ImportError, NotImplementedError):
-        # Fallback: numbered toggle interface (simple_term_menu doesn't support Windows)
-        selected = set(pre_selected)
-
-        while True:
-            for i, item in enumerate(items):
-                marker = color("[✓]", Colors.GREEN) if i in selected else "[ ]"
-                print(f"  {marker} {i + 1}. {item}")
-            print()
-
-            try:
-                value = input(
-                    color("  Toggle # (or Enter to confirm): ", Colors.DIM)
-                ).strip()
-                if not value:
-                    break
-                idx = int(value) - 1
-                if 0 <= idx < len(items):
-                    if idx in selected:
-                        selected.discard(idx)
-                    else:
-                        selected.add(idx)
-                else:
-                    print_error(f"Enter a number between 1 and {len(items)}")
-            except ValueError:
-                print_error("Enter a number")
-            except (KeyboardInterrupt, EOFError):
-                print()
-                return []
-
-            # Clear and redraw (simple approach)
-            print()
-
-        return sorted(selected)
+    chosen = curses_checklist(
+        title,
+        items,
+        set(pre_selected),
+        cancel_returns=set(pre_selected),
+    )
+    return sorted(chosen)


 def _prompt_api_key(var: dict):
@@ -780,6 +743,7 @@ def setup_model_provider(config: dict):
    selected_provider = (
        None  # "nous", "openai-codex", "openrouter", "custom", or None (keep)
    )
+    selected_base_url = None  # deferred until after model selection
    nous_models = []  # populated if Nous login succeeds

    if provider_idx == 0:  # Nous Portal (OAuth)
@@ -933,11 +897,35 @@ def setup_model_provider(config: dict):

        base_url = prompt(
            "  API base URL (e.g., https://api.example.com/v1)", current_url
-        )
+        ).strip()
        api_key = prompt("  API key", password=True)
        model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)

        if base_url:
+            from hermes_cli.models import probe_api_models
+
+            probe = probe_api_models(api_key, base_url)
+            if probe.get("used_fallback") and probe.get("resolved_base_url"):
+                print_warning(
+                    f"Endpoint verification worked at {probe['resolved_base_url']}/models, "
+                    f"not the exact URL you entered. Saving the working base URL instead."
+                )
+                base_url = probe["resolved_base_url"]
+            elif probe.get("models") is not None:
+                print_success(
+                    f"Verified endpoint via {probe.get('probed_url')} "
+                    f"({len(probe.get('models') or [])} model(s) visible)"
+                )
+            else:
+                print_warning(
+                    f"Could not verify this endpoint via {probe.get('probed_url')}. "
+                    f"Hermes will still save it."
+                )
+                if probe.get("suggested_base_url"):
+                    print_info(
+                        f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}"
+                    )
+
            save_env_value("OPENAI_BASE_URL", base_url)
        if api_key:
            save_env_value("OPENAI_API_KEY", api_key)
@@ -1038,8 +1026,8 @@ def setup_model_provider(config: dict):
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("zai", zai_base_url, default_model="glm-5")
        _set_model_provider(config, "zai", zai_base_url)
+        selected_base_url = zai_base_url

    elif provider_idx == 5:  # Kimi / Moonshot
        selected_provider = "kimi-coding"
@@ -1071,8 +1059,8 @@ def setup_model_provider(config: dict):
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("kimi-coding", pconfig.inference_base_url, default_model="kimi-k2.5")
        _set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url

    elif provider_idx == 6:  # MiniMax
        selected_provider = "minimax"
@@ -1104,8 +1092,8 @@ def setup_model_provider(config: dict):
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("minimax", pconfig.inference_base_url, default_model="MiniMax-M2.5")
        _set_model_provider(config, "minimax", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url

    elif provider_idx == 7:  # MiniMax China
        selected_provider = "minimax-cn"
@@ -1137,8 +1125,8 @@ def setup_model_provider(config: dict):
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
-        _update_config_for_provider("minimax-cn", pconfig.inference_base_url, default_model="MiniMax-M2.5")
        _set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url

    elif provider_idx == 8:  # Anthropic
        selected_provider = "anthropic"
@@ -1241,8 +1229,8 @@ def setup_model_provider(config: dict):
            save_env_value("OPENAI_API_KEY", "")
        # Don't save base_url for Anthropic — resolve_runtime_provider()
        # always hardcodes it. Stale base_urls contaminate other providers.
-        _update_config_for_provider("anthropic", "", default_model="claude-opus-4-6")
        _set_model_provider(config, "anthropic")
+        selected_base_url = ""

    # else: provider_idx == 9 (Keep current) — only shown when a provider already exists
    # Normalize "keep current" to an explicit provider so downstream logic
@@ -1472,6 +1460,12 @@ def setup_model_provider(config: dict):
            )
            print_success(f"Model set to: {_display}")

+    # Write provider+base_url to config.yaml only after model selection is complete.
+    # This prevents a race condition where the gateway picks up a new provider
+    # before the model name has been updated to match.
+    if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "anthropic") and selected_base_url is not None:
+        _update_config_for_provider(selected_provider, selected_base_url)
+
    save_config(config)


@@ -60,6 +60,12 @@ All fields are optional. Missing values inherit from the ``default`` skin.
    # Tool prefix: character for tool output lines (default: ┊)
    tool_prefix: "┊"

+    # Tool emojis: override the default emoji for any tool (used in spinners & progress)
+    tool_emojis:
+      terminal: "⚔"           # Override terminal tool emoji
+      web_search: "🔮"        # Override web_search tool emoji
+      # Any tool not listed here uses its registry default
+
 USAGE
 =====

@@ -111,6 +117,7 @@ class SkinConfig:
    spinner: Dict[str, Any] = field(default_factory=dict)
    branding: Dict[str, str] = field(default_factory=dict)
    tool_prefix: str = "┊"
+    tool_emojis: Dict[str, str] = field(default_factory=dict)  # per-tool emoji overrides
    banner_logo: str = ""    # Rich-markup ASCII art logo (replaces HERMES_AGENT_LOGO)
    banner_hero: str = ""    # Rich-markup hero art (replaces HERMES_CADUCEUS)

@@ -541,6 +548,7 @@ def _build_skin_config(data: Dict[str, Any]) -> SkinConfig:
        spinner=spinner,
        branding=branding,
        tool_prefix=data.get("tool_prefix", default.get("tool_prefix", "┊")),
+        tool_emojis=data.get("tool_emojis", {}),
        banner_logo=data.get("banner_logo", ""),
        banner_hero=data.get("banner_hero", ""),
    )
@@ -275,8 +275,13 @@ def show_status(args):
    print(color("◆ Gateway Service", Colors.CYAN, Colors.BOLD))
    
    if sys.platform.startswith('linux'):
+        try:
+            from hermes_cli.gateway import get_service_name
+            _gw_svc = get_service_name()
+        except Exception:
+            _gw_svc = "hermes-gateway"
        result = subprocess.run(
-            ["systemctl", "--user", "is-active", "hermes-gateway"],
+            ["systemctl", "--user", "is-active", _gw_svc],
            capture_output=True,
            text=True
        )
@@ -354,9 +354,29 @@ def _get_platform_tools(config: dict, platform: str) -> Set[str]:


 def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[str]):
-    """Save the selected toolset keys for a platform to config."""
+    """Save the selected toolset keys for a platform to config.
+
+    Preserves any non-configurable toolset entries (like MCP server names)
+    that were already in the config for this platform.
+    """
    config.setdefault("platform_toolsets", {})
-    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys)
+
+    # Get the set of all configurable toolset keys
+    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+
+    # Get existing toolsets for this platform
+    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
+    if not isinstance(existing_toolsets, list):
+        existing_toolsets = []
+
+    # Preserve any entries that are NOT configurable toolsets (i.e. MCP server names)
+    preserved_entries = {
+        entry for entry in existing_toolsets
+        if entry not in configurable_keys
+    }
+
+    # Merge preserved entries with new enabled toolsets
+    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
    save_config(config)


@@ -133,7 +133,13 @@ def uninstall_gateway_service():
    if platform.system() != "Linux":
        return False
    
-    service_file = Path.home() / ".config" / "systemd" / "user" / "hermes-gateway.service"
+    try:
+        from hermes_cli.gateway import get_service_name
+        svc_name = get_service_name()
+    except Exception:
+        svc_name = "hermes-gateway"
+
+    service_file = Path.home() / ".config" / "systemd" / "user" / f"{svc_name}.service"
    
    if not service_file.exists():
        return False
@@ -141,14 +147,14 @@ def uninstall_gateway_service():
    try:
        # Stop the service
        subprocess.run(
-            ["systemctl", "--user", "stop", "hermes-gateway"],
+            ["systemctl", "--user", "stop", svc_name],
            capture_output=True,
            check=False
        )
        
        # Disable the service
        subprocess.run(
-            ["systemctl", "--user", "disable", "hermes-gateway"],
+            ["systemctl", "--user", "disable", svc_name],
            capture_output=True,
            check=False
        )
@@ -249,6 +249,32 @@ class SessionDB:
        row = cursor.fetchone()
        return dict(row) if row else None

+    def resolve_session_id(self, session_id_or_prefix: str) -> Optional[str]:
+        """Resolve an exact or uniquely prefixed session ID to the full ID.
+
+        Returns the exact ID when it exists. Otherwise treats the input as a
+        prefix and returns the single matching session ID if the prefix is
+        unambiguous. Returns None for no matches or ambiguous prefixes.
+        """
+        exact = self.get_session(session_id_or_prefix)
+        if exact:
+            return exact["id"]
+
+        escaped = (
+            session_id_or_prefix
+            .replace("\\", "\\\\")
+            .replace("%", "\\%")
+            .replace("_", "\\_")
+        )
+        cursor = self._conn.execute(
+            "SELECT id FROM sessions WHERE id LIKE ? ESCAPE '\\' ORDER BY started_at DESC LIMIT 2",
+            (f"{escaped}%",),
+        )
+        matches = [row["id"] for row in cursor.fetchall()]
+        if len(matches) == 1:
+            return matches[0]
+        return None
+
    # Maximum length for session titles
    MAX_TITLE_LENGTH = 100

@@ -927,6 +927,11 @@ class HonchoSessionManager:
            return False

        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            logger.warning("No Honcho session cached for '%s', skipping AI seed", session_key)
+            return False
+
        try:
            wrapped = (
                f"<ai_identity_seed>\n"
@@ -935,7 +940,7 @@ class HonchoSessionManager:
                f"{content.strip()}\n"
                f"</ai_identity_seed>"
            )
-            assistant_peer.add_message("assistant", wrapped)
+            honcho_session.add_messages([assistant_peer.message(wrapped)])
            logger.info("Seeded AI identity from '%s' into %s", source, session_key)
            return True
        except Exception as e:
@@ -113,6 +113,13 @@ try:
 except Exception as e:
    logger.debug("MCP tool discovery failed: %s", e)

+# Plugin tool discovery (user/project/pip plugins)
+try:
+    from hermes_cli.plugins import discover_plugins
+    discover_plugins()
+except Exception as e:
+    logger.debug("Plugin discovery failed: %s", e)
+

 # =============================================================================
 # Backward-compat constants  (built once after discovery)
@@ -222,6 +229,16 @@ def get_tool_definitions(
        for ts_name in get_all_toolsets():
            tools_to_include.update(resolve_toolset(ts_name))

+    # Always include plugin-registered tools — they bypass the toolset filter
+    # because their toolsets are dynamic (created at plugin load time).
+    try:
+        from hermes_cli.plugins import get_plugin_tool_names
+        plugin_tools = get_plugin_tool_names()
+        if plugin_tools:
+            tools_to_include.update(plugin_tools)
+    except Exception:
+        pass
+
    # Ask the registry for schemas (only returns tools whose check_fn passes)
    filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)

@@ -267,6 +284,8 @@ def handle_function_call(
    task_id: Optional[str] = None,
    user_task: Optional[str] = None,
    enabled_tools: Optional[List[str]] = None,
+    honcho_manager: Optional[Any] = None,
+    honcho_session_key: Optional[str] = None,
 ) -> str:
    """
    Main function call dispatcher that routes calls to the tool registry.
@@ -298,21 +317,39 @@ def handle_function_call(
        if function_name in _AGENT_LOOP_TOOLS:
            return json.dumps({"error": f"{function_name} must be handled by the agent loop"})

+        try:
+            from hermes_cli.plugins import invoke_hook
+            invoke_hook("pre_tool_call", tool_name=function_name, args=function_args, task_id=task_id or "")
+        except Exception:
+            pass
+
        if function_name == "execute_code":
            # Prefer the caller-provided list so subagents can't overwrite
            # the parent's tool set via the process-global.
            sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
-            return registry.dispatch(
+            result = registry.dispatch(
                function_name, function_args,
                task_id=task_id,
                enabled_tools=sandbox_enabled,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
+            )
+        else:
+            result = registry.dispatch(
+                function_name, function_args,
+                task_id=task_id,
+                user_task=user_task,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
            )

-        return registry.dispatch(
-            function_name, function_args,
-            task_id=task_id,
-            user_task=user_task,
-        )
+        try:
+            from hermes_cli.plugins import invoke_hook
+            invoke_hook("post_tool_call", tool_name=function_name, args=function_args, result=result, task_id=task_id or "")
+        except Exception:
+            pass
+
+        return result

    except Exception as e:
        error_msg = f"Error executing {function_name}: {str(e)}"
@@ -0,0 +1,116 @@
+---
+name: blender-mcp
+description: Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender.
+version: 1.0.0
+requires: Blender 4.3+ (desktop instance required, headless not supported)
+author: alireza78a
+tags: [blender, 3d, animation, modeling, bpy, mcp]
+---
+
+# Blender MCP
+
+Control a running Blender instance from Hermes via socket on TCP port 9876.
+
+## Setup (one-time)
+
+### 1. Install the Blender addon
+
+    curl -sL https://raw.githubusercontent.com/ahujasid/blender-mcp/main/addon.py -o ~/Desktop/blender_mcp_addon.py
+
+In Blender:
+    Edit > Preferences > Add-ons > Install > select blender_mcp_addon.py
+    Enable "Interface: Blender MCP"
+
+### 2. Start the socket server in Blender
+
+Press N in Blender viewport to open sidebar.
+Find "BlenderMCP" tab and click "Start Server".
+
+### 3. Verify connection
+
+    nc -z -w2 localhost 9876 && echo "OPEN" || echo "CLOSED"
+
+## Protocol
+
+Plain UTF-8 JSON over TCP -- no length prefix.
+
+Send:     {"type": "<command>", "params": {<kwargs>}}
+Receive:  {"status": "success", "result": <value>}
+          {"status": "error",   "message": "<reason>"}
+
+## Available Commands
+
+| type                    | params            | description                     |
+|-------------------------|-------------------|---------------------------------|
+| execute_code            | code (str)        | Run arbitrary bpy Python code   |
+| get_scene_info          | (none)            | List all objects in scene       |
+| get_object_info         | object_name (str) | Details on a specific object    |
+| get_viewport_screenshot | (none)            | Screenshot of current viewport  |
+
+## Python Helper
+
+Use this inside execute_code tool calls:
+
+    import socket, json
+
+    def blender_exec(code: str, host="localhost", port=9876, timeout=15):
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.connect((host, port))
+        s.settimeout(timeout)
+        payload = json.dumps({"type": "execute_code", "params": {"code": code}})
+        s.sendall(payload.encode("utf-8"))
+        buf = b""
+        while True:
+            try:
+                chunk = s.recv(4096)
+                if not chunk:
+                    break
+                buf += chunk
+                try:
+                    json.loads(buf.decode("utf-8"))
+                    break
+                except json.JSONDecodeError:
+                    continue
+            except socket.timeout:
+                break
+        s.close()
+        return json.loads(buf.decode("utf-8"))
+
+## Common bpy Patterns
+
+### Clear scene
+    bpy.ops.object.select_all(action='SELECT')
+    bpy.ops.object.delete()
+
+### Add mesh objects
+    bpy.ops.mesh.primitive_uv_sphere_add(radius=1, location=(0, 0, 0))
+    bpy.ops.mesh.primitive_cube_add(size=2, location=(3, 0, 0))
+    bpy.ops.mesh.primitive_cylinder_add(radius=0.5, depth=2, location=(-3, 0, 0))
+
+### Create and assign material
+    mat = bpy.data.materials.new(name="MyMat")
+    mat.use_nodes = True
+    bsdf = mat.node_tree.nodes.get("Principled BSDF")
+    bsdf.inputs["Base Color"].default_value = (R, G, B, 1.0)
+    bsdf.inputs["Roughness"].default_value = 0.3
+    bsdf.inputs["Metallic"].default_value = 0.0
+    obj.data.materials.append(mat)
+
+### Keyframe animation
+    obj.location = (0, 0, 0)
+    obj.keyframe_insert(data_path="location", frame=1)
+    obj.location = (0, 0, 3)
+    obj.keyframe_insert(data_path="location", frame=60)
+
+### Render to file
+    bpy.context.scene.render.filepath = "/tmp/render.png"
+    bpy.context.scene.render.engine = 'CYCLES'
+    bpy.ops.render.render(write_still=True)
+
+## Pitfalls
+
+- Must check socket is open before running (nc -z localhost 9876)
+- Addon server must be started inside Blender each session (N-panel > BlenderMCP > Connect)
+- Break complex scenes into multiple smaller execute_code calls to avoid timeouts
+- Render output path must be absolute (/tmp/...) not relative
+- shade_smooth() requires object to be selected and in object mode
@@ -0,0 +1,422 @@
+---
+name: oss-forensics
+description: |
+  Supply chain investigation, evidence recovery, and forensic analysis for GitHub repositories.
+  Covers deleted commit recovery, force-push detection, IOC extraction, multi-source evidence
+  collection, hypothesis formation/validation, and structured forensic reporting.
+  Inspired by RAPTOR's 1800+ line OSS Forensics system.
+category: security
+triggers:
+  - "investigate this repository"
+  - "investigate [owner/repo]"
+  - "check for supply chain compromise"
+  - "recover deleted commits"
+  - "forensic analysis of [owner/repo]"
+  - "was this repo compromised"
+  - "supply chain attack"
+  - "suspicious commit"
+  - "force push detected"
+  - "IOC extraction"
+toolsets:
+  - terminal
+  - web
+  - file
+  - delegation
+---
+
+# OSS Security Forensics Skill
+
+A 7-phase multi-agent investigation framework for researching open-source supply chain attacks.
+Adapted from RAPTOR's forensics system. Covers GitHub Archive, Wayback Machine, GitHub API,
+local git analysis, IOC extraction, evidence-backed hypothesis formation and validation,
+and final forensic report generation.
+
+---
+
+## ⚠️ Anti-Hallucination Guardrails
+
+Read these before every investigation step. Violating them invalidates the report.
+
+1. **Evidence-First Rule**: Every claim in any report, hypothesis, or summary MUST cite at least one evidence ID (`EV-XXXX`). Assertions without citations are forbidden.
+2. **STAY IN YOUR LANE**: Each sub-agent (investigator) has a single data source. Do NOT mix sources. The GH Archive investigator does not query the GitHub API, and vice versa. Role boundaries are hard.
+3. **Fact vs. Hypothesis Separation**: Mark all unverified inferences with `[HYPOTHESIS]`. Only statements verified against original sources may be stated as facts.
+4. **No Evidence Fabrication**: The hypothesis validator MUST mechanically check that every cited evidence ID actually exists in the evidence store before accepting a hypothesis.
+5. **Proof-Required Disproval**: A hypothesis cannot be dismissed without a specific, evidence-backed counter-argument. "No evidence found" is not sufficient to disprove—it only makes a hypothesis inconclusive.
+6. **SHA/URL Double-Verification**: Any commit SHA, URL, or external identifier cited as evidence must be independently confirmed from at least two sources before being marked as verified.
+7. **Suspicious Code Rule**: Never run code found inside the investigated repository locally. Analyze statically only, or use `execute_code` in a sandboxed environment.
+8. **Secret Redaction**: Any API keys, tokens, or credentials discovered during investigation must be redacted in the final report. Log them internally only.
+
+---
+
+## Example Scenarios
+
+- **Scenario A: Dependency Confusion**: A malicious package `internal-lib-v2` is uploaded to NPM with a higher version than the internal one. The investigator must track when this package was first seen and if any PushEvents in the target repo updated `package.json` to this version.
+- **Scenario B: Maintainer Takeover**: A long-term contributor's account is used to push a backdoored `.github/workflows/build.yml`. The investigator looks for PushEvents from this user after a long period of inactivity or from a new IP/location (if detectable via BigQuery).
+- **Scenario C: Force-Push Hide**: A developer accidentally commits a production secret, then force-pushes to "fix" it. The investigator uses `git fsck` and GH Archive to recover the original commit SHA and verify what was leaked.
+
+---
+
+> **Path convention**: Throughout this skill, `SKILL_DIR` refers to the root of this skill's
+> installation directory (the folder containing this `SKILL.md`). When the skill is loaded,
+> resolve `SKILL_DIR` to the actual path — e.g. `~/.hermes/skills/security/oss-forensics/`
+> or the `optional-skills/` equivalent. All script and template references are relative to it.
+
+## Phase 0: Initialization
+
+1. Create investigation working directory:
+   ```bash
+   mkdir investigation_$(echo "REPO_NAME" | tr '/' '_')
+   cd investigation_$(echo "REPO_NAME" | tr '/' '_')
+   ```
+2. Initialize the evidence store:
+   ```bash
+   python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json list
+   ```
+3. Copy the forensic report template:
+   ```bash
+   cp SKILL_DIR/templates/forensic-report.md ./investigation-report.md
+   ```
+4. Create an `iocs.md` file to track Indicators of Compromise as they are discovered.
+5. Record the investigation start time, target repository, and stated investigation goal.
+
+---
+
+## Phase 1: Prompt Parsing and IOC Extraction
+
+**Goal**: Extract all structured investigative targets from the user's request.
+
+**Actions**:
+- Parse the user prompt and extract:
+  - Target repository (`owner/repo`)
+  - Target actors (GitHub handles, email addresses)
+  - Time window of interest (commit date ranges, PR timestamps)
+  - Provided Indicators of Compromise: commit SHAs, file paths, package names, IP addresses, domains, API keys/tokens, malicious URLs
+  - Any linked vendor security reports or blog posts
+
+**Tools**: Reasoning only, or `execute_code` for regex extraction from large text blocks.
+
+**Output**: Populate `iocs.md` with extracted IOCs. Each IOC must have:
+- Type (from: COMMIT_SHA, FILE_PATH, API_KEY, SECRET, IP_ADDRESS, DOMAIN, PACKAGE_NAME, ACTOR_USERNAME, MALICIOUS_URL, OTHER)
+- Value
+- Source (user-provided, inferred)
+
+**Reference**: See [evidence-types.md](./references/evidence-types.md) for IOC taxonomy.
+
+---
+
+## Phase 2: Parallel Evidence Collection
+
+Spawn up to 5 specialist investigator sub-agents using `delegate_task` (batch mode, max 3 concurrent). Each investigator has a **single data source** and must not mix sources.
+
+> **Orchestrator note**: Pass the IOC list from Phase 1 and the investigation time window in the `context` field of each delegated task.
+
+---
+
+### Investigator 1: Local Git Investigator
+
+**ROLE BOUNDARY**: You query the LOCAL GIT REPOSITORY ONLY. Do not call any external APIs.
+
+**Actions**:
+```bash
+# Clone repository
+git clone https://github.com/OWNER/REPO.git target_repo && cd target_repo
+
+# Full commit log with stats
+git log --all --full-history --stat --format="%H|%ae|%an|%ai|%s" > ../git_log.txt
+
+# Detect force-push evidence (orphaned/dangling commits)
+git fsck --lost-found --unreachable 2>&1 | grep commit > ../dangling_commits.txt
+
+# Check reflog for rewritten history
+git reflog --all > ../reflog.txt
+
+# List ALL branches including deleted remote refs
+git branch -a -v > ../branches.txt
+
+# Find suspicious large binary additions
+git log --all --diff-filter=A --name-only --format="%H %ai" -- "*.so" "*.dll" "*.exe" "*.bin" > ../binary_additions.txt
+
+# Check for GPG signature anomalies
+git log --show-signature --format="%H %ai %aN" > ../signature_check.txt 2>&1
+```
+
+**Evidence to collect** (add via `python3 SKILL_DIR/scripts/evidence-store.py add`):
+- Each dangling commit SHA → type: `git`
+- Force-push evidence (reflog showing history rewrite) → type: `git`
+- Unsigned commits from verified contributors → type: `git`
+- Suspicious binary file additions → type: `git`
+
+**Reference**: See [recovery-techniques.md](./references/recovery-techniques.md) for accessing force-pushed commits.
+
+---
+
+### Investigator 2: GitHub API Investigator
+
+**ROLE BOUNDARY**: You query the GITHUB REST API ONLY. Do not run git commands locally.
+
+**Actions**:
+```bash
+# Commits (paginated)
+curl -s "https://api.github.com/repos/OWNER/REPO/commits?per_page=100" > api_commits.json
+
+# Pull Requests including closed/deleted
+curl -s "https://api.github.com/repos/OWNER/REPO/pulls?state=all&per_page=100" > api_prs.json
+
+# Issues
+curl -s "https://api.github.com/repos/OWNER/REPO/issues?state=all&per_page=100" > api_issues.json
+
+# Contributors and collaborator changes
+curl -s "https://api.github.com/repos/OWNER/REPO/contributors" > api_contributors.json
+
+# Repository events (last 300)
+curl -s "https://api.github.com/repos/OWNER/REPO/events?per_page=100" > api_events.json
+
+# Check specific suspicious commit SHA details
+curl -s "https://api.github.com/repos/OWNER/REPO/git/commits/SHA" > commit_detail.json
+
+# Releases
+curl -s "https://api.github.com/repos/OWNER/REPO/releases?per_page=100" > api_releases.json
+
+# Check if a specific commit exists (force-pushed commits may 404 on commits/ but succeed on git/commits/)
+curl -s "https://api.github.com/repos/OWNER/REPO/commits/SHA" | jq .sha
+```
+
+**Cross-reference targets** (flag discrepancies as evidence):
+- PR exists in archive but missing from API → evidence of deletion
+- Contributor in archive events but not in contributors list → evidence of permission revocation
+- Commit in archive PushEvents but not in API commit list → evidence of force-push/deletion
+
+**Reference**: See [evidence-types.md](./references/evidence-types.md) for GH event types.
+
+---
+
+### Investigator 3: Wayback Machine Investigator
+
+**ROLE BOUNDARY**: You query the WAYBACK MACHINE CDX API ONLY. Do not use the GitHub API.
+
+**Goal**: Recover deleted GitHub pages (READMEs, issues, PRs, releases, wiki pages).
+
+**Actions**:
+```bash
+# Search for archived snapshots of the repo main page
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO&output=json&limit=100&from=YYYYMMDD&to=YYYYMMDD" > wayback_main.json
+
+# Search for a specific deleted issue
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO/issues/NUM&output=json&limit=50" > wayback_issue_NUM.json
+
+# Search for a specific deleted PR
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO/pull/NUM&output=json&limit=50" > wayback_pr_NUM.json
+
+# Fetch the best snapshot of a page
+# Use the Wayback Machine URL: https://web.archive.org/web/TIMESTAMP/ORIGINAL_URL
+# Example: https://web.archive.org/web/20240101000000*/github.com/OWNER/REPO
+
+# Advanced: Search for deleted releases/tags
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO/releases/tag/*&output=json" > wayback_tags.json
+
+# Advanced: Search for historical wiki changes
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO/wiki/*&output=json" > wayback_wiki.json
+```
+
+**Evidence to collect**:
+- Archived snapshots of deleted issues/PRs with their content
+- Historical README versions showing changes
+- Evidence of content present in archive but missing from current GitHub state
+
+**Reference**: See [github-archive-guide.md](./references/github-archive-guide.md) for CDX API parameters.
+
+---
+
+### Investigator 4: GH Archive / BigQuery Investigator
+
+**ROLE BOUNDARY**: You query GITHUB ARCHIVE via BIGQUERY ONLY. This is a tamper-proof record of all public GitHub events.
+
+> **Prerequisites**: Requires Google Cloud credentials with BigQuery access (`gcloud auth application-default login`). If unavailable, skip this investigator and note it in the report.
+
+**Cost Optimization Rules** (MANDATORY):
+1. ALWAYS run a `--dry_run` before every query to estimate cost.
+2. Use `_TABLE_SUFFIX` to filter by date range and minimize scanned data.
+3. Only SELECT the columns you need.
+4. Add a LIMIT unless aggregating.
+
+```bash
+# Template: safe BigQuery query for PushEvents to OWNER/REPO
+bq query --use_legacy_sql=false --dry_run "
+SELECT created_at, actor.login, payload.commits, payload.before, payload.head,
+       payload.size, payload.distinct_size
+FROM \`githubarchive.month.*\`
+WHERE _TABLE_SUFFIX BETWEEN 'YYYYMM' AND 'YYYYMM'
+  AND type = 'PushEvent'
+  AND repo.name = 'OWNER/REPO'
+LIMIT 1000
+"
+# If cost is acceptable, re-run without --dry_run
+
+# Detect force-pushes: zero-distinct_size PushEvents mean commits were force-erased
+# payload.distinct_size = 0 AND payload.size > 0 → force push indicator
+
+# Check for deleted branch events
+bq query --use_legacy_sql=false "
+SELECT created_at, actor.login, payload.ref, payload.ref_type
+FROM \`githubarchive.month.*\`
+WHERE _TABLE_SUFFIX BETWEEN 'YYYYMM' AND 'YYYYMM'
+  AND type = 'DeleteEvent'
+  AND repo.name = 'OWNER/REPO'
+LIMIT 200
+"
+```
+
+**Evidence to collect**:
+- Force-push events (payload.size > 0, payload.distinct_size = 0)
+- DeleteEvents for branches/tags
+- WorkflowRunEvents for suspicious CI/CD automation
+- PushEvents that precede a "gap" in the git log (evidence of rewrite)
+
+**Reference**: See [github-archive-guide.md](./references/github-archive-guide.md) for all 12 event types and query patterns.
+
+---
+
+### Investigator 5: IOC Enrichment Investigator
+
+**ROLE BOUNDARY**: You enrich EXISTING IOCs from Phase 1 using passive public sources ONLY. Do not execute any code from the target repository.
+
+**Actions**:
+- For each commit SHA: attempt recovery via direct GitHub URL (`github.com/OWNER/REPO/commit/SHA.patch`)
+- For each domain/IP: check passive DNS, WHOIS records (via `web_extract` on public WHOIS services)
+- For each package name: check npm/PyPI for matching malicious package reports
+- For each actor username: check GitHub profile, contribution history, account age
+- Recover force-pushed commits using 3 methods (see [recovery-techniques.md](./references/recovery-techniques.md))
+
+---
+
+## Phase 3: Evidence Consolidation
+
+After all investigators complete:
+
+1. Run `python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json list` to see all collected evidence.
+2. For each piece of evidence, verify the `content_sha256` hash matches the original source.
+3. Group evidence by:
+   - **Timeline**: Sort all timestamped evidence chronologically
+   - **Actor**: Group by GitHub handle or email
+   - **IOC**: Link evidence to the IOC it relates to
+4. Identify **discrepancies**: items present in one source but absent in another (key deletion indicators).
+5. Flag evidence as `[VERIFIED]` (confirmed from 2+ independent sources) or `[UNVERIFIED]` (single source only).
+
+---
+
+## Phase 4: Hypothesis Formation
+
+A hypothesis must:
+- State a specific claim (e.g., "Actor X force-pushed to BRANCH on DATE to erase commit SHA")
+- Cite at least 2 evidence IDs that support it (`EV-XXXX`, `EV-YYYY`)
+- Identify what evidence would disprove it
+- Be labeled `[HYPOTHESIS]` until validated
+
+**Common hypothesis templates** (see [investigation-templates.md](./references/investigation-templates.md)):
+- Maintainer Compromise: legitimate account used post-takeover to inject malicious code
+- Dependency Confusion: package name squatting to intercept installs
+- CI/CD Injection: malicious workflow changes to run code during builds
+- Typosquatting: near-identical package name targeting misspellers
+- Credential Leak: token/key accidentally committed then force-pushed to erase
+
+For each hypothesis, spawn a `delegate_task` sub-agent to attempt to find disconfirming evidence before confirming.
+
+---
+
+## Phase 5: Hypothesis Validation
+
+The validator sub-agent MUST mechanically check:
+
+1. For each hypothesis, extract all cited evidence IDs.
+2. Verify each ID exists in `evidence.json` (hard failure if any ID is missing → hypothesis rejected as potentially fabricated).
+3. Verify each `[VERIFIED]` piece of evidence was confirmed from 2+ sources.
+4. Check logical consistency: does the timeline depicted by the evidence support the hypothesis?
+5. Check for alternative explanations: could the same evidence pattern arise from a benign cause?
+
+**Output**:
+- `VALIDATED`: All evidence cited, verified, logically consistent, no plausible alternative explanation.
+- `INCONCLUSIVE`: Evidence supports hypothesis but alternative explanations exist or evidence is insufficient.
+- `REJECTED`: Missing evidence IDs, unverified evidence cited as fact, logical inconsistency detected.
+
+Rejected hypotheses feed back into Phase 4 for refinement (max 3 iterations).
+
+---
+
+## Phase 6: Final Report Generation
+
+Populate `investigation-report.md` using the template in [forensic-report.md](./templates/forensic-report.md).
+
+**Mandatory sections**:
+- Executive Summary: one-paragraph verdict (Compromised / Clean / Inconclusive) with confidence level
+- Timeline: chronological reconstruction of all significant events with evidence citations
+- Validated Hypotheses: each with status and supporting evidence IDs
+- Evidence Registry: table of all `EV-XXXX` entries with source, type, and verification status
+- IOC List: all extracted and enriched Indicators of Compromise
+- Chain of Custody: how evidence was collected, from what sources, at what timestamps
+- Recommendations: immediate mitigations if compromise detected; monitoring recommendations
+
+**Report rules**:
+- Every factual claim must have at least one `[EV-XXXX]` citation
+- Executive Summary must state confidence level (High / Medium / Low)
+- All secrets/credentials must be redacted to `[REDACTED]`
+
+---
+
+## Phase 7: Completion
+
+1. Run final evidence count: `python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json list`
+2. Archive the full investigation directory.
+3. If compromise is confirmed:
+   - List immediate mitigations (rotate credentials, pin dependency hashes, notify affected users)
+   - Identify affected versions/packages
+   - Note disclosure obligations (if a public package: coordinate with the package registry)
+4. Present the final `investigation-report.md` to the user.
+
+---
+
+## Ethical Use Guidelines
+
+This skill is designed for **defensive security investigation** — protecting open-source software from supply chain attacks. It must not be used for:
+
+- **Harassment or stalking** of contributors or maintainers
+- **Doxing** — correlating GitHub activity to real identities for malicious purposes
+- **Competitive intelligence** — investigating proprietary or internal repositories without authorization
+- **False accusations** — publishing investigation results without validated evidence (see anti-hallucination guardrails)
+
+Investigations should be conducted with the principle of **minimal intrusion**: collect only the evidence necessary to validate or refute the hypothesis. When publishing results, follow responsible disclosure practices and coordinate with affected maintainers before public disclosure.
+
+If the investigation reveals a genuine compromise, follow the coordinated vulnerability disclosure process:
+1. Notify the repository maintainers privately first
+2. Allow reasonable time for remediation (typically 90 days)
+3. Coordinate with package registries (npm, PyPI, etc.) if published packages are affected
+4. File a CVE if appropriate
+
+---
+
+## API Rate Limiting
+
+GitHub REST API enforces rate limits that will interrupt large investigations if not managed.
+
+**Authenticated requests**: 5,000/hour (requires `GITHUB_TOKEN` env var or `gh` CLI auth)
+**Unauthenticated requests**: 60/hour (unusable for investigations)
+
+**Best practices**:
+- Always authenticate: `export GITHUB_TOKEN=ghp_...` or use `gh` CLI (auto-authenticates)
+- Use conditional requests (`If-None-Match` / `If-Modified-Since` headers) to avoid consuming quota on unchanged data
+- For paginated endpoints, fetch all pages in sequence — don't parallelize against the same endpoint
+- Check `X-RateLimit-Remaining` header; if below 100, pause for `X-RateLimit-Reset` timestamp
+- BigQuery has its own quotas (10 TiB/day free tier) — always dry-run first
+- Wayback Machine CDX API: no formal rate limit, but be courteous (1-2 req/sec max)
+
+If rate-limited mid-investigation, record the partial results in the evidence store and note the limitation in the report.
+
+---
+
+## Reference Materials
+
+- [github-archive-guide.md](./references/github-archive-guide.md) — BigQuery queries, CDX API, 12 event types
+- [evidence-types.md](./references/evidence-types.md) — IOC taxonomy, evidence source types, observation types
+- [recovery-techniques.md](./references/recovery-techniques.md) — Recovering deleted commits, PRs, issues
+- [investigation-templates.md](./references/investigation-templates.md) — Pre-built hypothesis templates per attack type
+- [evidence-store.py](./scripts/evidence-store.py) — CLI tool for managing the evidence JSON store
+- [forensic-report.md](./templates/forensic-report.md) — Structured report template
@@ -0,0 +1,89 @@
+# Evidence Types Reference
+
+Taxonomy of all evidence types, IOC types, GitHub event types, and observation types
+used in OSS forensic investigations.
+
+---
+
+## Evidence Source Types
+
+| Type | Description | Example Sources |
+|------|-------------|-----------------|
+| `git` | Data from local git repository analysis | `git log`, `git fsck`, `git reflog`, `git blame` |
+| `gh_api` | Data from GitHub REST API responses | `/repos/.../commits`, `/repos/.../pulls`, `/repos/.../events` |
+| `gh_archive` | Data from GitHub Archive (BigQuery) | `githubarchive.month.*` BigQuery tables |
+| `web_archive` | Archived web pages from Wayback Machine | CDX API results, `web.archive.org/web/...` snapshots |
+| `ioc` | Indicator of Compromise from any source | Extracted from vendor reports, git history, network traces |
+| `analysis` | Derived insight from cross-source correlation | "SHA present in archive but absent from API" |
+| `vendor_report` | External security vendor or researcher report | CVE advisories, blog posts, NVD records |
+| `manual` | Manually recorded observation by investigator | Notes on behavioral patterns, timeline gaps |
+
+---
+
+## IOC Types
+
+| Type | Description | Example |
+|------|-------------|---------|
+| `COMMIT_SHA` | A git commit hash linked to malicious activity | `abc123def456...` |
+| `FILE_PATH` | A suspicious file inside the repository | `src/utils/crypto.js`, `dist/index.min.js` |
+| `API_KEY` | An API key accidentally committed | `AKIA...` (AWS), `ghp_...` (GitHub PAT) |
+| `SECRET` | A generic secret / credential | Database password, private key blob |
+| `IP_ADDRESS` | A C2 server or attacker IP | `192.0.2.1` |
+| `DOMAIN` | A malicious or suspicious domain | `evil-cdn.io`, typosquatted package registry domain |
+| `PACKAGE_NAME` | A malicious or squatted package name | `colo-rs` (typosquatting `color`), `lodash-utils` |
+| `ACTOR_USERNAME` | A GitHub handle linked to the attack | `malicious-bot-account` |
+| `MALICIOUS_URL` | A URL to a malicious resource | `https://evil.example.com/payload.sh` |
+| `WORKFLOW_FILE` | A suspicious CI/CD workflow file | `.github/workflows/release.yml` |
+| `BRANCH_NAME` | A suspicious branch | `refs/heads/temp-fix-do-not-merge` |
+| `TAG_NAME` | A suspicious git tag | `v1.0.0-security-patch` |
+| `RELEASE_NAME` | A suspicious release | Release with no associated tag or changelog |
+| `OTHER` | Catch-all for unclassified IOCs | — |
+
+---
+
+## GitHub Archive Event Types (12 Types)
+
+| Event Type | Forensic Relevance |
+|------------|-------------------|
+| `PushEvent` | Core: `payload.distinct_size=0` with `payload.size>0` → force push. `payload.before`/`payload.head` shows rewritten history. |
+| `PullRequestEvent` | Detects deleted PRs, rapid open→close patterns, PRs from new accounts |
+| `IssueEvent` | Detects deleted issues, coordinated labeling, rapid closure of vulnerability reports |
+| `IssueCommentEvent` | Deleted comments, rapid activity bursts |
+| `WatchEvent` | Star-farming campaigns (coordinated starring from new accounts) |
+| `ForkEvent` | Unusual fork patterns before malicious commit |
+| `CreateEvent` | Branch/tag creation: signals new release or code injection point |
+| `DeleteEvent` | Branch/tag deletion: critical — often used to hide traces |
+| `ReleaseEvent` | Unauthorized releases, release artifacts modified post-publish |
+| `MemberEvent` | Collaborator added/removed: maintainer compromise indicator |
+| `PublicEvent` | Repository made public (sometimes to drop malicious code briefly) |
+| `WorkflowRunEvent` | CI/CD pipeline executions: workflow injection, secret exfiltration |
+
+---
+
+## Evidence Verification States
+
+| State | Meaning |
+|-------|---------|
+| `unverified` | Collected from a single source, not cross-referenced |
+| `single_source` | The primary source has been confirmed directly (e.g., SHA resolves on GitHub), but no second source |
+| `multi_source_verified` | Confirmed from 2+ independent sources (e.g., GH Archive AND GitHub API both show the same event) |
+
+Only `multi_source_verified` evidence may be cited as fact in validated hypotheses.
+`unverified` and `single_source` evidence must be labeled `[UNVERIFIED]` or `[SINGLE-SOURCE]`.
+
+---
+
+## Observation Types (Patterned after RAPTOR)
+
+| Type | Description |
+|------|-------------|
+| `CommitObservation` | Specific commit SHA with metadata (author, date, files changed) |
+| `ForceWashObservation` | Evidence that commits were force-erased from a branch |
+| `DanglingCommitObservation` | SHA present in git object store but unreachable from any ref |
+| `IssueObservation` | A GitHub issue (current or archived) with title, body, timestamp |
+| `PRObservation` | A GitHub PR (current or archived) with diff summary, reviewers |
+| `IOC` | A single Indicator of Compromise with context |
+| `TimelineGap` | A period with unusual absence of expected activity |
+| `ActorAnomalyObservation` | Behavioral anomaly for a specific GitHub actor |
+| `WorkflowAnomalyObservation` | Suspicious CI/CD workflow change or unexpected run |
+| `CrossSourceDiscrepancy` | Item present in one source but absent in another (strong deletion indicator) |
@@ -0,0 +1,184 @@
+# GitHub Archive Query Guide (BigQuery)
+
+GitHub Archive records every public event on GitHub as immutable JSON records. This data is accessible via Google BigQuery and is the most reliable source for forensic investigation — events cannot be deleted or modified after recording.
+
+## Public Dataset
+
+- **Project**: `githubarchive`
+- **Tables**: `day.YYYYMMDD`, `month.YYYYMM`, `year.YYYY`
+- **Cost**: $6.25 per TiB scanned. Always run dry runs first.
+- **Access**: Requires a Google Cloud account with BigQuery enabled. Free tier includes 1 TiB/month of queries.
+
+---
+
+## The 12 GitHub Event Types
+
+| Event Type | What It Records | Forensic Value |
+|------------|-----------------|----------------|
+| `PushEvent` | Commits pushed to a branch | Force-push detection, commit timeline, author attribution |
+| `PullRequestEvent` | PR opened, closed, merged, reopened | Deleted PR recovery, review timeline |
+| `IssuesEvent` | Issue opened, closed, reopened, labeled | Deleted issue recovery, social engineering traces |
+| `IssueCommentEvent` | Comments on issues and PRs | Deleted comment recovery, communication patterns |
+| `CreateEvent` | Branch, tag, or repository creation | Suspicious branch creation, tag timing |
+| `DeleteEvent` | Branch or tag deletion | Evidence of cleanup after compromise |
+| `MemberEvent` | Collaborator added or removed | Permission changes, access escalation |
+| `PublicEvent` | Repository made public | Accidental exposure of private repos |
+| `WatchEvent` | User stars a repository | Actor reconnaissance patterns |
+| `ForkEvent` | Repository forked | Exfiltration of code before cleanup |
+| `ReleaseEvent` | Release published, edited, deleted | Malicious release injection, deleted release recovery |
+| `WorkflowRunEvent` | GitHub Actions workflow triggered | CI/CD abuse, unauthorized workflow runs |
+
+---
+
+## Query Templates
+
+### Basic: All Events for a Repository
+
+```sql
+SELECT
+  created_at,
+  type,
+  actor.login,
+  repo.name,
+  payload
+FROM
+  `githubarchive.day.20240101`  -- Adjust date
+WHERE
+  repo.name = 'owner/repo'
+  AND type IN ('PushEvent', 'DeleteEvent', 'MemberEvent')
+ORDER BY
+  created_at ASC
+```
+
+### Force-Push Detection
+
+Force-pushes produce PushEvents where commits are overwritten. Key indicators:
+- `payload.distinct_size = 0` with `payload.size > 0` → commits were erased
+- `payload.before` contains the SHA before the rewrite (recoverable)
+
+```sql
+SELECT
+  created_at,
+  actor.login,
+  JSON_EXTRACT_SCALAR(payload, '$.before') AS before_sha,
+  JSON_EXTRACT_SCALAR(payload, '$.head') AS after_sha,
+  JSON_EXTRACT_SCALAR(payload, '$.size') AS total_commits,
+  JSON_EXTRACT_SCALAR(payload, '$.distinct_size') AS distinct_commits,
+  JSON_EXTRACT_SCALAR(payload, '$.ref') AS branch_ref
+FROM
+  `githubarchive.month.*`
+WHERE
+  _TABLE_SUFFIX BETWEEN '202401' AND '202403'
+  AND type = 'PushEvent'
+  AND repo.name = 'owner/repo'
+  AND CAST(JSON_EXTRACT_SCALAR(payload, '$.distinct_size') AS INT64) = 0
+ORDER BY
+  created_at ASC
+```
+
+### Deleted Branch/Tag Detection
+
+```sql
+SELECT
+  created_at,
+  actor.login,
+  JSON_EXTRACT_SCALAR(payload, '$.ref') AS deleted_ref,
+  JSON_EXTRACT_SCALAR(payload, '$.ref_type') AS ref_type
+FROM
+  `githubarchive.month.*`
+WHERE
+  _TABLE_SUFFIX BETWEEN '202401' AND '202403'
+  AND type = 'DeleteEvent'
+  AND repo.name = 'owner/repo'
+ORDER BY
+  created_at ASC
+```
+
+### Collaborator Permission Changes
+
+```sql
+SELECT
+  created_at,
+  actor.login,
+  JSON_EXTRACT_SCALAR(payload, '$.action') AS action,
+  JSON_EXTRACT_SCALAR(payload, '$.member.login') AS member
+FROM
+  `githubarchive.month.*`
+WHERE
+  _TABLE_SUFFIX BETWEEN '202401' AND '202403'
+  AND type = 'MemberEvent'
+  AND repo.name = 'owner/repo'
+ORDER BY
+  created_at ASC
+```
+
+### CI/CD Workflow Activity
+
+```sql
+SELECT
+  created_at,
+  actor.login,
+  JSON_EXTRACT_SCALAR(payload, '$.action') AS action,
+  JSON_EXTRACT_SCALAR(payload, '$.workflow_run.name') AS workflow_name,
+  JSON_EXTRACT_SCALAR(payload, '$.workflow_run.conclusion') AS conclusion,
+  JSON_EXTRACT_SCALAR(payload, '$.workflow_run.head_sha') AS head_sha
+FROM
+  `githubarchive.month.*`
+WHERE
+  _TABLE_SUFFIX BETWEEN '202401' AND '202403'
+  AND type = 'WorkflowRunEvent'
+  AND repo.name = 'owner/repo'
+ORDER BY
+  created_at ASC
+```
+
+### Actor Activity Profiling
+
+```sql
+SELECT
+  type,
+  COUNT(*) AS event_count,
+  MIN(created_at) AS first_event,
+  MAX(created_at) AS last_event
+FROM
+  `githubarchive.month.*`
+WHERE
+  _TABLE_SUFFIX BETWEEN '202301' AND '202412'
+  AND actor.login = 'suspicious-username'
+GROUP BY type
+ORDER BY event_count DESC
+```
+
+---
+
+## Cost Optimization (MANDATORY)
+
+1. **Always dry run first**: Add `--dry_run` flag to `bq query` to see estimated bytes scanned before executing.
+2. **Use `_TABLE_SUFFIX`**: Narrow the date range as much as possible. `day.*` tables are cheapest for narrow windows; `month.*` for broader sweeps.
+3. **Select only needed columns**: Avoid `SELECT *`. The `payload` column is large — only select specific JSON paths.
+4. **Add LIMIT**: Use `LIMIT 1000` during exploration. Remove only for final exhaustive queries.
+5. **Column filtering in WHERE**: Filter on indexed columns (`type`, `repo.name`, `actor.login`) before payload extraction.
+
+**Cost estimation**: A single month of GH Archive data is ~1-2 TiB uncompressed. Querying a specific repo + event type with `_TABLE_SUFFIX` typically scans 1-10 GiB ($0.006-$0.06).
+
+---
+
+## Accessing via Hermes
+
+**Option A: BigQuery CLI** (if `gcloud` is installed)
+```bash
+bq query --use_legacy_sql=false --format=json "YOUR QUERY"
+```
+
+**Option B: Python** (via `execute_code`)
+```python
+from google.cloud import bigquery
+client = bigquery.Client()
+query = "YOUR QUERY"
+results = client.query(query).result()
+for row in results:
+    print(dict(row))
+```
+
+**Option C: No GCP credentials available**
+If BigQuery is unavailable, document this limitation in the report. Use the other 4 investigators (Git, GitHub API, Wayback Machine, IOC Enrichment) — they cover most investigation needs without BigQuery.
@@ -0,0 +1,131 @@
+# Investigation Templates
+
+Pre-built hypothesis and investigation templates for common supply chain attack scenarios.
+Each template includes: attack pattern, key evidence to collect, and hypothesis starters.
+
+---
+
+## Template 1: Maintainer Account Compromise
+
+**Pattern**: Attacker gains access to a legitimate maintainer account (phishing, credential stuffing)
+and uses it to push malicious code, create backdoored releases, or exfiltrate CI secrets.
+
+**Real-world examples**: XZ Utils (2024), Codecov (2021), event-stream (2018)
+
+**Key Evidence to Collect**:
+- [ ] Push events from maintainer account outside normal working hours/timezone
+- [ ] Commits adding new dependencies, obfuscated code, or modified build scripts
+- [ ] Release creation immediately after suspicious push (to maximize package distribution)
+- [ ] MemberEvent adding unknown collaborators (attacker adding backup access)
+- [ ] WorkflowRunEvent with unexpected secret access or exfiltration-like behavior
+- [ ] Account login location changes (check social media, conference talks for corroboration)
+
+**Hypothesis Starters**:
+```
+[HYPOTHESIS] Actor <HANDLE>'s account was compromised on or around <DATE>, 
+based on anomalous commit timing [EV-XXXX] and geographic access patterns [EV-YYYY].
+```
+```
+[HYPOTHESIS] Release <VERSION> was published by the compromised account to push 
+malicious code to downstream users, evidenced by the malicious commit [EV-XXXX] 
+being added <N> hours before the release [EV-YYYY].
+```
+
+---
+
+## Template 2: Malicious Dependency Injection
+
+**Pattern**: A trusted package is modified to include malicious code in a dependency,
+or a new malicious dependency is injected into an existing package.
+
+**Key Evidence to Collect**:
+- [ ] Diff of `package.json`/`requirements.txt`/`go.mod` before and after suspicious commit
+- [ ] The new dependency's publication timestamp vs. the injection commit timestamp
+- [ ] Whether the new dependency exists on npm/PyPI and who owns it
+- [ ] Any obfuscation patterns in the injected dependency code
+- [ ] Install-time scripts (`postinstall`, `setup.py`, etc.) that execute code on install
+
+**Hypothesis Starters**:
+```
+[HYPOTHESIS] Commit <SHA> [EV-XXXX] introduced dependency <PACKAGE@VERSION> 
+which appears to be a malicious package published by actor <HANDLE> [EV-YYYY], 
+designed to execute <BEHAVIOR> during installation.
+```
+
+---
+
+## Template 3: CI/CD Pipeline Injection
+
+**Pattern**: Attacker modifies GitHub Actions workflows to steal secrets, exfiltrate code,
+or inject malicious artifacts into the build output.
+
+**Key Evidence to Collect**:
+- [ ] Diff of all `.github/workflows/*.yml` files before/after suspicious period
+- [ ] WorkflowRunEvents triggered by the modified workflows
+- [ ] Any `curl`, `wget`, or network calls added to workflow steps
+- [ ] New or modified `env:` sections referencing `secrets.*`
+- [ ] Artifacts produced by modified workflow runs
+
+**Hypothesis Starters**:
+```
+[HYPOTHESIS] Workflow file <FILE> was modified in commit <SHA> [EV-XXXX] to 
+exfiltrate repository secrets via <METHOD>, as evidenced by the added network 
+call pattern [EV-YYYY].
+```
+
+---
+
+## Template 4: Typosquatting / Dependency Confusion
+
+**Pattern**: Attacker registers a package with a name similar to a popular package
+(or an internal package name) to intercept installs from users who mistype.
+
+**Key Evidence to Collect**:
+- [ ] Registration timestamp of the suspicious package on the registry
+- [ ] Package content: does it contain malicious code or is it a stub?
+- [ ] Download statistics for the suspicious package
+- [ ] Names of internal packages that could be targeted (if private repo scope)
+- [ ] Any references to the legitimate package in the malicious one's metadata
+
+**Hypothesis Starters**:
+```
+[HYPOTHESIS] Package <MALICIOUS_NAME> was registered on <DATE> [EV-XXXX] to 
+typosquat on <LEGITIMATE_NAME>, targeting users who misspell the package name. 
+The package contains <BEHAVIOR> [EV-YYYY].
+```
+
+---
+
+## Template 5: Force-Push History Rewrite (Evidence Erasure)
+
+**Pattern**: After a malicious commit is detected (or before wider notice), the attacker
+force-pushes to remove the malicious commit from branch history.
+
+**Detection is key** — this template focuses on proving the erasure happened.
+
+**Key Evidence to Collect**:
+- [ ] GH Archive PushEvent with `distinct_size=0` (force push indicator) [EV-XXXX]
+- [ ] The SHA of the commit BEFORE the force push (from GH Archive `payload.before`)
+- [ ] Recovery of the erased commit via direct URL or `git fetch origin SHA`
+- [ ] Wayback Machine snapshot of the commit page before erasure
+- [ ] Timeline gap in git log (N commits visible in archive but M < N in current repo)
+
+**Hypothesis Starters**:
+```
+[HYPOTHESIS] Actor <HANDLE> force-pushed branch <BRANCH> on <DATE> [EV-XXXX] 
+to erase commit <SHA> [EV-YYYY], which contained <MALICIOUS_CONTENT>. 
+The erased commit was recovered via <METHOD> [EV-ZZZZ].
+```
+
+---
+
+## Cross-Cutting Investigation Checklist
+
+Apply to every investigation regardless of template:
+
+- [ ] Check all contributors for newly created accounts (< 30 days old at time of malicious activity)
+- [ ] Check if any maintainer account changed email in the period (sign of account takeover)
+- [ ] Verify GPG signatures on suspicious commits match known maintainer keys
+- [ ] Check if the repository changed ownership or transferred orgs near the incident
+- [ ] Look for "cleanup" commits immediately after the malicious commit (cover-up pattern)
+- [ ] Check related packages/repos by the same author for similar patterns
@@ -0,0 +1,164 @@
+# Deleted Content Recovery Techniques
+
+## Key Insight: GitHub Never Fully Deletes Force-Pushed Commits
+
+Force-pushed commits are removed from the branch history but REMAIN on GitHub's servers until garbage collection runs (which can take weeks to months). This is the foundation of deleted commit recovery.
+
+---
+
+## Method 1: Direct GitHub URL (Fastest — No Auth Required)
+
+If you have a commit SHA, access it directly even if it was force-pushed off a branch:
+
+```bash
+# View commit metadata
+curl -s "https://github.com/OWNER/REPO/commit/SHA"
+
+# Download as patch (includes full diff)
+curl -s "https://github.com/OWNER/REPO/commit/SHA.patch" > recovered_commit.patch
+
+# Download as diff
+curl -s "https://github.com/OWNER/REPO/commit/SHA.diff" > recovered_commit.diff
+
+# Example (Istio credential leak - real incident):
+curl -s "https://github.com/istio/istio/commit/FORCE_PUSHED_SHA.patch"
+```
+
+**When this works**: SHA is known (from GH Archive, Wayback Machine, or `git fsck`)
+**When this fails**: GitHub has already garbage-collected the object (rare, typically 30–90 days post-force-push)
+
+---
+
+## Method 2: GitHub REST API
+
+```bash
+# Works for commits force-pushed off branches but still on server
+# Note: /commits/SHA may 404, but /git/commits/SHA often succeeds for orphaned commits
+curl -s "https://api.github.com/repos/OWNER/REPO/git/commits/SHA" | jq .
+
+# Get the tree (file listing) of a force-pushed commit
+curl -s "https://api.github.com/repos/OWNER/REPO/git/trees/SHA?recursive=1" | jq .
+
+# Get a specific file from a force-pushed commit
+curl -s "https://api.github.com/repos/OWNER/REPO/contents/PATH?ref=SHA" | jq .content | base64 -d
+```
+
+---
+
+## Method 3: Git Fetch by SHA (Local — Requires Clone)
+
+```bash
+# Fetch an orphaned commit directly by SHA into local repo
+cd target_repo
+git fetch origin SHA
+git log FETCH_HEAD -1   # view the commit
+git diff FETCH_HEAD~1 FETCH_HEAD  # view the diff
+
+# If the SHA was recently force-pushed it will still be fetchable
+# This stops working once GitHub GC runs
+```
+
+---
+
+## Method 4: Dangling Commits via git fsck
+
+```bash
+cd target_repo
+
+# Find all unreachable objects (includes force-pushed commits)
+git fsck --unreachable --no-reflogs 2>&1 | grep "unreachable commit" | awk '{print $3}' > dangling_shas.txt
+
+# For each dangling commit, get its metadata
+while read sha; do
+  echo "=== $sha ===" >> dangling_details.txt
+  git show --stat "$sha" >> dangling_details.txt 2>&1
+done < dangling_shas.txt
+
+# Note: dangling objects only exist in LOCAL clone — not the same as GitHub's copies
+# GitHub's copies are accessible via Methods 1-3 until GC runs
+```
+
+---
+
+## Recovering Deleted GitHub Issues and PRs
+
+### Via Wayback Machine CDX API
+
+```bash
+# Find all archived snapshots of a specific issue
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO/issues/NUMBER&output=json&limit=50&fl=timestamp,statuscode,original" | python3 -m json.tool
+
+# Fetch the best snapshot
+# Use the timestamp from the CDX result:
+# https://web.archive.org/web/TIMESTAMP/https://github.com/OWNER/REPO/issues/NUMBER
+curl -s "https://web.archive.org/web/TIMESTAMP/https://github.com/OWNER/REPO/issues/NUMBER" > issue_NUMBER_archived.html
+
+# Find all snapshots of the repo in a date range
+curl -s "https://web.archive.org/cdx/search/cdx?url=github.com/OWNER/REPO*&output=json&from=20240101&to=20240201&limit=200&fl=timestamp,urlkey,statuscode" | python3 -m json.tool
+```
+
+### Via GitHub API (Limited — Only Non-Deleted Content)
+
+```bash
+# Closed issues (not deleted) are retrievable
+curl -s "https://api.github.com/repos/OWNER/REPO/issues?state=closed&per_page=100" | jq '.[].number'
+
+# Note: DELETED issues/PRs do NOT appear in the API. Use Wayback Machine or GH Archive for those.
+```
+
+### Via GitHub Archive (For Event History — Not Content)
+
+```sql
+-- Find all IssueEvents for a repo in a date range
+SELECT created_at, actor.login, payload.action, payload.issue.number, payload.issue.title
+FROM `githubarchive.day.*`
+WHERE _TABLE_SUFFIX BETWEEN '20240101' AND '20240201'
+  AND type = 'IssuesEvent'
+  AND repo.name = 'OWNER/REPO'
+ORDER BY created_at
+```
+
+---
+
+## Recovering Deleted Files from a Known Commit
+
+```bash
+# If you have the commit SHA (even force-pushed):
+git show SHA:path/to/file.py > recovered_file.py
+
+# Or via API (base64 encoded content):
+curl -s "https://api.github.com/repos/OWNER/REPO/contents/path/to/file.py?ref=SHA" | python3 -c "
+import sys, json, base64
+d = json.load(sys.stdin)
+print(base64.b64decode(d['content']).decode())
+"
+```
+
+---
+
+## Evidence Recording
+
+After recovering any deleted content, immediately record it:
+
+```bash
+python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json add \
+  --source "git fetch origin FORCE_PUSHED_SHA" \
+  --content "Recovered commit: FORCE_PUSHED_SHA | Author: attacker@example.com | Date: 2024-01-15 | Added file: malicious.sh" \
+  --type git \
+  --actor "attacker-handle" \
+  --url "https://github.com/OWNER/REPO/commit/FORCE_PUSHED_SHA.patch" \
+  --timestamp "2024-01-15T00:00:00Z" \
+  --verification single_source \
+  --notes "Commit force-pushed off main branch on 2024-01-16. Recovered via direct fetch."
+```
+
+---
+
+## Recovery Failure Modes
+
+| Failure | Cause | Workaround |
+|---------|-------|------------|
+| `git fetch origin SHA` returns "not our ref" | GitHub GC already ran | Try Method 1/2, search Wayback Machine |
+| `github.com/OWNER/REPO/commit/SHA` returns 404 | GC ran or SHA is wrong | Verify SHA via GH Archive; try partial SHA search |
+| Wayback Machine has no snapshots | Page was never crawled by IA | Check `commoncrawl.org`, check Google Cache |
+| BigQuery shows event but no content | GH Archive stores event metadata, not file contents | Recovery only reveals the event occurred, not the content |
@@ -0,0 +1,313 @@
+#!/usr/bin/env python3
+"""
+OSS Forensics Evidence Store Manager
+Manages a JSON-based evidence store for forensic investigations.
+
+Commands:
+  add      - Add a piece of evidence
+  list     - List all evidence (optionally filter by type or actor)
+  verify   - Re-check SHA-256 hashes for integrity
+  query    - Search evidence by keyword
+  export   - Export evidence as a Markdown table
+  summary  - Print investigation statistics
+
+Usage example:
+  python3 evidence-store.py --store evidence.json add \
+    --source "git fsck output" --content "dangling commit abc123" \
+    --type git --actor "malicious-user" --url "https://github.com/owner/repo/commit/abc123"
+
+  python3 evidence-store.py --store evidence.json list --type git
+  python3 evidence-store.py --store evidence.json verify
+  python3 evidence-store.py --store evidence.json export > evidence-table.md
+"""
+
+import json
+import argparse
+import os
+import datetime
+import hashlib
+import sys
+
+EVIDENCE_TYPES = [
+    "git",           # Local git repository data (commits, reflog, fsck)
+    "gh_api",        # GitHub REST API responses
+    "gh_archive",    # GitHub Archive / BigQuery query results
+    "web_archive",   # Wayback Machine snapshots
+    "ioc",           # Indicator of Compromise (SHA, domain, IP, package name, etc.)
+    "analysis",      # Derived analysis / cross-source correlation result
+    "manual",        # Manually noted observation
+    "vendor_report", # External security vendor report excerpt
+]
+
+VERIFICATION_STATES = ["unverified", "single_source", "multi_source_verified"]
+
+IOC_TYPES = [
+    "COMMIT_SHA", "FILE_PATH", "API_KEY", "SECRET", "IP_ADDRESS",
+    "DOMAIN", "PACKAGE_NAME", "ACTOR_USERNAME", "MALICIOUS_URL",
+    "WORKFLOW_FILE", "BRANCH_NAME", "TAG_NAME", "RELEASE_NAME", "OTHER",
+]
+
+
+def _now_iso():
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") + "Z"
+
+
+def _sha256(content: str) -> str:
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()
+
+
+class EvidenceStore:
+    def __init__(self, filepath: str):
+        self.filepath = filepath
+        self.data = {
+            "metadata": {
+                "version": "2.0",
+                "created_at": _now_iso(),
+                "last_updated": _now_iso(),
+                "investigation": "",
+                "target_repo": "",
+            },
+            "evidence": [],
+            "chain_of_custody": [],
+        }
+        if os.path.exists(filepath):
+            try:
+                with open(filepath, "r", encoding="utf-8") as f:
+                    self.data = json.load(f)
+            except (json.JSONDecodeError, IOError) as e:
+                print(f"Error loading evidence store '{filepath}': {e}", file=sys.stderr)
+                print("Hint: The file might be corrupted. Check for manual edits or syntax errors.", file=sys.stderr)
+                sys.exit(1)
+
+    def _save(self):
+        self.data["metadata"]["last_updated"] = _now_iso()
+        with open(self.filepath, "w", encoding="utf-8") as f:
+            json.dump(self.data, f, indent=2, ensure_ascii=False)
+
+    def _next_id(self) -> str:
+        return f"EV-{len(self.data['evidence']) + 1:04d}"
+
+    def add(
+        self,
+        source: str,
+        content: str,
+        evidence_type: str,
+        actor: str = None,
+        url: str = None,
+        timestamp: str = None,
+        ioc_type: str = None,
+        verification: str = "unverified",
+        notes: str = None,
+    ) -> str:
+        evidence_id = self._next_id()
+        entry = {
+            "id": evidence_id,
+            "type": evidence_type,
+            "source": source,
+            "content": content,
+            "content_sha256": _sha256(content),
+            "actor": actor,
+            "url": url,
+            "event_timestamp": timestamp,
+            "collected_at": _now_iso(),
+            "ioc_type": ioc_type,
+            "verification": verification,
+            "notes": notes,
+        }
+        self.data["evidence"].append(entry)
+        self.data["chain_of_custody"].append({
+            "action": "add",
+            "evidence_id": evidence_id,
+            "timestamp": _now_iso(),
+            "source": source,
+        })
+        self._save()
+        return evidence_id
+
+    def list_evidence(self, filter_type: str = None, filter_actor: str = None):
+        results = self.data["evidence"]
+        if filter_type:
+            results = [e for e in results if e.get("type") == filter_type]
+        if filter_actor:
+            results = [e for e in results if e.get("actor") == filter_actor]
+        return results
+
+    def verify_integrity(self):
+        """Re-compute SHA-256 for all entries and report mismatches."""
+        issues = []
+        for entry in self.data["evidence"]:
+            expected = _sha256(entry["content"])
+            stored = entry.get("content_sha256", "")
+            if expected != stored:
+                issues.append({
+                    "id": entry["id"],
+                    "stored_sha256": stored,
+                    "computed_sha256": expected,
+                })
+        return issues
+
+    def query(self, keyword: str):
+        """Search for keyword in content, source, actor, or url."""
+        keyword_lower = keyword.lower()
+        return [
+            e for e in self.data["evidence"]
+            if keyword_lower in (e.get("content", "") or "").lower()
+            or keyword_lower in (e.get("source", "") or "").lower()
+            or keyword_lower in (e.get("actor", "") or "").lower()
+            or keyword_lower in (e.get("url", "") or "").lower()
+        ]
+
+    def export_markdown(self) -> str:
+        lines = [
+            "# Evidence Registry",
+            "",
+            f"**Store**: `{self.filepath}`",
+            f"**Last Updated**: {self.data['metadata'].get('last_updated', 'N/A')}",
+            f"**Total Evidence Items**: {len(self.data['evidence'])}",
+            "",
+            "| ID | Type | Source | Actor | Verification | Event Timestamp | URL |",
+            "|----|------|--------|-------|--------------|-----------------|-----|",
+        ]
+        for e in self.data["evidence"]:
+            url = e.get("url") or ""
+            url_display = f"[link]({url})" if url else ""
+            lines.append(
+                f"| {e['id']} | {e.get('type','')} | {e.get('source','')} "
+                f"| {e.get('actor') or ''} | {e.get('verification','')} "
+                f"| {e.get('event_timestamp') or ''} | {url_display} |"
+            )
+        lines.append("")
+        lines.append("## Chain of Custody")
+        lines.append("")
+        lines.append("| Evidence ID | Action | Timestamp | Source |")
+        lines.append("|-------------|--------|-----------|--------|")
+        for c in self.data["chain_of_custody"]:
+            lines.append(
+                f"| {c.get('evidence_id','')} | {c.get('action','')} "
+                f"| {c.get('timestamp','')} | {c.get('source','')} |"
+            )
+        return "\n".join(lines)
+
+    def summary(self) -> dict:
+        by_type = {}
+        by_verification = {}
+        actors = set()
+        for e in self.data["evidence"]:
+            t = e.get("type", "unknown")
+            by_type[t] = by_type.get(t, 0) + 1
+            v = e.get("verification", "unverified")
+            by_verification[v] = by_verification.get(v, 0) + 1
+            if e.get("actor"):
+                actors.add(e["actor"])
+        return {
+            "total": len(self.data["evidence"]),
+            "by_type": by_type,
+            "by_verification": by_verification,
+            "unique_actors": sorted(actors),
+        }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="OSS Forensics Evidence Store Manager v2.0",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--store", default="evidence.json", help="Path to evidence JSON file (default: evidence.json)")
+
+    subparsers = parser.add_subparsers(dest="command", metavar="COMMAND")
+
+    # --- add ---
+    add_p = subparsers.add_parser("add", help="Add a new evidence entry")
+    add_p.add_argument("--source", required=True, help="Where this evidence came from (e.g. 'git fsck', 'GH API /commits')")
+    add_p.add_argument("--content", required=True, help="The evidence content (commit SHA, API response excerpt, etc.)")
+    add_p.add_argument("--type", required=True, choices=EVIDENCE_TYPES, dest="evidence_type", help="Evidence type")
+    add_p.add_argument("--actor", help="GitHub handle or email of associated actor")
+    add_p.add_argument("--url", help="URL to original source")
+    add_p.add_argument("--timestamp", help="When the event occurred (ISO 8601)")
+    add_p.add_argument("--ioc-type", choices=IOC_TYPES, help="IOC subtype (for --type ioc)")
+    add_p.add_argument("--verification", choices=VERIFICATION_STATES, default="unverified")
+    add_p.add_argument("--notes", help="Additional investigator notes")
+    add_p.add_argument("--quiet", action="store_true", help="Suppress success message")
+
+    # --- list ---
+    list_p = subparsers.add_parser("list", help="List all evidence entries")
+    list_p.add_argument("--type", dest="filter_type", choices=EVIDENCE_TYPES, help="Filter by type")
+    list_p.add_argument("--actor", dest="filter_actor", help="Filter by actor")
+
+    # --- verify ---
+    subparsers.add_parser("verify", help="Verify SHA-256 integrity of all evidence content")
+
+    # --- query ---
+    query_p = subparsers.add_parser("query", help="Search evidence by keyword")
+    query_p.add_argument("keyword", help="Keyword to search for")
+
+    # --- export ---
+    subparsers.add_parser("export", help="Export evidence as a Markdown table (stdout)")
+
+    # --- summary ---
+    subparsers.add_parser("summary", help="Print investigation statistics")
+
+    args = parser.parse_args()
+
+    if not args.command:
+        parser.print_help()
+        sys.exit(0)
+
+    store = EvidenceStore(args.store)
+
+    if args.command == "add":
+        eid = store.add(
+            source=args.source,
+            content=args.content,
+            evidence_type=args.evidence_type,
+            actor=args.actor,
+            url=args.url,
+            timestamp=args.timestamp,
+            ioc_type=args.ioc_type,
+            verification=args.verification,
+            notes=args.notes,
+        )
+        if not getattr(args, "quiet", False):
+            print(f"✓ Added evidence: {eid}")
+
+    elif args.command == "list":
+        items = store.list_evidence(
+            filter_type=getattr(args, "filter_type", None),
+            filter_actor=getattr(args, "filter_actor", None),
+        )
+        if not items:
+            print("No evidence found.")
+        for e in items:
+            actor_str = f" | actor: {e['actor']}" if e.get("actor") else ""
+            url_str = f" | {e['url']}" if e.get("url") else ""
+            print(f"[{e['id']}] {e['type']:12s} | {e['verification']:20s} | {e['source']}{actor_str}{url_str}")
+
+    elif args.command == "verify":
+        issues = store.verify_integrity()
+        if not issues:
+            print(f"✓ All {len(store.data['evidence'])} evidence entries passed SHA-256 integrity check.")
+        else:
+            print(f"✗ {len(issues)} integrity issue(s) detected:")
+            for i in issues:
+                print(f"  [{i['id']}] stored={i['stored_sha256'][:16]}... computed={i['computed_sha256'][:16]}...")
+            sys.exit(1)
+
+    elif args.command == "query":
+        results = store.query(args.keyword)
+        print(f"Found {len(results)} result(s) for '{args.keyword}':")
+        for e in results:
+            print(f"  [{e['id']}] {e['type']} | {e['source']} | {e['content'][:80]}")
+
+    elif args.command == "export":
+        print(store.export_markdown())
+
+    elif args.command == "summary":
+        s = store.summary()
+        print(f"Total evidence items : {s['total']}")
+        print(f"By type              : {json.dumps(s['by_type'], indent=2)}")
+        print(f"By verification      : {json.dumps(s['by_verification'], indent=2)}")
+        print(f"Unique actors        : {s['unique_actors']}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,151 @@
+# Forensic Investigation Report
+
+> **Instructions**: Fill in all sections. Every factual claim must cite at least one `[EV-XXXX]` evidence ID.
+> Remove placeholder text and instruction notes before finalizing. Redact all secrets to `[REDACTED]`.
+
+---
+
+## Executive Summary
+
+**Target Repository**: `OWNER/REPO`
+**Investigation Period**: YYYY-MM-DD to YYYY-MM-DD
+**Verdict**: <!-- Compromised / Clean / Inconclusive -->
+**Confidence Level**: <!-- High / Medium / Low -->
+**Report Date**: YYYY-MM-DD
+**Investigator**: <!-- Agent session ID or analyst name -->
+
+<!-- One paragraph: what was investigated, what was found, what is recommended. -->
+
+---
+
+## Timeline of Events
+
+> All timestamps in UTC. Each event must cite at least one evidence ID.
+
+| Timestamp (UTC) | Event | Evidence IDs | Source |
+|-----------------|-------|--------------|--------|
+| YYYY-MM-DDTHH:MM:SSZ | _Describe event_ | [EV-XXXX] | git / gh_api / gh_archive / web_archive |
+| | | | |
+
+---
+
+## Validated Hypotheses
+
+### Hypothesis 1: <!-- Short title -->
+
+**Status**: <!-- VALIDATED / INCONCLUSIVE / REJECTED -->
+
+**Claim**: _Full statement of the hypothesis._
+
+**Supporting Evidence**:
+- [EV-XXXX]: _What this evidence shows_
+- [EV-YYYY]: _What this evidence shows_
+
+**Counter-Evidence Considered**: _What might disprove this, and why it was ruled out or not._
+
+**Confidence**: <!-- High / Medium / Low, and why -->
+
+---
+
+## Indicators of Compromise (IOC List)
+
+| Type | Value | Status | Evidence |
+|------|-------|--------|----------|
+| COMMIT_SHA | `abc123...` | Confirmed malicious | [EV-XXXX] |
+| ACTOR_USERNAME | `handle` | Suspected compromised | [EV-YYYY] |
+| FILE_PATH | `src/evil.js` | Confirmed malicious | [EV-ZZZZ] |
+| DOMAIN | `evil-cdn.io` | Confirmed C2 | [EV-WWWW] |
+
+---
+
+## Affected Versions
+
+| Version / Tag | Published | Contains Malicious Code | Evidence |
+|---------------|-----------|------------------------|----------|
+| `v1.2.3` | YYYY-MM-DD | Yes / No / Unknown | [EV-XXXX] |
+
+---
+
+## Evidence Registry
+
+> Generated by: `python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json export`
+
+<!-- Paste the Markdown table output from the evidence-store.py export command here -->
+
+| ID | Type | Source | Actor | Verification | Event Timestamp | URL |
+|----|------|--------|-------|--------------|-----------------|-----|
+| EV-0001 | | | | | | |
+
+---
+
+## Chain of Custody
+
+> Generated by: `python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json export`
+
+<!-- Paste the chain of custody section from the export output here -->
+
+| Evidence ID | Action | Timestamp | Source |
+|-------------|--------|-----------|--------|
+| EV-0001 | add | | |
+
+---
+
+## Technical Findings
+
+### Git History Analysis
+
+_Summarize findings from local git analysis: dangling commits, reflog anomalies, unsigned commits, binary additions, etc._
+
+### GitHub API Analysis
+
+_Summarize findings from GitHub REST API: deleted PRs/issues, contributor changes, release anomalies, etc._
+
+### GitHub Archive Analysis
+
+_Summarize findings from BigQuery: force-push events, delete events, workflow anomalies, member changes, etc._
+_Note: If BigQuery was unavailable, state this explicitly._
+
+### Wayback Machine Analysis
+
+_Summarize findings from archive.org: recovered deleted pages, historical content differences, etc._
+
+### IOC Enrichment
+
+_Summarize enrichment results: WHOIS data for domains, recovered commit content, actor account analysis, etc._
+
+---
+
+## Recommendations
+
+### Immediate Actions (If Compromise Confirmed)
+
+- [ ] Rotate all GitHub tokens, API keys, and credentials that may have been exposed
+- [ ] Pin dependency versions to hashes in all affected packages
+- [ ] Publish a security advisory / CVE if applicable
+- [ ] Notify downstream users/package registries (npm, PyPI, etc.)
+- [ ] Revoke access for the compromised account and re-secure with hardware 2FA
+- [ ] Audit all CI/CD workflow files for unauthorized modifications
+- [ ] Review all releases published during the compromise window
+
+### Monitoring Recommendations
+
+- [ ] Enable branch protection on `main`/`master` (require code review, disallow force-push)
+- [ ] Enable required commit signing (GPG/SSH)
+- [ ] Set up GitHub audit log streaming for future monitoring
+- [ ] Pin critical dependencies to known-good SHAs in lock files
+
+---
+
+## Limitations and Caveats
+
+- _List any data sources that were unavailable (e.g., no BigQuery access)_
+- _Note any evidence that is single-source only (not independently verified)_
+- _Note any hypotheses that could not be confirmed or denied_
+
+---
+
+## References
+
+- Evidence store: `evidence.json` (SHA-256 integrity: run `python3 SKILL_DIR/scripts/evidence-store.py --store evidence.json verify`)
+- Related issues: <!-- Link to GitHub issues, CVEs, security advisories -->
+- RAPTOR framework: https://github.com/gadievron/raptor
@@ -0,0 +1,43 @@
+# Malicious Package Investigation Report
+
+---
+
+## 📦 Package Metadata
+- **Package Name**: 
+- **Registry**: [NPM / PyPI / RubyGems / etc.]
+- **Affected Versions**: 
+- **Malicious Version(s)**: 
+- **Downloads at Time of Detection**: 
+- **Package URL**: 
+
+---
+
+## 🚩 Indicators of Compromise (IOCs)
+- **Malicious URL(s)**: 
+- **Exfiltrated Data Types**: [Environment variables, ~/.ssh/id_rsa, /etc/shadow, etc.]
+- **Exfiltration Method**: [DNS tunneling, HTTP POST to C2, etc.]
+- **C2 IP/Domain**: 
+
+---
+
+## 🛠️ Analysis Summary
+- **Primary Mechanism**: [Typosquatting / Dependency Confusion / Maintainer Takeover]
+- **Behavior Description**: 
+  - [Example: Installs a postinstall script that exfiltrates environment variables.]
+  - [Example: Patches `setup.py` to download a secondary payload.]
+
+---
+
+## 🔍 Evidence Registry
+| Evidence ID | Type | Source | Description |
+|-------------|------|--------|-------------|
+| EV-XXXX     | ioc  | NPM    | Package install script snapshot |
+| EV-YYYY     | web  | Wayback| Historical version comparison |
+
+---
+
+## 🛡️ Recommended Mitigations
+1. [ ] Unpublish/Report the package to the registry.
+2. [ ] Audit `package-lock.json` or `requirements.txt` across all projects.
+3. [ ] Rotate secrets exfiltrated via environment variables.
+4. [ ] Pin specific hashes (SHASUM) for mission-critical dependencies.
@@ -27,25 +27,16 @@ from pathlib import Path
 import fire
 import yaml

-# Load .env from ~/.hermes/.env first, then project root as dev fallback
-from dotenv import load_dotenv
-
+# Load .env from ~/.hermes/.env first, then project root as dev fallback.
+# User-managed env files should override stale shell exports on restart.
 _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
-_user_env = _hermes_home / ".env"
 _project_env = Path(__file__).parent / '.env'

-if _user_env.exists():
-    try:
-        load_dotenv(dotenv_path=_user_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_user_env, encoding="latin-1")
-    print(f"✅ Loaded environment variables from {_user_env}")
-elif _project_env.exists():
-    try:
-        load_dotenv(dotenv_path=_project_env, encoding="utf-8")
-    except UnicodeDecodeError:
-        load_dotenv(dotenv_path=_project_env, encoding="latin-1")
-    print(f"✅ Loaded environment variables from {_project_env}")
+from hermes_cli.env_loader import load_hermes_dotenv
+
+_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)
+for _env_path in _loaded_env_paths:
+    print(f"✅ Loaded environment variables from {_env_path}")

 # Set terminal working directory to tinker-atropos submodule
 # This ensures terminal commands run in the right context for RL work
@@ -0,0 +1,389 @@
+#!/usr/bin/env python3
+"""Discord Voice Doctor — diagnostic tool for voice channel support.
+
+Checks all dependencies, configuration, and bot permissions needed
+for Discord voice mode to work correctly.
+
+Usage:
+    python scripts/discord-voice-doctor.py
+    .venv/bin/python scripts/discord-voice-doctor.py
+"""
+
+import os
+import sys
+import shutil
+from pathlib import Path
+
+# Resolve project root
+SCRIPT_DIR = Path(__file__).resolve().parent
+PROJECT_ROOT = SCRIPT_DIR.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
+ENV_FILE = HERMES_HOME / ".env"
+
+OK = "\033[92m\u2713\033[0m"
+FAIL = "\033[91m\u2717\033[0m"
+WARN = "\033[93m!\033[0m"
+
+# Track whether discord.py is available for later sections
+_discord_available = False
+
+
+def mask(value):
+    """Mask sensitive value: show only first 4 chars."""
+    if not value or len(value) < 8:
+        return "****"
+    return f"{value[:4]}{'*' * (len(value) - 4)}"
+
+
+def check(label, ok, detail=""):
+    symbol = OK if ok else FAIL
+    msg = f"  {symbol} {label}"
+    if detail:
+        msg += f"  ({detail})"
+    print(msg)
+    return ok
+
+
+def warn(label, detail=""):
+    msg = f"  {WARN} {label}"
+    if detail:
+        msg += f"  ({detail})"
+    print(msg)
+
+
+def section(title):
+    print(f"\n\033[1m{title}\033[0m")
+
+
+def check_packages():
+    """Check Python package dependencies. Returns True if all critical deps OK."""
+    global _discord_available
+    section("Python Packages")
+    ok = True
+
+    # discord.py
+    try:
+        import discord
+        _discord_available = True
+        check("discord.py", True, f"v{discord.__version__}")
+    except ImportError:
+        check("discord.py", False, "pip install discord.py[voice]")
+        ok = False
+
+    # PyNaCl
+    try:
+        import nacl
+        ver = getattr(nacl, "__version__", "unknown")
+        try:
+            import nacl.secret
+            nacl.secret.Aead(bytes(32))
+            check("PyNaCl", True, f"v{ver}")
+        except (AttributeError, Exception):
+            check("PyNaCl (Aead)", False, f"v{ver} — need >=1.5.0")
+            ok = False
+    except ImportError:
+        check("PyNaCl", False, "pip install PyNaCl>=1.5.0")
+        ok = False
+
+    # davey (DAVE E2EE)
+    try:
+        import davey
+        check("davey (DAVE E2EE)", True, f"v{getattr(davey, '__version__', '?')}")
+    except ImportError:
+        check("davey (DAVE E2EE)", False, "pip install davey")
+        ok = False
+
+    # Optional: local STT
+    try:
+        import faster_whisper
+        check("faster-whisper (local STT)", True)
+    except ImportError:
+        warn("faster-whisper (local STT)", "not installed — local STT unavailable")
+
+    # Optional: TTS providers
+    try:
+        import edge_tts
+        check("edge-tts", True)
+    except ImportError:
+        warn("edge-tts", "not installed — edge TTS unavailable")
+
+    try:
+        import elevenlabs
+        check("elevenlabs SDK", True)
+    except ImportError:
+        warn("elevenlabs SDK", "not installed — premium TTS unavailable")
+
+    return ok
+
+
+def check_system_tools():
+    """Check system-level tools (opus, ffmpeg). Returns True if all OK."""
+    section("System Tools")
+    ok = True
+
+    # Opus codec
+    if _discord_available:
+        try:
+            import discord
+            opus_loaded = discord.opus.is_loaded()
+            if not opus_loaded:
+                import ctypes.util
+                opus_path = ctypes.util.find_library("opus")
+                if not opus_path:
+                    # Platform-specific fallback paths
+                    candidates = [
+                        "/opt/homebrew/lib/libopus.dylib",   # macOS Apple Silicon
+                        "/usr/local/lib/libopus.dylib",      # macOS Intel
+                        "/usr/lib/x86_64-linux-gnu/libopus.so.0",  # Debian/Ubuntu x86
+                        "/usr/lib/aarch64-linux-gnu/libopus.so.0", # Debian/Ubuntu ARM
+                        "/usr/lib/libopus.so",               # Arch Linux
+                        "/usr/lib64/libopus.so",             # RHEL/Fedora
+                    ]
+                    for p in candidates:
+                        if os.path.isfile(p):
+                            opus_path = p
+                            break
+                if opus_path:
+                    discord.opus.load_opus(opus_path)
+                    opus_loaded = discord.opus.is_loaded()
+            if opus_loaded:
+                check("Opus codec", True)
+            else:
+                check("Opus codec", False, "brew install opus / apt install libopus0")
+                ok = False
+        except Exception as e:
+            check("Opus codec", False, str(e))
+            ok = False
+    else:
+        warn("Opus codec", "skipped — discord.py not installed")
+
+    # ffmpeg
+    ffmpeg_path = shutil.which("ffmpeg")
+    if ffmpeg_path:
+        check("ffmpeg", True, ffmpeg_path)
+    else:
+        check("ffmpeg", False, "brew install ffmpeg / apt install ffmpeg")
+        ok = False
+
+    return ok
+
+
+def check_env_vars():
+    """Check environment variables. Returns (ok, token, groq_key, eleven_key)."""
+    section("Environment Variables")
+
+    # Load .env
+    try:
+        from dotenv import load_dotenv
+        if ENV_FILE.exists():
+            load_dotenv(ENV_FILE)
+    except ImportError:
+        pass
+
+    ok = True
+
+    token = os.getenv("DISCORD_BOT_TOKEN", "")
+    if token:
+        check("DISCORD_BOT_TOKEN", True, mask(token))
+    else:
+        check("DISCORD_BOT_TOKEN", False, "not set")
+        ok = False
+
+    # Allowed users — resolve usernames if possible
+    allowed = os.getenv("DISCORD_ALLOWED_USERS", "")
+    if allowed:
+        users = [u.strip() for u in allowed.split(",") if u.strip()]
+        user_labels = []
+        for uid in users:
+            label = mask(uid)
+            if token and uid.isdigit():
+                try:
+                    import requests
+                    r = requests.get(
+                        f"https://discord.com/api/v10/users/{uid}",
+                        headers={"Authorization": f"Bot {token}"},
+                        timeout=3,
+                    )
+                    if r.status_code == 200:
+                        label = f"{r.json().get('username', '?')} ({mask(uid)})"
+                except Exception:
+                    pass
+            user_labels.append(label)
+        check("DISCORD_ALLOWED_USERS", True, f"{len(users)} user(s): {', '.join(user_labels)}")
+    else:
+        warn("DISCORD_ALLOWED_USERS", "not set — all users can use voice")
+
+    groq_key = os.getenv("GROQ_API_KEY", "")
+    eleven_key = os.getenv("ELEVENLABS_API_KEY", "")
+
+    if groq_key:
+        check("GROQ_API_KEY (STT)", True, mask(groq_key))
+    else:
+        warn("GROQ_API_KEY", "not set — Groq STT unavailable")
+
+    if eleven_key:
+        check("ELEVENLABS_API_KEY (TTS)", True, mask(eleven_key))
+    else:
+        warn("ELEVENLABS_API_KEY", "not set — ElevenLabs TTS unavailable")
+
+    return ok, token, groq_key, eleven_key
+
+
+def check_config(groq_key, eleven_key):
+    """Check hermes config.yaml."""
+    section("Configuration")
+
+    config_path = HERMES_HOME / "config.yaml"
+    if config_path.exists():
+        try:
+            import yaml
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+
+            stt_provider = cfg.get("stt", {}).get("provider", "local")
+            tts_provider = cfg.get("tts", {}).get("provider", "edge")
+            check("STT provider", True, stt_provider)
+            check("TTS provider", True, tts_provider)
+
+            if stt_provider == "groq" and not groq_key:
+                warn("STT config says groq but GROQ_API_KEY is missing")
+            if tts_provider == "elevenlabs" and not eleven_key:
+                warn("TTS config says elevenlabs but ELEVENLABS_API_KEY is missing")
+        except Exception as e:
+            warn("config.yaml", f"parse error: {e}")
+    else:
+        warn("config.yaml", "not found — using defaults")
+
+    # Voice mode state
+    voice_mode_path = HERMES_HOME / "gateway_voice_mode.json"
+    if voice_mode_path.exists():
+        try:
+            import json
+            modes = json.loads(voice_mode_path.read_text())
+            off_count = sum(1 for v in modes.values() if v == "off")
+            all_count = sum(1 for v in modes.values() if v == "all")
+            check("Voice mode state", True, f"{all_count} on, {off_count} off, {len(modes)} total")
+        except Exception:
+            warn("Voice mode state", "parse error")
+    else:
+        check("Voice mode state", True, "no saved state (fresh)")
+
+
+def check_bot_permissions(token):
+    """Check bot permissions via Discord API. Returns True if all OK."""
+    section("Bot Permissions")
+
+    if not token:
+        warn("Bot permissions", "no token — skipping")
+        return True
+
+    try:
+        import requests
+    except ImportError:
+        warn("Bot permissions", "requests not installed — skipping")
+        return True
+
+    VOICE_PERMS = {
+        "Priority Speaker":      8,
+        "Stream":                9,
+        "View Channel":         10,
+        "Send Messages":        11,
+        "Embed Links":          14,
+        "Attach Files":         15,
+        "Read Message History": 16,
+        "Connect":              20,
+        "Speak":                21,
+        "Mute Members":         22,
+        "Deafen Members":       23,
+        "Move Members":         24,
+        "Use VAD":              25,
+        "Send Voice Messages":  46,
+    }
+    REQUIRED_PERMS = {"Connect", "Speak", "View Channel", "Send Messages"}
+    ok = True
+
+    try:
+        headers = {"Authorization": f"Bot {token}"}
+        r = requests.get("https://discord.com/api/v10/users/@me", headers=headers, timeout=5)
+
+        if r.status_code == 401:
+            check("Bot login", False, "invalid token (401)")
+            return False
+        if r.status_code != 200:
+            check("Bot login", False, f"HTTP {r.status_code}")
+            return False
+
+        bot = r.json()
+        bot_name = bot.get("username", "?")
+        check("Bot login", True, f"{bot_name[:3]}{'*' * (len(bot_name) - 3)}")
+
+        # Check guilds
+        r2 = requests.get("https://discord.com/api/v10/users/@me/guilds", headers=headers, timeout=5)
+        if r2.status_code != 200:
+            warn("Guilds", f"HTTP {r2.status_code}")
+            return ok
+
+        guilds = r2.json()
+        check("Guilds", True, f"{len(guilds)} guild(s)")
+
+        for g in guilds[:5]:
+            perms = int(g.get("permissions", 0))
+            is_admin = bool(perms & (1 << 3))
+
+            if is_admin:
+                print(f"    {OK} {g['name']}: Administrator (all permissions)")
+                continue
+
+            has = []
+            missing = []
+            for name, bit in sorted(VOICE_PERMS.items(), key=lambda x: x[1]):
+                if perms & (1 << bit):
+                    has.append(name)
+                elif name in REQUIRED_PERMS:
+                    missing.append(name)
+
+            if missing:
+                print(f"    {FAIL} {g['name']}: missing {', '.join(missing)}")
+                ok = False
+            else:
+                print(f"    {OK} {g['name']}: {', '.join(has)}")
+
+    except requests.exceptions.Timeout:
+        warn("Bot permissions", "Discord API timeout")
+    except requests.exceptions.ConnectionError:
+        warn("Bot permissions", "cannot reach Discord API")
+    except Exception as e:
+        warn("Bot permissions", f"check failed: {e}")
+
+    return ok
+
+
+def main():
+    print()
+    print("\033[1m" + "=" * 50 + "\033[0m")
+    print("\033[1m  Discord Voice Doctor\033[0m")
+    print("\033[1m" + "=" * 50 + "\033[0m")
+
+    all_ok = True
+
+    all_ok &= check_packages()
+    all_ok &= check_system_tools()
+    env_ok, token, groq_key, eleven_key = check_env_vars()
+    all_ok &= env_ok
+    check_config(groq_key, eleven_key)
+    all_ok &= check_bot_permissions(token)
+
+    # Summary
+    print()
+    print("\033[1m" + "-" * 50 + "\033[0m")
+    if all_ok:
+        print(f"  {OK} \033[92mAll checks passed — voice mode ready!\033[0m")
+    else:
+        print(f"  {FAIL} \033[91mSome checks failed — fix issues above.\033[0m")
+    print()
+
+
+if __name__ == "__main__":
+    main()
@@ -114,6 +114,7 @@ curl -s "https://export.arxiv.org/api/query?id_list=2402.03300,2401.12345,2403.0

 After fetching metadata for a paper, generate a BibTeX entry:

+{% raw %}
 ```bash
 curl -s "https://export.arxiv.org/api/query?id_list=1706.03762" | python3 -c "
 import sys, xml.etree.ElementTree as ET
@@ -139,6 +140,7 @@ print(f'  url       = {{https://arxiv.org/abs/{raw_id}}}')
 print('}')
 "
 ```
+{% endraw %}

 ## Reading Paper Content

@@ -215,6 +215,7 @@ def generate_citation_key(bibtex: str) -> str:

 ### Complete Citation Manager Class

+{% raw %}
 ```python
 """
 Citation Manager - Verified citation workflow for ML papers.
@@ -377,6 +378,7 @@ if __name__ == "__main__":
    if bibtex:
        print(bibtex)
 ```
+{% endraw %}

 ### Quick Functions

@@ -295,3 +295,97 @@ class TestOnConnect:
        mock_conn = MagicMock(spec=acp.Client)
        agent.on_connect(mock_conn)
        assert agent._conn is mock_conn
+
+
+# ---------------------------------------------------------------------------
+# Slash commands
+# ---------------------------------------------------------------------------
+
+
+class TestSlashCommands:
+    """Test slash command dispatch in the ACP adapter."""
+
+    def _make_state(self, mock_manager):
+        state = mock_manager.create_session(cwd="/tmp")
+        state.agent.model = "test-model"
+        state.agent.provider = "openrouter"
+        state.model = "test-model"
+        return state
+
+    def test_help_lists_commands(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/help", state)
+        assert result is not None
+        assert "/help" in result
+        assert "/model" in result
+        assert "/tools" in result
+        assert "/reset" in result
+
+    def test_model_shows_current(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/model", state)
+        assert "test-model" in result
+
+    def test_context_empty(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = []
+        result = agent._handle_slash_command("/context", state)
+        assert "empty" in result.lower()
+
+    def test_context_with_messages(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+        result = agent._handle_slash_command("/context", state)
+        assert "2 messages" in result
+        assert "user: 1" in result
+
+    def test_reset_clears_history(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [{"role": "user", "content": "hello"}]
+        result = agent._handle_slash_command("/reset", state)
+        assert "cleared" in result.lower()
+        assert len(state.history) == 0
+
+    def test_version(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/version", state)
+        assert HERMES_VERSION in result
+
+    def test_unknown_command_returns_none(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/nonexistent", state)
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_slash_command_intercepted_in_prompt(self, agent, mock_manager):
+        """Slash commands should be handled without calling the LLM."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        mock_conn = AsyncMock(spec=acp.Client)
+        agent._conn = mock_conn
+
+        prompt = [TextContentBlock(type="text", text="/help")]
+        resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
+
+        assert resp.stop_reason == "end_turn"
+        mock_conn.session_update.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager):
+        """Unknown /commands should be sent to the LLM, not intercepted."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        mock_conn = AsyncMock(spec=acp.Client)
+        agent._conn = mock_conn
+
+        # Mock run_in_executor to avoid actually running the agent
+        with patch("asyncio.get_running_loop") as mock_loop:
+            mock_loop.return_value.run_in_executor = AsyncMock(return_value={
+                "final_response": "I processed /foo",
+                "messages": [],
+            })
+            prompt = [TextContentBlock(type="text", text="/foo bar")]
+            resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
+
+        assert resp.stop_reason == "end_turn"
@@ -195,7 +195,7 @@ class TestGetTextAuxiliaryClient:
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"
        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
@@ -288,7 +288,7 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
        """Custom endpoint is used as fallback in vision auto mode.
@@ -371,7 +371,7 @@ class TestVisionClientFallback:
            client, model = get_vision_auxiliary_client()
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"


 class TestGetAuxiliaryProvider:
@@ -489,7 +489,7 @@ class TestResolveForcedProvider:
            client, model = _resolve_forced_provider("main")
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_forced_codex(self, codex_auth_dir, monkeypatch):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
@@ -497,7 +497,7 @@ class TestResolveForcedProvider:
            client, model = _resolve_forced_provider("codex")
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.3-codex"
+        assert model == "gpt-5.2-codex"

    def test_forced_codex_no_token(self, monkeypatch):
        with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
@@ -0,0 +1,123 @@
+"""Tests for get_tool_emoji in agent/display.py — skin + registry integration."""
+
+from unittest.mock import patch as mock_patch, MagicMock
+
+from agent.display import get_tool_emoji
+
+
+class TestGetToolEmoji:
+    """Verify the skin → registry → fallback resolution chain."""
+
+    def test_returns_registry_emoji_when_no_skin(self):
+        """Registry-registered emoji is used when no skin is active."""
+        mock_registry = MagicMock()
+        mock_registry.get_emoji.return_value = "🎨"
+        with mock_patch("agent.display._get_skin", return_value=None), \
+             mock_patch("agent.display.registry", mock_registry, create=True):
+            # Need to patch the import inside get_tool_emoji
+            pass
+        # Direct test: patch the lazy import path
+        with mock_patch("agent.display._get_skin", return_value=None):
+            # get_tool_emoji will try to import registry — mock that
+            mock_reg = MagicMock()
+            mock_reg.get_emoji.return_value = "📖"
+            with mock_patch.dict("sys.modules", {}):
+                import sys
+                # Patch tools.registry module
+                mock_module = MagicMock()
+                mock_module.registry = mock_reg
+                with mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+                    result = get_tool_emoji("read_file")
+                    assert result == "📖"
+
+    def test_skin_override_takes_precedence(self):
+        """Skin tool_emojis override registry defaults."""
+        skin = MagicMock()
+        skin.tool_emojis = {"terminal": "⚔"}
+        with mock_patch("agent.display._get_skin", return_value=skin):
+            result = get_tool_emoji("terminal")
+            assert result == "⚔"
+
+    def test_skin_empty_dict_falls_through(self):
+        """Empty skin tool_emojis falls through to registry."""
+        skin = MagicMock()
+        skin.tool_emojis = {}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = "💻"
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            result = get_tool_emoji("terminal")
+            assert result == "💻"
+
+    def test_fallback_default(self):
+        """When neither skin nor registry has an emoji, use the default."""
+        skin = MagicMock()
+        skin.tool_emojis = {}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = ""
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            result = get_tool_emoji("unknown_tool")
+            assert result == "⚡"
+
+    def test_custom_default(self):
+        """Custom default is returned when nothing matches."""
+        with mock_patch("agent.display._get_skin", return_value=None):
+            mock_reg = MagicMock()
+            mock_reg.get_emoji.return_value = ""
+            import sys
+            mock_module = MagicMock()
+            mock_module.registry = mock_reg
+            with mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+                result = get_tool_emoji("x", default="⚙️")
+                assert result == "⚙️"
+
+    def test_skin_override_only_for_matching_tool(self):
+        """Skin override for one tool doesn't affect others."""
+        skin = MagicMock()
+        skin.tool_emojis = {"terminal": "⚔"}
+        mock_reg = MagicMock()
+        mock_reg.get_emoji.return_value = "🔍"
+        import sys
+        mock_module = MagicMock()
+        mock_module.registry = mock_reg
+        with mock_patch("agent.display._get_skin", return_value=skin), \
+             mock_patch.dict(sys.modules, {"tools.registry": mock_module}):
+            assert get_tool_emoji("terminal") == "⚔"  # skin override
+            assert get_tool_emoji("web_search") == "🔍"  # registry fallback
+
+
+class TestSkinConfigToolEmojis:
+    """Verify SkinConfig handles tool_emojis field correctly."""
+
+    def test_skin_config_has_tool_emojis_field(self):
+        from hermes_cli.skin_engine import SkinConfig
+        skin = SkinConfig(name="test")
+        assert skin.tool_emojis == {}
+
+    def test_skin_config_accepts_tool_emojis(self):
+        from hermes_cli.skin_engine import SkinConfig
+        emojis = {"terminal": "⚔", "web_search": "🔮"}
+        skin = SkinConfig(name="test", tool_emojis=emojis)
+        assert skin.tool_emojis == emojis
+
+    def test_build_skin_config_includes_tool_emojis(self):
+        from hermes_cli.skin_engine import _build_skin_config
+        data = {
+            "name": "custom",
+            "tool_emojis": {"terminal": "🗡️", "patch": "⚒️"},
+        }
+        skin = _build_skin_config(data)
+        assert skin.tool_emojis == {"terminal": "🗡️", "patch": "⚒️"}
+
+    def test_build_skin_config_empty_tool_emojis_default(self):
+        from hermes_cli.skin_engine import _build_skin_config
+        data = {"name": "minimal"}
+        skin = _build_skin_config(data)
+        assert skin.tool_emojis == {}
@@ -0,0 +1,61 @@
+from agent.smart_model_routing import choose_cheap_model_route
+
+
+_BASE_CONFIG = {
+    "enabled": True,
+    "cheap_model": {
+        "provider": "openrouter",
+        "model": "google/gemini-2.5-flash",
+    },
+}
+
+
+def test_returns_none_when_disabled():
+    cfg = {**_BASE_CONFIG, "enabled": False}
+    assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None
+
+
+def test_routes_short_simple_prompt():
+    result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG)
+    assert result is not None
+    assert result["provider"] == "openrouter"
+    assert result["model"] == "google/gemini-2.5-flash"
+    assert result["routing_reason"] == "simple_turn"
+
+
+def test_skips_long_prompt():
+    prompt = "please summarize this carefully " * 20
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_code_like_prompt():
+    prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_tool_heavy_prompt_keywords():
+    prompt = "implement a patch for this docker error"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch):
+    from agent.smart_model_routing import resolve_turn_route
+
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")),
+    )
+    result = resolve_turn_route(
+        "what time is it in tokyo?",
+        _BASE_CONFIG,
+        {
+            "model": "anthropic/claude-sonnet-4",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+            "api_key": "sk-primary",
+        },
+    )
+    assert result["model"] == "anthropic/claude-sonnet-4"
+    assert result["runtime"]["provider"] == "openrouter"
+    assert result["label"] is None
@@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
+    # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
+    try:
+        import hermes_cli.plugins as _plugins_mod
+        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
+    except Exception:
+        pass
    # Tests should not inherit the agent's current gateway/messaging surface.
    # Individual tests that need gateway behavior set these explicitly.
    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
@@ -83,6 +83,14 @@ class TestSessionResetPolicy:
        assert policy.at_hour == 4
        assert policy.idle_minutes == 1440

+    def test_from_dict_treats_null_values_as_defaults(self):
+        restored = SessionResetPolicy.from_dict(
+            {"mode": None, "at_hour": None, "idle_minutes": None}
+        )
+        assert restored.mode == "both"
+        assert restored.at_hour == 4
+        assert restored.idle_minutes == 1440
+

 class TestGatewayConfigRoundtrip:
    def test_full_roundtrip(self):
@@ -96,6 +104,7 @@ class TestGatewayConfigRoundtrip:
            },
            reset_triggers=["/new"],
            quick_commands={"limits": {"type": "exec", "command": "echo ok"}},
+            group_sessions_per_user=False,
        )
        d = config.to_dict()
        restored = GatewayConfig.from_dict(d)
@@ -104,6 +113,7 @@ class TestGatewayConfigRoundtrip:
        assert restored.platforms[Platform.TELEGRAM].token == "tok_123"
        assert restored.reset_triggers == ["/new"]
        assert restored.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}
+        assert restored.group_sessions_per_user is False


 class TestLoadGatewayConfig:
@@ -125,6 +135,18 @@ class TestLoadGatewayConfig:

        assert config.quick_commands == {"limits": {"type": "exec", "command": "echo ok"}}

+    def test_bridges_group_sessions_per_user_from_config_yaml(self, tmp_path, monkeypatch):
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        config_path = hermes_home / "config.yaml"
+        config_path.write_text("group_sessions_per_user: false\n", encoding="utf-8")
+
+        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+        config = load_gateway_config()
+
+        assert config.group_sessions_per_user is False
+
    def test_invalid_quick_commands_in_config_yaml_are_ignored(self, tmp_path, monkeypatch):
        hermes_home = tmp_path / ".hermes"
        hermes_home.mkdir()
@@ -252,3 +252,109 @@ async def test_discord_dms_ignore_mention_requirement(adapter, monkeypatch):
    event = adapter.handle_message.await_args.args[0]
    assert event.text == "dm without mention"
    assert event.source.chat_type == "dm"
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_enabled_by_default(adapter, monkeypatch):
+    """Auto-threading should be enabled by default (DISCORD_AUTO_THREAD defaults to 'true')."""
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    # Patch _auto_create_thread to return a fake thread
+    fake_thread = FakeThread(channel_id=999, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "thread"
+    assert event.source.thread_id == "999"
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_can_be_disabled(adapter, monkeypatch):
+    """Setting auto_thread to false skips thread creation."""
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="hello")
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "group"
+
+
+@pytest.mark.asyncio
+async def test_discord_bot_thread_skips_mention_requirement(adapter, monkeypatch):
+    """Messages in a thread the bot has participated in should not require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    # Simulate bot having previously participated in thread 456
+    adapter._bot_participated_threads.add("456")
+
+    thread = FakeThread(channel_id=456, name="existing thread")
+    message = make_message(channel=thread, content="follow-up without mention")
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "follow-up without mention"
+    assert event.source.chat_type == "thread"
+
+
+@pytest.mark.asyncio
+async def test_discord_unknown_thread_still_requires_mention(adapter, monkeypatch):
+    """Messages in a thread the bot hasn't participated in should still require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    # Bot has NOT participated in thread 789
+    thread = FakeThread(channel_id=789, name="some thread")
+    message = make_message(channel=thread, content="hello from unknown thread")
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_discord_auto_thread_tracks_participation(adapter, monkeypatch):
+    """Auto-created threads should be tracked for future mention-free replies."""
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
+    fake_thread = FakeThread(channel_id=555, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    message = make_message(channel=FakeTextChannel(channel_id=123), content="start a thread")
+
+    await adapter._handle_message(message)
+
+    assert "555" in adapter._bot_participated_threads
+
+
+@pytest.mark.asyncio
+async def test_discord_thread_participation_tracked_on_dispatch(adapter, monkeypatch):
+    """When the bot processes a message in a thread, it tracks participation."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+
+    thread = FakeThread(channel_id=777, name="manually created thread")
+    message = make_message(channel=thread, content="hello in thread")
+
+    await adapter._handle_message(message)
+
+    assert "777" in adapter._bot_participated_threads
@@ -363,11 +363,37 @@ async def test_auto_thread_creates_thread_and_redirects(adapter, monkeypatch):


@pytest.mark.asyncio
-async def test_auto_thread_disabled_by_default(adapter, monkeypatch):
-    """Without DISCORD_AUTO_THREAD, messages stay in the channel."""
+async def test_auto_thread_enabled_by_default_slash_commands(adapter, monkeypatch):
+    """Without DISCORD_AUTO_THREAD env var, auto-threading is enabled (default: true)."""
    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")

+    fake_thread = _FakeThreadChannel(channel_id=999, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    captured_events = []
+
+    async def capture_handle(event):
+        captured_events.append(event)
+
+    adapter.handle_message = capture_handle
+
+    msg = _fake_message(_FakeTextChannel())
+
+    await adapter._handle_message(msg)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    assert len(captured_events) == 1
+    assert captured_events[0].source.chat_id == "999"  # redirected to thread
+    assert captured_events[0].source.chat_type == "thread"
+
+
+@pytest.mark.asyncio
+async def test_auto_thread_can_be_disabled(adapter, monkeypatch):
+    """Setting DISCORD_AUTO_THREAD=false keeps messages in the channel."""
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+
    adapter._auto_create_thread = AsyncMock()

    captured_events = []
@@ -0,0 +1,106 @@
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource, build_session_key
+
+
+class StubAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
+
+    async def connect(self):
+        return True
+
+    async def disconnect(self):
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        return SendResult(success=True, message_id="1")
+
+    async def send_typing(self, chat_id, metadata=None):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+def _source(chat_id="123456", chat_type="dm"):
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id=chat_id,
+        chat_type=chat_type,
+    )
+
+
+@pytest.mark.asyncio
+async def test_cancel_background_tasks_cancels_inflight_message_processing():
+    adapter = StubAdapter()
+    release = asyncio.Event()
+
+    async def block_forever(_event):
+        await release.wait()
+        return None
+
+    adapter.set_message_handler(block_forever)
+    event = MessageEvent(text="work", source=_source(), message_id="1")
+
+    await adapter.handle_message(event)
+    await asyncio.sleep(0)
+
+    session_key = build_session_key(event.source)
+    assert session_key in adapter._active_sessions
+    assert adapter._background_tasks
+
+    await adapter.cancel_background_tasks()
+
+    assert adapter._background_tasks == set()
+    assert adapter._active_sessions == {}
+    assert adapter._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks():
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")})
+    runner._running = True
+    runner._shutdown_event = asyncio.Event()
+    runner._exit_reason = None
+    runner._pending_messages = {"session": "pending text"}
+    runner._pending_approvals = {"session": {"command": "rm -rf /tmp/x"}}
+    runner._shutdown_all_gateway_honcho = lambda: None
+
+    adapter = StubAdapter()
+    release = asyncio.Event()
+
+    async def block_forever(_event):
+        await release.wait()
+        return None
+
+    adapter.set_message_handler(block_forever)
+    event = MessageEvent(text="work", source=_source(), message_id="1")
+    await adapter.handle_message(event)
+    await asyncio.sleep(0)
+
+    disconnect_mock = AsyncMock()
+    adapter.disconnect = disconnect_mock
+
+    session_key = build_session_key(event.source)
+    running_agent = MagicMock()
+    runner._running_agents = {session_key: running_agent}
+    runner.adapters = {Platform.TELEGRAM: adapter}
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    running_agent.interrupt.assert_called_once_with("Gateway shutting down")
+    disconnect_mock.assert_awaited_once()
+    assert runner.adapters == {}
+    assert runner._running_agents == {}
+    assert runner._pending_messages == {}
+    assert runner._pending_approvals == {}
+    assert runner._shutdown_event.is_set() is True
@@ -90,6 +90,7 @@ class TestGatewayHonchoLifecycle:
        runner = _make_runner()
        event = _make_event()
        runner._shutdown_gateway_honcho = MagicMock()
+        runner._async_flush_memories = AsyncMock()
        runner.session_store = MagicMock()
        runner.session_store._generate_session_key.return_value = "gateway-key"
        runner.session_store._entries = {
@@ -100,4 +101,31 @@ class TestGatewayHonchoLifecycle:
        result = await runner._handle_reset_command(event)

        runner._shutdown_gateway_honcho.assert_called_once_with("gateway-key")
+        runner._async_flush_memories.assert_called_once_with("old-session", "gateway-key")
        assert "Session reset" in result
+
+    def test_flush_memories_reuses_gateway_session_key_and_skips_honcho_sync(self):
+        runner = _make_runner()
+        runner.session_store = MagicMock()
+        runner.session_store.load_transcript.return_value = [
+            {"role": "user", "content": "a"},
+            {"role": "assistant", "content": "b"},
+            {"role": "user", "content": "c"},
+            {"role": "assistant", "content": "d"},
+        ]
+        tmp_agent = MagicMock()
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
+            patch("gateway.run._resolve_gateway_model", return_value="model-name"),
+            patch("run_agent.AIAgent", return_value=tmp_agent) as mock_agent_cls,
+        ):
+            runner._flush_memories_for_session("old-session", "gateway-key")
+
+        mock_agent_cls.assert_called_once()
+        _, kwargs = mock_agent_cls.call_args
+        assert kwargs["session_id"] == "old-session"
+        assert kwargs["honcho_session_key"] == "gateway-key"
+        tmp_agent.run_conversation.assert_called_once()
+        _, run_kwargs = tmp_agent.run_conversation.call_args
+        assert run_kwargs["sync_honcho"] is False
@@ -11,7 +11,7 @@ import asyncio
 import pytest

 from gateway.config import Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
 from gateway.session import SessionSource, build_session_key


@@ -50,11 +50,11 @@ class TestInterruptKeyConsistency:
    """Ensure adapter interrupt methods are queried with session_key, not chat_id."""

    def test_session_key_differs_from_chat_id_for_dm(self):
-        """Session key for a DM is NOT the same as chat_id."""
+        """Session key for a DM is namespaced and includes the DM chat_id."""
        source = _source("123456", "dm")
        session_key = build_session_key(source)
        assert session_key != source.chat_id
-        assert session_key == "agent:main:telegram:dm"
+        assert session_key == "agent:main:telegram:dm:123456"

    def test_session_key_differs_from_chat_id_for_group(self):
        """Session key for a group chat includes prefix, unlike raw chat_id."""
@@ -122,3 +122,29 @@ class TestInterruptKeyConsistency:

        # Interrupt event was set
        assert adapter._active_sessions[session_key].is_set()
+
+    @pytest.mark.asyncio
+    async def test_photo_followup_is_queued_without_interrupt(self):
+        """Photo follow-ups should queue behind the active run instead of interrupting it."""
+        adapter = StubAdapter()
+        adapter.set_message_handler(lambda event: asyncio.sleep(0, result=None))
+
+        source = _source("-1001234", "group")
+        session_key = build_session_key(source)
+        interrupt_event = asyncio.Event()
+        adapter._active_sessions[session_key] = interrupt_event
+
+        event = MessageEvent(
+            text="caption",
+            source=source,
+            message_type=MessageType.PHOTO,
+            message_id="2",
+            media_urls=["/tmp/photo-a.jpg"],
+            media_types=["image/jpeg"],
+        )
+        await adapter.handle_message(event)
+
+        queued = adapter._pending_messages[session_key]
+        assert queued is event
+        assert queued.media_urls == ["/tmp/photo-a.jpg"]
+        assert interrupt_event.is_set() is False
@@ -0,0 +1,156 @@
+"""Tests for PII redaction in gateway session context prompts."""
+
+from gateway.session import (
+    SessionContext,
+    SessionSource,
+    build_session_context_prompt,
+    _hash_id,
+    _hash_sender_id,
+    _hash_chat_id,
+    _looks_like_phone,
+)
+from gateway.config import Platform, HomeChannel
+
+
+# ---------------------------------------------------------------------------
+# Low-level helpers
+# ---------------------------------------------------------------------------
+
+class TestHashHelpers:
+    def test_hash_id_deterministic(self):
+        assert _hash_id("12345") == _hash_id("12345")
+
+    def test_hash_id_12_hex_chars(self):
+        h = _hash_id("user-abc")
+        assert len(h) == 12
+        assert all(c in "0123456789abcdef" for c in h)
+
+    def test_hash_sender_id_prefix(self):
+        assert _hash_sender_id("12345").startswith("user_")
+        assert len(_hash_sender_id("12345")) == 17  # "user_" + 12
+
+    def test_hash_chat_id_preserves_prefix(self):
+        result = _hash_chat_id("telegram:12345")
+        assert result.startswith("telegram:")
+        assert "12345" not in result
+
+    def test_hash_chat_id_no_prefix(self):
+        result = _hash_chat_id("12345")
+        assert len(result) == 12
+        assert "12345" not in result
+
+    def test_looks_like_phone(self):
+        assert _looks_like_phone("+15551234567")
+        assert _looks_like_phone("15551234567")
+        assert _looks_like_phone("+1-555-123-4567")
+        assert not _looks_like_phone("alice")
+        assert not _looks_like_phone("user-123")
+        assert not _looks_like_phone("")
+
+
+# ---------------------------------------------------------------------------
+# Integration: build_session_context_prompt
+# ---------------------------------------------------------------------------
+
+def _make_context(
+    user_id="user-123",
+    user_name=None,
+    chat_id="telegram:99999",
+    platform=Platform.TELEGRAM,
+    home_channels=None,
+):
+    source = SessionSource(
+        platform=platform,
+        chat_id=chat_id,
+        chat_type="dm",
+        user_id=user_id,
+        user_name=user_name,
+    )
+    return SessionContext(
+        source=source,
+        connected_platforms=[platform],
+        home_channels=home_channels or {},
+    )
+
+
+class TestBuildSessionContextPromptRedaction:
+    def test_no_redaction_by_default(self):
+        ctx = _make_context(user_id="user-123")
+        prompt = build_session_context_prompt(ctx)
+        assert "user-123" in prompt
+
+    def test_user_id_hashed_when_redact_pii(self):
+        ctx = _make_context(user_id="user-123")
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "user-123" not in prompt
+        assert "user_" in prompt  # hashed ID present
+
+    def test_user_name_not_redacted(self):
+        ctx = _make_context(user_id="user-123", user_name="Alice")
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "Alice" in prompt
+        # user_id should not appear when user_name is present (name takes priority)
+        assert "user-123" not in prompt
+
+    def test_home_channel_id_hashed(self):
+        hc = {
+            Platform.TELEGRAM: HomeChannel(
+                platform=Platform.TELEGRAM,
+                chat_id="telegram:99999",
+                name="Home Chat",
+            )
+        }
+        ctx = _make_context(home_channels=hc)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "99999" not in prompt
+        assert "telegram:" in prompt  # prefix preserved
+        assert "Home Chat" in prompt  # name not redacted
+
+    def test_home_channel_id_preserved_without_redaction(self):
+        hc = {
+            Platform.TELEGRAM: HomeChannel(
+                platform=Platform.TELEGRAM,
+                chat_id="telegram:99999",
+                name="Home Chat",
+            )
+        }
+        ctx = _make_context(home_channels=hc)
+        prompt = build_session_context_prompt(ctx, redact_pii=False)
+        assert "99999" in prompt
+
+    def test_redaction_is_deterministic(self):
+        ctx = _make_context(user_id="+15551234567")
+        prompt1 = build_session_context_prompt(ctx, redact_pii=True)
+        prompt2 = build_session_context_prompt(ctx, redact_pii=True)
+        assert prompt1 == prompt2
+
+    def test_different_ids_produce_different_hashes(self):
+        ctx1 = _make_context(user_id="user-A")
+        ctx2 = _make_context(user_id="user-B")
+        p1 = build_session_context_prompt(ctx1, redact_pii=True)
+        p2 = build_session_context_prompt(ctx2, redact_pii=True)
+        assert p1 != p2
+
+    def test_discord_ids_not_redacted_even_with_flag(self):
+        """Discord needs real IDs for <@user_id> mentions."""
+        ctx = _make_context(user_id="123456789", platform=Platform.DISCORD)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "123456789" in prompt
+
+    def test_whatsapp_ids_redacted(self):
+        ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "+15551234567" not in prompt
+        assert "user_" in prompt
+
+    def test_signal_ids_redacted(self):
+        ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "+15551234567" not in prompt
+        assert "user_" in prompt
+
+    def test_slack_ids_not_redacted(self):
+        """Slack may need IDs for mentions too."""
+        ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "U12345ABC" in prompt
@@ -199,3 +199,28 @@ class TestHandleResumeCommand:

        assert real_key not in runner._running_agents
        db.close()
+
+    @pytest.mark.asyncio
+    async def test_resume_flushes_memories_with_gateway_session_key(self, tmp_path):
+        """Resume should preserve the gateway session key for Honcho flushes."""
+        from hermes_state import SessionDB
+
+        db = SessionDB(db_path=tmp_path / "state.db")
+        db.create_session("old_session", "telegram")
+        db.set_session_title("old_session", "Old Work")
+        db.create_session("current_session_001", "telegram")
+
+        event = _make_event(text="/resume Old Work")
+        runner = _make_runner(
+            session_db=db,
+            current_session_id="current_session_001",
+            event=event,
+        )
+
+        await runner._handle_resume_command(event)
+
+        runner._async_flush_memories.assert_called_once_with(
+            "current_session_001",
+            _session_key_for_event(event),
+        )
+        db.close()
@@ -0,0 +1,89 @@
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter
+from gateway.run import GatewayRunner
+from gateway.status import read_runtime_status
+
+
+class _RetryableFailureAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
+
+    async def connect(self) -> bool:
+        self._set_fatal_error(
+            "telegram_connect_error",
+            "Telegram startup failed: temporary DNS resolution failure.",
+            retryable=True,
+        )
+        return False
+
+    async def disconnect(self) -> None:
+        self._mark_disconnected()
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        raise NotImplementedError
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+class _DisabledAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=False, token="***"), Platform.TELEGRAM)
+
+    async def connect(self) -> bool:
+        raise AssertionError("connect should not be called for disabled platforms")
+
+    async def disconnect(self) -> None:
+        self._mark_disconnected()
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        raise NotImplementedError
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+    runner = GatewayRunner(config)
+
+    monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _RetryableFailureAdapter())
+
+    ok = await runner.start()
+
+    assert ok is False
+    assert runner.should_exit_cleanly is False
+    state = read_runtime_status()
+    assert state["gateway_state"] == "startup_failed"
+    assert "temporary DNS resolution failure" in state["exit_reason"]
+    assert state["platforms"]["telegram"]["state"] == "fatal"
+    assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error"
+
+
+@pytest.mark.asyncio
+async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=False, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+    runner = GatewayRunner(config)
+
+    ok = await runner.start()
+
+    assert ok is True
+    assert runner.should_exit_cleanly is False
+    assert runner.adapters == {}
+    state = read_runtime_status()
+    assert state["gateway_state"] == "running"
@@ -338,7 +338,7 @@ class TestSessionStoreRewriteTranscript:

 class TestWhatsAppDMSessionKeyConsistency:
    """Regression: all session-key construction must go through build_session_key
-    so WhatsApp DMs include chat_id while other DMs do not."""
+    so DMs are isolated by chat_id across platforms."""

    @pytest.fixture()
    def store(self, tmp_path):
@@ -369,15 +369,72 @@ class TestWhatsAppDMSessionKeyConsistency:
        )
        assert store._generate_session_key(source) == build_session_key(source)

-    def test_telegram_dm_omits_chat_id(self):
-        """Non-WhatsApp DMs should still omit chat_id (single owner DM)."""
+    def test_store_creates_distinct_group_sessions_per_user(self, store):
+        first = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="alice",
+            user_name="Alice",
+        )
+        second = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="bob",
+            user_name="Bob",
+        )
+
+        first_entry = store.get_or_create_session(first)
+        second_entry = store.get_or_create_session(second)
+
+        assert first_entry.session_key == "agent:main:discord:group:guild-123:alice"
+        assert second_entry.session_key == "agent:main:discord:group:guild-123:bob"
+        assert first_entry.session_id != second_entry.session_id
+
+    def test_store_shares_group_sessions_when_disabled_in_config(self, store):
+        store.config.group_sessions_per_user = False
+
+        first = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="alice",
+            user_name="Alice",
+        )
+        second = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="bob",
+            user_name="Bob",
+        )
+
+        first_entry = store.get_or_create_session(first)
+        second_entry = store.get_or_create_session(second)
+
+        assert first_entry.session_key == "agent:main:discord:group:guild-123"
+        assert second_entry.session_key == "agent:main:discord:group:guild-123"
+        assert first_entry.session_id == second_entry.session_id
+
+    def test_telegram_dm_includes_chat_id(self):
+        """Non-WhatsApp DMs should also include chat_id to separate users."""
        source = SessionSource(
            platform=Platform.TELEGRAM,
            chat_id="99",
            chat_type="dm",
        )
        key = build_session_key(source)
-        assert key == "agent:main:telegram:dm"
+        assert key == "agent:main:telegram:dm:99"
+
+    def test_distinct_dm_chat_ids_get_distinct_session_keys(self):
+        """Different DM chats must not collapse into one shared session."""
+        first = SessionSource(platform=Platform.TELEGRAM, chat_id="99", chat_type="dm")
+        second = SessionSource(platform=Platform.TELEGRAM, chat_id="100", chat_type="dm")
+
+        assert build_session_key(first) == "agent:main:telegram:dm:99"
+        assert build_session_key(second) == "agent:main:telegram:dm:100"
+        assert build_session_key(first) != build_session_key(second)

    def test_discord_group_includes_chat_id(self):
        """Group/channel keys include chat_type and chat_id."""
@@ -389,6 +446,41 @@ class TestWhatsAppDMSessionKeyConsistency:
        key = build_session_key(source)
        assert key == "agent:main:discord:group:guild-123"

+    def test_group_sessions_are_isolated_per_user_when_user_id_present(self):
+        first = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="alice",
+        )
+        second = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="bob",
+        )
+
+        assert build_session_key(first) == "agent:main:discord:group:guild-123:alice"
+        assert build_session_key(second) == "agent:main:discord:group:guild-123:bob"
+        assert build_session_key(first) != build_session_key(second)
+
+    def test_group_sessions_can_be_shared_when_isolation_disabled(self):
+        first = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="alice",
+        )
+        second = SessionSource(
+            platform=Platform.DISCORD,
+            chat_id="guild-123",
+            chat_type="group",
+            user_id="bob",
+        )
+
+        assert build_session_key(first, group_sessions_per_user=False) == "agent:main:discord:group:guild-123"
+        assert build_session_key(second, group_sessions_per_user=False) == "agent:main:discord:group:guild-123"
+
    def test_group_thread_includes_thread_id(self):
        """Forum-style threads need a distinct session key within one group."""
        source = SessionSource(
@@ -400,6 +492,17 @@ class TestWhatsAppDMSessionKeyConsistency:
        key = build_session_key(source)
        assert key == "agent:main:telegram:group:-1002285219667:17585"

+    def test_group_thread_sessions_are_isolated_per_user(self):
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1002285219667",
+            chat_type="group",
+            thread_id="17585",
+            user_id="42",
+        )
+        key = build_session_key(source)
+        assert key == "agent:main:telegram:group:-1002285219667:17585:42"
+

 class TestSessionStoreEntriesAttribute:
    """Regression: /reset must access _entries, not _sessions."""
@@ -0,0 +1,45 @@
+import os
+
+from gateway.config import Platform
+from gateway.run import GatewayRunner
+from gateway.session import SessionContext, SessionSource
+
+
+def test_set_session_env_includes_thread_id(monkeypatch):
+    runner = object.__new__(GatewayRunner)
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_name="Group",
+        chat_type="group",
+        thread_id="17585",
+    )
+    context = SessionContext(source=source, connected_platforms=[], home_channels={})
+
+    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
+    monkeypatch.delenv("HERMES_SESSION_THREAD_ID", raising=False)
+
+    runner._set_session_env(context)
+
+    assert os.getenv("HERMES_SESSION_PLATFORM") == "telegram"
+    assert os.getenv("HERMES_SESSION_CHAT_ID") == "-1001"
+    assert os.getenv("HERMES_SESSION_CHAT_NAME") == "Group"
+    assert os.getenv("HERMES_SESSION_THREAD_ID") == "17585"
+
+
+def test_clear_session_env_removes_thread_id(monkeypatch):
+    runner = object.__new__(GatewayRunner)
+
+    monkeypatch.setenv("HERMES_SESSION_PLATFORM", "telegram")
+    monkeypatch.setenv("HERMES_SESSION_CHAT_ID", "-1001")
+    monkeypatch.setenv("HERMES_SESSION_CHAT_NAME", "Group")
+    monkeypatch.setenv("HERMES_SESSION_THREAD_ID", "17585")
+
+    runner._clear_session_env()
+
+    assert os.getenv("HERMES_SESSION_PLATFORM") is None
+    assert os.getenv("HERMES_SESSION_CHAT_ID") is None
+    assert os.getenv("HERMES_SESSION_CHAT_NAME") is None
+    assert os.getenv("HERMES_SESSION_THREAD_ID") is None
@@ -0,0 +1,81 @@
+"""Tests for SSL certificate auto-detection in gateway/run.py."""
+
+import importlib
+import os
+from unittest.mock import patch, MagicMock
+
+
+def _load_ensure_ssl():
+    """Import _ensure_ssl_certs fresh (gateway/run.py has heavy deps, so we
+    extract just the function source to avoid importing the whole gateway)."""
+    # We can test via the actual module since conftest isolates HERMES_HOME,
+    # but we need to be careful about side effects.  Instead, replicate the
+    # logic in a controlled way.
+    from types import ModuleType
+    import textwrap, ssl as _ssl  # noqa: F401
+
+    code = textwrap.dedent("""\
+    import os, ssl
+
+    def _ensure_ssl_certs():
+        if "SSL_CERT_FILE" in os.environ:
+            return
+        paths = ssl.get_default_verify_paths()
+        for candidate in (paths.cafile, paths.openssl_cafile):
+            if candidate and os.path.exists(candidate):
+                os.environ["SSL_CERT_FILE"] = candidate
+                return
+        try:
+            import certifi
+            os.environ["SSL_CERT_FILE"] = certifi.where()
+            return
+        except ImportError:
+            pass
+        for candidate in (
+            "/etc/ssl/certs/ca-certificates.crt",
+            "/etc/ssl/cert.pem",
+        ):
+            if os.path.exists(candidate):
+                os.environ["SSL_CERT_FILE"] = candidate
+                return
+    """)
+    mod = ModuleType("_ssl_helper")
+    exec(code, mod.__dict__)
+    return mod._ensure_ssl_certs
+
+
+class TestEnsureSslCerts:
+    def test_respects_existing_env_var(self):
+        fn = _load_ensure_ssl()
+        with patch.dict(os.environ, {"SSL_CERT_FILE": "/custom/ca.pem"}):
+            fn()
+            assert os.environ["SSL_CERT_FILE"] == "/custom/ca.pem"
+
+    def test_sets_from_ssl_default_paths(self, tmp_path):
+        fn = _load_ensure_ssl()
+        cert = tmp_path / "ca.crt"
+        cert.write_text("FAKE CERT")
+
+        mock_paths = MagicMock()
+        mock_paths.cafile = str(cert)
+        mock_paths.openssl_cafile = None
+
+        env = {k: v for k, v in os.environ.items() if k != "SSL_CERT_FILE"}
+        with patch.dict(os.environ, env, clear=True), \
+             patch("ssl.get_default_verify_paths", return_value=mock_paths):
+            fn()
+            assert os.environ.get("SSL_CERT_FILE") == str(cert)
+
+    def test_no_op_when_nothing_found(self):
+        fn = _load_ensure_ssl()
+        mock_paths = MagicMock()
+        mock_paths.cafile = None
+        mock_paths.openssl_cafile = None
+
+        env = {k: v for k, v in os.environ.items() if k != "SSL_CERT_FILE"}
+        with patch.dict(os.environ, env, clear=True), \
+             patch("ssl.get_default_verify_paths", return_value=mock_paths), \
+             patch("os.path.exists", return_value=False), \
+             patch.dict("sys.modules", {"certifi": None}):
+            fn()
+            assert "SSL_CERT_FILE" not in os.environ
@@ -26,6 +26,22 @@ class TestGatewayPidState:
        assert status.get_running_pid() is None
        assert not pid_path.exists()

+    def test_get_running_pid_accepts_gateway_metadata_when_cmdline_unavailable(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        pid_path = tmp_path / "gateway.pid"
+        pid_path.write_text(json.dumps({
+            "pid": os.getpid(),
+            "kind": "hermes-gateway",
+            "argv": ["python", "-m", "hermes_cli.main", "gateway"],
+            "start_time": 123,
+        }))
+
+        monkeypatch.setattr(status.os, "kill", lambda pid, sig: None)
+        monkeypatch.setattr(status, "_get_process_start_time", lambda pid: 123)
+        monkeypatch.setattr(status, "_read_process_cmdline", lambda pid: None)
+
+        assert status.get_running_pid() == os.getpid()
+

 class TestGatewayRuntimeStatus:
    def test_write_runtime_status_records_platform_failure(self, tmp_path, monkeypatch):
@@ -0,0 +1,133 @@
+"""Tests for gateway /status behavior and token persistence."""
+
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=_make_source(),
+        message_id="m1",
+    )
+
+
+def _make_runner(session_entry: SessionEntry):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = []
+    runner.session_store.has_any_sessions.return_value = True
+    runner.session_store.append_to_transcript = MagicMock()
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._show_reasoning = False
+    runner._is_user_authorized = lambda _source: True
+    runner._set_session_env = lambda _context: None
+    runner._should_send_voice_reply = lambda *_args, **_kwargs: False
+    runner._send_voice_reply = AsyncMock()
+    runner._capture_gateway_honcho_if_configured = lambda *args, **kwargs: None
+    runner._emit_gateway_run_progress = AsyncMock()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_status_command_reports_running_agent_without_interrupt(monkeypatch):
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=321,
+    )
+    runner = _make_runner(session_entry)
+    running_agent = MagicMock()
+    runner._running_agents[build_session_key(_make_source())] = running_agent
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Tokens:** 321" in result
+    assert "**Agent Running:** Yes ⚡" in result
+    running_agent.interrupt.assert_not_called()
+    assert runner._pending_messages == {}
+
+
+@pytest.mark.asyncio
+async def test_handle_message_persists_agent_token_counts(monkeypatch):
+    import gateway.run as gateway_run
+
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner = _make_runner(session_entry)
+    runner.session_store.load_transcript.return_value = [{"role": "user", "content": "earlier"}]
+    runner._run_agent = AsyncMock(
+        return_value={
+            "final_response": "ok",
+            "messages": [],
+            "tools": [],
+            "history_offset": 0,
+            "last_prompt_tokens": 80,
+            "input_tokens": 120,
+            "output_tokens": 45,
+            "model": "openai/test-model",
+        }
+    )
+
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+    monkeypatch.setattr(
+        "agent.model_metadata.get_model_context_length",
+        lambda *_args, **_kwargs: 100000,
+    )
+
+    result = await runner._handle_message(_make_event("hello"))
+
+    assert result == "ok"
+    runner.session_store.update_session.assert_called_once_with(
+        session_entry.session_key,
+        input_tokens=120,
+        output_tokens=45,
+        last_prompt_tokens=80,
+        model="openai/test-model",
+    )
@@ -51,3 +51,27 @@ async def test_enrich_message_with_transcription_skips_when_stt_disabled():

    assert "transcription is disabled" in result.lower()
    assert "caption" in result
+
+
+@pytest.mark.asyncio
+async def test_enrich_message_with_transcription_avoids_bogus_no_provider_message_for_backend_key_errors():
+    from gateway.run import GatewayRunner
+
+    runner = GatewayRunner.__new__(GatewayRunner)
+    runner.config = GatewayConfig(stt_enabled=True)
+
+    with patch(
+        "tools.transcription_tools.transcribe_audio",
+        return_value={"success": False, "error": "VOICE_TOOLS_OPENAI_KEY not set"},
+    ), patch(
+        "tools.transcription_tools.get_stt_model_from_config",
+        return_value=None,
+    ):
+        result = await runner._enrich_message_with_transcription(
+            "caption",
+            ["/tmp/voice.ogg"],
+        )
+
+    assert "No STT provider is configured" not in result
+    assert "trouble transcribing" in result
+    assert "caption" in result
@@ -100,6 +100,39 @@ async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch):
    fatal_handler.assert_awaited_once()


+@pytest.mark.asyncio
+async def test_connect_marks_retryable_fatal_error_for_startup_network_failure(monkeypatch):
+    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
+
+    monkeypatch.setattr(
+        "gateway.status.acquire_scoped_lock",
+        lambda scope, identity, metadata=None: (True, None),
+    )
+    monkeypatch.setattr(
+        "gateway.status.release_scoped_lock",
+        lambda scope, identity: None,
+    )
+
+    builder = MagicMock()
+    builder.token.return_value = builder
+    app = SimpleNamespace(
+        bot=SimpleNamespace(),
+        updater=SimpleNamespace(),
+        add_handler=MagicMock(),
+        initialize=AsyncMock(side_effect=RuntimeError("Temporary failure in name resolution")),
+        start=AsyncMock(),
+    )
+    builder.build.return_value = app
+    monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder)))
+
+    ok = await adapter.connect()
+
+    assert ok is False
+    assert adapter.fatal_error_code == "telegram_connect_error"
+    assert adapter.fatal_error_retryable is True
+    assert "Temporary failure in name resolution" in adapter.fatal_error_message
+
+
@pytest.mark.asyncio
 async def test_disconnect_skips_inactive_updater_and_app(monkeypatch):
    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
@@ -12,6 +12,7 @@ import asyncio
 import importlib
 import os
 import sys
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
@@ -351,6 +352,26 @@ class TestDocumentDownloadBlock:
 # ---------------------------------------------------------------------------

 class TestMediaGroups:
+    @pytest.mark.asyncio
+    async def test_non_album_photo_burst_is_buffered_and_combined(self, adapter):
+        first_photo = _make_photo(_make_file_obj(b"first"))
+        second_photo = _make_photo(_make_file_obj(b"second"))
+
+        msg1 = _make_message(caption="two images", photo=[first_photo])
+        msg2 = _make_message(photo=[second_photo])
+
+        with patch("gateway.platforms.telegram.cache_image_from_bytes", side_effect=["/tmp/burst-one.jpg", "/tmp/burst-two.jpg"]):
+            await adapter._handle_media_message(_make_update(msg1), MagicMock())
+            await adapter._handle_media_message(_make_update(msg2), MagicMock())
+            assert adapter.handle_message.await_count == 0
+            await asyncio.sleep(adapter.MEDIA_GROUP_WAIT_SECONDS + 0.05)
+
+        adapter.handle_message.assert_awaited_once()
+        event = adapter.handle_message.await_args.args[0]
+        assert event.text == "two images"
+        assert event.media_urls == ["/tmp/burst-one.jpg", "/tmp/burst-two.jpg"]
+        assert len(event.media_types) == 2
+
    @pytest.mark.asyncio
    async def test_photo_album_is_buffered_and_combined(self, adapter):
        first_photo = _make_photo(_make_file_obj(b"first"))
@@ -537,6 +558,51 @@ class TestSendDocument:
        assert call_kwargs["reply_to_message_id"] == 50


+class TestTelegramPhotoBatching:
+    @pytest.mark.asyncio
+    async def test_flush_photo_batch_does_not_drop_newer_scheduled_task(self, adapter):
+        old_task = MagicMock()
+        new_task = MagicMock()
+        batch_key = "session:photo-burst"
+        adapter._pending_photo_batch_tasks[batch_key] = new_task
+        adapter._pending_photo_batches[batch_key] = MessageEvent(
+            text="",
+            message_type=MessageType.PHOTO,
+            source=SimpleNamespace(channel_id="chat-1"),
+            media_urls=["/tmp/a.jpg"],
+            media_types=["image/jpeg"],
+        )
+
+        with (
+            patch("gateway.platforms.telegram.asyncio.current_task", return_value=old_task),
+            patch("gateway.platforms.telegram.asyncio.sleep", new=AsyncMock()),
+        ):
+            await adapter._flush_photo_batch(batch_key)
+
+        assert adapter._pending_photo_batch_tasks[batch_key] is new_task
+
+    @pytest.mark.asyncio
+    async def test_disconnect_cancels_pending_photo_batch_tasks(self, adapter):
+        task = MagicMock()
+        task.done.return_value = False
+        adapter._pending_photo_batch_tasks["session:photo-burst"] = task
+        adapter._pending_photo_batches["session:photo-burst"] = MessageEvent(
+            text="",
+            message_type=MessageType.PHOTO,
+            source=SimpleNamespace(channel_id="chat-1"),
+        )
+        adapter._app = MagicMock()
+        adapter._app.updater.stop = AsyncMock()
+        adapter._app.stop = AsyncMock()
+        adapter._app.shutdown = AsyncMock()
+
+        await adapter.disconnect()
+
+        task.cancel.assert_called_once()
+        assert adapter._pending_photo_batch_tasks == {}
+        assert adapter._pending_photo_batches == {}
+
+
 # ---------------------------------------------------------------------------
 # TestSendVideo — outbound video delivery
 # ---------------------------------------------------------------------------
@@ -7,7 +7,7 @@ or corrupt user-visible content.

 import re
 import sys
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock

 import pytest

@@ -392,3 +392,27 @@ class TestStripMdv2:

    def test_empty_string(self):
        assert _strip_mdv2("") == ""
+
+
+@pytest.mark.asyncio
+async def test_send_escapes_chunk_indicator_for_markdownv2(adapter):
+    adapter.MAX_MESSAGE_LENGTH = 80
+    adapter._bot = MagicMock()
+
+    sent_texts = []
+
+    async def _fake_send_message(**kwargs):
+        sent_texts.append(kwargs["text"])
+        msg = MagicMock()
+        msg.message_id = len(sent_texts)
+        return msg
+
+    adapter._bot.send_message = AsyncMock(side_effect=_fake_send_message)
+
+    content = ("**bold** chunk content " * 12).strip()
+    result = await adapter.send("123", content)
+
+    assert result.success is True
+    assert len(sent_texts) > 1
+    assert re.search(r" \\\([0-9]+/[0-9]+\\\)$", sent_texts[0])
+    assert re.search(r" \\\([0-9]+/[0-9]+\\\)$", sent_texts[-1])
@@ -0,0 +1,49 @@
+import asyncio
+from unittest.mock import MagicMock
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+from gateway.run import GatewayRunner
+
+
+class _PendingAdapter:
+    def __init__(self):
+        self._pending_messages = {}
+
+
+def _make_runner():
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")})
+    runner.adapters = {Platform.TELEGRAM: _PendingAdapter()}
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._voice_mode = {}
+    runner._is_user_authorized = lambda _source: True
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_handle_message_does_not_priority_interrupt_photo_followup():
+    runner = _make_runner()
+    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+    session_key = build_session_key(source)
+    running_agent = MagicMock()
+    runner._running_agents[session_key] = running_agent
+
+    event = MessageEvent(
+        text="caption",
+        message_type=MessageType.PHOTO,
+        source=source,
+        media_urls=["/tmp/photo-a.jpg"],
+        media_types=["image/jpeg"],
+    )
+
+    result = await runner._handle_message(event)
+
+    assert result is None
+    running_agent.interrupt.assert_not_called()
+    assert runner.adapters[Platform.TELEGRAM]._pending_messages[session_key] is event
@@ -1,5 +1,6 @@
 """Tests for the /voice command and auto voice reply in the gateway."""

+import importlib.util
 import json
 import os
 import queue
@@ -206,9 +207,11 @@ class TestAutoVoiceReply:
      2. gateway _send_voice_reply: fires based on voice_mode setting

    To prevent double audio, _send_voice_reply is skipped when voice input
-    already triggered base adapter auto-TTS (skip_double = is_voice_input).
-    Exception: Discord voice channel — both auto-TTS and Discord play_tts
-    override skip, so the runner must handle it via play_in_voice_channel.
+    already triggered base adapter auto-TTS.
+
+    For Discord voice channels, the base adapter now routes play_tts directly
+    into VC playback, so the runner should still skip voice-input follow-ups to
+    avoid double playback.
    """

    @pytest.fixture
@@ -292,14 +295,14 @@ class TestAutoVoiceReply:

    # -- Discord VC exception: runner must handle --------------------------

-    def test_discord_vc_voice_input_runner_fires(self, runner):
-        """Discord VC + voice input: base play_tts skips (VC override),
-        so runner must handle via play_in_voice_channel."""
-        assert self._call(runner, "all", MessageType.VOICE, in_voice_channel=True) is True
+    def test_discord_vc_voice_input_base_handles(self, runner):
+        """Discord VC + voice input: base adapter play_tts plays in VC,
+        so runner skips to avoid double playback."""
+        assert self._call(runner, "all", MessageType.VOICE, in_voice_channel=True) is False

-    def test_discord_vc_voice_only_runner_fires(self, runner):
-        """Discord VC + voice_only + voice: runner must handle."""
-        assert self._call(runner, "voice_only", MessageType.VOICE, in_voice_channel=True) is True
+    def test_discord_vc_voice_only_base_handles(self, runner):
+        """Discord VC + voice_only + voice: base adapter handles."""
+        assert self._call(runner, "voice_only", MessageType.VOICE, in_voice_channel=True) is False

    # -- Edge cases --------------------------------------------------------

@@ -422,17 +425,23 @@ class TestDiscordPlayTtsSkip:
        return adapter

    @pytest.mark.asyncio
-    async def test_play_tts_skipped_when_in_vc(self):
+    async def test_play_tts_plays_in_vc_when_connected(self):
        adapter = self._make_discord_adapter()
        # Simulate bot in voice channel for guild 111, text channel 123
        mock_vc = MagicMock()
        mock_vc.is_connected.return_value = True
+        mock_vc.is_playing.return_value = False
        adapter._voice_clients[111] = mock_vc
        adapter._voice_text_channels[111] = 123

+        # Mock play_in_voice_channel to avoid actual ffmpeg call
+        async def fake_play(gid, path):
+            return True
+        adapter.play_in_voice_channel = fake_play
+
        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/test.ogg")
+        # play_tts now plays in VC instead of being a no-op
        assert result.success is True
-        # send_voice should NOT have been called (no client, would fail)

    @pytest.mark.asyncio
    async def test_play_tts_not_skipped_when_not_in_vc(self):
@@ -728,6 +737,24 @@ class TestVoiceChannelCommands:
        result = await runner._handle_voice_channel_join(event)
        assert "failed" in result.lower()

+    @pytest.mark.asyncio
+    async def test_join_missing_voice_dependencies(self, runner):
+        """Missing PyNaCl/davey should return a user-actionable install hint."""
+        mock_channel = MagicMock()
+        mock_channel.name = "General"
+        mock_adapter = AsyncMock()
+        mock_adapter.join_voice_channel = AsyncMock(
+            side_effect=RuntimeError("PyNaCl library needed in order to use voice")
+        )
+        mock_adapter.get_user_voice_channel = AsyncMock(return_value=mock_channel)
+        event = self._make_discord_event()
+        runner.adapters[event.source.platform] = mock_adapter
+
+        result = await runner._handle_voice_channel_join(event)
+
+        assert "voice dependencies are missing" in result.lower()
+        assert "hermes-agent[messaging]" in result
+
    # -- _handle_voice_channel_leave --

    @pytest.mark.asyncio
@@ -2031,3 +2058,534 @@ class TestDisconnectVoiceCleanup:
        assert len(adapter._voice_receivers) == 0
        assert len(adapter._voice_listen_tasks) == 0
        assert len(adapter._voice_timeout_tasks) == 0
+
+
+# =====================================================================
+# Discord Voice Channel Flow Tests
+# =====================================================================
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("nacl") is None,
+    reason="PyNaCl not installed",
+)
+class TestVoiceReception:
+    """Audio reception: SSRC mapping, DAVE passthrough, buffer lifecycle."""
+
+    @staticmethod
+    def _make_receiver(allowed_ids=None, members=None, dave=False, bot_id=9999):
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = MagicMock() if dave else None
+        vc._connection.ssrc = bot_id
+        vc._connection.add_socket_listener = MagicMock()
+        vc._connection.remove_socket_listener = MagicMock()
+        vc._connection.hook = None
+        vc.user = SimpleNamespace(id=bot_id)
+        vc.channel = MagicMock()
+        vc.channel.members = members or []
+        receiver = VoiceReceiver(vc, allowed_user_ids=allowed_ids)
+        return receiver
+
+    @staticmethod
+    def _fill_buffer(receiver, ssrc, duration_s=1.0, age_s=3.0):
+        """Add PCM data to buffer. 48kHz stereo 16-bit = 192000 bytes/sec."""
+        size = int(192000 * duration_s)
+        receiver._buffers[ssrc] = bytearray(b"\x00" * size)
+        receiver._last_packet_time[ssrc] = time.monotonic() - age_s
+
+    # -- Known SSRC (normal flow) --
+
+    def test_known_ssrc_returns_completed(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+        assert len(receiver._buffers[100]) == 0  # cleared
+
+    def test_known_ssrc_short_buffer_ignored(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100, duration_s=0.1)  # too short
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_known_ssrc_recent_audio_waits(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.map_ssrc(100, 42)
+        self._fill_buffer(receiver, 100, age_s=0.0)  # just arrived
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    # -- Unknown SSRC + DAVE passthrough --
+
+    def test_unknown_ssrc_no_automap_no_completed(self):
+        """Unknown SSRC, no members to infer — buffer cleared, not returned."""
+        receiver = self._make_receiver(dave=True, members=[])
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+        assert len(receiver._buffers[100]) == 0
+
+    def test_unknown_ssrc_late_speaking_event(self):
+        """Audio buffered before SPEAKING → SPEAKING maps → next check returns it."""
+        receiver = self._make_receiver(dave=True)
+        receiver.start()
+        self._fill_buffer(receiver, 100, age_s=0.0)  # still receiving
+        # No user yet
+        assert receiver.check_silence() == []
+        # SPEAKING event arrives
+        receiver.map_ssrc(100, 42)
+        # Silence kicks in
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    # -- SSRC auto-mapping --
+
+    def test_automap_single_allowed_user(self):
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+        assert receiver._ssrc_to_user[100] == 42
+
+    def test_automap_multiple_allowed_users_no_map(self):
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+            SimpleNamespace(id=43, name="Bob"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42", "43"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_no_allowlist_single_member(self):
+        """No allowed_user_ids → sole non-bot member inferred."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids=None, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_automap_unallowed_user_rejected(self):
+        """User in channel but not in allowed list — not mapped."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"99"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_only_bot_in_channel(self):
+        """Only bot in channel — no one to map to."""
+        members = [SimpleNamespace(id=9999, name="Bot")]
+        receiver = self._make_receiver(allowed_ids=None, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_automap_persists_across_calls(self):
+        """Auto-mapped SSRC stays mapped for subsequent checks."""
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = self._make_receiver(allowed_ids={"42"}, members=members)
+        receiver.start()
+        self._fill_buffer(receiver, 100)
+        receiver.check_silence()
+        assert receiver._ssrc_to_user[100] == 42
+        # Second utterance — should use cached mapping
+        self._fill_buffer(receiver, 100)
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    # -- Stale buffer cleanup --
+
+    def test_stale_unknown_buffer_discarded(self):
+        """Buffer with no user and very old timestamp is discarded."""
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver._buffers[200] = bytearray(b"\x00" * 100)
+        receiver._last_packet_time[200] = time.monotonic() - 10.0
+        receiver.check_silence()
+        assert 200 not in receiver._buffers
+
+    # -- Pause / resume (echo prevention) --
+
+    def test_paused_receiver_ignores_packets(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.pause()
+        receiver._on_packet(b"\x00" * 100)
+        assert len(receiver._buffers) == 0
+
+    def test_resumed_receiver_accepts_packets(self):
+        receiver = self._make_receiver()
+        receiver.start()
+        receiver.pause()
+        receiver.resume()
+        assert receiver._paused is False
+
+    # -- _on_packet DAVE passthrough behavior --
+
+    def _make_receiver_with_nacl(self, dave_session=None, mapped_ssrcs=None):
+        """Create a receiver that can process _on_packet with mocked NaCl + Opus."""
+        from gateway.platforms.discord import VoiceReceiver
+        vc = MagicMock()
+        vc._connection.secret_key = [0] * 32
+        vc._connection.dave_session = dave_session
+        vc._connection.ssrc = 9999
+        vc._connection.add_socket_listener = MagicMock()
+        vc._connection.remove_socket_listener = MagicMock()
+        vc._connection.hook = None
+        vc.user = SimpleNamespace(id=9999)
+        vc.channel = MagicMock()
+        vc.channel.members = []
+        receiver = VoiceReceiver(vc)
+        receiver.start()
+        # Pre-map SSRCs if provided
+        if mapped_ssrcs:
+            for ssrc, uid in mapped_ssrcs.items():
+                receiver.map_ssrc(ssrc, uid)
+        return receiver
+
+    @staticmethod
+    def _build_rtp_packet(ssrc=100, seq=1, timestamp=960):
+        """Build a minimal valid RTP packet for _on_packet.
+
+        We need: RTP header (12 bytes) + encrypted payload + 4-byte nonce.
+        NaCl decrypt is mocked so payload content doesn't matter.
+        """
+        import struct
+        # RTP header: version=2, payload_type=0x78, no extension, no CSRC
+        header = struct.pack(">BBHII", 0x80, 0x78, seq, timestamp, ssrc)
+        # Fake encrypted payload (NaCl will be mocked) + 4 byte nonce
+        payload = b"\x00" * 20 + b"\x00\x00\x00\x01"
+        return header + payload
+
+    def _inject_mock_decoder(self, receiver, ssrc):
+        """Pre-inject a mock Opus decoder for the given SSRC."""
+        mock_decoder = MagicMock()
+        mock_decoder.decode.return_value = b"\x00" * 3840
+        receiver._decoders[ssrc] = mock_decoder
+        return mock_decoder
+
+    def test_on_packet_dave_known_user_decrypt_ok(self):
+        """Known SSRC + DAVE decrypt success → audio buffered."""
+        dave = MagicMock()
+        dave.decrypt.return_value = b"\xf8\xff\xfe"
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+        dave.decrypt.assert_called_once()
+
+    def test_on_packet_dave_unknown_ssrc_passthrough(self):
+        """Unknown SSRC + DAVE → skip DAVE, attempt Opus decode (passthrough)."""
+        dave = MagicMock()
+        receiver = self._make_receiver_with_nacl(dave_session=dave)
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        dave.decrypt.assert_not_called()
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_dave_unencrypted_error_passthrough(self):
+        """DAVE decrypt 'Unencrypted' error → use data as-is, don't drop."""
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception(
+            "Failed to decrypt: DecryptionFailed(UnencryptedWhenPassthroughDisabled)"
+        )
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_dave_other_error_drops(self):
+        """DAVE decrypt non-Unencrypted error → packet dropped."""
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception("KeyRotationFailed")
+        receiver = self._make_receiver_with_nacl(
+            dave_session=dave, mapped_ssrcs={100: 42}
+        )
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_on_packet_no_dave_direct_decode(self):
+        """No DAVE session → decode directly."""
+        receiver = self._make_receiver_with_nacl(dave_session=None)
+        self._inject_mock_decoder(receiver, 100)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_on_packet_bot_own_ssrc_ignored(self):
+        """Bot's own SSRC → dropped (echo prevention)."""
+        receiver = self._make_receiver_with_nacl()
+        with patch("nacl.secret.Aead"):
+            receiver._on_packet(self._build_rtp_packet(ssrc=9999))
+        assert len(receiver._buffers) == 0
+
+    def test_on_packet_multiple_ssrcs_separate_buffers(self):
+        """Different SSRCs → separate buffers."""
+        receiver = self._make_receiver_with_nacl(dave_session=None)
+        self._inject_mock_decoder(receiver, 100)
+        self._inject_mock_decoder(receiver, 200)
+
+        with patch("nacl.secret.Aead") as mock_aead:
+            mock_aead.return_value.decrypt.return_value = b"\xf8\xff\xfe"
+            receiver._on_packet(self._build_rtp_packet(ssrc=100))
+            receiver._on_packet(self._build_rtp_packet(ssrc=200))
+
+        assert 100 in receiver._buffers
+        assert 200 in receiver._buffers
+
+
+class TestVoiceTTSPlayback:
+    """TTS playback: play_tts in VC, dedup, fallback."""
+
+    @staticmethod
+    def _make_discord_adapter():
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake-token"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_receivers = {}
+        return adapter
+
+    # -- play_tts behavior --
+
+    @pytest.mark.asyncio
+    async def test_play_tts_plays_in_vc(self):
+        """play_tts calls play_in_voice_channel when bot is in VC."""
+        adapter = self._make_discord_adapter()
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 123
+
+        played = []
+        async def fake_play(gid, path):
+            played.append((gid, path))
+            return True
+        adapter.play_in_voice_channel = fake_play
+
+        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/tts.ogg")
+        assert result.success is True
+        assert played == [(111, "/tmp/tts.ogg")]
+
+    @pytest.mark.asyncio
+    async def test_play_tts_fallback_when_not_in_vc(self):
+        """play_tts sends as file attachment when bot is not in VC."""
+        adapter = self._make_discord_adapter()
+        from gateway.platforms.base import SendResult
+        adapter.send_voice = AsyncMock(return_value=SendResult(success=False, error="no client"))
+        result = await adapter.play_tts(chat_id="123", audio_path="/tmp/tts.ogg")
+        assert result.success is False
+        adapter.send_voice.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_play_tts_wrong_channel_no_match(self):
+        """play_tts doesn't match if chat_id is for a different channel."""
+        adapter = self._make_discord_adapter()
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        adapter._voice_clients[111] = mock_vc
+        adapter._voice_text_channels[111] = 123
+
+        from gateway.platforms.base import SendResult
+        adapter.send_voice = AsyncMock(return_value=SendResult(success=True))
+        # Different chat_id — shouldn't match VC
+        result = await adapter.play_tts(chat_id="999", audio_path="/tmp/tts.ogg")
+        adapter.send_voice.assert_called_once()
+
+    # -- Runner dedup --
+
+    @staticmethod
+    def _make_runner():
+        from gateway.run import GatewayRunner
+        runner = object.__new__(GatewayRunner)
+        runner._voice_mode = {}
+        runner.adapters = {}
+        return runner
+
+    def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", agent_msgs=None):
+        from gateway.platforms.base import MessageType, MessageEvent, SessionSource
+        from gateway.config import Platform
+        runner._voice_mode["ch1"] = voice_mode
+        source = SessionSource(
+            platform=Platform.DISCORD, chat_id="ch1",
+            user_id="1", user_name="test", chat_type="channel",
+        )
+        event = MessageEvent(source=source, text="test", message_type=msg_type)
+        return runner._should_send_voice_reply(event, response, agent_msgs or [])
+
+    def test_voice_input_runner_skips(self):
+        """Voice input: runner skips — base adapter handles via play_tts."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.VOICE) is False
+
+    def test_text_input_voice_all_runner_fires(self):
+        """Text input + voice_mode=all: runner generates TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT) is True
+
+    def test_text_input_voice_off_no_tts(self):
+        """Text input + voice_mode=off: no TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "off", MessageType.TEXT) is False
+
+    def test_text_input_voice_only_no_tts(self):
+        """Text input + voice_mode=voice_only: no TTS for text."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "voice_only", MessageType.TEXT) is False
+
+    def test_error_response_no_tts(self):
+        """Error response: no TTS regardless of voice_mode."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, response="Error: boom") is False
+
+    def test_empty_response_no_tts(self):
+        """Empty response: no TTS."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, response="") is False
+
+    def test_agent_tts_tool_dedup(self):
+        """Agent already called text_to_speech tool: runner skips."""
+        from gateway.platforms.base import MessageType
+        runner = self._make_runner()
+        agent_msgs = [{"role": "assistant", "tool_calls": [
+            {"id": "1", "type": "function", "function": {"name": "text_to_speech", "arguments": "{}"}}
+        ]}]
+        assert self._call_should_reply(runner, "all", MessageType.TEXT, agent_msgs=agent_msgs) is False
+
+
+class TestUDPKeepalive:
+    """UDP keepalive prevents Discord from dropping the voice session."""
+
+    def test_keepalive_interval_is_reasonable(self):
+        from gateway.platforms.discord import DiscordAdapter
+        interval = DiscordAdapter._KEEPALIVE_INTERVAL
+        assert 5 <= interval <= 30, f"Keepalive interval {interval}s should be between 5-30s"
+
+    @pytest.mark.asyncio
+    async def test_keepalive_sends_silence_frame(self):
+        """Listen loop sends silence frame via send_packet after interval."""
+        from gateway.platforms.discord import DiscordAdapter
+        from gateway.config import PlatformConfig, Platform
+
+        config = PlatformConfig(enabled=True, extra={})
+        config.token = "fake"
+        adapter = object.__new__(DiscordAdapter)
+        adapter.platform = Platform.DISCORD
+        adapter.config = config
+        adapter._voice_clients = {}
+        adapter._voice_text_channels = {}
+        adapter._voice_receivers = {}
+        adapter._voice_listen_tasks = {}
+
+        # Mock VC and receiver
+        mock_vc = MagicMock()
+        mock_vc.is_connected.return_value = True
+        mock_conn = MagicMock()
+        adapter._voice_clients[111] = mock_vc
+        mock_vc._connection = mock_conn
+
+        from gateway.platforms.discord import VoiceReceiver
+        mock_receiver_vc = MagicMock()
+        mock_receiver_vc._connection.secret_key = [0] * 32
+        mock_receiver_vc._connection.dave_session = None
+        mock_receiver_vc._connection.ssrc = 9999
+        mock_receiver_vc._connection.add_socket_listener = MagicMock()
+        mock_receiver_vc._connection.remove_socket_listener = MagicMock()
+        mock_receiver_vc._connection.hook = None
+        receiver = VoiceReceiver(mock_receiver_vc)
+        receiver.start()
+        adapter._voice_receivers[111] = receiver
+
+        # Set keepalive interval very short for test
+        original_interval = DiscordAdapter._KEEPALIVE_INTERVAL
+        DiscordAdapter._KEEPALIVE_INTERVAL = 0.1
+
+        try:
+            # Run listen loop briefly
+            import asyncio
+            loop_task = asyncio.create_task(adapter._voice_listen_loop(111))
+            await asyncio.sleep(0.3)
+            receiver._running = False  # stop loop
+            await asyncio.sleep(0.1)
+            loop_task.cancel()
+            try:
+                await loop_task
+            except asyncio.CancelledError:
+                pass
+
+            # send_packet should have been called with silence frame
+            mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
+        finally:
+            DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
@@ -12,7 +12,8 @@ EXPECTED_COMMANDS = {
    "/personality", "/clear", "/history", "/new", "/reset", "/retry",
    "/undo", "/save", "/config", "/cron", "/skills", "/platforms",
    "/verbose", "/reasoning", "/compress", "/title", "/usage", "/insights", "/paste",
-    "/reload-mcp", "/rollback", "/background", "/skin", "/voice", "/quit",
+    "/reload-mcp", "/rollback", "/stop", "/background", "/skin", "/voice", "/browser", "/quit",
+    "/plugins",
 }


@@ -0,0 +1,70 @@
+import importlib
+import os
+import sys
+from pathlib import Path
+
+from hermes_cli.env_loader import load_hermes_dotenv
+
+
+def test_user_env_overrides_stale_shell_values(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    env_file = home / ".env"
+    env_file.write_text("OPENAI_BASE_URL=https://new.example/v1\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+
+    loaded = load_hermes_dotenv(hermes_home=home)
+
+    assert loaded == [env_file]
+    assert os.getenv("OPENAI_BASE_URL") == "https://new.example/v1"
+
+
+def test_project_env_overrides_stale_shell_values_when_user_env_missing(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    project_env = tmp_path / ".env"
+    project_env.write_text("OPENAI_BASE_URL=https://project.example/v1\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [project_env]
+    assert os.getenv("OPENAI_BASE_URL") == "https://project.example/v1"
+
+
+def test_user_env_takes_precedence_over_project_env(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    user_env = home / ".env"
+    project_env = tmp_path / ".env"
+    user_env.write_text("OPENAI_BASE_URL=https://user.example/v1\n", encoding="utf-8")
+    project_env.write_text("OPENAI_BASE_URL=https://project.example/v1\nOPENAI_API_KEY=project-key\n", encoding="utf-8")
+
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+
+    loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
+
+    assert loaded == [user_env, project_env]
+    assert os.getenv("OPENAI_BASE_URL") == "https://user.example/v1"
+    assert os.getenv("OPENAI_API_KEY") == "project-key"
+
+
+def test_main_import_applies_user_env_over_shell_values(tmp_path, monkeypatch):
+    home = tmp_path / "hermes"
+    home.mkdir()
+    (home / ".env").write_text(
+        "OPENAI_BASE_URL=https://new.example/v1\nHERMES_INFERENCE_PROVIDER=custom\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.setenv("HERMES_HOME", str(home))
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://old.example/v1")
+    monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
+
+    sys.modules.pop("hermes_cli.main", None)
+    importlib.import_module("hermes_cli.main")
+
+    assert os.getenv("OPENAI_BASE_URL") == "https://new.example/v1"
+    assert os.getenv("HERMES_INFERENCE_PROVIDER") == "custom"
@@ -39,7 +39,7 @@ def test_systemd_status_warns_when_linger_disabled(monkeypatch, tmp_path, capsys
    monkeypatch.setattr(gateway, "get_systemd_linger_status", lambda: (False, ""))

    def fake_run(cmd, capture_output=False, text=False, check=False):
-        if cmd[:4] == ["systemctl", "--user", "status", gateway.SERVICE_NAME]:
+        if cmd[:4] == ["systemctl", "--user", "status", gateway.get_service_name()]:
            return SimpleNamespace(returncode=0, stdout="", stderr="")
        if cmd[:3] == ["systemctl", "--user", "is-active"]:
            return SimpleNamespace(returncode=0, stdout="active\n", stderr="")
@@ -76,7 +76,7 @@ def test_systemd_install_checks_linger_status(monkeypatch, tmp_path, capsys):
    assert unit_path.exists()
    assert [cmd for cmd, _ in calls] == [
        ["systemctl", "--user", "daemon-reload"],
-        ["systemctl", "--user", "enable", gateway.SERVICE_NAME],
+        ["systemctl", "--user", "enable", gateway.get_service_name()],
    ]
    assert helper_calls == [True]
    assert "User service installed and enabled" in out
@@ -110,7 +110,7 @@ def test_systemd_install_system_scope_skips_linger_and_uses_systemctl(monkeypatc
    assert unit_path.read_text(encoding="utf-8") == "scope=True user=alice\n"
    assert [cmd for cmd, _ in calls] == [
        ["systemctl", "daemon-reload"],
-        ["systemctl", "enable", gateway.SERVICE_NAME],
+        ["systemctl", "enable", gateway.get_service_name()],
    ]
    assert helper_calls == []
    assert "Configured to run as: alice" not in out  # generated test unit has no User= line
@@ -114,7 +114,7 @@ def test_systemd_install_calls_linger_helper(monkeypatch, tmp_path, capsys):
    assert unit_path.exists()
    assert [cmd for cmd, _ in calls] == [
        ["systemctl", "--user", "daemon-reload"],
-        ["systemctl", "--user", "enable", gateway.SERVICE_NAME],
+        ["systemctl", "--user", "enable", gateway.get_service_name()],
    ]
    assert helper_calls == [True]
    assert "User service installed and enabled" in out
@@ -26,7 +26,7 @@ class TestSystemdServiceRefresh:
        assert unit_path.read_text(encoding="utf-8") == "new unit\n"
        assert calls[:2] == [
            ["systemctl", "--user", "daemon-reload"],
-            ["systemctl", "--user", "start", gateway_cli.SERVICE_NAME],
+            ["systemctl", "--user", "start", gateway_cli.get_service_name()],
        ]

    def test_systemd_restart_refreshes_outdated_unit(self, tmp_path, monkeypatch):
@@ -49,10 +49,27 @@ class TestSystemdServiceRefresh:
        assert unit_path.read_text(encoding="utf-8") == "new unit\n"
        assert calls[:2] == [
            ["systemctl", "--user", "daemon-reload"],
-            ["systemctl", "--user", "restart", gateway_cli.SERVICE_NAME],
+            ["systemctl", "--user", "restart", gateway_cli.get_service_name()],
        ]


+class TestGeneratedSystemdUnits:
+    def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
+        unit = gateway_cli.generate_systemd_unit(system=False)
+
+        assert "ExecStart=" in unit
+        assert "ExecStop=" not in unit
+        assert "TimeoutStopSec=60" in unit
+
+    def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
+        unit = gateway_cli.generate_systemd_unit(system=True)
+
+        assert "ExecStart=" in unit
+        assert "ExecStop=" not in unit
+        assert "TimeoutStopSec=60" in unit
+        assert "WantedBy=multi-user.target" in unit
+
+
 class TestGatewayStopCleanup:
    def test_stop_sweeps_manual_gateway_processes_after_service_stop(self, tmp_path, monkeypatch):
        unit_path = tmp_path / "hermes-gateway.service"
@@ -92,9 +109,9 @@ class TestGatewayServiceDetection:
        )

        def fake_run(cmd, capture_output=True, text=True, **kwargs):
-            if cmd == ["systemctl", "--user", "is-active", gateway_cli.SERVICE_NAME]:
+            if cmd == ["systemctl", "--user", "is-active", gateway_cli.get_service_name()]:
                return SimpleNamespace(returncode=0, stdout="inactive\n", stderr="")
-            if cmd == ["systemctl", "is-active", gateway_cli.SERVICE_NAME]:
+            if cmd == ["systemctl", "is-active", gateway_cli.get_service_name()]:
                return SimpleNamespace(returncode=0, stdout="active\n", stderr="")
            raise AssertionError(f"Unexpected command: {cmd}")

@@ -7,6 +7,7 @@ from hermes_cli.models import (
    fetch_api_models,
    normalize_provider,
    parse_model_input,
+    probe_api_models,
    provider_label,
    provider_model_ids,
    validate_requested_model,
@@ -26,7 +27,15 @@ FAKE_API_MODELS = [

 def _validate(model, provider="openrouter", api_models=FAKE_API_MODELS, **kw):
    """Shortcut: call validate_requested_model with mocked API."""
-    with patch("hermes_cli.models.fetch_api_models", return_value=api_models):
+    probe_payload = {
+        "models": api_models,
+        "probed_url": "http://localhost:11434/v1/models",
+        "resolved_base_url": kw.get("base_url", "") or "http://localhost:11434/v1",
+        "suggested_base_url": None,
+        "used_fallback": False,
+    }
+    with patch("hermes_cli.models.fetch_api_models", return_value=api_models), \
+         patch("hermes_cli.models.probe_api_models", return_value=probe_payload):
        return validate_requested_model(model, provider, **kw)


@@ -147,6 +156,33 @@ class TestFetchApiModels:
        with patch("hermes_cli.models.urllib.request.urlopen", side_effect=Exception("timeout")):
            assert fetch_api_models("key", "https://example.com/v1") is None

+    def test_probe_api_models_tries_v1_fallback(self):
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                return b'{"data": [{"id": "local-model"}]}'
+
+        calls = []
+
+        def _fake_urlopen(req, timeout=5.0):
+            calls.append(req.full_url)
+            if req.full_url.endswith("/v1/models"):
+                return _Resp()
+            raise Exception("404")
+
+        with patch("hermes_cli.models.urllib.request.urlopen", side_effect=_fake_urlopen):
+            probe = probe_api_models("key", "http://localhost:8000")
+
+        assert calls == ["http://localhost:8000/models", "http://localhost:8000/v1/models"]
+        assert probe["models"] == ["local-model"]
+        assert probe["resolved_base_url"] == "http://localhost:8000/v1"
+        assert probe["used_fallback"] is True
+

 # -- validate — format checks -----------------------------------------------

@@ -191,6 +227,7 @@ class TestValidateApiFound:
        )
        assert result["accepted"] is True
        assert result["persist"] is True
+        assert result["recognized"] is True


 # -- validate — API not found ------------------------------------------------
@@ -232,3 +269,26 @@ class TestValidateApiFallback:
        result = _validate("some-model", provider="totally-unknown", api_models=None)
        assert result["accepted"] is True
        assert result["persist"] is True
+
+    def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
+        with patch(
+            "hermes_cli.models.probe_api_models",
+            return_value={
+                "models": None,
+                "probed_url": "http://localhost:8000/v1/models",
+                "resolved_base_url": "http://localhost:8000",
+                "suggested_base_url": "http://localhost:8000/v1",
+                "used_fallback": False,
+            },
+        ):
+            result = validate_requested_model(
+                "qwen3",
+                "custom",
+                api_key="local-key",
+                base_url="http://localhost:8000",
+            )
+
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert "http://localhost:8000/v1/models" in result["message"]
+        assert "http://localhost:8000/v1" in result["message"]
@@ -1,6 +1,6 @@
 """Tests for the hermes_cli models module."""

-from hermes_cli.models import OPENROUTER_MODELS, menu_labels, model_ids
+from hermes_cli.models import OPENROUTER_MODELS, menu_labels, model_ids, detect_provider_for_model


 class TestModelIds:
@@ -54,3 +54,66 @@ class TestOpenRouterModels:
    def test_at_least_5_models(self):
        """Sanity check that the models list hasn't been accidentally truncated."""
        assert len(OPENROUTER_MODELS) >= 5
+
+
+class TestFindOpenrouterSlug:
+    def test_exact_match(self):
+        from hermes_cli.models import _find_openrouter_slug
+        assert _find_openrouter_slug("anthropic/claude-opus-4.6") == "anthropic/claude-opus-4.6"
+
+    def test_bare_name_match(self):
+        from hermes_cli.models import _find_openrouter_slug
+        result = _find_openrouter_slug("claude-opus-4.6")
+        assert result == "anthropic/claude-opus-4.6"
+
+    def test_case_insensitive(self):
+        from hermes_cli.models import _find_openrouter_slug
+        result = _find_openrouter_slug("Anthropic/Claude-Opus-4.6")
+        assert result is not None
+
+    def test_unknown_returns_none(self):
+        from hermes_cli.models import _find_openrouter_slug
+        assert _find_openrouter_slug("totally-fake-model-xyz") is None
+
+
+class TestDetectProviderForModel:
+    def test_anthropic_model_detected(self):
+        """claude-opus-4-6 should resolve to anthropic provider."""
+        result = detect_provider_for_model("claude-opus-4-6", "openai-codex")
+        assert result is not None
+        assert result[0] == "anthropic"
+
+    def test_deepseek_model_detected(self):
+        """deepseek-chat should resolve to deepseek provider."""
+        result = detect_provider_for_model("deepseek-chat", "openai-codex")
+        assert result is not None
+        # Provider is deepseek (direct) or openrouter (fallback) depending on creds
+        assert result[0] in ("deepseek", "openrouter")
+
+    def test_current_provider_model_returns_none(self):
+        """Models belonging to the current provider should not trigger a switch."""
+        assert detect_provider_for_model("gpt-5.3-codex", "openai-codex") is None
+
+    def test_openrouter_slug_match(self):
+        """Models in the OpenRouter catalog should be found."""
+        result = detect_provider_for_model("anthropic/claude-opus-4.6", "openai-codex")
+        assert result is not None
+        assert result[0] == "openrouter"
+        assert result[1] == "anthropic/claude-opus-4.6"
+
+    def test_bare_name_gets_openrouter_slug(self):
+        """Bare model names should get mapped to full OpenRouter slugs."""
+        result = detect_provider_for_model("claude-opus-4.6", "openai-codex")
+        assert result is not None
+        # Should find it on OpenRouter with full slug
+        assert result[1] == "anthropic/claude-opus-4.6"
+
+    def test_unknown_model_returns_none(self):
+        """Completely unknown model names should return None."""
+        assert detect_provider_for_model("nonexistent-model-xyz", "openai-codex") is None
+
+    def test_aggregator_not_suggested(self):
+        """nous/openrouter should never be auto-suggested as target provider."""
+        result = detect_provider_for_model("claude-opus-4-6", "openai-codex")
+        assert result is not None
+        assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested
@@ -0,0 +1,184 @@
+"""Tests for file path autocomplete in the CLI completer."""
+
+import os
+from unittest.mock import MagicMock
+
+import pytest
+from prompt_toolkit.document import Document
+from prompt_toolkit.formatted_text import to_plain_text
+
+from hermes_cli.commands import SlashCommandCompleter, _file_size_label
+
+
+def _display_names(completions):
+    """Extract plain-text display names from a list of Completion objects."""
+    return [to_plain_text(c.display) for c in completions]
+
+
+def _display_metas(completions):
+    """Extract plain-text display_meta from a list of Completion objects."""
+    return [to_plain_text(c.display_meta) if c.display_meta else "" for c in completions]
+
+
+@pytest.fixture
+def completer():
+    return SlashCommandCompleter()
+
+
+class TestExtractPathWord:
+    def test_relative_path(self):
+        assert SlashCommandCompleter._extract_path_word("look at ./src/main.py") == "./src/main.py"
+
+    def test_home_path(self):
+        assert SlashCommandCompleter._extract_path_word("edit ~/docs/") == "~/docs/"
+
+    def test_absolute_path(self):
+        assert SlashCommandCompleter._extract_path_word("read /etc/hosts") == "/etc/hosts"
+
+    def test_parent_path(self):
+        assert SlashCommandCompleter._extract_path_word("check ../config.yaml") == "../config.yaml"
+
+    def test_path_with_slash_in_middle(self):
+        assert SlashCommandCompleter._extract_path_word("open src/utils/helpers.py") == "src/utils/helpers.py"
+
+    def test_plain_word_not_path(self):
+        assert SlashCommandCompleter._extract_path_word("hello world") is None
+
+    def test_empty_string(self):
+        assert SlashCommandCompleter._extract_path_word("") is None
+
+    def test_single_word_no_slash(self):
+        assert SlashCommandCompleter._extract_path_word("README.md") is None
+
+    def test_word_after_space(self):
+        assert SlashCommandCompleter._extract_path_word("fix the bug in ./tools/") == "./tools/"
+
+    def test_just_dot_slash(self):
+        assert SlashCommandCompleter._extract_path_word("./") == "./"
+
+    def test_just_tilde_slash(self):
+        assert SlashCommandCompleter._extract_path_word("~/") == "~/"
+
+
+class TestPathCompletions:
+    def test_lists_current_directory(self, tmp_path):
+        (tmp_path / "file_a.py").touch()
+        (tmp_path / "file_b.txt").touch()
+        (tmp_path / "subdir").mkdir()
+
+        old_cwd = os.getcwd()
+        os.chdir(tmp_path)
+        try:
+            completions = list(SlashCommandCompleter._path_completions("./"))
+            names = _display_names(completions)
+            assert "file_a.py" in names
+            assert "file_b.txt" in names
+            assert "subdir/" in names
+        finally:
+            os.chdir(old_cwd)
+
+    def test_filters_by_prefix(self, tmp_path):
+        (tmp_path / "alpha.py").touch()
+        (tmp_path / "beta.py").touch()
+        (tmp_path / "alpha_test.py").touch()
+
+        completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/alpha"))
+        names = _display_names(completions)
+        assert "alpha.py" in names
+        assert "alpha_test.py" in names
+        assert "beta.py" not in names
+
+    def test_directories_have_trailing_slash(self, tmp_path):
+        (tmp_path / "mydir").mkdir()
+        (tmp_path / "myfile.txt").touch()
+
+        completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/"))
+        names = _display_names(completions)
+        metas = _display_metas(completions)
+        assert "mydir/" in names
+        idx = names.index("mydir/")
+        assert metas[idx] == "dir"
+
+    def test_home_expansion(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HOME", str(tmp_path))
+        (tmp_path / "testfile.md").touch()
+
+        completions = list(SlashCommandCompleter._path_completions("~/test"))
+        names = _display_names(completions)
+        assert "testfile.md" in names
+
+    def test_nonexistent_dir_returns_empty(self):
+        completions = list(SlashCommandCompleter._path_completions("/nonexistent_dir_xyz/"))
+        assert completions == []
+
+    def test_respects_limit(self, tmp_path):
+        for i in range(50):
+            (tmp_path / f"file_{i:03d}.txt").touch()
+
+        completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/", limit=10))
+        assert len(completions) == 10
+
+    def test_case_insensitive_prefix(self, tmp_path):
+        (tmp_path / "README.md").touch()
+
+        completions = list(SlashCommandCompleter._path_completions(f"{tmp_path}/read"))
+        names = _display_names(completions)
+        assert "README.md" in names
+
+
+class TestIntegration:
+    """Test the completer produces path completions via the prompt_toolkit API."""
+
+    def test_slash_commands_still_work(self, completer):
+        doc = Document("/hel", cursor_position=4)
+        event = MagicMock()
+        completions = list(completer.get_completions(doc, event))
+        names = _display_names(completions)
+        assert "/help" in names
+
+    def test_path_completion_triggers_on_dot_slash(self, completer, tmp_path):
+        (tmp_path / "test.py").touch()
+        old_cwd = os.getcwd()
+        os.chdir(tmp_path)
+        try:
+            doc = Document("edit ./te", cursor_position=9)
+            event = MagicMock()
+            completions = list(completer.get_completions(doc, event))
+            names = _display_names(completions)
+            assert "test.py" in names
+        finally:
+            os.chdir(old_cwd)
+
+    def test_no_completion_for_plain_words(self, completer):
+        doc = Document("hello world", cursor_position=11)
+        event = MagicMock()
+        completions = list(completer.get_completions(doc, event))
+        assert completions == []
+
+    def test_absolute_path_triggers_completion(self, completer):
+        doc = Document("check /etc/hos", cursor_position=14)
+        event = MagicMock()
+        completions = list(completer.get_completions(doc, event))
+        names = _display_names(completions)
+        # /etc/hosts should exist on Linux
+        assert any("host" in n.lower() for n in names)
+
+
+class TestFileSizeLabel:
+    def test_bytes(self, tmp_path):
+        f = tmp_path / "small.txt"
+        f.write_text("hi")
+        assert _file_size_label(str(f)) == "2B"
+
+    def test_kilobytes(self, tmp_path):
+        f = tmp_path / "medium.txt"
+        f.write_bytes(b"x" * 2048)
+        assert _file_size_label(str(f)) == "2K"
+
+    def test_megabytes(self, tmp_path):
+        f = tmp_path / "large.bin"
+        f.write_bytes(b"x" * (2 * 1024 * 1024))
+        assert _file_size_label(str(f)) == "2.0M"
+
+    def test_nonexistent(self):
+        assert _file_size_label("/nonexistent_xyz") == ""
@@ -0,0 +1,64 @@
+import sys
+
+
+def test_sessions_delete_accepts_unique_id_prefix(monkeypatch, capsys):
+    import hermes_cli.main as main_mod
+    import hermes_state
+
+    captured = {}
+
+    class FakeDB:
+        def resolve_session_id(self, session_id):
+            captured["resolved_from"] = session_id
+            return "20260315_092437_c9a6ff"
+
+        def delete_session(self, session_id):
+            captured["deleted"] = session_id
+            return True
+
+        def close(self):
+            captured["closed"] = True
+
+    monkeypatch.setattr(hermes_state, "SessionDB", lambda: FakeDB())
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "sessions", "delete", "20260315_092437_c9a6", "--yes"],
+    )
+
+    main_mod.main()
+
+    output = capsys.readouterr().out
+    assert captured == {
+        "resolved_from": "20260315_092437_c9a6",
+        "deleted": "20260315_092437_c9a6ff",
+        "closed": True,
+    }
+    assert "Deleted session '20260315_092437_c9a6ff'." in output
+
+
+def test_sessions_delete_reports_not_found_when_prefix_is_unknown(monkeypatch, capsys):
+    import hermes_cli.main as main_mod
+    import hermes_state
+
+    class FakeDB:
+        def resolve_session_id(self, session_id):
+            return None
+
+        def delete_session(self, session_id):
+            raise AssertionError("delete_session should not be called when resolution fails")
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr(hermes_state, "SessionDB", lambda: FakeDB())
+    monkeypatch.setattr(
+        sys,
+        "argv",
+        ["hermes", "sessions", "delete", "missing-prefix", "--yes"],
+    )
+
+    main_mod.main()
+
+    output = capsys.readouterr().out
+    assert "Session 'missing-prefix' not found." in output
@@ -115,3 +115,13 @@ class TestConfigYamlRouting:
        set_config_value("terminal.docker_image", "python:3.12")
        config = _read_config(_isolated_hermes_home)
        assert "python:3.12" in config
+
+    def test_terminal_docker_cwd_mount_flag_goes_to_config_and_env(self, _isolated_hermes_home):
+        set_config_value("terminal.docker_mount_cwd_to_workspace", "true")
+        config = _read_config(_isolated_hermes_home)
+        env_content = _read_env(_isolated_hermes_home)
+        assert "docker_mount_cwd_to_workspace: 'true'" in config or "docker_mount_cwd_to_workspace: true" in config
+        assert (
+            "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=true" in env_content
+            or "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE=True" in env_content
+        )
@@ -75,6 +75,58 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    assert calls["count"] == 1


+def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+
+    config = load_config()
+
+    def fake_prompt_choice(question, choices, default=0):
+        if question == "Select your inference provider:":
+            return 3  # Custom endpoint
+        if question == "Configure vision:":
+            return len(choices) - 1  # Skip
+        raise AssertionError(f"Unexpected prompt_choice call: {question}")
+
+    def fake_prompt(message, current=None, **kwargs):
+        if "API base URL" in message:
+            return "http://localhost:8000"
+        if "API key" in message:
+            return "local-key"
+        if "Model name" in message:
+            return "llm"
+        return ""
+
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
+    monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt)
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
+    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
+    monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
+    monkeypatch.setattr(
+        "hermes_cli.models.probe_api_models",
+        lambda api_key, base_url: {
+            "models": ["llm"],
+            "probed_url": "http://localhost:8000/v1/models",
+            "resolved_base_url": "http://localhost:8000/v1",
+            "suggested_base_url": "http://localhost:8000/v1",
+            "used_fallback": True,
+        },
+    )
+
+    setup_model_provider(config)
+    save_config(config)
+
+    env = _read_env(tmp_path)
+    reloaded = load_config()
+
+    assert env.get("OPENAI_BASE_URL") == "http://localhost:8000/v1"
+    assert env.get("OPENAI_API_KEY") == "local-key"
+    assert reloaded["model"]["provider"] == "custom"
+    assert reloaded["model"]["base_url"] == "http://localhost:8000/v1"
+    assert reloaded["model"]["default"] == "llm"
+
+
 def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
    """Keep-current should respect config-backed providers, not fall back to OpenRouter."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
@@ -0,0 +1,29 @@
+from hermes_cli import setup as setup_mod
+
+
+def test_prompt_choice_uses_curses_helper(monkeypatch):
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: 1)
+
+    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)
+
+    assert idx == 1
+
+
+def test_prompt_choice_falls_back_to_numbered_input(monkeypatch):
+    monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0: -1)
+    monkeypatch.setattr("builtins.input", lambda _prompt="": "2")
+
+    idx = setup_mod.prompt_choice("Pick one", ["a", "b", "c"], default=0)
+
+    assert idx == 1
+
+
+def test_prompt_checklist_uses_shared_curses_checklist(monkeypatch):
+    monkeypatch.setattr(
+        "hermes_cli.curses_ui.curses_checklist",
+        lambda title, items, selected, cancel_returns=None: {0, 2},
+    )
+
+    selected = setup_mod.prompt_checklist("Pick tools", ["one", "two", "three"], pre_selected=[1])
+
+    assert selected == [0, 2]
@@ -1,6 +1,13 @@
 """Tests for hermes_cli.tools_config platform tool persistence."""

-from hermes_cli.tools_config import _get_platform_tools, _platform_toolset_summary, _toolset_has_keys
+from unittest.mock import patch
+
+from hermes_cli.tools_config import (
+    _get_platform_tools,
+    _platform_toolset_summary,
+    _save_platform_tools,
+    _toolset_has_keys,
+)


 def test_get_platform_tools_uses_default_when_platform_not_configured():
@@ -31,7 +38,7 @@ def test_platform_toolset_summary_uses_explicit_platform_list():
 def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    (tmp_path / "auth.json").write_text(
-        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"codex-access-token","refresh_token":"codex-refresh-token"}}}}'
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token": "codex-...oken","refresh_token": "codex-...oken"}}}}'
    )
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
@@ -40,3 +47,56 @@ def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch):
    monkeypatch.delenv("CONTEXT_VISION_PROVIDER", raising=False)

    assert _toolset_has_keys("vision") is True
+
+
+def test_save_platform_tools_preserves_mcp_server_names():
+    """Ensure MCP server names are preserved when saving platform tools.
+
+    Regression test for https://github.com/NousResearch/hermes-agent/issues/1247
+    """
+    config = {
+        "platform_toolsets": {
+            "cli": ["web", "terminal", "time", "github", "custom-mcp-server"]
+        }
+    }
+
+    new_selection = {"web", "browser"}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", new_selection)
+
+    saved_toolsets = config["platform_toolsets"]["cli"]
+
+    assert "time" in saved_toolsets
+    assert "github" in saved_toolsets
+    assert "custom-mcp-server" in saved_toolsets
+    assert "web" in saved_toolsets
+    assert "browser" in saved_toolsets
+    assert "terminal" not in saved_toolsets
+
+
+def test_save_platform_tools_handles_empty_existing_config():
+    """Saving platform tools works when no existing config exists."""
+    config = {}
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "telegram", {"web", "terminal"})
+
+    saved_toolsets = config["platform_toolsets"]["telegram"]
+    assert "web" in saved_toolsets
+    assert "terminal" in saved_toolsets
+
+
+def test_save_platform_tools_handles_invalid_existing_config():
+    """Saving platform tools works when existing config is not a list."""
+    config = {
+        "platform_toolsets": {
+            "cli": "invalid-string-value"
+        }
+    }
+
+    with patch("hermes_cli.tools_config.save_config"):
+        _save_platform_tools(config, "cli", {"web"})
+
+    saved_toolsets = config["platform_toolsets"]["cli"]
+    assert "web" in saved_toolsets
@@ -134,6 +134,16 @@ def test_restore_stashed_changes_applies_without_prompt_when_disabled(monkeypatc



+def test_print_stash_cleanup_guidance_with_selector(capsys):
+    hermes_main._print_stash_cleanup_guidance("abc123", "stash@{2}")
+
+    out = capsys.readouterr().out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
+    assert "git stash drop stash@{2}" in out
+
+
+
 def test_restore_stashed_changes_keeps_going_when_stash_entry_cannot_be_resolved(monkeypatch, tmp_path, capsys):
    calls = []

@@ -157,6 +167,8 @@ def test_restore_stashed_changes_keeps_going_when_stash_entry_cannot_be_resolved
    out = capsys.readouterr().out
    assert "couldn't find the stash entry to drop" in out
    assert "stash was left in place" in out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
    assert "Look for commit abc123" in out


@@ -183,6 +195,8 @@ def test_restore_stashed_changes_keeps_going_when_drop_fails(monkeypatch, tmp_pa
    out = capsys.readouterr().out
    assert "couldn't drop the saved stash entry" in out
    assert "drop failed" in out
+    assert "Check `git status` first" in out
+    assert "git stash list --format='%gd %H %s'" in out
    assert "git stash drop stash@{0}" in out


@@ -0,0 +1,305 @@
+"""Tests for cmd_update gateway auto-restart — systemd + launchd coverage.
+
+Ensures ``hermes update`` correctly detects running gateways managed by
+systemd (Linux) or launchd (macOS) and restarts/informs the user properly,
+rather than leaving zombie processes or telling users to manually restart
+when launchd will auto-respawn.
+"""
+
+import subprocess
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+import hermes_cli.gateway as gateway_cli
+from hermes_cli.main import cmd_update
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_run_side_effect(
+    branch="main",
+    verify_ok=True,
+    commit_count="3",
+    systemd_active=False,
+    launchctl_loaded=False,
+):
+    """Build a subprocess.run side_effect that simulates git + service commands."""
+
+    def side_effect(cmd, **kwargs):
+        joined = " ".join(str(c) for c in cmd)
+
+        # git rev-parse --abbrev-ref HEAD
+        if "rev-parse" in joined and "--abbrev-ref" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
+
+        # git rev-parse --verify origin/{branch}
+        if "rev-parse" in joined and "--verify" in joined:
+            rc = 0 if verify_ok else 128
+            return subprocess.CompletedProcess(cmd, rc, stdout="", stderr="")
+
+        # git rev-list HEAD..origin/{branch} --count
+        if "rev-list" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout=f"{commit_count}\n", stderr="")
+
+        # systemctl --user is-active
+        if "systemctl" in joined and "is-active" in joined:
+            if systemd_active:
+                return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
+
+        # systemctl --user restart
+        if "systemctl" in joined and "restart" in joined:
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        # launchctl list ai.hermes.gateway
+        if "launchctl" in joined and "list" in joined:
+            if launchctl_loaded:
+                return subprocess.CompletedProcess(cmd, 0, stdout="PID\tStatus\tLabel\n123\t0\tai.hermes.gateway\n", stderr="")
+            return subprocess.CompletedProcess(cmd, 113, stdout="", stderr="Could not find service")
+
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+    return side_effect
+
+
+@pytest.fixture
+def mock_args():
+    return SimpleNamespace()
+
+
+# ---------------------------------------------------------------------------
+# Launchd plist includes --replace
+# ---------------------------------------------------------------------------
+
+
+class TestLaunchdPlistReplace:
+    """The generated launchd plist must include --replace so respawned
+    gateways kill stale instances."""
+
+    def test_plist_contains_replace_flag(self):
+        plist = gateway_cli.generate_launchd_plist()
+        assert "--replace" in plist
+
+    def test_plist_program_arguments_order(self):
+        """--replace comes after 'run' in the ProgramArguments."""
+        plist = gateway_cli.generate_launchd_plist()
+        lines = [line.strip() for line in plist.splitlines()]
+        # Find 'run' and '--replace' in the string entries
+        string_values = [
+            line.replace("<string>", "").replace("</string>", "")
+            for line in lines
+            if "<string>" in line and "</string>" in line
+        ]
+        assert "run" in string_values
+        assert "--replace" in string_values
+        run_idx = string_values.index("run")
+        replace_idx = string_values.index("--replace")
+        assert replace_idx == run_idx + 1
+
+
+# ---------------------------------------------------------------------------
+# cmd_update — macOS launchd detection
+# ---------------------------------------------------------------------------
+
+
+class TestLaunchdPlistRefresh:
+    """refresh_launchd_plist_if_needed rewrites stale plists (like systemd's
+    refresh_systemd_unit_if_needed)."""
+
+    def test_refresh_rewrites_stale_plist(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist>old content</plist>")
+
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        calls = []
+        def fake_run(cmd, check=False, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+
+        assert result is True
+        # Plist should now contain the generated content (which includes --replace)
+        assert "--replace" in plist_path.read_text()
+        # Should have unloaded then reloaded
+        assert any("unload" in str(c) for c in calls)
+        assert any("load" in str(c) for c in calls)
+
+    def test_refresh_skips_when_current(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        # Write the current expected content
+        plist_path.write_text(gateway_cli.generate_launchd_plist())
+
+        calls = []
+        monkeypatch.setattr(
+            gateway_cli.subprocess, "run",
+            lambda cmd, **kw: calls.append(cmd) or SimpleNamespace(returncode=0),
+        )
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+
+        assert result is False
+        assert len(calls) == 0  # No launchctl calls needed
+
+    def test_refresh_skips_when_no_plist(self, tmp_path, monkeypatch):
+        plist_path = tmp_path / "nonexistent.plist"
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        result = gateway_cli.refresh_launchd_plist_if_needed()
+        assert result is False
+
+    def test_launchd_start_calls_refresh(self, tmp_path, monkeypatch):
+        """launchd_start refreshes the plist before starting."""
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist>old</plist>")
+        monkeypatch.setattr(gateway_cli, "get_launchd_plist_path", lambda: plist_path)
+
+        calls = []
+        def fake_run(cmd, check=False, **kwargs):
+            calls.append(cmd)
+            return SimpleNamespace(returncode=0, stdout="", stderr="")
+
+        monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
+
+        gateway_cli.launchd_start()
+
+        # First calls should be refresh (unload/load), then start
+        cmd_strs = [" ".join(c) for c in calls]
+        assert any("unload" in s for s in cmd_strs)
+        assert any("start" in s for s in cmd_strs)
+
+
+class TestCmdUpdateLaunchdRestart:
+    """cmd_update correctly detects and handles launchd on macOS."""
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_detects_launchd_and_skips_manual_restart_message(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """When launchd is running the gateway, update should print
+        'auto-restart via launchd' instead of 'Restart it with: hermes gateway run'."""
+        # Create a fake launchd plist so is_macos + plist.exists() passes
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        plist_path.write_text("<plist/>")
+
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: True,
+        )
+        monkeypatch.setattr(
+            gateway_cli, "get_launchd_plist_path", lambda: plist_path,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=True,
+        )
+
+        # Mock get_running_pid to return a PID
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Gateway restarted via launchd" in captured
+        assert "Restart it with: hermes gateway run" not in captured
+        # Verify launchctl stop + start were called (not manual SIGTERM)
+        launchctl_calls = [
+            c for c in mock_run.call_args_list
+            if len(c.args[0]) > 0 and c.args[0][0] == "launchctl"
+        ]
+        stop_calls = [c for c in launchctl_calls if "stop" in c.args[0]]
+        start_calls = [c for c in launchctl_calls if "start" in c.args[0]]
+        assert len(stop_calls) >= 1
+        assert len(start_calls) >= 1
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_without_launchd_shows_manual_restart(
+        self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
+    ):
+        """When no service manager is running, update should show the manual restart hint."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: True,
+        )
+        plist_path = tmp_path / "ai.hermes.gateway.plist"
+        # plist does NOT exist — no launchd service
+        monkeypatch.setattr(
+            gateway_cli, "get_launchd_plist_path", lambda: plist_path,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            launchctl_loaded=False,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("os.kill"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Restart it with: hermes gateway run" in captured
+        assert "Gateway restarted via launchd" not in captured
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_with_systemd_still_restarts_via_systemd(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """On Linux with systemd active, update should restart via systemctl."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: False,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=True,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=12345), \
+             patch("gateway.status.remove_pid_file"), \
+             patch("os.kill"):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Gateway restarted" in captured
+        # Verify systemctl restart was called
+        restart_calls = [
+            c for c in mock_run.call_args_list
+            if "restart" in " ".join(str(a) for a in c.args[0])
+            and "systemctl" in " ".join(str(a) for a in c.args[0])
+        ]
+        assert len(restart_calls) == 1
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_no_gateway_running_skips_restart(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """When no gateway is running, update should skip the restart section entirely."""
+        monkeypatch.setattr(
+            gateway_cli, "is_macos", lambda: False,
+        )
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=False,
+        )
+
+        with patch("gateway.status.get_running_pid", return_value=None):
+            cmd_update(mock_args)
+
+        captured = capsys.readouterr().out
+        assert "Stopped gateway" not in captured
+        assert "Gateway restarted" not in captured
+        assert "Gateway restarted via launchd" not in captured
@@ -0,0 +1,611 @@
+"""Integration tests for Discord voice channel audio flow.
+
+Uses real NaCl encryption and Opus codec (no mocks for crypto/codec).
+Does NOT require a Discord connection — tests the VoiceReceiver
+packet processing pipeline end-to-end.
+
+Requires: PyNaCl>=1.5.0, discord.py[voice] (opus codec)
+"""
+
+import struct
+import time
+import pytest
+
+pytestmark = pytest.mark.integration
+
+# Skip entire module if voice deps are missing
+pytest.importorskip("nacl.secret", reason="PyNaCl required for voice integration tests")
+discord = pytest.importorskip("discord", reason="discord.py required for voice integration tests")
+
+import nacl.secret
+
+try:
+    if not discord.opus.is_loaded():
+        import ctypes.util
+        opus_path = ctypes.util.find_library("opus")
+        if not opus_path:
+            import sys
+            for p in ("/opt/homebrew/lib/libopus.dylib", "/usr/local/lib/libopus.dylib"):
+                import os
+                if os.path.isfile(p):
+                    opus_path = p
+                    break
+        if opus_path:
+            discord.opus.load_opus(opus_path)
+    OPUS_AVAILABLE = discord.opus.is_loaded()
+except Exception:
+    OPUS_AVAILABLE = False
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+from gateway.platforms.discord import VoiceReceiver
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_secret_key():
+    """Generate a random 32-byte key."""
+    import os
+    return os.urandom(32)
+
+
+def _build_encrypted_rtp_packet(secret_key, opus_payload, ssrc=100, seq=1, timestamp=960):
+    """Build a real NaCl-encrypted RTP packet matching Discord's format.
+
+    Format: RTP header (12 bytes) + encrypted(opus) + 4-byte nonce
+    Encryption: aead_xchacha20_poly1305 with RTP header as AAD.
+    """
+    # RTP header: version=2, payload_type=0x78, no extension, no CSRC
+    header = struct.pack(">BBHII", 0x80, 0x78, seq, timestamp, ssrc)
+
+    # Encrypt with NaCl AEAD
+    box = nacl.secret.Aead(secret_key)
+    nonce_counter = struct.pack(">I", seq)  # 4-byte counter as nonce seed
+    # Full 24-byte nonce: counter in first 4 bytes, rest zeros
+    full_nonce = nonce_counter + b'\x00' * 20
+
+    enc_msg = box.encrypt(opus_payload, header, full_nonce)
+    ciphertext = enc_msg.ciphertext  # without nonce prefix
+
+    # Discord format: header + ciphertext + 4-byte nonce
+    return header + ciphertext + nonce_counter
+
+
+def _make_voice_receiver(secret_key, dave_session=None, bot_ssrc=9999,
+                         allowed_user_ids=None, members=None):
+    """Create a VoiceReceiver with real secret key."""
+    vc = MagicMock()
+    vc._connection.secret_key = list(secret_key)
+    vc._connection.dave_session = dave_session
+    vc._connection.ssrc = bot_ssrc
+    vc._connection.add_socket_listener = MagicMock()
+    vc._connection.remove_socket_listener = MagicMock()
+    vc._connection.hook = None
+    vc.user = SimpleNamespace(id=bot_ssrc)
+    vc.channel = MagicMock()
+    vc.channel.members = members or []
+    receiver = VoiceReceiver(vc, allowed_user_ids=allowed_user_ids)
+    receiver.start()
+    return receiver
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestRealNaClDecrypt:
+    """End-to-end: real NaCl encrypt → _on_packet decrypt → buffer."""
+
+    def test_valid_encrypted_packet_buffered(self):
+        """Real NaCl encrypted packet → decrypted → buffered."""
+        key = _make_secret_key()
+        opus_silence = b'\xf8\xff\xfe'
+        receiver = _make_voice_receiver(key)
+
+        packet = _build_encrypted_rtp_packet(key, opus_silence, ssrc=100)
+        receiver._on_packet(packet)
+
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_wrong_key_packet_dropped(self):
+        """Packet encrypted with wrong key → NaCl fails → not buffered."""
+        real_key = _make_secret_key()
+        wrong_key = _make_secret_key()
+        opus_silence = b'\xf8\xff\xfe'
+        receiver = _make_voice_receiver(real_key)
+
+        packet = _build_encrypted_rtp_packet(wrong_key, opus_silence, ssrc=100)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_bot_ssrc_ignored(self):
+        """Packet from bot's own SSRC → ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key, bot_ssrc=9999)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=9999)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers) == 0
+
+    def test_multiple_packets_accumulate(self):
+        """Multiple valid packets → buffer grows."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        for seq in range(1, 6):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert 100 in receiver._buffers
+        buf_size = len(receiver._buffers[100])
+        assert buf_size > 0, "Multiple packets should accumulate in buffer"
+
+    def test_different_ssrcs_separate_buffers(self):
+        """Packets from different SSRCs → separate buffers."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        for ssrc in [100, 200, 300]:
+            packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=ssrc)
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers) == 3
+        for ssrc in [100, 200, 300]:
+            assert ssrc in receiver._buffers
+
+
+class TestRealNaClWithDAVE:
+    """NaCl decrypt + DAVE passthrough scenarios with real crypto."""
+
+    def test_dave_unknown_ssrc_passthrough(self):
+        """DAVE enabled but SSRC unknown → skip DAVE, buffer audio."""
+        key = _make_secret_key()
+        dave = MagicMock()  # DAVE session present but SSRC not mapped
+        receiver = _make_voice_receiver(key, dave_session=dave)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        # DAVE decrypt not called (SSRC unknown)
+        dave.decrypt.assert_not_called()
+        # Audio still buffered via passthrough
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_dave_unencrypted_error_passthrough(self):
+        """DAVE raises 'Unencrypted' → use NaCl-decrypted data as-is."""
+        key = _make_secret_key()
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception(
+            "DecryptionFailed(UnencryptedWhenPassthroughDisabled)"
+        )
+        receiver = _make_voice_receiver(key, dave_session=dave)
+        receiver.map_ssrc(100, 42)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        # DAVE was called but failed → passthrough
+        dave.decrypt.assert_called_once()
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_dave_real_error_drops(self):
+        """DAVE raises non-Unencrypted error → packet dropped."""
+        key = _make_secret_key()
+        dave = MagicMock()
+        dave.decrypt.side_effect = Exception("KeyRotationFailed")
+        receiver = _make_voice_receiver(key, dave_session=dave)
+        receiver.map_ssrc(100, 42)
+
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+
+class TestFullVoiceFlow:
+    """End-to-end: encrypt → receive → buffer → silence detect → complete."""
+
+    def test_single_utterance_flow(self):
+        """Encrypt packets → buffer → silence → check_silence returns utterance."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        # Send enough packets to exceed MIN_SPEECH_DURATION (0.5s)
+        # At 48kHz stereo 16-bit, each Opus silence frame decodes to ~3840 bytes
+        # Need 96000 bytes = ~25 frames
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        # Simulate silence by setting last_packet_time in the past
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        user_id, pcm_data = completed[0]
+        assert user_id == 42
+        assert len(pcm_data) > 0
+
+    def test_utterance_with_ssrc_automap(self):
+        """No SPEAKING event → auto-map sole allowed user → utterance processed."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members
+        )
+        # No map_ssrc call — simulating missing SPEAKING event
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42  # auto-mapped to sole allowed user
+
+    def test_pause_blocks_during_playback(self):
+        """Pause receiver → packets ignored → resume → packets accepted."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        # Pause (echo prevention during TTS playback)
+        receiver.pause()
+        packet = _build_encrypted_rtp_packet(key, b'\xf8\xff\xfe', ssrc=100)
+        receiver._on_packet(packet)
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+        # Resume
+        receiver.resume()
+        receiver._on_packet(packet)
+        assert 100 in receiver._buffers
+        assert len(receiver._buffers[100]) > 0
+
+    def test_corrupted_packet_ignored(self):
+        """Corrupted/truncated packet → silently ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+
+        # Too short
+        receiver._on_packet(b"\x00" * 5)
+        assert len(receiver._buffers) == 0
+
+        # Wrong RTP version
+        bad_header = struct.pack(">BBHII", 0x00, 0x78, 1, 960, 100)
+        receiver._on_packet(bad_header + b"\x00" * 20)
+        assert len(receiver._buffers) == 0
+
+        # Wrong payload type
+        bad_pt = struct.pack(">BBHII", 0x80, 0x00, 1, 960, 100)
+        receiver._on_packet(bad_pt + b"\x00" * 20)
+        assert len(receiver._buffers) == 0
+
+    def test_stop_cleans_everything(self):
+        """stop() clears all state cleanly."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers[100]) > 0
+
+        receiver.stop()
+        assert receiver._running is False
+        assert len(receiver._buffers) == 0
+        assert len(receiver._ssrc_to_user) == 0
+        assert len(receiver._decoders) == 0
+
+
+class TestSPEAKINGHook:
+    """SPEAKING event hook correctly maps SSRC to user_id."""
+
+    def test_speaking_hook_installed(self):
+        """start() installs speaking hook on connection."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        conn = receiver._vc._connection
+        # hook should be set (wrapped)
+        assert conn.hook is not None
+
+    def test_map_ssrc_via_speaking(self):
+        """SPEAKING op 5 event maps SSRC to user_id."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(500, 12345)
+        assert receiver._ssrc_to_user[500] == 12345
+
+    def test_map_ssrc_overwrites(self):
+        """New SPEAKING event for same SSRC overwrites old mapping."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(500, 111)
+        receiver.map_ssrc(500, 222)
+        assert receiver._ssrc_to_user[500] == 222
+
+    def test_speaking_mapped_audio_processed(self):
+        """After SSRC is mapped, audio from that SSRC gets correct user_id."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestAuthFiltering:
+    """Only allowed users' audio should be processed."""
+
+    def test_allowed_user_audio_processed(self):
+        """Allowed user's utterance is returned by check_silence."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members,
+        )
+        receiver.map_ssrc(100, 42)
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_automap_rejects_unallowed_user(self):
+        """Auto-map refuses to map SSRC to user not in allowed list."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids={"99"},  # Alice not allowed
+            members=members,
+        )
+        # No map_ssrc — SSRC unknown, auto-map should reject
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 0
+
+    def test_empty_allowlist_allows_all(self):
+        """Empty allowed_user_ids means no restriction."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+        receiver = _make_voice_receiver(
+            key, allowed_user_ids=None, members=members,
+        )
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        # Auto-mapped to sole non-bot member
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestRejoinFlow:
+    """Leave and rejoin: state cleanup and fresh receiver."""
+
+    def test_stop_then_new_receiver_clean_state(self):
+        """After stop(), a new receiver starts with empty state."""
+        key = _make_secret_key()
+        receiver1 = _make_voice_receiver(key)
+        receiver1.map_ssrc(100, 42)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver1._on_packet(packet)
+
+        assert len(receiver1._buffers[100]) > 0
+        receiver1.stop()
+
+        # New receiver (simulates rejoin)
+        receiver2 = _make_voice_receiver(key)
+        assert len(receiver2._buffers) == 0
+        assert len(receiver2._ssrc_to_user) == 0
+        assert len(receiver2._decoders) == 0
+
+    def test_rejoin_new_ssrc_works(self):
+        """After rejoin, user may get new SSRC — still works."""
+        key = _make_secret_key()
+        receiver1 = _make_voice_receiver(key)
+        receiver1.map_ssrc(100, 42)  # old SSRC
+        receiver1.stop()
+
+        receiver2 = _make_voice_receiver(key)
+        receiver2.map_ssrc(200, 42)  # new SSRC after rejoin
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=200, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver2._last_packet_time[200] = time.monotonic() - 3.0
+        completed = receiver2.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+    def test_rejoin_without_speaking_event_automap(self):
+        """Rejoin without SPEAKING event — auto-map sole allowed user."""
+        key = _make_secret_key()
+        members = [
+            SimpleNamespace(id=9999, name="Bot"),
+            SimpleNamespace(id=42, name="Alice"),
+        ]
+
+        # First session
+        receiver1 = _make_voice_receiver(
+            key, allowed_user_ids={"42"}, members=members,
+        )
+        receiver1.stop()
+
+        # Rejoin — new key (Discord may assign new secret_key)
+        new_key = _make_secret_key()
+        receiver2 = _make_voice_receiver(
+            new_key, allowed_user_ids={"42"}, members=members,
+        )
+        # No map_ssrc — simulating missing SPEAKING event
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                new_key, b'\xf8\xff\xfe', ssrc=300, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver2._last_packet_time[300] = time.monotonic() - 3.0
+        completed = receiver2.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
+
+
+class TestMultiGuildIsolation:
+    """Each guild has independent voice state."""
+
+    def test_separate_receivers_independent(self):
+        """Two receivers (different guilds) don't interfere."""
+        key1 = _make_secret_key()
+        key2 = _make_secret_key()
+
+        receiver1 = _make_voice_receiver(key1, bot_ssrc=1111)
+        receiver2 = _make_voice_receiver(key2, bot_ssrc=2222)
+
+        receiver1.map_ssrc(100, 42)
+        receiver2.map_ssrc(200, 99)
+
+        # Send to receiver1
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key1, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver1._on_packet(packet)
+
+        # receiver2 should be empty
+        assert len(receiver2._buffers) == 0
+        assert 100 in receiver1._buffers
+
+    def test_stop_one_doesnt_affect_other(self):
+        """Stopping one receiver doesn't affect another."""
+        key1 = _make_secret_key()
+        key2 = _make_secret_key()
+
+        receiver1 = _make_voice_receiver(key1)
+        receiver2 = _make_voice_receiver(key2)
+
+        receiver1.map_ssrc(100, 42)
+        receiver2.map_ssrc(200, 99)
+
+        for seq in range(1, 10):
+            packet = _build_encrypted_rtp_packet(
+                key2, b'\xf8\xff\xfe', ssrc=200, seq=seq, timestamp=960 * seq
+            )
+            receiver2._on_packet(packet)
+
+        receiver1.stop()
+
+        # receiver2 still has data
+        assert receiver2._running is True
+        assert len(receiver2._buffers[200]) > 0
+
+
+class TestEchoPreventionFlow:
+    """Receiver pause/resume during TTS playback prevents echo."""
+
+    def test_audio_during_pause_ignored(self):
+        """Audio arriving while paused is completely ignored."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+        receiver.pause()
+
+        for seq in range(1, 30):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+    def test_audio_after_resume_processed(self):
+        """Audio arriving after resume is processed normally."""
+        key = _make_secret_key()
+        receiver = _make_voice_receiver(key)
+        receiver.map_ssrc(100, 42)
+
+        # Pause → send packets → resume → send more packets
+        receiver.pause()
+        for seq in range(1, 5):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+        assert len(receiver._buffers.get(100, b"")) == 0
+
+        receiver.resume()
+        for seq in range(5, 35):
+            packet = _build_encrypted_rtp_packet(
+                key, b'\xf8\xff\xfe', ssrc=100, seq=seq, timestamp=960 * seq
+            )
+            receiver._on_packet(packet)
+
+        assert len(receiver._buffers[100]) > 0
+        receiver._last_packet_time[100] = time.monotonic() - 3.0
+        completed = receiver.check_silence()
+        assert len(completed) == 1
+        assert completed[0][0] == 42
@@ -16,126 +16,131 @@ from run_agent import AIAgent, IterationBudget
 from tools.delegate_tool import _run_single_child
 from tools.interrupt import set_interrupt, is_interrupted

-set_interrupt(False)
+def main() -> int:
+    set_interrupt(False)

-# Create parent agent (minimal)
-parent = AIAgent.__new__(AIAgent)
-parent._interrupt_requested = False
-parent._interrupt_message = None
-parent._active_children = []
-parent.quiet_mode = True
-parent.model = "test/model"
-parent.base_url = "http://localhost:1"
-parent.api_key = "test"
-parent.provider = "test"
-parent.api_mode = "chat_completions"
-parent.platform = "cli"
-parent.enabled_toolsets = ["terminal", "file"]
-parent.providers_allowed = None
-parent.providers_ignored = None
-parent.providers_order = None
-parent.provider_sort = None
-parent.max_tokens = None
-parent.reasoning_config = None
-parent.prefill_messages = None
-parent._session_db = None
-parent._delegate_depth = 0
-parent._delegate_spinner = None
-parent.tool_progress_callback = None
-parent.iteration_budget = IterationBudget(max_total=100)
-parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
+    # Create parent agent (minimal)
+    parent = AIAgent.__new__(AIAgent)
+    parent._interrupt_requested = False
+    parent._interrupt_message = None
+    parent._active_children = []
+    parent.quiet_mode = True
+    parent.model = "test/model"
+    parent.base_url = "http://localhost:1"
+    parent.api_key = "test"
+    parent.provider = "test"
+    parent.api_mode = "chat_completions"
+    parent.platform = "cli"
+    parent.enabled_toolsets = ["terminal", "file"]
+    parent.providers_allowed = None
+    parent.providers_ignored = None
+    parent.providers_order = None
+    parent.provider_sort = None
+    parent.max_tokens = None
+    parent.reasoning_config = None
+    parent.prefill_messages = None
+    parent._session_db = None
+    parent._delegate_depth = 0
+    parent._delegate_spinner = None
+    parent.tool_progress_callback = None
+    parent.iteration_budget = IterationBudget(max_total=100)
+    parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}

-child_started = threading.Event()
-result_holder = [None]
+    child_started = threading.Event()
+    result_holder = [None]

+    def run_delegate():
+        with patch("run_agent.OpenAI") as MockOpenAI:
+            mock_client = MagicMock()

-def run_delegate():
-    with patch("run_agent.OpenAI") as MockOpenAI:
-        mock_client = MagicMock()
+            def slow_create(**kwargs):
+                time.sleep(3)
+                resp = MagicMock()
+                resp.choices = [MagicMock()]
+                resp.choices[0].message.content = "Done"
+                resp.choices[0].message.tool_calls = None
+                resp.choices[0].message.refusal = None
+                resp.choices[0].finish_reason = "stop"
+                resp.usage.prompt_tokens = 100
+                resp.usage.completion_tokens = 10
+                resp.usage.total_tokens = 110
+                resp.usage.prompt_tokens_details = None
+                return resp

-        def slow_create(**kwargs):
-            time.sleep(3)
-            resp = MagicMock()
-            resp.choices = [MagicMock()]
-            resp.choices[0].message.content = "Done"
-            resp.choices[0].message.tool_calls = None
-            resp.choices[0].message.refusal = None
-            resp.choices[0].finish_reason = "stop"
-            resp.usage.prompt_tokens = 100
-            resp.usage.completion_tokens = 10
-            resp.usage.total_tokens = 110
-            resp.usage.prompt_tokens_details = None
-            return resp
+            mock_client.chat.completions.create = slow_create
+            mock_client.close = MagicMock()
+            MockOpenAI.return_value = mock_client

-        mock_client.chat.completions.create = slow_create
-        mock_client.close = MagicMock()
-        MockOpenAI.return_value = mock_client
+            original_init = AIAgent.__init__

-        original_init = AIAgent.__init__
+            def patched_init(self_agent, *a, **kw):
+                original_init(self_agent, *a, **kw)
+                child_started.set()

-        def patched_init(self_agent, *a, **kw):
-            original_init(self_agent, *a, **kw)
-            child_started.set()
+            with patch.object(AIAgent, "__init__", patched_init):
+                try:
+                    result = _run_single_child(
+                        task_index=0,
+                        goal="Test slow task",
+                        context=None,
+                        toolsets=["terminal"],
+                        model="test/model",
+                        max_iterations=5,
+                        parent_agent=parent,
+                        task_count=1,
+                        override_provider="test",
+                        override_base_url="http://localhost:1",
+                        override_api_key="test",
+                        override_api_mode="chat_completions",
+                    )
+                    result_holder[0] = result
+                except Exception as e:
+                    print(f"ERROR in delegate: {e}")
+                    import traceback
+                    traceback.print_exc()

-        with patch.object(AIAgent, "__init__", patched_init):
-            try:
-                result = _run_single_child(
-                    task_index=0,
-                    goal="Test slow task",
-                    context=None,
-                    toolsets=["terminal"],
-                    model="test/model",
-                    max_iterations=5,
-                    parent_agent=parent,
-                    task_count=1,
-                    override_provider="test",
-                    override_base_url="http://localhost:1",
-                    override_api_key="test",
-                    override_api_mode="chat_completions",
-                )
-                result_holder[0] = result
-            except Exception as e:
-                print(f"ERROR in delegate: {e}")
-                import traceback
-                traceback.print_exc()
+    print("Starting agent thread...")
+    agent_thread = threading.Thread(target=run_delegate, daemon=True)
+    agent_thread.start()

+    started = child_started.wait(timeout=10)
+    if not started:
+        print("ERROR: Child never started")
+        set_interrupt(False)
+        return 1

-print("Starting agent thread...")
-agent_thread = threading.Thread(target=run_delegate, daemon=True)
-agent_thread.start()
+    time.sleep(0.5)

-started = child_started.wait(timeout=10)
-if not started:
-    print("ERROR: Child never started")
-    sys.exit(1)
+    print(f"Active children: {len(parent._active_children)}")
+    for i, c in enumerate(parent._active_children):
+        print(f"  Child {i}: _interrupt_requested={c._interrupt_requested}")

-time.sleep(0.5)
+    t0 = time.monotonic()
+    parent.interrupt("User typed a new message")
+    print("Called parent.interrupt()")

-print(f"Active children: {len(parent._active_children)}")
-for i, c in enumerate(parent._active_children):
-    print(f"  Child {i}: _interrupt_requested={c._interrupt_requested}")
+    for i, c in enumerate(parent._active_children):
+        print(f"  Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")
+    print(f"Global is_interrupted: {is_interrupted()}")

-t0 = time.monotonic()
-parent.interrupt("User typed a new message")
-print(f"Called parent.interrupt()")
+    agent_thread.join(timeout=10)
+    elapsed = time.monotonic() - t0
+    print(f"Agent thread finished in {elapsed:.2f}s")

-for i, c in enumerate(parent._active_children):
-    print(f"  Child {i} after interrupt: _interrupt_requested={c._interrupt_requested}")
-print(f"Global is_interrupted: {is_interrupted()}")
-
-agent_thread.join(timeout=10)
-elapsed = time.monotonic() - t0
-print(f"Agent thread finished in {elapsed:.2f}s")
-
-result = result_holder[0]
-if result:
-    print(f"Status: {result['status']}")
-    print(f"Duration: {result['duration_seconds']}s")
-    if elapsed < 2.0:
-        print("✅ PASS: Interrupt detected quickly!")
+    result = result_holder[0]
+    if result:
+        print(f"Status: {result['status']}")
+        print(f"Duration: {result['duration_seconds']}s")
+        if elapsed < 2.0:
+            print("✅ PASS: Interrupt detected quickly!")
+        else:
+            print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")
    else:
-        print(f"❌ FAIL: Took {elapsed:.2f}s — interrupt was too slow or not detected")
-else:
-    print("❌ FAIL: No result!")
+        print("❌ FAIL: No result!")

-set_interrupt(False)
+    set_interrupt(False)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -495,6 +495,59 @@ class TestConvertMessages:
        assert len(result) == 1
        assert result[0]["role"] == "user"

+    def test_converts_user_image_url_blocks_to_anthropic_image_blocks(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Can you see this?"},
+                    {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+
+        assert result == [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Can you see this?"},
+                    {"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
+                ],
+            }
+        ]
+
+    def test_converts_data_url_image_blocks_to_base64_anthropic_image_blocks(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "input_text", "text": "What is in this screenshot?"},
+                    {"type": "input_image", "image_url": "data:image/png;base64,AAAA"},
+                ],
+            }
+        ]
+
+        _, result = convert_messages_to_anthropic(messages)
+
+        assert result == [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What is in this screenshot?"},
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": "AAAA",
+                        },
+                    },
+                ],
+            }
+        ]
+
    def test_converts_tool_calls(self):
        messages = [
            {
@@ -0,0 +1,480 @@
+"""Tests for Anthropic error handling in the agent retry loop.
+
+Covers all error paths in run_agent.py's run_conversation() for api_mode=anthropic_messages:
+- 429 rate limit → retried with backoff
+- 529 overloaded → retried with backoff
+- 400 bad request → non-retryable, immediate fail
+- 401 unauthorized → credential refresh + retry
+- 500 server error → retried with backoff
+- "prompt is too long" → context length error triggers compression
+"""
+
+import asyncio
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import MagicMock, AsyncMock
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import gateway.run as gateway_run
+import run_agent
+from gateway.config import Platform
+from gateway.session import SessionSource
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _anthropic_response(text: str):
+    """Simulate an Anthropic messages.create() response object."""
+    return SimpleNamespace(
+        content=[SimpleNamespace(type="text", text=text)],
+        stop_reason="end_turn",
+        usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+        model="claude-sonnet-4-6-20250514",
+    )
+
+
+class _RateLimitError(Exception):
+    """Simulates Anthropic 429 rate limit error."""
+    def __init__(self):
+        super().__init__("Error code: 429 - Rate limit exceeded. Please retry after 30s.")
+        self.status_code = 429
+
+
+class _OverloadedError(Exception):
+    """Simulates Anthropic 529 overloaded error."""
+    def __init__(self):
+        super().__init__("Error code: 529 - API is temporarily overloaded.")
+        self.status_code = 529
+
+
+class _BadRequestError(Exception):
+    """Simulates Anthropic 400 bad request error (non-retryable)."""
+    def __init__(self):
+        super().__init__("Error code: 400 - Invalid model specified.")
+        self.status_code = 400
+
+
+class _UnauthorizedError(Exception):
+    """Simulates Anthropic 401 unauthorized error."""
+    def __init__(self):
+        super().__init__("Error code: 401 - Unauthorized. Invalid API key.")
+        self.status_code = 401
+
+
+class _ServerError(Exception):
+    """Simulates Anthropic 500 internal server error."""
+    def __init__(self):
+        super().__init__("Error code: 500 - Internal server error.")
+        self.status_code = 500
+
+
+class _PromptTooLongError(Exception):
+    """Simulates Anthropic prompt-too-long error (triggers context compression)."""
+    def __init__(self):
+        super().__init__("prompt is too long: 250000 tokens > 200000 maximum")
+        self.status_code = 400
+
+
+class _FakeAnthropicClient:
+    def close(self):
+        pass
+
+
+def _fake_build_anthropic_client(key, base_url=None):
+    return _FakeAnthropicClient()
+
+
+def _make_agent_cls(error_cls, recover_after=None):
+    """Create an AIAgent subclass that raises error_cls on API calls.
+
+    If recover_after is set, the agent succeeds after that many failures.
+    """
+
+    class _Agent(run_agent.AIAgent):
+        def __init__(self, *args, **kwargs):
+            kwargs.setdefault("skip_context_files", True)
+            kwargs.setdefault("skip_memory", True)
+            kwargs.setdefault("max_iterations", 4)
+            super().__init__(*args, **kwargs)
+            self._cleanup_task_resources = lambda task_id: None
+            self._persist_session = lambda messages, history=None: None
+            self._save_trajectory = lambda messages, user_message, completed: None
+            self._save_session_log = lambda messages: None
+
+        def run_conversation(self, user_message, conversation_history=None, task_id=None):
+            calls = {"n": 0}
+
+            def _fake_api_call(api_kwargs):
+                calls["n"] += 1
+                if recover_after is not None and calls["n"] > recover_after:
+                    return _anthropic_response("Recovered")
+                raise error_cls()
+
+            self._interruptible_api_call = _fake_api_call
+            return super().run_conversation(
+                user_message, conversation_history=conversation_history, task_id=task_id
+            )
+
+    return _Agent
+
+
+def _run_with_agent(monkeypatch, agent_cls):
+    """Run _run_agent through the gateway with the given agent class."""
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
+    )
+    monkeypatch.setattr(run_agent, "AIAgent", agent_cls)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-api03-test-key",
+        },
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._running_agents = {}
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
+
+    source = SessionSource(
+        platform=Platform.LOCAL,
+        chat_id="cli",
+        chat_name="CLI",
+        chat_type="dm",
+        user_id="test-user-1",
+    )
+
+    return asyncio.run(
+        runner._run_agent(
+            message="hello",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="test-session",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_429_rate_limit_is_retried_and_recovers(monkeypatch):
+    """429 should be retried with backoff. First call fails, second succeeds."""
+    agent_cls = _make_agent_cls(_RateLimitError, recover_after=1)
+    result = _run_with_agent(monkeypatch, agent_cls)
+    assert result["final_response"] == "Recovered"
+
+
+def test_529_overloaded_is_retried_and_recovers(monkeypatch):
+    """529 should be retried with backoff. First call fails, second succeeds."""
+    agent_cls = _make_agent_cls(_OverloadedError, recover_after=1)
+    result = _run_with_agent(monkeypatch, agent_cls)
+    assert result["final_response"] == "Recovered"
+
+
+def test_429_exhausts_all_retries_before_raising(monkeypatch):
+    """429 must retry max_retries times, not abort on first attempt."""
+    agent_cls = _make_agent_cls(_RateLimitError)  # always fails
+    with pytest.raises(_RateLimitError):
+        _run_with_agent(monkeypatch, agent_cls)
+
+
+def test_400_bad_request_is_non_retryable(monkeypatch):
+    """400 should fail immediately with only 1 API call (regression guard)."""
+    agent_cls = _make_agent_cls(_BadRequestError)
+    result = _run_with_agent(monkeypatch, agent_cls)
+    assert result["api_calls"] == 1
+    assert "400" in str(result.get("final_response", ""))
+
+
+def test_500_server_error_is_retried_and_recovers(monkeypatch):
+    """500 should be retried with backoff. First call fails, second succeeds."""
+    agent_cls = _make_agent_cls(_ServerError, recover_after=1)
+    result = _run_with_agent(monkeypatch, agent_cls)
+    assert result["final_response"] == "Recovered"
+
+
+def test_401_credential_refresh_recovers(monkeypatch):
+    """401 should trigger credential refresh and retry once."""
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    refresh_count = {"n": 0}
+
+    class _Auth401ThenSuccessAgent(run_agent.AIAgent):
+        def __init__(self, *args, **kwargs):
+            kwargs.setdefault("skip_context_files", True)
+            kwargs.setdefault("skip_memory", True)
+            kwargs.setdefault("max_iterations", 4)
+            super().__init__(*args, **kwargs)
+            self._cleanup_task_resources = lambda task_id: None
+            self._persist_session = lambda messages, history=None: None
+            self._save_trajectory = lambda messages, user_message, completed: None
+            self._save_session_log = lambda messages: None
+
+        def _try_refresh_anthropic_client_credentials(self) -> bool:
+            refresh_count["n"] += 1
+            return True  # Simulate successful credential refresh
+
+        def run_conversation(self, user_message, conversation_history=None, task_id=None):
+            calls = {"n": 0}
+
+            def _fake_api_call(api_kwargs):
+                calls["n"] += 1
+                if calls["n"] == 1:
+                    raise _UnauthorizedError()
+                return _anthropic_response("Auth refreshed")
+
+            self._interruptible_api_call = _fake_api_call
+            return super().run_conversation(
+                user_message, conversation_history=conversation_history, task_id=task_id
+            )
+
+    monkeypatch.setattr(run_agent, "AIAgent", _Auth401ThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-api03-test-key",
+        },
+    )
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._running_agents = {}
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
+
+    source = SessionSource(
+        platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
+        chat_type="dm", user_id="test-user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="hello", context_prompt="", history=[],
+            source=source, session_id="session-401",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Auth refreshed"
+    assert refresh_count["n"] == 1
+
+
+def test_401_refresh_fails_is_non_retryable(monkeypatch):
+    """401 with failed credential refresh should be treated as non-retryable."""
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    class _Auth401AlwaysFailAgent(run_agent.AIAgent):
+        def __init__(self, *args, **kwargs):
+            kwargs.setdefault("skip_context_files", True)
+            kwargs.setdefault("skip_memory", True)
+            kwargs.setdefault("max_iterations", 4)
+            super().__init__(*args, **kwargs)
+            self._cleanup_task_resources = lambda task_id: None
+            self._persist_session = lambda messages, history=None: None
+            self._save_trajectory = lambda messages, user_message, completed: None
+            self._save_session_log = lambda messages: None
+
+        def _try_refresh_anthropic_client_credentials(self) -> bool:
+            return False  # Simulate failed credential refresh
+
+        def run_conversation(self, user_message, conversation_history=None, task_id=None):
+            def _fake_api_call(api_kwargs):
+                raise _UnauthorizedError()
+
+            self._interruptible_api_call = _fake_api_call
+            return super().run_conversation(
+                user_message, conversation_history=conversation_history, task_id=task_id
+            )
+
+    monkeypatch.setattr(run_agent, "AIAgent", _Auth401AlwaysFailAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-api03-test-key",
+        },
+    )
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._running_agents = {}
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
+
+    source = SessionSource(
+        platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
+        chat_type="dm", user_id="test-user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="hello", context_prompt="", history=[],
+            source=source, session_id="session-401-fail",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    # 401 after failed refresh → non-retryable (falls through to is_client_error)
+    assert result["api_calls"] == 1
+    assert "401" in str(result.get("final_response", "")) or "unauthorized" in str(result.get("final_response", "")).lower()
+
+
+def test_prompt_too_long_triggers_compression(monkeypatch):
+    """Anthropic 'prompt is too long' error should trigger context compression, not immediate fail."""
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.build_anthropic_client", _fake_build_anthropic_client
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    class _PromptTooLongThenSuccessAgent(run_agent.AIAgent):
+        compress_called = 0
+
+        def __init__(self, *args, **kwargs):
+            kwargs.setdefault("skip_context_files", True)
+            kwargs.setdefault("skip_memory", True)
+            kwargs.setdefault("max_iterations", 4)
+            super().__init__(*args, **kwargs)
+            self._cleanup_task_resources = lambda task_id: None
+            self._persist_session = lambda messages, history=None: None
+            self._save_trajectory = lambda messages, user_message, completed: None
+            self._save_session_log = lambda messages: None
+
+        def _compress_context(self, messages, system_message, approx_tokens=0, task_id=None):
+            type(self).compress_called += 1
+            # Simulate compression by dropping oldest non-system message
+            if len(messages) > 2:
+                compressed = [messages[0]] + messages[2:]
+            else:
+                compressed = messages
+            return compressed, system_message
+
+        def run_conversation(self, user_message, conversation_history=None, task_id=None):
+            calls = {"n": 0}
+
+            def _fake_api_call(api_kwargs):
+                calls["n"] += 1
+                if calls["n"] == 1:
+                    raise _PromptTooLongError()
+                return _anthropic_response("Compressed and recovered")
+
+            self._interruptible_api_call = _fake_api_call
+            return super().run_conversation(
+                user_message, conversation_history=conversation_history, task_id=task_id
+            )
+
+    _PromptTooLongThenSuccessAgent.compress_called = 0
+    monkeypatch.setattr(run_agent, "AIAgent", _PromptTooLongThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": "https://api.anthropic.com",
+            "api_key": "sk-ant-api03-test-key",
+        },
+    )
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._running_agents = {}
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None
+
+    source = SessionSource(
+        platform=Platform.LOCAL, chat_id="cli", chat_name="CLI",
+        chat_type="dm", user_id="test-user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="hello", context_prompt="", history=[],
+            source=source, session_id="session-prompt-long",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Compressed and recovered"
+    assert _PromptTooLongThenSuccessAgent.compress_called >= 1
@@ -68,6 +68,22 @@ class TestAtomicJsonWrite:
        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
        assert len(tmp_files) == 0

+    def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
+        class SimulatedAbort(BaseException):
+            pass
+
+        target = tmp_path / "data.json"
+        original = {"preserved": True}
+        target.write_text(json.dumps(original), encoding="utf-8")
+
+        with patch("utils.json.dump", side_effect=SimulatedAbort):
+            with pytest.raises(SimulatedAbort):
+                atomic_json_write(target, {"new": True})
+
+        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
+        assert len(tmp_files) == 0
+        assert json.loads(target.read_text(encoding="utf-8")) == original
+
    def test_accepts_string_path(self, tmp_path):
        target = str(tmp_path / "string_path.json")
        atomic_json_write(target, {"string": True})
@@ -0,0 +1,44 @@
+"""Tests for utils.atomic_yaml_write — crash-safe YAML file writes."""
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+import yaml
+
+from utils import atomic_yaml_write
+
+
+class TestAtomicYamlWrite:
+    def test_writes_valid_yaml(self, tmp_path):
+        target = tmp_path / "data.yaml"
+        data = {"key": "value", "nested": {"a": 1}}
+
+        atomic_yaml_write(target, data)
+
+        assert yaml.safe_load(target.read_text(encoding="utf-8")) == data
+
+    def test_cleans_up_temp_file_on_baseexception(self, tmp_path):
+        class SimulatedAbort(BaseException):
+            pass
+
+        target = tmp_path / "data.yaml"
+        original = {"preserved": True}
+        target.write_text(yaml.safe_dump(original), encoding="utf-8")
+
+        with patch("utils.yaml.dump", side_effect=SimulatedAbort):
+            with pytest.raises(SimulatedAbort):
+                atomic_yaml_write(target, {"new": True})
+
+        tmp_files = [f for f in tmp_path.iterdir() if ".tmp" in f.name]
+        assert len(tmp_files) == 0
+        assert yaml.safe_load(target.read_text(encoding="utf-8")) == original
+
+    def test_appends_extra_content(self, tmp_path):
+        target = tmp_path / "data.yaml"
+
+        atomic_yaml_write(target, {"key": "value"}, extra_content="\n# comment\n")
+
+        text = target.read_text(encoding="utf-8")
+        assert "key: value" in text
+        assert "# comment" in text
@@ -0,0 +1,103 @@
+"""Tests for automatic MCP reload when config.yaml mcp_servers section changes."""
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+
+def _make_cli(tmp_path, mcp_servers=None):
+    """Create a minimal HermesCLI instance with mocked config."""
+    import cli as cli_mod
+    obj = object.__new__(cli_mod.HermesCLI)
+    obj.config = {"mcp_servers": mcp_servers or {}}
+    obj._agent_running = False
+    obj._last_config_check = 0.0
+    obj._config_mcp_servers = mcp_servers or {}
+
+    cfg_file = tmp_path / "config.yaml"
+    cfg_file.write_text("mcp_servers: {}\n")
+    obj._config_mtime = cfg_file.stat().st_mtime
+
+    obj._reload_mcp = MagicMock()
+    obj._busy_command = MagicMock()
+    obj._busy_command.return_value.__enter__ = MagicMock(return_value=None)
+    obj._busy_command.return_value.__exit__ = MagicMock(return_value=False)
+    obj._slow_command_status = MagicMock(return_value="reloading...")
+
+    return obj, cfg_file
+
+
+class TestMCPConfigWatch:
+
+    def test_no_change_does_not_reload(self, tmp_path):
+        """If mtime and mcp_servers unchanged, _reload_mcp is NOT called."""
+        obj, cfg_file = _make_cli(tmp_path)
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_mtime_change_with_same_mcp_servers_does_not_reload(self, tmp_path):
+        """If file mtime changes but mcp_servers is identical, no reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={"fs": {"command": "npx"}})
+
+        # Write same mcp_servers but touch the file
+        cfg_file.write_text(yaml.dump({"mcp_servers": {"fs": {"command": "npx"}}}))
+        # Force mtime to appear changed
+        obj._config_mtime = 0.0
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_new_mcp_server_triggers_reload(self, tmp_path):
+        """Adding a new MCP server to config triggers auto-reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={})
+
+        # Simulate user adding a new MCP server to config.yaml
+        cfg_file.write_text(yaml.dump({"mcp_servers": {"github": {"url": "https://mcp.github.com"}}}))
+        obj._config_mtime = 0.0  # force stale mtime
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_called_once()
+
+    def test_removed_mcp_server_triggers_reload(self, tmp_path):
+        """Removing an MCP server from config triggers auto-reload."""
+        import yaml
+        obj, cfg_file = _make_cli(tmp_path, mcp_servers={"github": {"url": "https://mcp.github.com"}})
+
+        # Simulate user removing the server
+        cfg_file.write_text(yaml.dump({"mcp_servers": {}}))
+        obj._config_mtime = 0.0
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file):
+            obj._check_config_mcp_changes()
+
+        obj._reload_mcp.assert_called_once()
+
+    def test_interval_throttle_skips_check(self, tmp_path):
+        """If called within CONFIG_WATCH_INTERVAL, stat() is skipped."""
+        obj, cfg_file = _make_cli(tmp_path)
+        obj._last_config_check = time.monotonic()  # just checked
+
+        with patch("hermes_cli.config.get_config_path", return_value=cfg_file), \
+             patch.object(Path, "stat") as mock_stat:
+            obj._check_config_mcp_changes()
+            mock_stat.assert_not_called()
+
+        obj._reload_mcp.assert_not_called()
+
+    def test_missing_config_file_does_not_crash(self, tmp_path):
+        """If config.yaml doesn't exist, _check_config_mcp_changes is a no-op."""
+        obj, cfg_file = _make_cli(tmp_path)
+        missing = tmp_path / "nonexistent.yaml"
+
+        with patch("hermes_cli.config.get_config_path", return_value=missing):
+            obj._check_config_mcp_changes()  # should not raise
+
+        obj._reload_mcp.assert_not_called()
--- a/Show More
+++ b/Show More