refactor: remove config TypedDicts and fix ImportError propagation in clipboard

Remove 44 TypedDict classes from config.py — they were already stale (11 missing keys) and load_config() still returns Dict[str, Any], so they provided zero type-checking value. Keep the int() coercions and Dict[str, Any] annotations which are real fixes. Fix _wayland_save() swallowing ImportError at DEBUG level by adding an explicit except ImportError: raise before the broad except Exception.
Clean up TODO comment in auxiliary_client.py
2026-04-23 17:41:25 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:18 +05:30 · 2026-04-23 17:41:10 +05:30
249 changed files with 18525 additions and 5969 deletions
@@ -14,3 +14,6 @@ node_modules
 .env

 *.md
+
+# Runtime data (bind-mounted at /opt/data; must not leak into build context)
+data/
@@ -1,3 +1,4 @@
+.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support |
+| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
+| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
+echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ```

 ### Run
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -50,5 +50,6 @@ RUN uv venv && \
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -173,7 +173,6 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -17,7 +17,6 @@ import os
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

@@ -117,6 +116,63 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


+def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
+    """Return ``value`` floored to a positive int, or ``None`` if it is not a
+    finite positive number. Ported from openclaw/openclaw#66664.
+
+    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
+    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
+    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
+    negative ints and fractional floats (``-1``, ``0.5``) through to the
+    API, producing a user-visible failure instead of a local error.
+    """
+    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
+    # silently become 1 and ``False`` doesn't become 0.
+    if isinstance(value, bool):
+        return None
+    if not isinstance(value, (int, float)):
+        return None
+    try:
+        import math
+        if not math.isfinite(value):
+            return None
+    except Exception:
+        return None
+    floored = int(value)  # truncates toward zero for floats
+    return floored if floored > 0 else None
+
+
+def _resolve_anthropic_messages_max_tokens(
+    requested,
+    model: str,
+    context_length: Optional[int] = None,
+) -> int:
+    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
+
+    Prefers ``requested`` when it is a positive finite number; otherwise
+    falls back to the model's output ceiling. Raises ``ValueError`` if no
+    positive budget can be resolved (should not happen with current model
+    table defaults, but guards against a future regression where
+    ``_get_anthropic_max_output`` could return ``0``).
+
+    Separately, callers apply a context-window clamp — this resolver does
+    not, to keep the positive-value contract independent of endpoint
+    specifics.
+
+    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
+    """
+    resolved = _resolve_positive_anthropic_max_tokens(requested)
+    if resolved is not None:
+        return resolved
+    fallback = _get_anthropic_max_output(model)
+    if fallback > 0:
+        return fallback
+    raise ValueError(
+        f"Anthropic Messages adapter requires a positive max_tokens value for "
+        f"model {model!r}; got {requested!r} and no model default resolved."
+    )
+
+
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -301,7 +357,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -1083,6 +1139,31 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1238,6 +1319,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1249,7 +1331,25 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1347,7 +1447,12 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+    # Use the resolver helper so non-positive values (negative ints,
+    # fractional floats, NaN, non-numeric) fail locally with a clear error
+    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
+    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
+        max_tokens, model, context_length=context_length
+    )

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1493,109 +1598,4 @@ def build_anthropic_kwargs(
    return kwargs


-def normalize_anthropic_response(
-    response,
-    strip_tool_prefix: bool = False,
-) -> Tuple[SimpleNamespace, str]:
-    """Normalize Anthropic response to match the shape expected by AIAgent.

-    Returns (assistant_message, finish_reason) where assistant_message has
-    .content, .tool_calls, and .reasoning attributes.
-
-    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
-    added to tool names for OAuth Claude Code compatibility.
-    """
-    text_parts = []
-    reasoning_parts = []
-    reasoning_details = []
-    tool_calls = []
-
-    for block in response.content:
-        if block.type == "text":
-            text_parts.append(block.text)
-        elif block.type == "thinking":
-            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
-        elif block.type == "tool_use":
-            name = block.name
-            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
-            tool_calls.append(
-                SimpleNamespace(
-                    id=block.id,
-                    type="function",
-                    function=SimpleNamespace(
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    ),
-                )
-            )
-
-    # Map Anthropic stop_reason to OpenAI finish_reason.
-    # Newer stop reasons added in Claude 4.5+ / 4.7:
-    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
-    #   - model_context_window_exceeded: hit context limit (not max_tokens)
-    # Both need distinct handling upstream — a refusal should surface to the
-    # user with a clear message, and a context-window overflow should trigger
-    # compression/truncation rather than be treated as normal end-of-turn.
-    stop_reason_map = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
-
-    return (
-        SimpleNamespace(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
-        ),
-        finish_reason,
-    )
-
-
-def normalize_anthropic_response_v2(
-    response,
-    strip_tool_prefix: bool = False,
-) -> "NormalizedResponse":
-    """Normalize Anthropic response to NormalizedResponse.
-
-    Wraps the existing normalize_anthropic_response() and maps its output
-    to the shared transport types.  This allows incremental migration —
-    one call site at a time — without changing the original function.
-    """
-    from agent.transports.types import NormalizedResponse, build_tool_call
-
-    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
-
-    tool_calls = None
-    if assistant_msg.tool_calls:
-        tool_calls = [
-            build_tool_call(
-                id=tc.id,
-                name=tc.function.name,
-                arguments=tc.function.arguments,
-            )
-            for tc in assistant_msg.tool_calls
-        ]
-
-    provider_data = {}
-    if getattr(assistant_msg, "reasoning_details", None):
-        provider_data["reasoning_details"] = assistant_msg.reasoning_details
-
-    return NormalizedResponse(
-        content=assistant_msg.content,
-        tool_calls=tool_calls,
-        finish_reason=finish_reason,
-        reasoning=getattr(assistant_msg, "reasoning", None),
-        usage=None,  # Anthropic usage is on the raw response, not the normaliser
-        provider_data=provider_data or None,
-    )
@@ -41,10 +41,13 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

 from openai import OpenAI

+if TYPE_CHECKING:
+    from agent.gemini_native_adapter import GeminiNativeClient
+
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
@@ -134,6 +137,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
@@ -572,7 +576,8 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.transports import get_transport

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -609,7 +614,19 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        assistant_message, finish_reason = normalize_anthropic_response(response)
+        _transport = get_transport("anthropic_messages")
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )
+
+        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
+        # .function.arguments) via properties, so no wrapping needed.
+        assistant_message = SimpleNamespace(
+            content=_nr.content,
+            tool_calls=_nr.tool_calls,
+            reasoning=_nr.reasoning,
+        )
+        finish_reason = _nr.finish_reason

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -796,7 +813,11 @@ def _read_codex_access_token() -> Optional[str]:
        return None


-def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
+# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
+#     Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
+#     (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
+#     Gemini/Kimi/Copilot client-building into a helper.
+def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
    """Try each API-key provider in PROVIDER_REGISTRY order.

    Returns (client, model) for the first provider with usable runtime
@@ -64,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -807,7 +848,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize)  # retry immediately
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -1144,10 +1185,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1191,12 +1235,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
    _save_auth_store,
    _save_provider_state,
    read_credential_pool,
+    read_provider_credentials,
    write_credential_pool,
 )

@@ -321,7 +322,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:

 def list_custom_pool_providers() -> List[str]:
    """Return all 'custom:*' pool keys that have entries in auth.json."""
-    pool_data = read_credential_pool(None)
+    pool_data = read_credential_pool()
    return sorted(
        key for key in pool_data
        if key.startswith(CUSTOM_POOL_PREFIX)
@@ -875,6 +876,20 @@ class CredentialPool:
            self._current_id = None
        return removed

+    def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
+        for idx, entry in enumerate(self._entries):
+            if entry.id == entry_id:
+                removed = self._entries.pop(idx)
+                self._entries = [
+                    replace(e, priority=new_priority)
+                    for new_priority, e in enumerate(self._entries)
+                ]
+                self._persist()
+                if self._current_id == removed.id:
+                    self._current_id = None
+                return removed
+        return None
+
    def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
        raw = str(target or "").strip()
        if not raw:
@@ -1325,7 +1340,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b

 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
-    raw_entries = read_credential_pool(provider)
+    raw_entries = read_provider_credentials(provider)
    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]

    if provider.startswith(CUSTOM_POOL_PREFIX):
@@ -729,6 +729,7 @@ class KawaiiSpinner:
                time.sleep(0.1)
                continue
            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
+            assert self.start_time is not None  # start() sets it before thread starts
            elapsed = time.time() - self.start_time
            if wings:
                left, right = wings[self.frame_idx % len(wings)]
@@ -220,12 +220,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +249,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +302,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -388,7 +436,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +464,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -470,11 +529,16 @@ def _classify_by_status(
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
        return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
        )

    if status_code == 413:
@@ -4,6 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
 import re
 import time
@@ -25,7 +26,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -36,7 +37,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -51,6 +52,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -125,6 +133,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -177,6 +187,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "mimo-v2-pro": 1000000,
    "mimo-v2-omni": 256000,
    "mimo-v2-flash": 256000,
+    "mimo-v2.5-pro": 1000000,
+    "mimo-v2.5": 1000000,
    "zai-org/GLM-5": 202752,
 }

@@ -191,6 +203,7 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
+    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -234,9 +247,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
+    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -281,7 +297,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -296,14 +320,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -314,6 +341,8 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -417,6 +418,9 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
+    from hermes_cli.models import normalize_provider
+    provider = normalize_provider(provider) or provider
+    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -370,6 +370,32 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "mattermost": (
+        "You are in a Mattermost workspace communicating with your user. "
+        "Mattermost renders standard Markdown — headings, bold, italic, code "
+        "blocks, and tables all work. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
+        "attachments, audio and video as file attachments. "
+        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
+    ),
+    "matrix": (
+        "You are in a Matrix room communicating with your user. "
+        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
+        "the adapter converts your Markdown to HTML for rich display. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
+        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
+        "and other files as downloadable attachments."
+    ),
+    "feishu": (
+        "You are in a Feishu (Lark) workspace communicating with your user. "
+        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
+        "links are supported. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
+        "inline, audio files as voice messages, and other files as attachments."
+    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir):
+    for root, dirs, files in os.walk(skills_dir, followlinks=True):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -455,7 +455,8 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
    """
    if ":" not in name:
        return None, name
-    return tuple(name.split(":", 1))  # type: ignore[return-value]
+    ns, bare = name.split(":", 1)
+    return ns, bare


 def is_valid_namespace(candidate: Optional[str]) -> bool:
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=30,
+            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
        )
@@ -78,23 +78,71 @@ class AnthropicTransport(ProviderTransport):
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Anthropic response to NormalizedResponse.

-        kwargs:
-            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
+        Parses content blocks (text, thinking, tool_use), maps stop_reason
+        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
-        from agent.anthropic_adapter import normalize_anthropic_response_v2
+        import json
+        from agent.anthropic_adapter import _to_plain_data
+        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+        _MCP_PREFIX = "mcp_"
+
+        text_parts = []
+        reasoning_parts = []
+        reasoning_details = []
+        tool_calls = []
+
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
+                    reasoning_details.append(block_dict)
+            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
+                tool_calls.append(
+                    ToolCall(
+                        id=block.id,
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    )
+                )
+
+        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
+
+        provider_data = {}
+        if reasoning_details:
+            provider_data["reasoning_details"] = reasoning_details
+
+        return NormalizedResponse(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            finish_reason=finish_reason,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            usage=None,
+            provider_data=provider_data or None,
+        )

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid."""
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return False
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -37,6 +37,30 @@ class ToolCall:
    arguments: str  # JSON string
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The agent loop reads tc.function.name / tc.function.arguments
+    # throughout run_agent.py (45+ sites).  These properties let
+    # NormalizedResponse pass through without the _nr_to_assistant_message
+    # shim, while keeping ToolCall's canonical fields flat.
+    @property
+    def type(self) -> str:
+        return "function"
+
+    @property
+    def function(self) -> "ToolCall":
+        """Return self so tc.function.name / tc.function.arguments work."""
+        return self
+
+    @property
+    def call_id(self) -> Optional[str]:
+        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
+        return (self.provider_data or {}).get("call_id")
+
+    @property
+    def response_item_id(self) -> Optional[str]:
+        """Codex response_item_id from provider_data."""
+        return (self.provider_data or {}).get("response_item_id")
+

@dataclass
 class Usage:
@@ -70,6 +94,24 @@ class NormalizedResponse:
    usage: Optional[Usage] = None
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The shim _nr_to_assistant_message() mapped these from provider_data.
+    # These properties let NormalizedResponse pass through directly.
+    @property
+    def reasoning_content(self) -> Optional[str]:
+        pd = self.provider_data or {}
+        return pd.get("reasoning_content")
+
+    @property
+    def reasoning_details(self):
+        pd = self.provider_data or {}
+        return pd.get("reasoning_details")
+
+    @property
+    def codex_reasoning_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_reasoning_items")
+

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -533,10 +533,22 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
+        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
+        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
+        # AI Gateway, Cline) expose when routing Claude models — without this
+        # fallback, cache writes are undercounted as 0 and cache reads can be
+        # missed when the proxy only surfaces them at the top level.
+        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        if not cache_read_tokens:
+            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
+        if not cache_write_tokens:
+            cache_write_tokens = _to_int(
+                getattr(response_usage, "cache_creation_input_tokens", 0)
+            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -776,6 +776,7 @@ delegation:
  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -30,7 +30,7 @@ from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
-from typing import List, Dict, Any, Optional
+from typing import List, Dict, Any, Optional, TypedDict

 logger = logging.getLogger(__name__)

@@ -84,6 +84,34 @@ _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


+class _ModelPickerState(TypedDict, total=False):
+    stage: str
+    providers: List[Dict[str, Any]]
+    selected: int
+    current_model: str
+    current_provider: str
+    user_provs: Optional[Dict[str, Any]]
+    custom_provs: Optional[Dict[str, Any]]
+    provider_data: Dict[str, Any]
+    model_list: List[str]
+
+
+class _ApprovalState(TypedDict, total=False):
+    command: str
+    description: str
+    choices: List[str]
+    selected: int
+    response_queue: "queue.Queue[str]"
+    show_full: bool
+
+
+class _ClarifyState(TypedDict, total=False):
+    question: str
+    choices: List[str]
+    selected: int
+    response_queue: "queue.Queue[str]"
+
+
 _REASONING_TAGS = (
    "REASONING_SCRATCHPAD",
    "think",
@@ -108,6 +136,11 @@ def _strip_reasoning_tags(text: str) -> str:
    ``<thought>`` (Gemma 4).  Must stay in sync with
    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+
+    Also strips tool-call XML blocks some open models leak into visible
+    content (``<tool_call>``, ``<function_calls>``, Gemma-style
+    ``<function name="…">…</function>``). Ported from
+    openclaw/openclaw#67318.
    """
    cleaned = text
    for tag in _REASONING_TAGS:
@@ -132,6 +165,31 @@ def _strip_reasoning_tags(text: str) -> str:
            cleaned,
            flags=re.IGNORECASE,
        )
+    # Tool-call XML blocks (openclaw/openclaw#67318).
+    for tc_tag in ("tool_call", "tool_calls", "tool_result",
+                   "function_call", "function_calls"):
+        cleaned = re.sub(
+            rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+    # <function name="..."> — boundary + attribute gated to avoid prose FPs.
+    cleaned = re.sub(
+        r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+        r'<function\b[^>]*\bname\s*=[^>]*>'
+        r'(?:(?:(?!</function>).)*)</function>\s*',
+        '',
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Stray tool-call close tags.
+    cleaned = re.sub(
+        r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+        '',
+        cleaned,
+        flags=re.IGNORECASE,
+    )
    return cleaned.strip()


@@ -275,13 +333,23 @@ def load_cli_config() -> Dict[str, Any]:
    
    Environment variables take precedence over config file values.
    Returns default values if no config file exists.
+
+    If HERMES_IGNORE_USER_CONFIG=1 is set (via ``hermes chat --ignore-user-config``),
+    the user config at ``~/.hermes/config.yaml`` is skipped entirely and only the
+    built-in defaults plus the project-level ``cli-config.yaml`` (if any) are used.
+    Credentials in ``.env`` are still loaded — this flag only suppresses
+    behavioral/config settings.
    """
    # Check user config first ({HERMES_HOME}/config.yaml)
    user_config_path = _hermes_home / 'config.yaml'
    project_config_path = Path(__file__).parent / 'cli-config.yaml'

+    # --ignore-user-config: force-skip the user config.yaml (still honor project
+    # config as a fallback so defaults stay sensible).
+    ignore_user_config = os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
+
    # Use user config if it exists, otherwise project config
-    if user_config_path.exists():
+    if user_config_path.exists() and not ignore_user_config:
        config_path = user_config_path
    else:
        config_path = project_config_path
@@ -914,6 +982,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
    print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")


+def _run_state_db_auto_maintenance(session_db) -> None:
+    """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
+
+    Reads the ``sessions:`` section from config.yaml via
+    :func:`hermes_cli.config.load_config` (the authoritative loader that
+    deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
+    values). Honours ``auto_prune`` / ``retention_days`` /
+    ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
+    DB. Never raises — maintenance must never block interactive startup.
+    """
+    if session_db is None:
+        return
+    try:
+        from hermes_cli.config import load_config as _load_full_config
+        cfg = (_load_full_config().get("sessions") or {})
+        if not cfg.get("auto_prune", False):
+            return
+        session_db.maybe_auto_prune_and_vacuum(
+            retention_days=int(cfg.get("retention_days", 90)),
+            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
+            vacuum=bool(cfg.get("vacuum_after_prune", True)),
+        )
+    except Exception as exc:
+        logger.debug("state.db auto-maintenance skipped: %s", exc)
+
+
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
    """Remove stale worktrees and orphaned branches on startup.

@@ -1662,7 +1756,7 @@ def _parse_skills_argument(skills: str | list[str] | tuple[str, ...] | None) ->
    return parsed


-def save_config_value(key_path: str, value: any) -> bool:
+def save_config_value(key_path: str, value: Any) -> bool:
    """
    Save a value to the active config file at the specified key path.
    
@@ -1746,6 +1840,7 @@ class HermesCLI:
        resume: str = None,
        checkpoints: bool = False,
        pass_session_id: bool = False,
+        ignore_rules: bool = False,
    ):
        """
        Initialize the Hermes CLI.
@@ -1899,6 +1994,11 @@ class HermesCLI:
        self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
        self.pass_session_id = pass_session_id
+        # --ignore-rules: honor either the constructor flag or the env var set
+        # by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
+        # pass skip_context_files=True and skip_memory=True to AIAgent so
+        # AGENTS.md/SOUL.md/.cursorrules and persistent memory are not loaded.
+        self.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
        
        # Ephemeral system prompt: env var takes precedence, then config
        self.system_prompt = (
@@ -1961,7 +2061,13 @@ class HermesCLI:
            self._session_db = SessionDB()
        except Exception as e:
            logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
-        
+
+        # Opportunistic state.db maintenance — runs at most once per
+        # min_interval_hours, tracked via state_meta in state.db itself so
+        # it's shared across all Hermes processes for this HERMES_HOME.
+        # Never blocks startup on failure.
+        _run_state_db_auto_maintenance(self._session_db)
+
        # Deferred title: stored in memory until the session is created in the DB
        self._pending_title: Optional[str] = None
        
@@ -1987,16 +2093,16 @@ class HermesCLI:
        self._interrupt_queue = queue.Queue()
        self._should_exit = False
        self._last_ctrl_c_time = 0
-        self._clarify_state = None
+        self._clarify_state: Optional[_ClarifyState] = None
        self._clarify_freetext = False
        self._clarify_deadline = 0
        self._sudo_state = None
        self._sudo_deadline = 0
        self._modal_input_snapshot = None
-        self._approval_state = None
+        self._approval_state: Optional[_ApprovalState] = None
        self._approval_deadline = 0
        self._approval_lock = threading.Lock()
-        self._model_picker_state = None
+        self._model_picker_state: Optional[_ModelPickerState] = None
        self._secret_state = None
        self._secret_deadline = 0
        self._spinner_text: str = ""  # thinking spinner text for TUI
@@ -3250,6 +3356,8 @@ class HermesCLI:
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
+                skip_context_files=self.ignore_rules,
+                skip_memory=self.ignore_rules,
                tool_progress_callback=self._on_tool_progress,
                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
@@ -7076,7 +7184,7 @@ class HermesCLI:
                logging.getLogger(noisy).setLevel(logging.WARNING)
        else:
            logging.getLogger().setLevel(logging.INFO)
-            for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
+            for quiet_logger in ('tools', 'run_agent', 'scripts.trajectory_compressor', 'cron', 'hermes_cli'):
                logging.getLogger(quiet_logger).setLevel(logging.ERROR)

    def _show_insights(self, command: str = "/insights"):
@@ -10754,6 +10862,8 @@ def main(
    w: bool = False,
    checkpoints: bool = False,
    pass_session_id: bool = False,
+    ignore_user_config: bool = False,
+    ignore_rules: bool = False,
 ):
    """
    Hermes Agent CLI - Interactive AI Assistant
@@ -10863,6 +10973,7 @@ def main(
        resume=resume,
        checkpoints=checkpoints,
        pass_session_id=pass_session_id,
+        ignore_rules=ignore_rules,
    )

    if parsed_skills:
@@ -439,8 +439,9 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                delivery_errors.append(msg)
                continue

-            if result and result.get("error"):
-                msg = f"delivery error: {result['error']}"
+            error = result.get("error") if result else None
+            if error:
+                msg = f"delivery error: {error}"
                logger.error("Job '%s': %s", job["id"], msg)
                delivery_errors.append(msg)
                continue
@@ -972,6 +973,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

+        # Guard against non-dict returns from run_conversation under error conditions
+        if not isinstance(result, dict):
+            raise RuntimeError(
+                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
+            )
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
 # Point to the example dataset in this directory
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

-python batch_runner.py \
+python scripts/batch_runner.py \
  --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
  --batch_size=5 \
  --run_name="browser_tasks_example" \
@@ -4,7 +4,7 @@
 # Generates tool-calling trajectories for multi-step web research tasks.
 #
 # Usage:
-#   python batch_runner.py \
+#   python scripts/batch_runner.py \
 #     --config datagen-config-examples/web_research.yaml \
 #     --run_name web_research_v1

@@ -58,6 +58,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

+# Ensure the main config file remains accessible to the hermes runtime user
+# even if it was edited on the host after initial ownership setup.
+if [ -f "$HERMES_HOME/config.yaml" ]; then
+    chown hermes:hermes "$HERMES_HOME/config.yaml"
+    chmod 640 "$HERMES_HOME/config.yaml"
+fi
+
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -68,4 +75,19 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

+# Final exec: two supported invocation patterns.
+#
+#   docker run <image>                 -> exec `hermes` with no args (legacy default)
+#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
+#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
+#   docker run <image> bash            -> exec `bash` directly
+#
+# If the first positional arg resolves to an executable on PATH, we assume the
+# caller wants to run it directly (needed by the launcher which runs long-lived
+# `sleep infinity` sandbox containers — see tools/environments/docker.py).
+# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
+# preserving the documented `docker run <image> <subcommand>` behavior.
+if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
+    exec "$@"
+fi
 exec hermes "$@"
@@ -18,7 +18,10 @@ import logging
 import os
 import uuid
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set
+from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from tools.budget_config import BudgetConfig

 from model_tools import handle_function_call
 from tools.terminal_tool import get_active_env
@@ -616,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -135,9 +135,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -32,14 +32,7 @@ import sqlite3
 import time
 import uuid
 from typing import Any, Dict, List, Optional
-
-try:
-    from aiohttp import web
-    AIOHTTP_AVAILABLE = True
-except ImportError:
-    AIOHTTP_AVAILABLE = False
-    web = None  # type: ignore[assignment]
-
+from aiohttp import web
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -270,12 +263,6 @@ def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Respons
        status=400,
    )

-
-def check_api_server_requirements() -> bool:
-    """Check if API server dependencies are available."""
-    return AIOHTTP_AVAILABLE
-
-
 class ResponseStore:
    """
    SQLite-backed LRU store for Responses API state.
@@ -391,30 +378,26 @@ _CORS_HEADERS = {
 }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def cors_middleware(request, handler):
-        """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
-        adapter = request.app.get("api_server_adapter")
-        origin = request.headers.get("Origin", "")
-        cors_headers = None
-        if adapter is not None:
-            if not adapter._origin_allowed(origin):
-                return web.Response(status=403)
-            cors_headers = adapter._cors_headers_for_origin(origin)
+@web.middleware
+async def cors_middleware(request, handler):
+    """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
+    adapter = request.app.get("api_server_adapter")
+    origin = request.headers.get("Origin", "")
+    cors_headers = None
+    if adapter is not None:
+        if not adapter._origin_allowed(origin):
+            return web.Response(status=403)
+        cors_headers = adapter._cors_headers_for_origin(origin)

-        if request.method == "OPTIONS":
-            if cors_headers is None:
-                return web.Response(status=403)
-            return web.Response(status=200, headers=cors_headers)
-
-        response = await handler(request)
-        if cors_headers is not None:
-            response.headers.update(cors_headers)
-        return response
-else:
-    cors_middleware = None  # type: ignore[assignment]
+    if request.method == "OPTIONS":
+        if cors_headers is None:
+            return web.Response(status=403)
+        return web.Response(status=200, headers=cors_headers)

+    response = await handler(request)
+    if cors_headers is not None:
+        response.headers.update(cors_headers)
+    return response

 def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
    """OpenAI-style error envelope."""
@@ -428,21 +411,18 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
    }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def body_limit_middleware(request, handler):
-        """Reject overly large request bodies early based on Content-Length."""
-        if request.method in ("POST", "PUT", "PATCH"):
-            cl = request.headers.get("Content-Length")
-            if cl is not None:
-                try:
-                    if int(cl) > MAX_REQUEST_BYTES:
-                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-                except ValueError:
-                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-        return await handler(request)
-else:
-    body_limit_middleware = None  # type: ignore[assignment]
+@web.middleware
+async def body_limit_middleware(request, handler):
+    """Reject overly large request bodies early based on Content-Length."""
+    if request.method in ("POST", "PUT", "PATCH"):
+        cl = request.headers.get("Content-Length")
+        if cl is not None:
+            try:
+                if int(cl) > MAX_REQUEST_BYTES:
+                    return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+            except ValueError:
+                return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+    return await handler(request)

 _SECURITY_HEADERS = {
    "X-Content-Type-Options": "nosniff",
@@ -450,16 +430,13 @@ _SECURITY_HEADERS = {
 }


-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def security_headers_middleware(request, handler):
-        """Add security headers to all responses (including errors)."""
-        response = await handler(request)
-        for k, v in _SECURITY_HEADERS.items():
-            response.headers.setdefault(k, v)
-        return response
-else:
-    security_headers_middleware = None  # type: ignore[assignment]
+@web.middleware
+async def security_headers_middleware(request, handler):
+    """Add security headers to all responses (including errors)."""
+    response = await handler(request)
+    for k, v in _SECURITY_HEADERS.items():
+        response.headers.setdefault(k, v)
+    return response


 class _IdempotencyCache:
@@ -804,7 +781,7 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
+    async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -1588,7 +1565,7 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_responses(self, request: "web.Request") -> "web.Response":
+    async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -2482,10 +2459,6 @@ class APIServerAdapter(BasePlatformAdapter):

    async def connect(self) -> bool:
        """Start the aiohttp web server."""
-        if not AIOHTTP_AVAILABLE:
-            logger.warning("[%s] aiohttp not installed", self.name)
-            return False
-
        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
@@ -187,16 +187,14 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-
-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}
        except ImportError:
-            logger.warning(
-                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
-                "Run: pip install aiohttp-socks",
-                proxy_url,
-            )
-            return {}
+            raise ImportError(
+                "aiohttp-socks is required for SOCKS proxy support. "
+                "Install with: pip install hermes-agent[messaging]"
+            ) from None
+
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}
    return {"proxy": proxy_url}


@@ -220,16 +218,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-
-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
        except ImportError:
-            logger.warning(
-                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
-                "Run: pip install aiohttp-socks",
-                proxy_url,
-            )
-            return {}, {}
+            raise ImportError(
+                "aiohttp-socks is required for SOCKS proxy support. "
+                "Install with: pip install hermes-agent[messaging]"
+            ) from None
+
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
    return {}, {"proxy": proxy_url}


@@ -428,6 +424,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
+    raise AssertionError("unreachable: retry loop exhausted")


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -542,6 +539,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
+    raise AssertionError("unreachable: retry loop exhausted")


 # ---------------------------------------------------------------------------
@@ -752,7 +750,10 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        return parts[1] if len(parts) > 1 else ""
+        args = parts[1] if len(parts) > 1 else ""
+        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
+        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
+        return args


@dataclass 
@@ -1343,7 +1344,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1828,8 +1829,11 @@ class BasePlatformAdapter(ABC):
        try:
            await self._run_processing_hook("on_processing_start", event)

-            # Call the handler (this can take a while with tool calls)
-            response = await self._message_handler(event)
+            handler = self._message_handler
+            if handler is None:
+                return
+
+            response = await handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
@@ -14,7 +14,7 @@ import logging
 import os
 import re
 import uuid
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote

@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        payload = {
            "addresses": [address],
            "message": message,
-            "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
+            "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
        }
        try:
            res = await self._api_post("/api/v1/chat/new", payload)
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                )
            payload: Dict[str, Any] = {
                "chatGuid": guid,
-                "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
+                "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
                "message": chunk,
            }
            if reply_to and self._private_api_enabled and self._helper_connected:
@@ -527,6 +527,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -744,7 +745,8 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            self._register_slash_commands()
+            if self._slash_commands:
+                self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -1194,9 +1196,16 @@ class DiscordAdapter(BasePlatformAdapter):
            try:
                import base64

-                duration_secs = 5.0
                try:
                    from mutagen.oggopus import OggOpus
+                except ImportError:
+                    raise ImportError(
+                        "mutagen is required for Discord voice messages. "
+                        "Install with: pip install hermes-agent[messaging]"
+                    ) from None
+
+                duration_secs = 5.0
+                try:
                    info = OggOpus(audio_path)
                    duration_secs = info.info.length
                except Exception:
@@ -1889,7 +1898,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # Fetch full member list (requires members intent)
            try:
                members = guild.members
-                if len(members) < guild.member_count:
+                if guild.member_count is not None and len(members) < guild.member_count:
                    members = [m async for m in guild.fetch_members(limit=None)]
            except Exception as e:
                logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
@@ -2129,10 +2138,42 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2188,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2208,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2469,7 +2511,7 @@ class DiscordAdapter(BasePlatformAdapter):
                if isinstance(skills, str):
                    return [skills]
                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # dedup, preserve order
+                    return list(dict.fromkeys(skills))  # ty: ignore[invalid-return-type]  # dedup, preserve order
        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
@@ -3005,7 +3047,7 @@ class DiscordAdapter(BasePlatformAdapter):

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
-            in_bot_thread = is_thread and thread_id in self._threads
+            in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
                if self._client.user not in message.mentions and not mention_prefix:
@@ -3598,7 +3640,9 @@ if DISCORD_AVAILABLE:
                )
                return

-            provider_slug = interaction.data["values"][0]
+            if interaction.data is None:
+                return
+            provider_slug = interaction.data["values"][0]  # ty: ignore[invalid-key]
            self._selected_provider = provider_slug
            provider = next(
                (p for p in self.providers if p["slug"] == provider_slug), None
@@ -3632,8 +3676,10 @@ if DISCORD_AVAILABLE:
                )
                return

+            if interaction.data is None:
+                return
            self.resolved = True
-            model_id = interaction.data["values"][0]
+            model_id = interaction.data["values"][0]  # ty: ignore[invalid-key]

            try:
                result_text = await self.on_model_selected(
@@ -532,6 +532,7 @@ class EmailAdapter(BasePlatformAdapter):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an image URL as part of an email body."""
        text = caption or ""
@@ -545,6 +546,7 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -14,6 +14,35 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
+
+Feishu identity model
+---------------------
+Feishu uses three user-ID tiers (official docs:
+https://open.feishu.cn/document/home/user-identity-introduction/introduction):
+
+  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
+                        open_id under each Feishu app.  Always available in
+                        event payloads without extra permissions.
+  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
+                        requires the ``contact:user.employee_id:readonly``
+                        scope.  May not be present.
+  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
+                        one developer/ISV.  Best cross-app stable ID.
+
+For bots specifically:
+
+  app_id              — The application's canonical credential identifier.
+  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
+                        open_id *within its app context* and is what Feishu
+                        puts in ``mentions[].id.open_id`` when someone
+                        @-mentions the bot.  Used for mention gating only.
+
+In single-bot mode (what Hermes currently supports), open_id works as a
+de-facto unique user identifier since there is only one app context.
+
+Session-key participant isolation prefers ``union_id`` (via user_id_alt)
+over ``open_id`` (via user_id) so that sessions stay stable if the same
+user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -35,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +102,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -234,6 +265,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -277,12 +310,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -292,14 +349,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str
+    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -307,7 +364,11 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
+    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
+    # @mention matching: Feishu puts this value in mentions[].id.open_id when
+    # a user @-mentions the bot in a group chat.
    bot_open_id: str
+    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -505,14 +566,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +587,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -532,7 +599,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -584,7 +650,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -602,19 +668,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -652,8 +720,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -663,7 +730,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -671,17 +738,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -692,31 +759,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -724,6 +808,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +820,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1009,8 +1095,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1127,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1491,6 +1700,7 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2470,13 +2680,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2935,14 +3154,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2959,7 +3184,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3223,10 +3448,22 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
+
+        Preference order for the primary ``user_id`` field:
+          1. user_id  (tenant-scoped, most stable — requires permission scope)
+          2. open_id  (app-scoped, always available — different per bot app)
+
+        ``user_id_alt`` carries the union_id (developer-scoped, stable across
+        all apps by the same developer).  Session-key generation prefers
+        user_id_alt when present, so participant isolation stays stable even
+        if the primary ID is the app-scoped open_id.
+        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        primary_id = open_id or user_id
+        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
+        primary_id = user_id or open_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3308,15 +3545,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3639,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3662,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3717,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4212,6 +4473,9 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+
+    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
+    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4232,12 +4496,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4246,13 +4510,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
            if ul_match:
                items = []
-                while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
-                    items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
+                while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
+                    items.append(m.group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ul>{li}</ul>")
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
            if ol_match:
                items = []
-                while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
-                    items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
+                while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
+                    items.append(m.group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ol>{li}</ol>")
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
-    create_bind_task,
-    poll_bind_result,
    build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -44,9 +43,8 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
    "build_connect_url",
+    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -535,6 +535,9 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return

            except Exception as exc:
                if not self._running:
@@ -1839,6 +1842,7 @@ class QQAdapter(BasePlatformAdapter):
                    await asyncio.sleep(1.5 * (attempt + 1))
                else:
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------


-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -64,8 +92,8 @@ async def create_bind_task(
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -80,7 +108,7 @@ async def create_bind_task(
    return task_id, key


-async def poll_bind_result(
+def _poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -102,8 +124,8 @@ async def poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
        # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
        self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
        self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -362,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
        if not thread_ts:
            return  # Can only set status in a thread context

+        self._active_status_threads[chat_id] = thread_ts
        try:
            await self._get_client(chat_id).assistant_threads_setStatus(
                channel_id=chat_id,
@@ -373,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
    def _dm_top_level_threads_as_sessions(self) -> bool:
        """Whether top-level Slack DMs get per-message session threads.

@@ -584,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
            logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
            return False

+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
    # ----- User identity resolution -----

    async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1213,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
        # Only react when bot is directly addressed (DM or @mention).
        # In listen-all channels (require_mention=false), reacting to every
        # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
        if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)

        await self.handle_message(msg_event)

-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
    # ----- Approval button support (Block Kit) -----

    async def send_exec_approval(
@@ -1640,6 +1690,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
@@ -1665,6 +1716,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
+        raise AssertionError("unreachable: retry loop exhausted")

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -25,7 +25,10 @@ import hmac
 import logging
 import os
 import urllib.parse
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import aiohttp

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -2820,6 +2820,8 @@ class TelegramAdapter(BasePlatformAdapter):
        )

        sticker = msg.sticker
+        if sticker is None:
+            return
        emoji = sticker.emoji or ""
        set_name = sticker.set_name or ""

@@ -151,7 +151,7 @@ def _resolve_system_dns() -> set[str]:
    """Return the IPv4 addresses that the OS resolver gives for api.telegram.org."""
    try:
        results = socket.getaddrinfo(_TELEGRAM_API_HOST, 443, socket.AF_INET)
-        return {addr[4][0] for addr in results}
+        return {str(addr[4][0]) for addr in results}
    except Exception:
        return set()

@@ -508,6 +508,11 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
+        # Strip leading @mention in group chats so slash commands like
+        # "@BotName /approve" are correctly recognized as "/approve".
+        # Mirrors what the Telegram adapter does (re.sub @botname).
+        if is_group and text:
+            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -698,7 +703,8 @@ class WeComAdapter(BasePlatformAdapter):
                elif isinstance(appmsg.get("image"), dict):
                    refs.append(("image", appmsg["image"]))

-        quote = body.get("quote") if isinstance(body.get("quote"), dict) else {}
+        raw_quote = body.get("quote")
+        quote = raw_quote if isinstance(raw_quote, dict) else {}
        quote_type = str(quote.get("msgtype") or "").lower()
        if quote_type == "image" and isinstance(quote.get("image"), dict):
            refs.append(("image", quote["image"]))
@@ -1464,3 +1470,134 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
@@ -25,7 +25,10 @@ import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
-from typing import Dict, Optional, Any
+from typing import Dict, Optional, Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import aiohttp

 from hermes_constants import get_hermes_dir

@@ -710,7 +710,26 @@ class GatewayRunner:
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
-        
+
+        # Opportunistic state.db maintenance: prune ended sessions older
+        # than sessions.retention_days + optional VACUUM. Tracks last-run
+        # in state_meta so it only actually executes once per
+        # sessions.min_interval_hours.  Gateway is long-lived so blocking
+        # a few seconds once per day is acceptable; failures are logged
+        # but never raised.
+        if self._session_db is not None:
+            try:
+                from hermes_cli.config import load_config as _load_full_config
+                _sess_cfg = (_load_full_config().get("sessions") or {})
+                if _sess_cfg.get("auto_prune", False):
+                    self._session_db.maybe_auto_prune_and_vacuum(
+                        retention_days=int(_sess_cfg.get("retention_days", 90)),
+                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
+                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
+                    )
+            except Exception as exc:
+                logger.debug("state.db auto-maintenance skipped: %s", exc)
+
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
@@ -2668,8 +2687,9 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file
+            from gateway.status import remove_pid_file, release_gateway_runtime_lock
            remove_pid_file()
+            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -2839,10 +2859,12 @@ class GatewayRunner:
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
-            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
-            if not check_api_server_requirements():
+            try:
+                import aiohttp  # noqa: F401
+            except ImportError:
                logger.warning("API Server: aiohttp not installed")
                return None
+            from gateway.platforms.api_server import APIServerAdapter
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
@@ -3466,23 +3488,73 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-        
-        # Emit command:* hook for any recognized slash command.
-        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
-        # in hermes_cli/commands.py — no hardcoded set to maintain here.
-        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
-        if command and command in GATEWAY_KNOWN_COMMANDS:
-            await self.hooks.emit(f"command:{command}", {
-                "platform": source.platform.value if source.platform else "",
-                "user_id": source.user_id,
-                "command": command,
-                "args": event.get_command_args().strip(),
-            })

-        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        from hermes_cli.commands import (
+            GATEWAY_KNOWN_COMMANDS,
+            is_gateway_known_command,
+            resolve_command as _resolve_cmd,
+        )
+
+        # Resolve aliases to canonical name so dispatch and hook names
+        # don't depend on the exact alias the user typed.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

+        # Fire the ``command:<canonical>`` hook for any recognized slash
+        # command — built-in OR plugin-registered. Handlers can return a
+        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
+        # to intercept dispatch before core handling runs. This replaces
+        # the previous fire-and-forget emit(): return values are now
+        # honored, but handlers that return nothing behave exactly as
+        # before (telemetry-style hooks keep working).
+        if command and is_gateway_known_command(canonical):
+            raw_args = event.get_command_args().strip()
+            hook_ctx = {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": canonical,
+                "raw_command": command,
+                "args": raw_args,
+                "raw_args": raw_args,
+            }
+            try:
+                hook_results = await self.hooks.emit_collect(
+                    f"command:{canonical}", hook_ctx
+                )
+            except Exception as _hook_err:
+                logger.debug(
+                    "command:%s hook dispatch failed (non-fatal): %s",
+                    canonical, _hook_err,
+                )
+                hook_results = []
+
+            for hook_result in hook_results:
+                if not isinstance(hook_result, dict):
+                    continue
+                decision = str(hook_result.get("decision", "")).strip().lower()
+                if not decision or decision == "allow":
+                    continue
+                if decision == "deny":
+                    message = hook_result.get("message")
+                    if isinstance(message, str) and message:
+                        return message
+                    return f"Command `/{command}` was blocked by a hook."
+                if decision == "handled":
+                    message = hook_result.get("message")
+                    return message if isinstance(message, str) and message else None
+                if decision == "rewrite":
+                    new_command = str(
+                        hook_result.get("command_name", "")
+                    ).strip().lstrip("/")
+                    if not new_command:
+                        continue
+                    new_args = str(hook_result.get("raw_args", "")).strip()
+                    event.text = f"/{new_command} {new_args}".strip()
+                    command = event.get_command()
+                    _cmd_def = _resolve_cmd(command) if command else None
+                    canonical = _cmd_def.name if _cmd_def else command
+                    break
+
        if canonical == "new":
            return await self._handle_reset_command(event)
        
@@ -4359,9 +4431,10 @@ class GatewayRunner:
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
+            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
-            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
+            if guild_id and isinstance(adapter, DiscordAdapter):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"
@@ -4901,6 +4974,11 @@ class GatewayRunner:
        # the configured default instead of the previously switched model.
        self._session_model_overrides.pop(session_key, None)

+        # Clear session-scoped dangerous-command approvals and /yolo state.
+        # /new is a conversation-boundary operation — approval state from the
+        # previous conversation must not survive the reset.
+        self._clear_session_boundary_security_state(session_key)
+
        # Fire plugin on_session_finalize hook (session boundary)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -5409,6 +5487,7 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
+                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -5520,6 +5599,7 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
+                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -5797,7 +5877,7 @@ class GatewayRunner:
        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
-    async def _handle_retry_command(self, event: MessageEvent) -> str:
+    async def _handle_retry_command(self, event: MessageEvent) -> Optional[str]:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -5947,9 +6027,10 @@ class GatewayRunner:
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
+            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
-            if guild_id and hasattr(adapter, "get_voice_channel_info"):
+            if guild_id and isinstance(adapter, DiscordAdapter):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
@@ -5980,8 +6061,9 @@ class GatewayRunner:

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
+        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
-        if not hasattr(adapter, "join_voice_channel"):
+        if not isinstance(adapter, DiscordAdapter):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
@@ -5996,10 +6078,8 @@ class GatewayRunner:

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
-        if hasattr(adapter, "_voice_input_callback"):
-            adapter._voice_input_callback = self._handle_voice_channel_input
-        if hasattr(adapter, "_on_voice_disconnect"):
-            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
+        adapter._voice_input_callback = self._handle_voice_channel_input
+        adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
@@ -6016,8 +6096,7 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
-            if hasattr(adapter, "_voice_sources"):
-                adapter._voice_sources[guild_id] = event.source.to_dict()
+            adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -6031,13 +6110,14 @@ class GatewayRunner:

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
+        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

-        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
+        if not guild_id or not isinstance(adapter, DiscordAdapter):
            return "Not in a voice channel."

-        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
+        if not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
@@ -6048,8 +6128,7 @@ class GatewayRunner:
        self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-        if hasattr(adapter, "_voice_input_callback"):
-            adapter._voice_input_callback = None
+        adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
@@ -6209,13 +6288,13 @@ class GatewayRunner:
            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
+            from gateway.platforms.discord import DiscordAdapter
            guild_id = self._get_guild_id(event)
            if (guild_id
-                    and hasattr(adapter, "play_in_voice_channel")
-                    and hasattr(adapter, "is_in_voice_channel")
+                    and isinstance(adapter, DiscordAdapter)
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
-            elif adapter and hasattr(adapter, "send_voice"):
+            elif adapter:
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
@@ -6456,6 +6535,11 @@ class GatewayRunner:
                    session_id=task_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -7142,6 +7226,7 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."
+        self._clear_session_boundary_security_state(session_key)

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name
@@ -7216,6 +7301,7 @@ class GatewayRunner:
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                    reasoning=msg.get("reasoning"),
+                    reasoning_content=msg.get("reasoning_content"),
                )
            except Exception:
                pass  # Best-effort copy
@@ -7230,6 +7316,7 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, new_session_id)
        if not new_entry:
            return "Branch created but failed to switch to it."
+        self._clear_session_boundary_security_state(session_key)

        # Evict any cached agent for this session
        self._evict_cached_agent(session_key)
@@ -7620,13 +7707,14 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE,
+            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
+            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -8603,6 +8691,29 @@ class GatewayRunner:
        if hasattr(self, "_busy_ack_ts"):
            self._busy_ack_ts.pop(session_key, None)

+    def _clear_session_boundary_security_state(self, session_key: str) -> None:
+        """Clear approval state that must not survive a real conversation switch."""
+        if not session_key:
+            return
+
+        pending_approvals = getattr(self, "_pending_approvals", None)
+        if isinstance(pending_approvals, dict):
+            pending_approvals.pop(session_key, None)
+
+        try:
+            from tools.approval import clear_session as _clear_approval_session
+        except Exception:
+            return
+
+        try:
+            _clear_approval_session(session_key)
+        except Exception as e:
+            logger.debug(
+                "Failed to clear approval state for session boundary %s: %s",
+                session_key,
+                e,
+            )
+
    def _begin_session_run_generation(self, session_key: str) -> int:
        """Claim a fresh run generation token for ``session_key``.

@@ -9698,6 +9809,11 @@ class GatewayRunner:
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
@@ -10374,6 +10490,7 @@ class GatewayRunner:
                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)

+                assert _agent_timeout is not None  # narrowed by _idle_secs >= _agent_timeout above
                _timeout_mins = int(_agent_timeout // 60) or 1

                # Construct a user-facing message with diagnostic context.
@@ -10492,7 +10609,7 @@ class GatewayRunner:
                pending = None

            if pending_event or pending:
-                logger.debug("Processing pending message: '%s...'", pending[:40])
+                logger.debug("Processing pending message: '%s...'", (pending or "")[:40])

                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
@@ -10511,8 +10628,6 @@ class GatewayRunner:
                    adapter = self.adapters.get(source.platform)
                    if adapter and pending_event:
                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
-                    elif adapter and hasattr(adapter, 'queue_message'):
-                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
@@ -10594,7 +10709,7 @@ class GatewayRunner:
                        history=updated_history,
                    )
                    if next_message is None:
-                        return result
+                        return result  # ty: ignore[invalid-return-type]
                    next_message_id = getattr(pending_event, "message_id", None)
                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)

@@ -10764,7 +10879,13 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
+    from gateway.status import (
+        acquire_gateway_runtime_lock,
+        get_running_pid,
+        release_gateway_runtime_lock,
+        remove_pid_file,
+        terminate_pid,
+    )
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
@@ -10977,14 +11098,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            "Exiting to avoid double-running.", _current_pid
        )
        return False
+    if not acquire_gateway_runtime_lock():
+        logger.error(
+            "Gateway runtime lock is already held by another instance. Exiting."
+        )
+        return False
    try:
        write_pid_file()
    except FileExistsError:
+        release_gateway_runtime_lock()
        logger.error(
            "PID file race lost to another gateway instance. Exiting."
        )
        return False
    atexit.register(remove_pid_file)
+    atexit.register(release_gateway_runtime_lock)

    # Start the gateway
    success = await runner.start()
@@ -80,7 +80,7 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    
@@ -1147,6 +1147,10 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1176,6 +1180,7 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    )
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -121,6 +136,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
+        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -212,16 +228,135 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
    except Exception:
        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
+    except Exception:
+        pass
+
+
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass


 def write_pid_file() -> None:
@@ -361,7 +496,8 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError):
+            except (ProcessLookupError, PermissionError, OSError):
+                # Windows raises OSError with WinError 87 for invalid pid check
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -583,35 +719,46 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue
+        except OSError:
+            # Windows raises OSError with WinError 87 for an invalid pid
+            # (process is definitely gone). Treat as "process doesn't exist".
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -72,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -182,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.moonshot.cn/v1",
        api_key_env_vars=("KIMI_CN_API_KEY",),
    ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "arcee": ProviderConfig(
        id="arcee",
        name="Arcee AI",
@@ -204,6 +214,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
+        base_url_env_var="ANTHROPIC_BASE_URL",
    ),
    "alibaba": ProviderConfig(
        id="alibaba",
@@ -757,16 +768,20 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


-def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
-    """Return the persisted credential pool, or one provider slice."""
+def read_credential_pool() -> Dict[str, Any]:
+    """Return the entire persisted credential pool."""
    auth_store = _load_auth_store()
    pool = auth_store.get("credential_pool")
    if not isinstance(pool, dict):
        pool = {}
-    if provider_id is None:
-        return dict(pool)
-    provider_entries = pool.get(provider_id)
-    return list(provider_entries) if isinstance(provider_entries, list) else []
+    return dict(pool)
+
+
+def read_provider_credentials(provider_id: str) -> List[Dict[str, Any]]:
+    """Return credential entries for a single provider."""
+    pool = read_credential_pool()
+    entries = pool.get(provider_id)
+    return list(entries) if isinstance(entries, list) else []


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
@@ -992,6 +1007,7 @@ def resolve_provider(
        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            state_path = child / state_name
            if state_path.exists():
                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir)
+                rel = state_path.relative_to(source_dir).as_posix()
                findings.append((state_path, f"Workspace {kind}: {rel}"))

    return findings
@@ -276,7 +276,7 @@ def _get_ps_exe() -> str | None:
    global _ps_exe
    if _ps_exe is False:
        _ps_exe = _find_powershell()
-    return _ps_exe
+    return _ps_exe if isinstance(_ps_exe, str) else None


 def _windows_has_image() -> bool:
@@ -387,6 +387,8 @@ def _wayland_save(dest: Path) -> bool:

    except FileNotFoundError:
        logger.debug("wl-paste not installed — Wayland clipboard unavailable")
+    except ImportError:
+        raise
    except Exception as e:
        logger.debug("wl-paste clipboard extraction failed: %s", e)
        dest.unlink(missing_ok=True)
@@ -395,14 +397,17 @@ def _wayland_save(dest: Path) -> bool:

 def _convert_to_png(path: Path) -> bool:
    """Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
-    # Try Pillow first (likely installed in the venv)
    try:
        from PIL import Image
+    except ImportError:
+        raise ImportError(
+            "Pillow is required for clipboard image conversion. "
+            "Install with: pip install hermes-agent[cli]"
+        ) from None
+    try:
        img = Image.open(path)
        img.save(path, "PNG")
        return True
-    except ImportError:
-        pass
    except Exception as e:
        logger.debug("Pillow BMP→PNG conversion failed: %s", e)

@@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
    return lines


+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
    return result


@@ -750,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -759,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
    return mapping


@@ -712,6 +712,12 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        # When delegate_task narrows child toolsets explicitly, preserve any
+        # MCP toolsets the parent already has enabled. On by default so
+        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
+        # extras" without silently stripping MCP tools the parent already has.
+        # Set to false for strict intersection.
+        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
@@ -840,6 +846,7 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -893,6 +900,34 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1050,6 +1085,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "ARCEEAI_API_KEY": {
        "description": "Arcee AI API key",
        "prompt": "Arcee AI API key",
@@ -1853,7 +1904,7 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
    config = load_config()
    missing = []

-    def _check(defaults: dict, current: dict, prefix: str = ""):
+    def _check(defaults: Dict[str, Any], current: Dict[str, Any], prefix: str = ""):
        for key, default_value in defaults.items():
            if key.startswith('_'):
                continue
@@ -2004,6 +2055,14 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
+    elif isinstance(models, list) and models:
+        # Hand-edited configs (and older Hermes versions) write ``models`` as
+        # a plain list of model ids. Preserve them by converting to the dict
+        # shape downstream code expects; otherwise normalize silently drops
+        # the list and /model shows the provider with (0) models.
+        normalized["models"] = {
+            str(m): {} for m in models if isinstance(m, str) and m.strip()
+        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -2087,8 +2146,8 @@ def check_config_version() -> Tuple[int, int]:
    Returns (current_version, latest_version).
    """
    config = load_config()
-    current = config.get("_config_version", 0)
-    latest = DEFAULT_CONFIG.get("_config_version", 1)
+    current = int(config.get("_config_version", 0))
+    latest = int(DEFAULT_CONFIG.get("_config_version", 1))
    return current, latest


@@ -2102,6 +2161,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }

 # Valid fields inside a custom_providers list entry
@@ -2807,7 +2867,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
    return results


-def _deep_merge(base: dict, override: dict) -> dict:
+def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
    """Recursively merge *override* into *base*, preserving nested defaults.

    Keys in *override* take precedence. If both values are dicts the merge
@@ -3117,7 +3177,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not (fb.get("provider") and fb.get("model")):
+    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
    from hermes_cli.logs import LOG_FILES

    filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
        return None

-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.

-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)

    try:
        size = log_path.stat().st_size
        if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)

-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }


 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
        dump_text = _capture_dump()
    buf.write(dump_text)

+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n")

    return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -18,7 +18,7 @@ import os
 import sys
 import time
 import logging
-from typing import Optional, Tuple
+from typing import Any, Callable, Optional, Tuple

 import requests

@@ -108,7 +108,7 @@ def wait_for_registration_success(
    device_code: str,
    interval: int = 3,
    expires_in: int = 7200,
-    on_waiting: Optional[callable] = None,
+    on_waiting: Optional[Callable[..., Any]] = None,
 ) -> Tuple[str, str]:
    """Block until the registration succeeds or times out.

@@ -912,6 +912,7 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -160,6 +160,8 @@ def load_hermes_dotenv(
    # Fix corrupted .env files before python-dotenv parses them (#8908).
    if user_env.exists():
        _sanitize_env_file_if_needed(user_env)
+    if project_env_path and project_env_path.exists():
+        _sanitize_env_file_if_needed(project_env_path)

    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
@@ -333,6 +333,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


+def _read_systemd_unit_properties(
+    system: bool = False,
+    properties: tuple[str, ...] = (
+        "ActiveState",
+        "SubState",
+        "Result",
+        "ExecMainStatus",
+    ),
+) -> dict[str, str]:
+    """Return selected ``systemctl show`` properties for the gateway unit."""
+    selected_system = _select_systemd_scope(system)
+    try:
+        result = _run_systemctl(
+            [
+                "show",
+                get_service_name(),
+                "--no-pager",
+                "--property",
+                ",".join(properties),
+            ],
+            system=selected_system,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (RuntimeError, subprocess.TimeoutExpired, OSError):
+        return {}
+
+    if result.returncode != 0:
+        return {}
+
+    parsed: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        parsed[key] = value.strip()
+    return parsed
+
+
+def _wait_for_systemd_service_restart(
+    *,
+    system: bool = False,
+    previous_pid: int | None = None,
+    timeout: float = 60.0,
+) -> bool:
+    """Wait for the gateway service to become active after a restart handoff."""
+    import time
+
+    svc = get_service_name()
+    scope_label = _service_scope_label(system).capitalize()
+    deadline = time.time() + timeout
+
+    while time.time() < deadline:
+        props = _read_systemd_unit_properties(system=system)
+        active_state = props.get("ActiveState", "")
+        sub_state = props.get("SubState", "")
+        new_pid = None
+        try:
+            from gateway.status import get_running_pid
+
+            new_pid = get_running_pid()
+        except Exception:
+            new_pid = None
+
+        if active_state == "active":
+            if new_pid and (previous_pid is None or new_pid != previous_pid):
+                print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                return True
+            if previous_pid is None:
+                print(f"✓ {scope_label} service restarted")
+                return True
+
+        if active_state == "activating" and sub_state == "auto-restart":
+            time.sleep(1)
+            continue
+
+        time.sleep(2)
+
+    print(
+        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
+        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
+    )
+    return False
+
+
+def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
+    """Recover a planned service restart that is stuck in systemd state."""
+    props = _read_systemd_unit_properties(system=system)
+    if not props:
+        return False
+
+    try:
+        from gateway.status import read_runtime_status
+    except Exception:
+        return False
+
+    runtime_state = read_runtime_status() or {}
+    if not runtime_state.get("restart_requested"):
+        return False
+
+    active_state = props.get("ActiveState", "")
+    sub_state = props.get("SubState", "")
+    exec_main_status = props.get("ExecMainStatus", "")
+    result = props.get("Result", "")
+
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("⏳ Service restart already pending — waiting for systemd relaunch...")
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    if active_state == "failed" and (
+        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
+        or result == "exit-code"
+    ):
+        svc = get_service_name()
+        scope_label = _service_scope_label(system).capitalize()
+        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
+        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    return False
+
+
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -470,7 +611,8 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    remove_pid_file()
+    if get_running_pid() is None:
+        remove_pid_file()
    return True


@@ -619,6 +761,21 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


+class UserSystemdUnavailableError(RuntimeError):
+    """Raised when ``systemctl --user`` cannot reach the user D-Bus session.
+
+    Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
+    and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
+    Carries a user-facing remediation message in ``args[0]``.
+    """
+
+
+def _user_dbus_socket_path() -> Path:
+    """Return the expected per-user D-Bus socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "bus"
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -641,6 +798,126 @@ def _ensure_user_systemd_env() -> None:
            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"


+def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
+    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+
+    Linger-enabled user@.service can take a second or two to spawn the socket
+    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    """
+    import time
+
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if _user_dbus_socket_path().exists():
+            _ensure_user_systemd_env()
+            return True
+        time.sleep(0.2)
+    return _user_dbus_socket_path().exists()
+
+
+def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
+    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+
+    No-op when the bus socket is already there (the common case on desktops
+    and linger-enabled servers).  On fresh SSH sessions where the socket is
+    missing:
+
+    * If linger is already enabled, wait briefly for user@.service to spawn
+      the socket.
+    * If linger is disabled and ``auto_enable_linger`` is True, try
+      ``loginctl enable-linger $USER`` (works as non-root when polkit permits
+      it, otherwise needs sudo).
+    * If the socket is still missing afterwards, raise
+      :class:`UserSystemdUnavailableError` with a precise remediation message.
+
+    Callers should treat the exception as a terminal condition for user-scope
+    systemd operations and surface the message to the user.
+    """
+    _ensure_user_systemd_env()
+    bus_path = _user_dbus_socket_path()
+    if bus_path.exists():
+        return
+
+    import getpass
+
+    username = getpass.getuser()
+    linger_enabled, linger_detail = get_systemd_linger_status()
+
+    if linger_enabled is True:
+        if _wait_for_user_dbus_socket(timeout=3.0):
+            return
+        # Linger is on but socket still missing — unusual; fall through to error.
+        _raise_user_systemd_unavailable(
+            username,
+            reason="User D-Bus socket is missing even though linger is enabled.",
+            fix_hint=(
+                f"  systemctl start user@{os.getuid()}.service\n"
+                "  (may require sudo; try again after the command succeeds)"
+            ),
+        )
+
+    if auto_enable_linger and shutil.which("loginctl"):
+        try:
+            result = subprocess.run(
+                ["loginctl", "enable-linger", username],
+                capture_output=True,
+                text=True,
+                check=False,
+                timeout=30,
+            )
+        except Exception as exc:
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger failed ({exc}).",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+        else:
+            if result.returncode == 0:
+                if _wait_for_user_dbus_socket(timeout=5.0):
+                    print(f"✓ Enabled linger for {username} — user D-Bus now available")
+                    return
+                # enable-linger succeeded but the socket never appeared.
+                _raise_user_systemd_unavailable(
+                    username,
+                    reason="Linger was enabled, but the user D-Bus socket did not appear.",
+                    fix_hint=(
+                        "  Log out and log back in, then re-run the command.\n"
+                        f"  Or reboot and run: systemctl --user start {get_service_name()}"
+                    ),
+                )
+            detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger was denied: {detail}",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+
+    _raise_user_systemd_unavailable(
+        username,
+        reason=(
+            "User D-Bus session is not available "
+            f"({linger_detail or 'linger disabled'})."
+        ),
+        fix_hint=f"  sudo loginctl enable-linger {username}",
+    )
+
+
+def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
+    """Build a user-facing error message and raise UserSystemdUnavailableError."""
+    msg = (
+        f"{reason}\n"
+        "  systemctl --user cannot reach the user D-Bus session in this shell.\n"
+        "\n"
+        "  To fix:\n"
+        f"{fix_hint}\n"
+        "\n"
+        "  Alternative: run the gateway in the foreground (stays up until\n"
+        "  you exit / close the terminal):\n"
+        "    hermes gateway run"
+    )
+    raise UserSystemdUnavailableError(msg)
+
+
 def _systemctl_cmd(system: bool = False) -> list[str]:
    if not system:
        _ensure_user_systemd_env()
@@ -1481,6 +1758,11 @@ def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("start")
+    else:
+        # Fail fast with actionable guidance if the user D-Bus session is not
+        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
+        # Raises UserSystemdUnavailableError with a remediation message.
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1500,19 +1782,16 @@ def systemd_restart(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("restart")
+    else:
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        # SIGUSR1 sent — the gateway will drain active agents, exit with
-        # code 75, and systemd will restart it after RestartSec (30s).
-        # Wait for the old process to die and the new one to become active
-        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
-        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1526,48 +1805,41 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # Phase 2: wait for systemd to start the new process
-        print(f"⏳ Waiting for {svc} to restart...")
-        deadline = time.time() + 60
-        while time.time() < deadline:
-            try:
-                result = subprocess.run(
-                    scope_cmd + ["is-active", svc],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if result.stdout.strip() == "active":
-                    # Verify it's a NEW process, not the old one somehow
-                    new_pid = get_running_pid()
-                    if new_pid and new_pid != pid:
-                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                        return
-            except (subprocess.TimeoutExpired, FileNotFoundError):
-                pass
-            time.sleep(2)
-
-        # Timed out — check final state
-        try:
-            result = subprocess.run(
-                scope_cmd + ["is-active", svc],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                print(f"✓ {scope_label} service restarted")
-                return
-        except Exception:
-            pass
-        print(
-            f"⚠ {scope_label} service did not become active within 60s.\n"
-            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        # The gateway exits with code 75 for a planned service restart.
+        # systemd can sit in the RestartSec window or even wedge itself into a
+        # failed/rate-limited state if the operator asks for another restart in
+        # the middle of that handoff. Clear any stale failed state and kick the
+        # unit immediately so `hermes gateway restart` behaves idempotently.
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
        return
+
+    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        return
+
+    _run_systemctl(
+        ["reset-failed", get_service_name()],
+        system=system,
+        check=False,
+        timeout=30,
+    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False):
+def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1590,8 +1862,12 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

+    status_cmd = ["status", get_service_name(), "--no-pager"]
+    if full:
+        status_cmd.append("-l")
+
    _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
+        status_cmd,
        system=system,
        capture_output=False,
        timeout=10,
@@ -1624,6 +1900,19 @@ def systemd_status(deep: bool = False, system: bool = False):
        for line in runtime_lines:
            print(f"  {line}")

+    unit_props = _read_systemd_unit_properties(system=system)
+    active_state = unit_props.get("ActiveState", "")
+    sub_state = unit_props.get("SubState", "")
+    exec_main_status = unit_props.get("ExecMainStatus", "")
+    result_code = unit_props.get("Result", "")
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
+    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
+        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
+        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+    elif active_state == "failed" and result_code:
+        print(f"  ⚠ Systemd unit result: {result_code}")
+
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1639,7 +1928,10 @@ def systemd_status(deep: bool = False, system: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
+        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
+        if full:
+            log_cmd.append("-l")
+        subprocess.run(log_cmd, timeout=10)


 # =============================================================================
@@ -2639,8 +2931,125 @@ def _setup_dingtalk():


 def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
+    """Interactive setup for WeCom — scan QR code or manual credential input."""
+    print()
+    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
+
+    existing_bot_id = get_env_value("WECOM_BOT_ID")
+    existing_secret = get_env_value("WECOM_SECRET")
+    if existing_bot_id and existing_secret:
+        print()
+        print_success("WeCom is already configured.")
+        if not prompt_yes_no("  Reconfigure WeCom?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
+        "Enter existing Bot ID and Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
+
+    bot_id = None
+    secret = None
+
+    if method_idx == 0:
+        # ── QR scan flow ──
+        try:
+            from gateway.platforms.wecom import qr_scan_for_bot_info
+        except Exception as exc:
+            print_error(f"  WeCom QR scan import failed: {exc}")
+            qr_scan_for_bot_info = None
+
+        if qr_scan_for_bot_info is not None:
+            try:
+                credentials = qr_scan_for_bot_info()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  WeCom setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR scan failed: {exc}")
+                credentials = None
+            if credentials:
+                bot_id = credentials.get("bot_id", "")
+                secret = credentials.get("secret", "")
+                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
+
+        if not bot_id or not secret:
+            print_info("  QR scan did not complete. Continuing with manual input.")
+            bot_id = None
+            secret = None
+
+    # ── Manual credential input ──
+    if not bot_id or not secret:
+        print()
+        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
+        print_info("  2. Select API Mode")
+        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
+        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
+        print()
+        bot_id = prompt("  Bot ID", password=False)
+        if not bot_id:
+            print_warning("  Skipped — WeCom won't work without a Bot ID.")
+            return
+        secret = prompt("  Secret", password=True)
+        if not secret:
+            print_warning("  Skipped — WeCom won't work without a Secret.")
+            return
+
+    # ── Save core credentials ──
+    save_env_value("WECOM_BOT_ID", bot_id)
+    save_env_value("WECOM_SECRET", secret)
+
+    # ── Allowed users (deny-by-default security) ──
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter user IDs to create an allowlist, or leave empty.")
+    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
+    if allowed:
+        cleaned = allowed.replace(" ", "")
+        save_env_value("WECOM_ALLOWED_USERS", cleaned)
+        print_success("  Saved — only these users can interact with the bot.")
+    else:
+        print()
+        access_choices = [
+            "Enable open access (anyone can message the bot)",
+            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
+            "Disable direct messages",
+            "Skip for now (bot will deny all users until configured)",
+        ]
+        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
+        if access_idx == 0:
+            save_env_value("WECOM_DM_POLICY", "open")
+            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
+            print_warning("  Open access enabled — anyone can use your bot!")
+        elif access_idx == 1:
+            save_env_value("WECOM_DM_POLICY", "pairing")
+            print_success("  DM pairing mode — users will receive a code to request access.")
+            print_info("  Approve with: hermes pairing approve <platform> <code>")
+        elif access_idx == 2:
+            save_env_value("WECOM_DM_POLICY", "disabled")
+            print_warning("  Direct messages disabled.")
+        else:
+            print_info("  Skipped — configure later with 'hermes gateway setup'")
+
+    # ── Home channel (optional) ──
+    print()
+    print_info("  Chat ID for scheduled results and notifications.")
+    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home:
+        save_env_value("WECOM_HOME_CHANNEL", home)
+        print_success(f"  Home channel set to {home}")
+
+    print()
+    print_success("💬 WeCom configured!")
+
+
+def _setup_wecom_callback():
+    """Configure WeCom Callback (self-built app) via the standard platform setup."""
+    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom_callback")
    _setup_standard_platform(wecom_platform)


@@ -3021,7 +3430,8 @@ def _setup_qqbot():
    if method_idx == 0:
        # ── QR scan-to-configure ──
        try:
-            credentials = _qqbot_qr_flow()
+            from gateway.platforms.qqbot import qr_register
+            credentials = qr_register()
        except KeyboardInterrupt:
            print()
            print_warning("  QQ Bot setup cancelled.")
@@ -3103,106 +3513,6 @@ def _setup_qqbot():
    print_info(f"  App ID: {credentials['app_id']}")


-def _qqbot_render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    try:
-        import qrcode as _qr
-        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def _qqbot_qr_flow():
-    """Run the QR-code scan-to-configure flow.
-
-    Returns a dict with app_id, client_secret, user_openid on success,
-    or None on failure/cancel.
-    """
-    try:
-        from gateway.platforms.qqbot import (
-            create_bind_task, poll_bind_result, build_connect_url,
-            decrypt_secret, BindStatus,
-        )
-        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
-    except Exception as exc:
-        print_error(f"  QQBot onboard import failed: {exc}")
-        return None
-
-    import asyncio
-    import time
-
-    MAX_REFRESHES = 3
-    refresh_count = 0
-
-    while refresh_count <= MAX_REFRESHES:
-        loop = asyncio.new_event_loop()
-
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = loop.run_until_complete(create_bind_task())
-        except Exception as e:
-            print_warning(f"  Failed to create bind task: {e}")
-            loop.close()
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _qqbot_render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
-
-        # ── Poll loop (silent — keep QR visible at bottom) ──
-        try:
-            while True:
-                try:
-                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
-                        poll_bind_result(task_id)
-                    )
-                except Exception:
-                    time.sleep(ONBOARD_POLL_INTERVAL)
-                    continue
-
-                if status == BindStatus.COMPLETED:
-                    client_secret = decrypt_secret(encrypted_secret, aes_key)
-                    print()
-                    print_success(f"  QR scan complete! (App ID: {app_id})")
-                    if user_openid:
-                        print_info(f"  Scanner's OpenID: {user_openid}")
-                    return {
-                        "app_id": app_id,
-                        "client_secret": client_secret,
-                        "user_openid": user_openid,
-                    }
-
-                if status == BindStatus.EXPIRED:
-                    refresh_count += 1
-                    if refresh_count > MAX_REFRESHES:
-                        print()
-                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
-                        return None
-                    print()
-                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
-                    loop.close()
-                    break  # outer while creates a new task
-
-                time.sleep(ONBOARD_POLL_INTERVAL)
-        except KeyboardInterrupt:
-            loop.close()
-            raise
-        finally:
-            loop.close()
-
-    return None
-
-
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -3354,6 +3664,10 @@ def gateway_setup():
                    systemd_start()
                elif is_macos():
                    launchd_start()
+            except UserSystemdUnavailableError as e:
+                print_error("  Failed to start — user systemd not reachable:")
+                for line in str(e).splitlines():
+                    print(f"  {line}")
            except subprocess.CalledProcessError as e:
                print_error(f"  Failed to start: {e}")
    else:
@@ -3390,6 +3704,8 @@ def gateway_setup():
            _setup_feishu()
        elif platform["key"] == "qqbot":
            _setup_qqbot()
+        elif platform["key"] == "wecom":
+            _setup_wecom()
        else:
            _setup_standard_platform(platform)

@@ -3416,6 +3732,10 @@ def gateway_setup():
                    else:
                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -3425,6 +3745,10 @@ def gateway_setup():
                        systemd_start()
                    elif is_macos():
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Start failed: {e}")
        else:
@@ -3448,6 +3772,10 @@ def gateway_setup():
                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
+                            except UserSystemdUnavailableError as e:
+                                print_error("  Start failed — user systemd not reachable:")
+                                for line in str(e).splitlines():
+                                    print(f"  {line}")
                            except subprocess.CalledProcessError as e:
                                print_error(f"  Start failed: {e}")
                    except subprocess.CalledProcessError as e:
@@ -3485,6 +3813,18 @@ def gateway_setup():

 def gateway_command(args):
    """Handle gateway subcommands."""
+    try:
+        return _gateway_command_inner(args)
+    except UserSystemdUnavailableError as e:
+        # Clean, actionable message instead of a traceback when the user D-Bus
+        # session is unreachable (fresh SSH shell, no linger, container, etc.).
+        print_error("User systemd not reachable:")
+        for line in str(e).splitlines():
+            print(f"  {line}")
+        sys.exit(1)
+
+
+def _gateway_command_inner(args):
    subcmd = getattr(args, 'gateway_command', None)
    
    # Default to run if no subcommand
@@ -3748,12 +4088,13 @@ def gateway_command(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
+        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system)
+            systemd_status(deep, system=system, full=full)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -1131,6 +1131,20 @@ def cmd_chat(args):
    if getattr(args, "yolo", False):
        os.environ["HERMES_YOLO_MODE"] = "1"

+    # --ignore-user-config: make load_cli_config() / load_config() skip the
+    # user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
+    # importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
+    # import time). Credentials in .env are still loaded — this flag only
+    # ignores behavioral/config settings.
+    if getattr(args, "ignore_user_config", False):
+        os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
+
+    # --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
+    # (rules), memory entries, and any preloaded skills coming from user config.
+    # Maps to AIAgent(skip_context_files=True, skip_memory=True).
+    if getattr(args, "ignore_rules", False):
+        os.environ["HERMES_IGNORE_RULES"] = "1"
+
    # --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
    if getattr(args, "source", None):
        os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1159,6 +1173,8 @@ def cmd_chat(args):
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
        "max_turns": getattr(args, "max_turns", None),
+        "ignore_rules": getattr(args, "ignore_rules", False),
+        "ignore_user_config": getattr(args, "ignore_user_config", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1566,6 +1582,8 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
+    elif selected_provider == "stepfun":
+        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
    elif selected_provider in (
@@ -3462,6 +3480,140 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")


+def _infer_stepfun_region(base_url: str) -> str:
+    """Infer the current StepFun region from the configured endpoint."""
+    normalized = (base_url or "").strip().lower()
+    if "api.stepfun.com" in normalized:
+        return "china"
+    return "international"
+
+
+def _stepfun_base_url_for_region(region: str) -> str:
+    from hermes_cli.auth import (
+        STEPFUN_STEP_PLAN_CN_BASE_URL,
+        STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    )
+
+    return (
+        STEPFUN_STEP_PLAN_CN_BASE_URL
+        if region == "china"
+        else STEPFUN_STEP_PLAN_INTL_BASE_URL
+    )
+
+
+def _model_flow_stepfun(config, current_model=""):
+    """StepFun Step Plan flow with region-specific endpoints."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.models import fetch_api_models
+
+    provider_id = "stepfun"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    current_base = ""
+    if base_url_env:
+        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, dict):
+            current_base = str(model_cfg.get("base_url") or "").strip()
+    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+    region_choices = [
+        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+    ]
+    ordered_regions = []
+    for region_key, label in region_choices:
+        if region_key == current_region:
+            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
+        else:
+            ordered_regions.append((region_key, label))
+    ordered_regions.append(("cancel", "Cancel"))
+
+    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+        print("No change.")
+        return
+
+    selected_region = ordered_regions[region_idx][0]
+    effective_base = _stepfun_base_url_for_region(selected_region)
+    if base_url_env:
+        save_env_value(base_url_env, effective_base)
+
+    live_models = fetch_api_models(existing_key, effective_base)
+    if live_models:
+        model_list = live_models
+        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+    else:
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        if model_list:
+            print(
+                f"  Could not auto-detect models from {pconfig.name} API — "
+                "showing Step Plan fallback catalog."
+            )
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        model.pop("api_mode", None)
+        save_config(cfg)
+        deactivate_provider()
+
+        config["model"] = dict(model)
+        print(f"Default model set to: {selected} (via {pconfig.name})")
+    else:
+        print("No change.")
+
+
 def _model_flow_bedrock_api_key(config, region, current_model=""):
    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.

@@ -6470,6 +6622,18 @@ For more help on a command:
        default=False,
        help="Include the session ID in the agent's system prompt",
    )
+    parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=False,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
+    )
+    parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=False,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
+    )
    parser.add_argument(
        "--tui",
        action="store_true",
@@ -6530,6 +6694,7 @@ For more help on a command:
            "zai",
            "kimi-coding",
            "kimi-coding-cn",
+            "stepfun",
            "minimax",
            "minimax-cn",
            "kilocode",
@@ -6608,6 +6773,18 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Include the session ID in the agent's system prompt",
    )
+    chat_parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
+    )
+    chat_parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
+    )
    chat_parser.add_argument(
        "--source",
        default=None,
@@ -6751,6 +6928,12 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument(
+        "-l",
+        "--full",
+        action="store_true",
+        help="Show full, untruncated service/log output where supported",
+    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
    # Z.AI / GLM
    "glm":       ModelIdentity("z-ai", "glm"),

-    # StepFun
+    # Step Plan (StepFun)
    "step":      ModelIdentity("stepfun", "step"),

    # Xiaomi
@@ -782,6 +782,7 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
+    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@@ -810,7 +811,10 @@ def list_authenticated_providers(
        get_provider_info as _mdev_pinfo,
    )
    from hermes_cli.auth import PROVIDER_REGISTRY
-    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
+    from hermes_cli.models import (
+        OPENROUTER_MODELS, _PROVIDER_MODELS,
+        _MODELS_DEV_PREFERRED, _merge_with_models_dev,
+    )

    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
@@ -844,6 +848,10 @@ def list_authenticated_providers(
        # source of truth.  models.dev can have wrong mappings (e.g.
        # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
        pconfig = PROVIDER_REGISTRY.get(hermes_id)
+        # Skip non-API-key auth providers here — they are handled in
+        # section 2 (HERMES_OVERLAYS) with proper auth store checking.
+        if pconfig and pconfig.auth_type != "api_key":
+            continue
        if pconfig and pconfig.api_key_env_vars:
            env_vars = list(pconfig.api_key_env_vars)
        else:
@@ -856,8 +864,13 @@ def list_authenticated_providers(
        if not has_creds:
            continue

-        # Use curated list, falling back to models.dev if no curated list
+        # Use curated list, falling back to models.dev if no curated list.
+        # For preferred providers, merge models.dev entries into the curated
+        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
+        # show up in the picker without requiring a Hermes release.
        model_ids = curated.get(hermes_id, [])
+        if hermes_id in _MODELS_DEV_PREFERRED:
+            model_ids = _merge_with_models_dev(hermes_id, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -961,6 +974,9 @@ def list_authenticated_providers(

        # Use curated list — look up by Hermes slug, fall back to overlay key
        model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
+        # Merge with models.dev for preferred providers (same rationale as above).
+        if hermes_slug in _MODELS_DEV_PREFERRED:
+            model_ids = _merge_with_models_dev(hermes_slug, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -1106,66 +1122,113 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
+    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
+    # are grouped into a single picker row, so e.g. four Ollama entries
+    # pointing at ``http://localhost:11434/v1`` with per-model display names
+    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # "Ollama" row with four models inside instead of four near-duplicates
+    # that differ only by suffix. Entries with distinct endpoints still
+    # produce separate rows.
+    #
+    # When the grouped endpoint matches ``current_base_url`` the group's
+    # slug becomes ``current_provider`` so that selecting a model from the
+    # picker flows back through the runtime provider that already holds
+    # valid credentials — no re-resolution needed.
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        groups: "OrderedDict[str, dict]" = OrderedDict()
+        # Key by (base_url, api_key) instead of slug: names frequently
+        # differ per model ("Ollama — X") while the endpoint stays the
+        # same. Slug-based grouping left them as separate rows.
+        groups: "OrderedDict[tuple, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            display_name = (entry.get("name") or "").strip()
+            raw_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip()
-            if not display_name or not api_url:
+            ).strip().rstrip("/")
+            if not raw_name or not api_url:
                continue
+            api_key = (entry.get("api_key") or "").strip()

-            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
+            group_key = (api_url, api_key)
+            if group_key not in groups:
+                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
+                # "Ollama" for the grouped row. Em dash is the convention
+                # Hermes's own writer uses; a hyphen variant is accepted
+                # for hand-edited configs.
+                display_name = raw_name
+                for sep in ("—", " - "):
+                    if sep in display_name:
+                        display_name = display_name.split(sep)[0].strip()
+                        break
+                if not display_name:
+                    display_name = raw_name
+                # If this endpoint matches the currently active one, use
+                # ``current_provider`` as the slug so picker-driven switches
+                # route through the live credential pipeline.
+                if (
+                    current_base_url
+                    and api_url == current_base_url.strip().rstrip("/")
+                ):
+                    slug = current_provider or custom_provider_slug(display_name)
+                else:
+                    slug = custom_provider_slug(display_name)
+                groups[group_key] = {
+                    "slug": slug,
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
+
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
-            # The /model picker previously ignored it, so multi-model
-            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
+            if default_model and default_model not in groups[group_key]["models"]:
+                groups[group_key]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)

-        for slug, grp in groups.items():
-            if slug.lower() in seen_slugs:
+        _section4_emitted_slugs: set = set()
+        for grp in groups.values():
+            slug = grp["slug"]
+            # If the slug is already claimed by a built-in / overlay /
+            # user-provider row (sections 1-3), skip this custom group
+            # to avoid shadowing a real provider.
+            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
                continue
+            # If a prior section-4 group already used this slug (two custom
+            # endpoints with the same cleaned name — e.g. two OpenAI-
+            # compatible gateways named identically with different keys),
+            # append a counter so both rows stay visible in the picker.
+            if slug.lower() in _section4_emitted_slugs:
+                base_slug = slug
+                n = 2
+                while f"{base_slug}-{n}".lower() in seen_slugs:
+                    n += 1
+                slug = f"{base_slug}-{n}"
+                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url),
-            # the tuple section 4 groups by.  Prevents two picker rows
-            # labelled identically when callers pass both ``user_providers``
-            # and a compatibility-merged ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url).
+            # Prevents two picker rows labelled identically when callers
+            # pass both ``user_providers`` and a compatibility-merged
+            # ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1183,6 +1246,7 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
+            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
@@ -42,7 +42,8 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2-pro",               ""),
+    ("xiaomi/mimo-v2.5-pro",             ""),
+    ("xiaomi/mimo-v2.5",                 ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -108,7 +109,8 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2-pro",
+        "xiaomi/mimo-v2.5-pro",
+        "xiaomi/mimo-v2.5",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -210,6 +212,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "stepfun": [
+        "step-3.5-flash",
+        "step-3.5-flash-2603",
+    ],
    "moonshot": [
        "kimi-k2.6",
        "kimi-k2.5",
@@ -699,6 +705,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
    ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -733,6 +740,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
@@ -1580,11 +1589,84 @@ def _resolve_copilot_catalog_api_key() -> str:
        return ""


+# Providers where models.dev is treated as authoritative: curated static
+# lists are kept only as an offline fallback and to capture custom additions
+# the registry doesn't publish yet. Adding a provider here causes its
+# curated list to be merged with fresh models.dev entries (fresh first, any
+# curated-only names appended) for both the CLI and the gateway /model picker.
+#
+# DELIBERATELY EXCLUDED:
+#   - "openrouter": curated list is already a hand-picked agentic subset of
+#     OpenRouter's 400+ catalog. Blindly merging would dump everything.
+#   - "nous": curated list and Portal /models endpoint are the source of
+#     truth for the subscription tier.
+# Also excluded: providers that already have dedicated live-endpoint
+# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
+# stepfun, openai-codex) — those paths handle freshness themselves.
+_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
+    "opencode-go",
+    "opencode-zen",
+    "deepseek",
+    "kilocode",
+    "fireworks",
+    "mistral",
+    "togetherai",
+    "cohere",
+    "perplexity",
+    "groq",
+    "nvidia",
+    "huggingface",
+    "zai",
+    "gemini",
+    "google",
+})
+
+
+def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
+    """Merge curated list with fresh models.dev entries for a preferred provider.
+
+    Returns models.dev entries first (in models.dev order), then any
+    curated-only entries appended. Preserves case for curated fallbacks
+    (e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
+
+    If models.dev is unreachable or returns nothing, the curated list is
+    returned unchanged — this is the offline/CI fallback path.
+    """
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev = list_agentic_models(provider)
+    except Exception:
+        mdev = []
+
+    if not mdev:
+        return list(curated)
+
+    # Case-insensitive dedup while preserving order and curated casing.
+    seen_lower: set[str] = set()
+    merged: list[str] = []
+    for mid in mdev:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    for mid in curated:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    return merged
+
+
 def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
    """Return the best known model catalog for a provider.

    Tries live API endpoints for providers that support them (Codex, Nous),
-    falling back to static lists.
+    falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
+    (opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
+    models.dev entries are merged on top of curated so new models released
+    on the platform appear in ``/model`` without a Hermes release.
    """
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
@@ -1613,6 +1695,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
        except Exception:
            pass
+    if normalized == "stepfun":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("stepfun")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
    if normalized == "anthropic":
        live = _fetch_anthropic_models()
        if live:
@@ -1637,7 +1732,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-    return list(_PROVIDER_MODELS.get(normalized, []))
+    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
+    if normalized in _MODELS_DEV_PREFERRED:
+        return _merge_with_models_dev(normalized, curated_static)
+    return curated_static


 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -44,7 +44,7 @@ def _cmd_list(store):
        for p in pending:
            print(
                f"  {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
-                f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
+                f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
            )
    else:
        print("\n  No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
        print(f"  {'Platform':<12} {'User ID':<20} {'Name':<20}")
        print(f"  {'--------':<12} {'-------':<20} {'----':<20}")
        for a in approved:
-            print(f"  {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
+            print(f"  {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
    else:
        print("\n  No approved users.")

@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
    result = store.approve_code(platform, code)
    if result:
        uid = result["user_id"]
-        name = result.get("user_name", "")
+        name = result.get("user_name") or ""
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
@@ -283,6 +283,7 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
+        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -293,6 +294,13 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

+        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
+        ``"dias:7 formato:json"``) used by gateway adapters to surface the
+        command with an argument field — for example Discord's native slash
+        command picker. Plugin commands without ``args_hint`` register as
+        parameterless in Discord and still accept trailing text when invoked
+        as free-form chat.
+
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -320,6 +328,7 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
+            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -503,10 +512,23 @@ class PluginManager:
    # Public
    # -----------------------------------------------------------------------

-    def discover_and_load(self) -> None:
-        """Scan all plugin sources and load each plugin found."""
-        if self._discovered:
+    def discover_and_load(self, force: bool = False) -> None:
+        """Scan all plugin sources and load each plugin found.
+
+        When ``force`` is true, clear cached discovery state first so config
+        changes or newly-added bundled backends become visible in long-lived
+        sessions without requiring a full agent restart.
+        """
+        if self._discovered and not force:
            return
+        if force:
+            self._plugins.clear()
+            self._hooks.clear()
+            self._plugin_tool_names.clear()
+            self._cli_commands.clear()
+            self._plugin_commands.clear()
+            self._plugin_skills.clear()
+            self._context_engine = None
        self._discovered = True

        manifests: List[PluginManifest] = []
@@ -734,6 +756,30 @@ class PluginManager:
                )
                kind = "standalone"

+            # Auto-coerce user-installed memory providers to kind="exclusive"
+            # so they're routed to plugins/memory discovery instead of being
+            # loaded by the general PluginManager (which has no
+            # register_memory_provider on PluginContext). Mirrors the
+            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
+            # Bundled memory providers are already skipped via skip_names.
+            if kind == "standalone" and "kind" not in data:
+                init_file = plugin_dir / "__init__.py"
+                if init_file.exists():
+                    try:
+                        source_text = init_file.read_text(errors="replace")[:8192]
+                        if (
+                            "register_memory_provider" in source_text
+                            or "MemoryProvider" in source_text
+                        ):
+                            kind = "exclusive"
+                            logger.debug(
+                                "Plugin %s: detected memory provider, "
+                                "treating as kind='exclusive'",
+                                key,
+                            )
+                    except Exception:
+                        pass
+
            return PluginManifest(
                name=name,
                version=str(data.get("version", "")),
@@ -996,9 +1042,13 @@ def get_plugin_manager() -> PluginManager:
    return _plugin_manager


-def discover_plugins() -> None:
-    """Discover and load all plugins (idempotent)."""
-    get_plugin_manager().discover_and_load()
+def discover_plugins(force: bool = False) -> None:
+    """Discover and load all plugins.
+
+    Default behavior is idempotent. Pass ``force=True`` to rescan plugin
+    manifests and reload state in the current process.
+    """
+    get_plugin_manager().discover_and_load(force=force)


 def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1049,10 +1099,13 @@ def get_pre_tool_call_block_message(
    return None


-def _ensure_plugins_discovered() -> PluginManager:
-    """Return the global manager after running idempotent plugin discovery."""
+def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
+    """Return the global manager after ensuring plugin discovery has run.
+
+    Pass ``force=True`` to rescan in the current process.
+    """
    manager = get_plugin_manager()
-    manager.discover_and_load()
+    manager.discover_and_load(force=force)
    return manager


@@ -863,19 +863,15 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
                pass


-def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
-    """Import a profile from a tar.gz archive.
+def _inspect_profile_archive_roots(archive: Path) -> set[str]:
+    """Return the archive's top-level directory names.

-    If *name* is not given, infers it from the archive's top-level directory.
-    Returns the imported profile directory.
+    Profile imports expect exactly one root directory. Inspecting the archive
+    before extraction lets us stage the import safely instead of mutating a
+    live profile tree first and reconciling names later.
    """
    import tarfile

-    archive = Path(archive_path)
-    if not archive.exists():
-        raise FileNotFoundError(f"Archive not found: {archive}")
-
-    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
        top_dirs = {
            parts[0]
@@ -889,13 +885,33 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
                for member in tf.getmembers()
                if member.isdir()
            }
+    return top_dirs

-    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
+
+def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
+    """Import a profile from a tar.gz archive.
+
+    If *name* is not given, infers it from the archive's top-level directory.
+    Returns the imported profile directory.
+    """
+    import tempfile
+
+    archive = Path(archive_path)
+    if not archive.exists():
+        raise FileNotFoundError(f"Archive not found: {archive}")
+
+    top_dirs = _inspect_profile_archive_roots(archive)
+    archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
+    inferred_name = name or archive_root
    if not inferred_name:
        raise ValueError(
            "Cannot determine profile name from archive. "
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )
+    if archive_root is None:
+        raise ValueError(
+            "Profile archive must contain exactly one top-level directory."
+        )

    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
@@ -914,12 +930,22 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    _safe_extract_profile_archive(archive, profiles_root)
+    with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
+        staging_root = Path(tmpdir)
+        _safe_extract_profile_archive(archive, staging_root)

-    # If the archive extracted under a different name, rename
-    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
-    if extracted != profile_dir and extracted.exists():
-        extracted.rename(profile_dir)
+        extracted = staging_root / archive_root
+        if not extracted.is_dir():
+            raise ValueError(
+                f"Profile archive root is missing or invalid: {archive_root}"
+            )
+
+        final_source = extracted
+        if archive_root != inferred_name:
+            final_source = staging_root / inferred_name
+            extracted.rename(final_source)
+
+        shutil.move(str(final_source), str(profile_dir))

    return profile_dir

@@ -94,6 +94,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="KIMI_BASE_URL",
    ),
+    "stepfun": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("STEPFUN_API_KEY",),
+        base_url_override="https://api.stepfun.ai/step_plan/v1",
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "minimax": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
@@ -210,6 +216,10 @@ ALIASES: Dict[str, str] = {
    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

+    # stepfun
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
+
    # minimax-cn
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
@@ -294,6 +304,7 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "nous": "Nous Portal",
    "openai-codex": "OpenAI Codex",
    "copilot-acp": "GitHub Copilot ACP",
+    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
@@ -96,6 +96,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
@@ -804,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "zai": "Z.AI / GLM",
            "kimi-coding": "Kimi / Moonshot",
            "kimi-coding-cn": "Kimi / Moonshot (China)",
+            "stepfun": "StepFun Step Plan",
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
@@ -2332,6 +2334,7 @@ def setup_gateway(config: dict):
            launchd_install,
            launchd_start,
            launchd_restart,
+            UserSystemdUnavailableError,
        )

        service_installed = _is_service_installed()
@@ -2355,6 +2358,10 @@ def setup_gateway(config: dict):
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -2364,6 +2371,10 @@ def setup_gateway(config: dict):
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Start failed: {e}")
        elif supports_service_manager:
@@ -2387,6 +2398,10 @@ def setup_gateway(config: dict):
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
+                        except UserSystemdUnavailableError as e:
+                            print_error("  Start failed — user systemd not reachable:")
+                            for line in str(e).splitlines():
+                                print(f"  {line}")
                        except Exception as e:
                            print_error(f"  Start failed: {e}")
                except Exception as e:
@@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      prompt: "#FFF8DC"                  # Prompt text color
      input_rule: "#CD7F32"              # Input area horizontal rule
      response_border: "#FFD700"         # Response box border (ANSI)
+      status_bar_bg: "#1a1a2e"           # Status bar background
+      status_bar_text: "#C0C0C0"         # Status bar default text
+      status_bar_strong: "#FFD700"       # Status bar highlighted text
+      status_bar_dim: "#8B8682"          # Status bar separators/muted text
+      status_bar_good: "#8FBC8F"         # Healthy context usage
+      status_bar_warn: "#FFD700"         # Warning context usage
+      status_bar_bad: "#FF8C00"          # High context usage
+      status_bar_critical: "#FF6B6B"     # Critical context usage
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF8DC",
            "input_rule": "#CD7F32",
            "response_border": "#FFD700",
+            "status_bar_bg": "#1a1a2e",
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
@@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F1E6CF",
            "input_rule": "#9F1C1C",
            "response_border": "#C7A96B",
+            "status_bar_bg": "#2A1212",
+            "status_bar_text": "#F1E6CF",
+            "status_bar_strong": "#C7A96B",
+            "status_bar_dim": "#6E584B",
+            "status_bar_good": "#7BC96F",
+            "status_bar_warn": "#C7A96B",
+            "status_bar_bad": "#DD4A3A",
+            "status_bar_critical": "#EF5350",
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
@@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#444444",
            "response_border": "#aaaaaa",
+            "status_bar_bg": "#1F1F1F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#E6EDF3",
+            "status_bar_dim": "#777777",
+            "status_bar_good": "#B5B5B5",
+            "status_bar_warn": "#AAAAAA",
+            "status_bar_bad": "#D0D0D0",
+            "status_bar_critical": "#F0F0F0",
            "session_label": "#888888",
            "session_border": "#555555",
        },
@@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#4169e1",
            "response_border": "#7eb8f6",
+            "status_bar_bg": "#151C2F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#7EB8F6",
+            "status_bar_dim": "#4B5563",
+            "status_bar_good": "#63D0A6",
+            "status_bar_warn": "#E6A855",
+            "status_bar_bad": "#F7A072",
+            "status_bar_critical": "#FF7A7A",
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
@@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#EAF7FF",
            "input_rule": "#2A6FB9",
            "response_border": "#5DB8F5",
+            "status_bar_bg": "#0F2440",
+            "status_bar_text": "#EAF7FF",
+            "status_bar_strong": "#A9DFFF",
+            "status_bar_dim": "#496884",
+            "status_bar_good": "#6ED7B0",
+            "status_bar_warn": "#5DB8F5",
+            "status_bar_bad": "#2A6FB9",
+            "status_bar_critical": "#D94F4F",
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
@@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F5F5F5",
            "input_rule": "#656565",
            "response_border": "#B7B7B7",
+            "status_bar_bg": "#202020",
+            "status_bar_text": "#D3D3D3",
+            "status_bar_strong": "#F5F5F5",
+            "status_bar_dim": "#656565",
+            "status_bar_good": "#B7B7B7",
+            "status_bar_warn": "#D3D3D3",
+            "status_bar_bad": "#E7E7E7",
+            "status_bar_critical": "#F5F5F5",
            "session_label": "#919191",
            "session_border": "#656565",
        },
@@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF0D4",
            "input_rule": "#C75B1D",
            "response_border": "#F29C38",
+            "status_bar_bg": "#2B160E",
+            "status_bar_text": "#FFF0D4",
+            "status_bar_strong": "#FFD39A",
+            "status_bar_dim": "#6C4724",
+            "status_bar_good": "#6BCB77",
+            "status_bar_warn": "#F29C38",
+            "status_bar_bad": "#E2832B",
+            "status_bar_critical": "#EF5350",
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
@@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    status_text = skin.get_color("status_bar_text", text)
+    status_strong = skin.get_color("status_bar_strong", title)
+    status_dim = skin.get_color("status_bar_dim", dim)
+    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
+    status_warn = skin.get_color("status_bar_warn", warn)
+    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
+    status_critical = skin.get_color("status_bar_critical", error)
    voice_bg = skin.get_color("voice_status_bg", status_bg)
    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {text}",
-        "status-bar-strong": f"bg:{status_bg} {title} bold",
-        "status-bar-dim": f"bg:{status_bg} {dim}",
-        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
-        "status-bar-warn": f"bg:{status_bg} {warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
-        "status-bar-critical": f"bg:{status_bg} {error} bold",
+        "status-bar": f"bg:{status_bg} {status_text}",
+        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
+        "status-bar-dim": f"bg:{status_bg} {status_dim}",
+        "status-bar-good": f"bg:{status_bg} {status_good} bold",
+        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
+        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
        "completion-menu": f"bg:{menu_bg} {text}",
@@ -122,6 +122,7 @@ def show_status(args):
        "OpenAI": "OPENAI_API_KEY",
        "Z.AI/GLM": "GLM_API_KEY",
        "Kimi": "KIMI_API_KEY",
+        "StepFun Step Plan": "STEPFUN_API_KEY",
        "MiniMax": "MINIMAX_API_KEY",
        "MiniMax-CN": "MINIMAX_CN_API_KEY",
        "Firecrawl": "FIRECRAWL_API_KEY",
@@ -252,6 +253,7 @@ def show_status(args):
    apikey_providers = {
        "Z.AI / GLM":       ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
        "Kimi / Moonshot":  ("KIMI_API_KEY",),
+        "StepFun Step Plan": ("STEPFUN_API_KEY",),
        "MiniMax":          ("MINIMAX_API_KEY",),
        "MiniMax (China)":  ("MINIMAX_CN_API_KEY",),
    }
@@ -13,7 +13,7 @@ import json as _json
 import logging
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict


 from hermes_cli.config import (
@@ -748,7 +748,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    OpenAI-format tool schema.  Triggers tool discovery on first call,
    then caches the result for the rest of the process.

-    Returns an empty dict when tiktoken or the registry is unavailable.
+    Returns an empty dict when the registry is unavailable.
    """
    global _tool_token_cache
    if _tool_token_cache is not None:
@@ -756,11 +756,12 @@ def _estimate_tool_tokens() -> Dict[str, int]:

    try:
        import tiktoken
-        enc = tiktoken.get_encoding("cl100k_base")
-    except Exception:
-        logger.debug("tiktoken unavailable; skipping tool token estimation")
-        _tool_token_cache = {}
-        return _tool_token_cache
+    except ImportError:
+        raise ImportError(
+            "tiktoken is required for tool token estimation. "
+            "Install with: pip install hermes-agent[cli]"
+        ) from None
+    enc = tiktoken.get_encoding("cl100k_base")

    try:
        # Trigger full tool discovery (imports all tool modules).
@@ -1019,6 +1020,11 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        image_cfg = config.get("image_gen", {})
+        return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
+
    managed_feature = provider.get("managed_nous_feature")
    if managed_feature:
        features = get_nous_subscription_features(config)
@@ -1026,6 +1032,13 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        if feature is None:
            return False
        if managed_feature == "image_gen":
+            image_cfg = config.get("image_gen", {})
+            if isinstance(image_cfg, dict):
+                configured_provider = image_cfg.get("provider")
+                if configured_provider not in (None, "", "fal"):
+                    return False
+                if image_cfg.get("use_gateway") is False:
+                    return False
            return feature.managed_by_nous
        if provider.get("tts_provider"):
            return (
@@ -1048,6 +1061,16 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
    if provider.get("web_backend"):
        current = config.get("web", {}).get("backend")
        return current == provider["web_backend"]
+    if provider.get("imagegen_backend"):
+        image_cfg = config.get("image_gen", {})
+        if not isinstance(image_cfg, dict):
+            return False
+        configured_provider = image_cfg.get("provider")
+        return (
+            provider["imagegen_backend"] == "fal"
+            and configured_provider in (None, "", "fal")
+            and not image_cfg.get("use_gateway")
+        )
    return False


@@ -1076,13 +1099,19 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 # right catalog at picker time.


-def _fal_model_catalog():
+class _ImagegenBackend(TypedDict):
+    display: str
+    config_key: str
+    catalog_fn: Callable[[], Tuple[Dict[str, Dict[str, Any]], str]]
+
+
+def _fal_model_catalog() -> Tuple[Dict[str, Dict[str, Any]], str]:
    """Lazy-load the FAL model catalog from the tool module."""
    from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
    return FAL_MODELS, DEFAULT_MODEL


-IMAGEGEN_BACKENDS = {
+IMAGEGEN_BACKENDS: Dict[str, _ImagegenBackend] = {
    "fal": {
        "display": "FAL.ai",
        "config_key": "image_gen",
@@ -1245,6 +1274,18 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
    _print_success(f"  Model set to: {chosen}")


+def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
+    """Persist a plugin-backed image generation provider selection."""
+    img_cfg = config.setdefault("image_gen", {})
+    if not isinstance(img_cfg, dict):
+        img_cfg = {}
+        config["image_gen"] = img_cfg
+    img_cfg["provider"] = plugin_name
+    img_cfg["use_gateway"] = False
+    _print_success(f"  image_gen.provider set to: {plugin_name}")
+    _configure_imagegen_model_for_plugin(plugin_name, config)
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1305,13 +1346,7 @@ def _configure_provider(provider: dict, config: dict):
        # and route model selection to the plugin's own catalog.
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
@@ -1359,13 +1394,7 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} configured!")
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
@@ -1539,16 +1568,39 @@ def _reconfigure_provider(provider: dict, config: dict):
        config.setdefault("web", {})["backend"] = provider["web_backend"]
        _print_success(f"  Web backend set to: {provider['web_backend']}")

+    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+        section = config.setdefault(managed_feature, {})
+        if not isinstance(section, dict):
+            section = {}
+            config[managed_feature] = section
+        section["use_gateway"] = True
+    elif not managed_feature:
+        for cat_key, cat in TOOL_CATEGORIES.items():
+            if provider in cat.get("providers", []):
+                section = config.get(cat_key)
+                if isinstance(section, dict) and section.get("use_gateway"):
+                    section["use_gateway"] = False
+                break
+
    if not env_vars:
        if provider.get("post_setup"):
            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            _select_plugin_image_gen_provider(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection on reconfig too.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            if backend == "fal":
+                img_cfg = config.setdefault("image_gen", {})
+                if isinstance(img_cfg, dict):
+                    img_cfg["provider"] = "fal"
+                    img_cfg["use_gateway"] = False
        return

    for var in env_vars:
@@ -1567,9 +1619,19 @@ def _reconfigure_provider(provider: dict, config: dict):
            _print_info("    Kept current")

    # Imagegen backends prompt for model selection on reconfig too.
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        _select_plugin_image_gen_provider(plugin_name, config)
+        return
+
    backend = provider.get("imagegen_backend")
    if backend:
        _configure_imagegen_model(backend, config)
+        if backend == "fal":
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict):
+                img_cfg["provider"] = "fal"
+                img_cfg["use_gateway"] = False


 def _reconfigure_simple_requirements(ts_key: str):
@@ -2189,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as reasoning_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ?
            GROUP BY day ORDER BY day
        """, (cutoff,))
@@ -2200,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(input_tokens) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ? AND model IS NOT NULL
            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
        """, (cutoff,))
@@ -2213,7 +2215,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as total_reasoning,
                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
            FROM sessions WHERE started_at > ?
        """, (cutoff,))
        totals = dict(cur3.fetchone())
@@ -142,7 +142,7 @@ class _ComponentFilter(logging.Filter):
 # Used by _ComponentFilter and exposed for ``hermes logs --component``.
 COMPONENT_PREFIXES = {
    "gateway": ("gateway",),
-    "agent": ("agent", "run_agent", "model_tools", "batch_runner"),
+    "agent": ("agent", "run_agent", "model_tools", "scripts.batch_runner"),
    "tools": ("tools",),
    "cli": ("hermes_cli", "cli"),
    "cron": ("cron",),
@@ -31,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 8

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
    cost_source TEXT,
    pricing_version TEXT,
    title TEXT,
+    api_call_count INTEGER DEFAULT 0,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages (
    token_count INTEGER,
    finish_reason TEXT,
    reasoning TEXT,
+    reasoning_content TEXT,
    reasoning_details TEXT,
    codex_reasoning_items TEXT
 );

+CREATE TABLE IF NOT EXISTS state_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
@@ -329,6 +336,26 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: preserve provider-native reasoning_content separately from
+                # normalized reasoning text. Kimi/Moonshot replay can require
+                # this field on assistant tool-call messages when thinking is on.
+                try:
+                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")
+            if current_version < 8:
+                # v8: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 8")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -435,6 +462,7 @@ class SessionDB:
        billing_provider: Optional[str] = None,
        billing_base_url: Optional[str] = None,
        billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
        absolute: bool = False,
    ) -> None:
        """Update token counters and backfill model if not already set.
@@ -464,7 +492,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                   WHERE id = ?"""
        else:
            sql = """UPDATE sessions SET
@@ -484,7 +513,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                   WHERE id = ?"""
        params = (
            input_tokens,
@@ -502,6 +532,7 @@ class SessionDB:
            billing_base_url,
            billing_mode,
            model,
+            api_call_count,
            session_id,
        )
        def _do(conn):
@@ -922,6 +953,7 @@ class SessionDB:
        token_count: int = None,
        finish_reason: str = None,
        reasoning: str = None,
+        reasoning_content: str = None,
        reasoning_details: Any = None,
        codex_reasoning_items: Any = None,
    ) -> int:
@@ -951,8 +983,8 @@ class SessionDB:
            cursor = conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -964,6 +996,7 @@ class SessionDB:
                    token_count,
                    finish_reason,
                    reasoning,
+                    reasoning_content,
                    reasoning_details_json,
                    codex_items_json,
                ),
@@ -1014,7 +1047,7 @@ class SessionDB:
        with self._lock:
            cursor = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                (session_id,),
            )
@@ -1038,6 +1071,8 @@ class SessionDB:
            if row["role"] == "assistant":
                if row["reasoning"]:
                    msg["reasoning"] = row["reasoning"]
+                if row["reasoning_content"] is not None:
+                    msg["reasoning_content"] = row["reasoning_content"]
                if row["reasoning_details"]:
                    try:
                        msg["reasoning_details"] = json.loads(row["reasoning_details"])
@@ -1441,3 +1476,116 @@ class SessionDB:
            return len(session_ids)

        return self._execute_write(_do)
+
+    # ── Meta key/value (for scheduler bookkeeping) ──
+
+    def get_meta(self, key: str) -> Optional[str]:
+        """Read a value from the state_meta key/value store."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?", (key,)
+            ).fetchone()
+        if row is None:
+            return None
+        return row["value"] if isinstance(row, sqlite3.Row) else row[0]
+
+    def set_meta(self, key: str, value: str) -> None:
+        """Write a value to the state_meta key/value store."""
+        def _do(conn):
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                (key, value),
+            )
+        self._execute_write(_do)
+
+    # ── Space reclamation ──
+
+    def vacuum(self) -> None:
+        """Run VACUUM to reclaim disk space after large deletes.
+
+        SQLite does not shrink the database file when rows are deleted —
+        freed pages just get reused on the next insert. After a prune that
+        removed hundreds of sessions, the file stays bloated unless we
+        explicitly VACUUM.
+
+        VACUUM rewrites the entire DB, so it's expensive (seconds per
+        100MB) and cannot run inside a transaction. It also acquires an
+        exclusive lock, so callers must ensure no other writers are
+        active. Safe to call at startup before the gateway/CLI starts
+        serving traffic.
+        """
+        # VACUUM cannot be executed inside a transaction.
+        with self._lock:
+            # Best-effort WAL checkpoint first, then VACUUM.
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except Exception:
+                pass
+            self._conn.execute("VACUUM")
+
+    def maybe_auto_prune_and_vacuum(
+        self,
+        retention_days: int = 90,
+        min_interval_hours: int = 24,
+        vacuum: bool = True,
+    ) -> Dict[str, Any]:
+        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.
+
+        Records the last run timestamp in state_meta so subsequent calls
+        within ``min_interval_hours`` no-op. Designed to be called once at
+        startup from long-lived entrypoints (CLI, gateway, cron scheduler).
+
+        Never raises. On any failure, logs a warning and returns a dict
+        with ``"error"`` set.
+
+        Returns a dict with keys:
+          - ``"skipped"`` (bool) — true if within min_interval_hours of last run
+          - ``"pruned"`` (int)   — number of sessions deleted
+          - ``"vacuumed"`` (bool) — true if VACUUM ran
+          - ``"error"`` (str, optional) — present only on failure
+        """
+        result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False}
+        try:
+            # Skip if another process/call did maintenance recently.
+            last_raw = self.get_meta("last_auto_prune")
+            now = time.time()
+            if last_raw:
+                try:
+                    last_ts = float(last_raw)
+                    if now - last_ts < min_interval_hours * 3600:
+                        result["skipped"] = True
+                        return result
+                except (TypeError, ValueError):
+                    pass  # corrupt meta; treat as no prior run
+
+            pruned = self.prune_sessions(older_than_days=retention_days)
+            result["pruned"] = pruned
+
+            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
+            # is wasted I/O. Threshold keeps small DBs from paying the cost.
+            if vacuum and pruned > 0:
+                try:
+                    self.vacuum()
+                    result["vacuumed"] = True
+                except Exception as exc:
+                    logger.warning("state.db VACUUM failed: %s", exc)
+
+            # Record the attempt even if pruned == 0, so we don't retry
+            # every startup within the min_interval_hours window.
+            self.set_meta("last_auto_prune", str(now))
+
+            if pruned > 0:
+                logger.info(
+                    "state.db auto-maintenance: pruned %d session(s) older than %d days%s",
+                    pruned,
+                    retention_days,
+                    " + VACUUM" if result["vacuumed"] else "",
+                )
+        except Exception as exc:
+            # Maintenance must never block startup. Log and return error marker.
+            logger.warning("state.db auto-maintenance failed: %s", exc)
+            result["error"] = str(exc)
+
+        return result
+
@@ -108,9 +108,15 @@ def _run_async(coro):
    if loop and loop.is_running():
        # Inside an async context (gateway, RL env) — run in a fresh thread.
        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, coro)
+        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = pool.submit(asyncio.run, coro)
+        try:
            return future.result(timeout=300)
+        except concurrent.futures.TimeoutError:
+            future.cancel()
+            raise
+        finally:
+            pool.shutdown(wait=False, cancel_futures=True)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -28,7 +28,7 @@

  let
    cfg = config.services.hermes-agent;
-    hermes-agent = inputs.self.packages.${pkgs.system}.default;
+    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
    deepConfigType = lib.types.mkOptionType {
@@ -777,7 +777,10 @@ HERMES_NIX_ENV_EOF
            NoNewPrivileges = true;
            ProtectSystem = "strict";
            ProtectHome = false;
-            ReadWritePaths = [ cfg.stateDir ];
+            ReadWritePaths = [
+              cfg.stateDir
+              cfg.workingDirectory
+            ];
            PrivateTmp = true;
          };

@@ -0,0 +1,5 @@
+# Web Development
+
+Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps.
+
+These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites.
@@ -0,0 +1,189 @@
+---
+name: page-agent
+description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single <script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill username as John"). No Python, no headless browser, no extension required. Use this skill when the user is a web developer who wants to add an AI copilot to their SaaS / admin panel / B2B tool, make a legacy web app accessible via natural language, or evaluate page-agent against a local (Ollama) or cloud (Qwen / OpenAI / OpenRouter) LLM. NOT for server-side browser automation — point those users to Hermes' built-in browser tool instead.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [web, javascript, agent, browser, gui, alibaba, embed, copilot, saas]
+    category: web-development
+---
+
+# page-agent
+
+alibaba/page-agent (https://github.com/alibaba/page-agent, 17k+ stars, MIT) is an in-page GUI agent written in TypeScript. It lives inside a webpage, reads the DOM as text (no screenshots, no multi-modal LLM), and executes natural-language instructions like "click the login button, then fill username as John" against the current page. Pure client-side — the host site just includes a script and passes an OpenAI-compatible LLM endpoint.
+
+## When to use this skill
+
+Load this skill when a user wants to:
+
+- **Ship an AI copilot inside their own web app** (SaaS, admin panel, B2B tool, ERP, CRM) — "users on my dashboard should be able to type 'create invoice for Acme Corp and email it' instead of clicking through five screens"
+- **Modernize a legacy web app** without rewriting the frontend — page-agent drops on top of existing DOM
+- **Add accessibility via natural language** — voice / screen-reader users drive the UI by describing what they want
+- **Demo or evaluate page-agent** against a local (Ollama) or hosted (Qwen, OpenAI, OpenRouter) LLM
+- **Build interactive training / product demos** — let an AI walk a user through "how to submit an expense report" live in the real UI
+
+## When NOT to use this skill
+
+- User wants **Hermes itself to drive a browser** → use Hermes' built-in browser tool (Browserbase / Camofox). page-agent is the *opposite* direction.
+- User wants **cross-tab automation without embedding** → use Playwright, browser-use, or the page-agent Chrome extension
+- User needs **visual grounding / screenshots** → page-agent is text-DOM only; use a multimodal browser agent instead
+
+## Prerequisites
+
+- Node 22.13+ or 24+, npm 10+ (docs claim 11+ but 10.9 works fine)
+- An OpenAI-compatible LLM endpoint: Qwen (DashScope), OpenAI, Ollama, OpenRouter, or anything speaking `/v1/chat/completions`
+- Browser with devtools (for debugging)
+
+## Path 1 — 30-second demo via CDN (no install)
+
+Fastest way to see it work. Uses alibaba's free testing LLM proxy — **for evaluation only**, subject to their terms.
+
+Add to any HTML page (or paste into the devtools console as a bookmarklet):
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js" crossorigin="true"></script>
+```
+
+A panel appears. Type an instruction. Done.
+
+Bookmarklet form (drop into bookmarks bar, click on any page):
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})();
+```
+
+## Path 2 — npm install into your own web app (production use)
+
+Inside an existing web project (React / Vue / Svelte / plain):
+
+```bash
+npm install page-agent
+```
+
+Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**:
+
+```javascript
+import { PageAgent } from 'page-agent'
+
+const agent = new PageAgent({
+    model: 'qwen3.5-plus',
+    baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKey: process.env.LLM_API_KEY,   // never hardcode
+    language: 'en-US',
+})
+
+// Show the panel for end users:
+agent.panel.show()
+
+// Or drive it programmatically:
+await agent.execute('Click submit button, then fill username as John')
+```
+
+Provider examples (any OpenAI-compatible endpoint works):
+
+| Provider | `baseURL` | `model` |
+|----------|-----------|---------|
+| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` |
+| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` |
+| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` |
+| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` |
+
+**Key config fields** (passed to `new PageAgent({...})`):
+
+- `model`, `baseURL`, `apiKey` — LLM connection
+- `language` — UI language (`en-US`, `zh-CN`, etc.)
+- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list
+
+**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation.
+
+## Path 3 — clone the source repo (contributing, or hacking on it)
+
+Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension.
+
+```bash
+git clone https://github.com/alibaba/page-agent.git
+cd page-agent
+npm ci              # exact lockfile install (or `npm i` to allow updates)
+```
+
+Create `.env` in the repo root with an LLM endpoint. Example:
+
+```
+LLM_MODEL_NAME=gpt-4o-mini
+LLM_API_KEY=sk-...
+LLM_BASE_URL=https://api.openai.com/v1
+```
+
+Ollama flavor:
+
+```
+LLM_BASE_URL=http://localhost:11434/v1
+LLM_API_KEY=NA
+LLM_MODEL_NAME=qwen3:14b
+```
+
+Common commands:
+
+```bash
+npm start           # docs/website dev server
+npm run build       # build every package
+npm run dev:demo    # serve IIFE bundle at http://localhost:5174/page-agent.demo.js
+npm run dev:ext     # develop the browser extension (WXT + React)
+npm run build:ext   # build the extension
+```
+
+**Test on any website** using the local IIFE bundle. Add this bookmarklet:
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})();
+```
+
+Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save.
+
+**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.)
+
+## Repo layout (Path 3)
+
+Monorepo with npm workspaces. Key packages:
+
+| Package | Path | Purpose |
+|---------|------|---------|
+| `page-agent` | `packages/page-agent/` | Main entry with UI panel |
+| `@page-agent/core` | `packages/core/` | Core agent logic, no UI |
+| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) |
+| — | `packages/llms/` | LLM client |
+| — | `packages/page-controller/` | DOM ops + visual feedback |
+| — | `packages/ui/` | Panel + i18n |
+| — | `packages/extension/` | Chrome/Firefox extension |
+| — | `packages/website/` | Docs + landing site |
+
+## Verifying it works
+
+After Path 1 or Path 2:
+1. Open the page in a browser with devtools open
+2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key)
+3. Type a simple instruction matching something visible on the page ("click the Login link")
+4. Watch the Network tab — you should see a request to your `baseURL`
+
+After Path 3:
+1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174`
+2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript`
+3. Click the bookmarklet on any site; panel appears
+
+## Pitfalls
+
+- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use.
+- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments.
+- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front.
+- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin.
+- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup.
+- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors.
+- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine.
+
+## Reference
+
+- Repo: https://github.com/alibaba/page-agent
+- Docs: https://alibaba.github.io/page-agent/
+- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic)
@@ -16,8 +16,8 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camofox-browser": "^1.5.2"
+    "@askjo/camofox-browser": "^1.5.2",
+    "agent-browser": "^0.26.0"
  },
  "overrides": {
    "lodash": "4.18.1"
@@ -0,0 +1,378 @@
+"""OpenAI image generation backend — ChatGPT/Codex OAuth variant.
+
+Identical model catalog and tier semantics to the ``openai`` image-gen plugin
+(``gpt-image-2`` at low/medium/high quality), but routes the request through
+the Codex Responses API ``image_generation`` tool instead of the
+``images.generate`` REST endpoint. This lets users who are already
+authenticated with Codex/ChatGPT generate images without configuring a
+separate ``OPENAI_API_KEY``.
+
+Selection precedence for the tier (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai-codex.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+
+Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog — mirrors the ``openai`` plugin so the picker UX is identical.
+# ---------------------------------------------------------------------------
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+# Codex Responses surface used for the request. The chat model itself is only
+# the host that calls the ``image_generation`` tool; the actual image work is
+# done by ``API_MODEL``.
+_CODEX_CHAT_MODEL = "gpt-5.4"
+_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+_CODEX_INSTRUCTIONS = (
+    "You are an assistant that must fulfill image generation requests by "
+    "using the image_generation tool when provided."
+)
+
+
+# ---------------------------------------------------------------------------
+# Config + auth helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_image_gen_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    import os
+
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_image_gen_config()
+    sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(sub, dict):
+        value = sub.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+def _read_codex_access_token() -> Optional[str]:
+    """Return a usable Codex OAuth token, or None.
+
+    Delegates to the canonical reader in ``agent.auxiliary_client`` so token
+    expiry, credential pool selection, and JWT decoding stay in one place.
+    """
+    try:
+        from agent.auxiliary_client import _read_codex_access_token as _reader
+
+        token = _reader()
+        if isinstance(token, str) and token.strip():
+            return token.strip()
+        return None
+    except Exception as exc:
+        logger.debug("Could not resolve Codex access token: %s", exc)
+        return None
+
+
+def _build_codex_client():
+    """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
+    token = _read_codex_access_token()
+    if not token:
+        return None
+    try:
+        import openai
+        from agent.auxiliary_client import _codex_cloudflare_headers
+
+        return openai.OpenAI(
+            api_key=token,
+            base_url=_CODEX_BASE_URL,
+            default_headers=_codex_cloudflare_headers(token),
+        )
+    except Exception as exc:
+        logger.debug("Could not build Codex image client: %s", exc)
+        return None
+
+
+def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
+    """Stream a Codex Responses image_generation call and return the b64 image."""
+    image_b64: Optional[str] = None
+
+    with client.responses.stream(
+        model=_CODEX_CHAT_MODEL,
+        store=False,
+        instructions=_CODEX_INSTRUCTIONS,
+        input=[{
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": prompt}],
+        }],
+        tools=[{
+            "type": "image_generation",
+            "model": API_MODEL,
+            "size": size,
+            "quality": quality,
+            "output_format": "png",
+            "background": "opaque",
+            "partial_images": 1,
+        }],
+        tool_choice={
+            "type": "allowed_tools",
+            "mode": "required",
+            "tools": [{"type": "image_generation"}],
+        },
+    ) as stream:
+        for event in stream:
+            event_type = getattr(event, "type", "")
+            if event_type == "response.output_item.done":
+                item = getattr(event, "item", None)
+                if getattr(item, "type", None) == "image_generation_call":
+                    result = getattr(item, "result", None)
+                    if isinstance(result, str) and result:
+                        image_b64 = result
+            elif event_type == "response.image_generation_call.partial_image":
+                partial = getattr(event, "partial_image_b64", None)
+                if isinstance(partial, str) and partial:
+                    image_b64 = partial
+        final = stream.get_final_response()
+
+    # Final-response sweep covers the case where the stream finished before
+    # we observed the ``output_item.done`` event for the image call.
+    for item in getattr(final, "output", None) or []:
+        if getattr(item, "type", None) == "image_generation_call":
+            result = getattr(item, "result", None)
+            if isinstance(result, str) and result:
+                image_b64 = result
+
+    return image_b64
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAICodexImageGenProvider(ImageGenProvider):
+    """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key."""
+
+    @property
+    def name(self) -> str:
+        return "openai-codex"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI (Codex auth)"
+
+    def is_available(self) -> bool:
+        if not _read_codex_access_token():
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI (Codex auth)",
+            "badge": "free",
+            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
+            "env_vars": [],
+            "post_setup_hint": (
+                "Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
+                "if you haven't already. No API key needed."
+            ),
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        if not _read_codex_access_token():
+            return error_response(
+                error=(
+                    "No Codex/ChatGPT OAuth credentials available. Run "
+                    "`hermes auth codex` (or `hermes setup` → Codex) to sign in."
+                ),
+                error_type="auth_required",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai-codex",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        client = _build_codex_client()
+        if client is None:
+            return error_response(
+                error="Could not initialize Codex image client",
+                error_type="auth_required",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        try:
+            b64 = _collect_image_b64(
+                client,
+                prompt=prompt,
+                size=size,
+                quality=meta["quality"],
+            )
+        except Exception as exc:
+            logger.debug("Codex image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation via Codex auth failed: {exc}",
+                error_type="api_error",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        if not b64:
+            return error_response(
+                error="Codex response contained no image_generation_call result",
+                error_type="empty_response",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        try:
+            saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}")
+        except Exception as exc:
+            return error_response(
+                error=f"Could not save image to cache: {exc}",
+                error_type="io_error",
+                provider="openai-codex",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        return success_response(
+            image=str(saved_path),
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai-codex",
+            extra={"size": size, "quality": meta["quality"]},
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — register the Codex-backed image-gen provider."""
+    ctx.register_image_gen_provider(OpenAICodexImageGenProvider())
@@ -0,0 +1,5 @@
+name: openai-codex
+version: 1.0.0
+description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
@@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json`
 | `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
 | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
 | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
-| `tags` | — | Tags applied when storing memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |

 ### Integration

@@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes:

 | Tool | Description |
 |------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |

@@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes.
 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.

 Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
+  HINDSIGHT_API_KEY                — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID                — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET                 — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL                — API endpoint
+  HINDSIGHT_MODE                   — cloud or local (default: cloud)
+  HINDSIGHT_RETAIN_TAGS            — comma-separated tags attached to retained memories
+  HINDSIGHT_RETAIN_SOURCE          — metadata source value attached to retained memories
+  HINDSIGHT_RETAIN_USER_PREFIX     — label used before user turns in retained transcripts
+  HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts

 Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 ~/.hindsight/config.json (legacy, shared) for backward compatibility.
@@ -24,7 +28,7 @@ import logging
 import os
 import threading

-from hermes_constants import get_hermes_home
+from datetime import datetime, timezone
 from typing import Any, Dict, List

 from agent.memory_provider import MemoryProvider
@@ -99,6 +103,11 @@ RETAIN_SCHEMA = {
        "properties": {
            "content": {"type": "string", "description": "The information to store."},
            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional per-call tags to merge with configured default retain tags.",
+            },
        },
        "required": ["content"],
    },
@@ -168,6 +177,10 @@ def _load_config() -> dict:
    return {
        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""),
+        "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""),
+        "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"),
+        "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"),
        "banks": {
            "hermes": {
                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
@@ -178,6 +191,48 @@ def _load_config() -> dict:
    }


+def _normalize_retain_tags(value: Any) -> List[str]:
+    """Normalize tag config/tool values to a deduplicated list of strings."""
+    if value is None:
+        return []
+
+    raw_items: list[Any]
+    if isinstance(value, list):
+        raw_items = value
+    elif isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return []
+        if text.startswith("["):
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, list):
+                raw_items = parsed
+            else:
+                raw_items = text.split(",")
+        else:
+            raw_items = text.split(",")
+    else:
+        raw_items = [value]
+
+    normalized = []
+    seen = set()
+    for item in raw_items:
+        tag = str(item).strip()
+        if not tag or tag in seen:
+            continue
+        seen.add(tag)
+        normalized.append(tag)
+    return normalized
+
+
+def _utc_timestamp() -> str:
+    """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix."""
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider implementation
 # ---------------------------------------------------------------------------
@@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider):
        self._llm_base_url = ""
        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
        self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._retain_tags: List[str] = []
+        self._retain_source = ""
+        self._retain_user_prefix = "User"
+        self._retain_assistant_prefix = "Assistant"
+        self._platform = ""
+        self._user_id = ""
+        self._user_name = ""
+        self._chat_id = ""
+        self._chat_name = ""
+        self._chat_type = ""
+        self._thread_id = ""
+        self._agent_identity = ""
+        self._turn_index = 0
        self._client = None
        self._prefetch_result = ""
        self._prefetch_lock = threading.Lock()
@@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider):
        # Retain controls
        self._auto_retain = True
        self._retain_every_n_turns = 1
+        self._retain_async = True
        self._retain_context = "conversation between Hermes Agent and the User"
        self._turn_counter = 0
        self._session_turns: list[str] = []  # accumulates ALL turns for the session
@@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider):
        # Bank
        self._bank_mission = ""
        self._bank_retain_mission: str | None = None
-        self._retain_async = True

    @property
    def name(self) -> str:
@@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider):
            {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
            {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-            {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
+            {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""},
+            {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""},
+            {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"},
+            {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
            {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
            {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
            {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
@@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider):
        return self._client

    def initialize(self, session_id: str, **kwargs) -> None:
-        self._session_id = session_id
+        self._session_id = str(session_id or "").strip()

        # Check client version and auto-upgrade if needed
        try:
@@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider):
            pass  # packaging not available or other issue — proceed anyway

        self._config = _load_config()
+        self._platform = str(kwargs.get("platform") or "").strip()
+        self._user_id = str(kwargs.get("user_id") or "").strip()
+        self._user_name = str(kwargs.get("user_name") or "").strip()
+        self._chat_id = str(kwargs.get("chat_id") or "").strip()
+        self._chat_name = str(kwargs.get("chat_name") or "").strip()
+        self._chat_type = str(kwargs.get("chat_type") or "").strip()
+        self._thread_id = str(kwargs.get("thread_id") or "").strip()
+        self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
+        self._turn_index = 0
+        self._session_turns = []
        self._mode = self._config.get("mode", "cloud")
        # "local" is a legacy alias for "local_embedded"
        if self._mode == "local":
@@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider):
        memory_mode = self._config.get("memory_mode", "hybrid")
        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"

-        prefetch_method = self._config.get("recall_prefetch_method", "recall")
+        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"

        # Bank options
@@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider):
        self._bank_retain_mission = self._config.get("bank_retain_mission") or None

        # Tags
-        self._tags = self._config.get("tags") or None
+        self._retain_tags = _normalize_retain_tags(
+            self._config.get("retain_tags")
+            or os.environ.get("HINDSIGHT_RETAIN_TAGS", "")
+        )
+        self._tags = self._retain_tags or None
        self._recall_tags = self._config.get("recall_tags") or None
        self._recall_tags_match = self._config.get("recall_tags_match", "any")
+        self._retain_source = str(
+            self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "")
+        ).strip()
+        self._retain_user_prefix = str(
+            self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User")
+        ).strip() or "User"
+        self._retain_assistant_prefix = str(
+            self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant")
+        ).strip() or "Assistant"

        # Retain controls
        self._auto_retain = self._config.get("auto_retain", True)
@@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider):
        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
        logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
-                     "retain_async=%s, retain_context=%s, "
-                     "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
+                     "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
                     self._auto_retain, self._auto_recall, self._retain_every_n_turns,
-                     self._retain_async, self._retain_context,
-                     self._recall_max_tokens, self._recall_max_input_chars,
+                     self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars,
                     self._tags, self._recall_tags)

        # For local mode, start the embedded daemon in the background so it
@@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider):
        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
        self._prefetch_thread.start()

+    def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]:
+        now = datetime.now(timezone.utc).isoformat()
+        return [
+            {
+                "role": "user",
+                "content": f"{self._retain_user_prefix}: {user_content}",
+                "timestamp": now,
+            },
+            {
+                "role": "assistant",
+                "content": f"{self._retain_assistant_prefix}: {assistant_content}",
+                "timestamp": now,
+            },
+        ]
+
+    def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]:
+        metadata: Dict[str, str] = {
+            "retained_at": _utc_timestamp(),
+            "message_count": str(message_count),
+            "turn_index": str(turn_index),
+        }
+        if self._retain_source:
+            metadata["source"] = self._retain_source
+        if self._session_id:
+            metadata["session_id"] = self._session_id
+        if self._platform:
+            metadata["platform"] = self._platform
+        if self._user_id:
+            metadata["user_id"] = self._user_id
+        if self._user_name:
+            metadata["user_name"] = self._user_name
+        if self._chat_id:
+            metadata["chat_id"] = self._chat_id
+        if self._chat_name:
+            metadata["chat_name"] = self._chat_name
+        if self._chat_type:
+            metadata["chat_type"] = self._chat_type
+        if self._thread_id:
+            metadata["thread_id"] = self._thread_id
+        if self._agent_identity:
+            metadata["agent_identity"] = self._agent_identity
+        return metadata
+
+    def _build_retain_kwargs(
+        self,
+        content: str,
+        *,
+        context: str | None = None,
+        document_id: str | None = None,
+        metadata: Dict[str, str] | None = None,
+        tags: List[str] | None = None,
+        retain_async: bool | None = None,
+    ) -> Dict[str, Any]:
+        kwargs: Dict[str, Any] = {
+            "bank_id": self._bank_id,
+            "content": content,
+            "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index),
+        }
+        if context is not None:
+            kwargs["context"] = context
+        if document_id:
+            kwargs["document_id"] = document_id
+        if retain_async is not None:
+            kwargs["retain_async"] = retain_async
+        merged_tags = _normalize_retain_tags(self._retain_tags)
+        for tag in _normalize_retain_tags(tags):
+            if tag not in merged_tags:
+                merged_tags.append(tag)
+        if merged_tags:
+            kwargs["tags"] = merged_tags
+        return kwargs
+
    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
        """Retain conversation turn in background (non-blocking).

@@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider):
            logger.debug("sync_turn: skipped (auto_retain disabled)")
            return

-        from datetime import datetime, timezone
-        now = datetime.now(timezone.utc).isoformat()
+        if session_id:
+            self._session_id = str(session_id).strip()

-        messages = [
-            {"role": "user", "content": user_content, "timestamp": now},
-            {"role": "assistant", "content": assistant_content, "timestamp": now},
-        ]
-
-        turn = json.dumps(messages)
+        turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
        self._session_turns.append(turn)
        self._turn_counter += 1
+        self._turn_index = self._turn_counter

-        # Only retain every N turns
        if self._turn_counter % self._retain_every_n_turns != 0:
            logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
                         self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
@@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider):

        logger.debug("sync_turn: retaining %d turns, total session content %d chars",
                     len(self._session_turns), sum(len(t) for t in self._session_turns))
-        # Send the ENTIRE session as a single JSON array (document_id deduplicates).
-        # Each element in _session_turns is a JSON string of that turn's messages.
        content = "[" + ",".join(self._session_turns) + "]"

        def _sync():
            try:
                client = self._get_client()
-                item: dict = {
-                    "content": content,
-                    "context": self._retain_context,
-                }
-                if self._tags:
-                    item["tags"] = self._tags
+                item = self._build_retain_kwargs(
+                    content,
+                    context=self._retain_context,
+                    metadata=self._build_metadata(
+                        message_count=len(self._session_turns) * 2,
+                        turn_index=self._turn_index,
+                    ),
+                )
+                item.pop("bank_id", None)
+                item.pop("retain_async", None)
                logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
                             self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
                _run_sync(client.aretain_batch(
@@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider):
                return tool_error("Missing required parameter: content")
            context = args.get("context")
            try:
-                retain_kwargs: dict = {
-                    "bank_id": self._bank_id, "content": content, "context": context,
-                }
-                if self._tags:
-                    retain_kwargs["tags"] = self._tags
+                retain_kwargs = self._build_retain_kwargs(
+                    content,
+                    context=context,
+                    tags=args.get("tags"),
+                )
                logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
                             self._bank_id, len(content), context)
                _run_sync(client.aretain(**retain_kwargs))
@@ -39,12 +39,12 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
-dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
-messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
+dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
+messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8", "mutagen>=1.45,<2", "aiohttp-socks>=0.9,<1"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
-cli = ["simple-term-menu>=1.0,<2"]
+cli = ["simple-term-menu>=1.0,<2", "tiktoken>=0.7,<1", "Pillow>=10,<12"]
 tts-premium = ["elevenlabs>=1.0,<2"]
 voice = [
  # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
@@ -58,7 +58,7 @@ pty = [
  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
 ]
 honcho = ["honcho-ai>=2.0.1,<3"]
-mcp = ["mcp>=1.2.0,<2"]
+mcp = ["mcp>=1.2.0,<2", "psutil>=5.9,<7"]
 homeassistant = ["aiohttp>=3.9.0,<4"]
 sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.9.0,<1.0"]
@@ -85,7 +85,9 @@ rl = [
  "fastapi>=0.104.0,<1",
  "uvicorn[standard]>=0.24.0,<1",
  "wandb>=0.15.0,<1",
+  "datasets>=2.14,<3",
 ]
+tts-local = ["neutts[all]", "soundfile>=0.12,<1"]
 yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
  "hermes-agent[modal]",
@@ -120,13 +122,13 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]

 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "scripts"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -134,3 +136,28 @@ markers = [
    "integration: marks tests requiring external services (API keys, Modal, etc.)",
 ]
 addopts = "-m 'not integration' -n auto"
+
+[tool.ty.environment]
+python-version = "3.13"
+
+[tool.ty.rules]
+unknown-argument = "warn"
+redundant-cast = "ignore"
+
+[tool.ty.src]
+exclude = ["**"]
+
+[[tool.ty.overrides]]
+include = ["**"]
+
+[tool.ty.overrides.rules]
+unresolved-import = "ignore"
+invalid-method-override = "ignore"
+invalid-assignment = "ignore"
+not-iterable = "ignore"
+
+[tool.ruff]
+exclude = ["*"]
+
+[tool.uv]
+exclude-newer = "7 days"
@@ -37,7 +37,10 @@ import time
 import threading
 from types import SimpleNamespace
 import uuid
-from typing import List, Dict, Any, Optional
+from typing import Callable, List, Dict, Any, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from agent.rate_limit_tracker import RateLimitState
 from openai import OpenAI
 import fire
 from datetime import datetime
@@ -76,8 +79,6 @@ from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser


-from hermes_constants import OPENROUTER_BASE_URL
-
 # Agent internals extracted to agent/ package for modularity
 from agent.memory_manager import build_memory_context_block, sanitize_context
 from agent.retry_utils import jittered_backoff
@@ -98,19 +99,11 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.codex_responses_adapter import (
-    _chat_content_to_responses_parts,
-    _chat_messages_to_responses_input as _codex_chat_messages_to_responses_input,
    _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
    _deterministic_call_id as _codex_deterministic_call_id,
-    _extract_responses_message_text as _codex_extract_responses_message_text,
-    _extract_responses_reasoning_text as _codex_extract_responses_reasoning_text,
-    _normalize_codex_response as _codex_normalize_codex_response,
-    _preflight_codex_api_kwargs as _codex_preflight_codex_api_kwargs,
-    _preflight_codex_input_items as _codex_preflight_codex_input_items,
-    _responses_tools as _codex_responses_tools,
    _split_responses_tool_id as _codex_split_responses_tool_id,
    _summarize_user_message_for_log,
 )
@@ -385,9 +378,8 @@ def _sanitize_surrogates(text: str) -> str:
    return text


-# _chat_content_to_responses_parts and _summarize_user_message_for_log are
-# imported from agent.codex_responses_adapter (see import block above).
-# They remain importable from run_agent for backward compatibility.
+# _summarize_user_message_for_log is imported from agent.codex_responses_adapter
+# (see import block above). Remains importable from run_agent for backward compat.


 def _sanitize_structure_surrogates(payload: Any) -> bool:
@@ -733,17 +725,17 @@ class AIAgent:
        provider_require_parameters: bool = False,
        provider_data_collection: str = None,
        session_id: str = None,
-        tool_progress_callback: callable = None,
-        tool_start_callback: callable = None,
-        tool_complete_callback: callable = None,
-        thinking_callback: callable = None,
-        reasoning_callback: callable = None,
-        clarify_callback: callable = None,
-        step_callback: callable = None,
-        stream_delta_callback: callable = None,
-        interim_assistant_callback: callable = None,
-        tool_gen_callback: callable = None,
-        status_callback: callable = None,
+        tool_progress_callback: Callable[..., Any] = None,
+        tool_start_callback: Callable[..., Any] = None,
+        tool_complete_callback: Callable[..., Any] = None,
+        thinking_callback: Callable[..., Any] = None,
+        reasoning_callback: Callable[..., Any] = None,
+        clarify_callback: Callable[..., Any] = None,
+        step_callback: Callable[..., Any] = None,
+        stream_delta_callback: Callable[..., Any] = None,
+        interim_assistant_callback: Callable[..., Any] = None,
+        tool_gen_callback: Callable[..., Any] = None,
+        status_callback: Callable[..., Any] = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        service_tier: str = None,
@@ -751,6 +743,11 @@ class AIAgent:
        prefill_messages: List[Dict[str, Any]] = None,
        platform: str = None,
        user_id: str = None,
+        user_name: str = None,
+        chat_id: str = None,
+        chat_name: str = None,
+        chat_type: str = None,
+        thread_id: str = None,
        gateway_session_key: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
@@ -820,6 +817,11 @@ class AIAgent:
        self.ephemeral_system_prompt = ephemeral_system_prompt
        self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
        self._user_id = user_id  # Platform user identifier (gateway sessions)
+        self._user_name = user_name
+        self._chat_id = chat_id
+        self._chat_name = chat_name
+        self._chat_type = chat_type
+        self._thread_id = thread_id
        self._gateway_session_key = gateway_session_key  # Stable per-chat key (e.g. agent:main:telegram:dm:123)
        # Pluggable print function — CLI replaces this with _cprint so that
        # raw ANSI status lines are routed through prompt_toolkit's renderer
@@ -872,6 +874,13 @@ class AIAgent:
        else:
            self.api_mode = "chat_completions"

+        # Eagerly warm the transport cache so import errors surface at init,
+        # not mid-conversation.  Also validates the api_mode is registered.
+        try:
+            self._get_transport()
+        except Exception:
+            pass  # Non-fatal — transport may not exist for all modes yet
+
        try:
            from hermes_cli.model_normalize import (
                _AGGREGATOR_PROVIDERS,
@@ -907,6 +916,10 @@ class AIAgent:
            )
        ):
            self.api_mode = "codex_responses"
+            # Invalidate the eager-warmed transport cache — api_mode changed
+            # from chat_completions to codex_responses after the warm at __init__.
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()

        # Pre-warm OpenRouter model metadata cache in a background thread.
        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
@@ -1038,7 +1051,7 @@ class AIAgent:
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
                    'run_agent',            # agent runner internals
-                    'trajectory_compressor',
+                    'scripts.trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
                    'hermes_cli',           # CLI helpers
                ]:
@@ -1471,6 +1484,16 @@ class AIAgent:
                        # Thread gateway user identity for per-user memory scoping
                        if self._user_id:
                            _init_kwargs["user_id"] = self._user_id
+                        if self._user_name:
+                            _init_kwargs["user_name"] = self._user_name
+                        if self._chat_id:
+                            _init_kwargs["chat_id"] = self._chat_id
+                        if self._chat_name:
+                            _init_kwargs["chat_name"] = self._chat_name
+                        if self._chat_type:
+                            _init_kwargs["chat_type"] = self._chat_type
+                        if self._thread_id:
+                            _init_kwargs["thread_id"] = self._thread_id
                        # Thread gateway session key for stable per-chat Honcho session isolation
                        if self._gateway_session_key:
                            _init_kwargs["gateway_session_key"] = self._gateway_session_key
@@ -1903,6 +1926,9 @@ class AIAgent:
        self.provider = new_provider
        self.base_url = base_url or self.base_url
        self.api_mode = api_mode
+        # Invalidate transport cache — new api_mode may need a different transport
+        if hasattr(self, "_transport_cache"):
+            self._transport_cache.clear()
        if api_key:
            self.api_key = api_key

@@ -2503,6 +2529,20 @@ class AIAgent:
          4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
             ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
             case-insensitive.
+
+        Additionally strips standalone tool-call XML blocks that some open
+        models (notably Gemma variants on OpenRouter) emit inside assistant
+        content instead of via the structured ``tool_calls`` field:
+          * ``<tool_call>…</tool_call>``
+          * ``<tool_calls>…</tool_calls>``
+          * ``<tool_result>…</tool_result>``
+          * ``<function_call>…</function_call>``
+          * ``<function_calls>…</function_calls>``
+          * ``<function name="…">…</function>`` (Gemma style)
+        Ported from openclaw/openclaw#67318. The ``<function>`` variant is
+        boundary-gated (only strips when the tag sits at start-of-line or
+        after punctuation and carries a ``name="..."`` attribute) so prose
+        mentions like "Use <function> in JavaScript" are preserved.
        """
        if not content:
            return ""
@@ -2514,6 +2554,30 @@ class AIAgent:
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
+        # 1b. Tool-call XML blocks (openclaw/openclaw#67318). Handle the
+        #     generic tag names first — they have no attribute gating since
+        #     a literal <tool_call> in prose is already vanishingly rare.
+        for _tc_name in ("tool_call", "tool_calls", "tool_result",
+                          "function_call", "function_calls"):
+            content = re.sub(
+                rf'<{_tc_name}\b[^>]*>.*?</{_tc_name}>',
+                '',
+                content,
+                flags=re.DOTALL | re.IGNORECASE,
+            )
+        # 1c. <function name="...">...</function> — Gemma-style standalone
+        #     tool call. Only strip when the tag sits at a block boundary
+        #     (start of text, after a newline, or after sentence-ending
+        #     punctuation) AND carries a name="..." attribute. This keeps
+        #     prose mentions like "Use <function> to declare" safe.
+        content = re.sub(
+            r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+            r'<function\b[^>]*\bname\s*=[^>]*>'
+            r'(?:(?:(?!</function>).)*)</function>',
+            '',
+            content,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
        # 2. Unterminated reasoning block — open tag at a block boundary
        #    (start of text, or after a newline) with no matching close.
        #    Strip from the tag to end of string.  Fixes #8878 / #9568
@@ -2531,6 +2595,16 @@ class AIAgent:
            content,
            flags=re.IGNORECASE,
        )
+        # 3b. Stray tool-call closers. (We do NOT strip bare <function> or
+        #     unterminated <function name="..."> because a truncated tail
+        #     during streaming may still be valuable to the user; matches
+        #     OpenClaw's intentional asymmetry.)
+        content = re.sub(
+            r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+            '',
+            content,
+            flags=re.IGNORECASE,
+        )
        return content

    @staticmethod
@@ -2966,6 +3040,7 @@ class AIAgent:
                    tool_call_id=msg.get("tool_call_id"),
                    finish_reason=msg.get("finish_reason"),
                    reasoning=msg.get("reasoning") if role == "assistant" else None,
+                    reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                    reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                    codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                )
@@ -4695,7 +4770,7 @@ class AIAgent:
    def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
        self._close_openai_client(client, reason=reason, shared=False)

-    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
+    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: Callable[..., Any] = None):
        """Execute one streaming Responses API request and return the final response."""
        import httpx as _httpx

@@ -4823,7 +4898,7 @@ class AIAgent:
        active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
-        fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
        stream_or_response = active_client.responses.create(**fallback_kwargs)

        # Compatibility shim for mocks or providers that still return a concrete response.
@@ -5178,6 +5253,9 @@ class AIAgent:
                    result["response"] = self._anthropic_messages_create(api_kwargs)
                elif self.api_mode == "bedrock_converse":
                    # Bedrock uses boto3 directly — no OpenAI client needed.
+                    # normalize_converse_response produces an OpenAI-compatible
+                    # SimpleNamespace so the rest of the agent loop can treat
+                    # bedrock responses like chat_completions responses.
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
                        normalize_converse_response,
@@ -5391,7 +5469,7 @@ class AIAgent:
        )

    def _interruptible_streaming_api_call(
-        self, api_kwargs: dict, *, on_first_delta: callable = None
+        self, api_kwargs: dict, *, on_first_delta: Callable[..., Any] = None
    ):
        """Streaming variant of _interruptible_api_call for real-time token delivery.

@@ -5805,16 +5883,6 @@ class AIAgent:
                            result["response"] = _call_chat_completions()
                        return  # success
                    except Exception as e:
-                        if deltas_were_sent["yes"]:
-                            # Streaming failed AFTER some tokens were already
-                            # delivered.  Don't retry or fall back — partial
-                            # content already reached the user.
-                            logger.warning(
-                                "Streaming failed after partial delivery, not retrying: %s", e
-                            )
-                            result["error"] = e
-                            return
-
                        _is_timeout = isinstance(
                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
                        )
@@ -5822,6 +5890,123 @@ class AIAgent:
                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                        )

+                        # If the stream died AFTER some tokens were delivered:
+                        # normally we don't retry (the user already saw text,
+                        # retrying would duplicate it).  BUT: if a tool call
+                        # was in-flight when the stream died, silently aborting
+                        # discards the tool call entirely.  In that case we
+                        # prefer to retry — the user sees a brief
+                        # "reconnecting" marker + duplicated preamble text,
+                        # which is strictly better than a failed action with
+                        # a "retry manually" message.  Limit this to transient
+                        # connection errors (Clawdbot-style narrow gate): no
+                        # tool has executed yet within this API call, so
+                        # silent retry is safe wrt side-effects.
+                        if deltas_were_sent["yes"]:
+                            _partial_tool_in_flight = bool(
+                                result.get("partial_tool_names")
+                            )
+                            _is_sse_conn_err_preview = False
+                            if not _is_timeout and not _is_conn_err:
+                                from openai import APIError as _APIError
+                                if isinstance(e, _APIError) and not getattr(e, "status_code", None):
+                                    _err_lower_preview = str(e).lower()
+                                    _SSE_PREVIEW_PHRASES = (
+                                        "connection lost",
+                                        "connection reset",
+                                        "connection closed",
+                                        "connection terminated",
+                                        "network error",
+                                        "network connection",
+                                        "terminated",
+                                        "peer closed",
+                                        "broken pipe",
+                                        "upstream connect error",
+                                    )
+                                    _is_sse_conn_err_preview = any(
+                                        phrase in _err_lower_preview
+                                        for phrase in _SSE_PREVIEW_PHRASES
+                                    )
+                            _is_transient = (
+                                _is_timeout or _is_conn_err or _is_sse_conn_err_preview
+                            )
+                            _can_silent_retry = (
+                                _partial_tool_in_flight
+                                and _is_transient
+                                and _stream_attempt < _max_stream_retries
+                            )
+                            if not _can_silent_retry:
+                                # Either no tool call was in-flight (so the
+                                # turn was a pure text response — current
+                                # stub-with-recovered-text behaviour is
+                                # correct), or retries are exhausted, or the
+                                # error isn't transient.  Fall through to the
+                                # stub path.
+                                logger.warning(
+                                    "Streaming failed after partial delivery, not retrying: %s", e
+                                )
+                                result["error"] = e
+                                return
+                            # Tool call was in-flight AND error is transient:
+                            # retry silently.  Clear per-attempt state so the
+                            # next stream starts clean.  Fire a "reconnecting"
+                            # marker so the user sees why the preamble is
+                            # about to be re-streamed.
+                            logger.info(
+                                "Streaming attempt %s/%s died mid tool-call "
+                                "(%s: %s) after user-visible text; retrying "
+                                "silently to avoid losing the action. "
+                                "Preamble will re-stream.",
+                                _stream_attempt + 1,
+                                _max_stream_retries + 1,
+                                type(e).__name__,
+                                e,
+                            )
+                            try:
+                                self._fire_stream_delta(
+                                    "\n\n⚠ Connection dropped mid tool-call; "
+                                    "reconnecting…\n\n"
+                                )
+                            except Exception:
+                                pass
+                            # Reset the streamed-text buffer so the retry's
+                            # fresh preamble doesn't get double-recorded in
+                            # _current_streamed_assistant_text (which would
+                            # pollute the interim-visible-text comparison).
+                            try:
+                                self._reset_stream_delivery_tracking()
+                            except Exception:
+                                pass
+                            # Reset in-memory accumulators so the next
+                            # attempt's chunks don't concat onto the dead
+                            # stream's partial JSON.
+                            result["partial_tool_names"] = []
+                            deltas_were_sent["yes"] = False
+                            first_delta_fired["done"] = False
+                            self._emit_status(
+                                f"⚠️ Connection dropped mid tool-call "
+                                f"({type(e).__name__}). Reconnecting… "
+                                f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
+                            )
+                            self._touch_activity(
+                                f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
+                                f"mid tool-call after {type(e).__name__}"
+                            )
+                            stale = request_client_holder.get("client")
+                            if stale is not None:
+                                self._close_request_openai_client(
+                                    stale, reason="stream_mid_tool_retry_cleanup"
+                                )
+                                request_client_holder["client"] = None
+                            try:
+                                self._replace_primary_openai_client(
+                                    reason="stream_mid_tool_retry_pool_cleanup"
+                                )
+                            except Exception:
+                                pass
+                            self._emit_status("🔄 Reconnected — resuming…")
+                            continue
+
                        # SSE error events from proxies (e.g. OpenRouter sends
                        # {"error":{"message":"Network connection lost."}}) are
                        # raised as APIError by the OpenAI SDK.  These are
@@ -6132,6 +6317,10 @@ class AIAgent:
            # falling through to OpenRouter defaults.
            fb_base_url_hint = (fb.get("base_url") or "").strip() or None
            fb_api_key_hint = (fb.get("api_key") or "").strip() or None
+            if not fb_api_key_hint:
+                fb_key_env = (fb.get("key_env") or "").strip()
+                if fb_key_env:
+                    fb_api_key_hint = os.getenv(fb_key_env, "").strip() or None
            # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
            # when no explicit key is in the fallback config. Host match
            # (not substring) — see GHSA-76xc-57q6-vm5m.
@@ -6181,6 +6370,8 @@ class AIAgent:
            self.provider = fb_provider
            self.base_url = fb_base_url
            self.api_mode = fb_api_mode
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
            self._fallback_activated = True

            # Honor per-provider / per-model request_timeout_seconds for the
@@ -6292,6 +6483,8 @@ class AIAgent:
            self.provider = rt["provider"]
            self.base_url = rt["base_url"]           # setter updates _base_url_lower
            self.api_mode = rt["api_mode"]
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
            self.api_key = rt["api_key"]
            self._client_kwargs = dict(rt["client_kwargs"])
            self._use_prompt_caching = rt["use_prompt_caching"]
@@ -6398,6 +6591,8 @@ class AIAgent:
            self.provider = rt["provider"]
            self.base_url = rt["base_url"]
            self.api_mode = rt["api_mode"]
+            if hasattr(self, "_transport_cache"):
+                self._transport_cache.clear()
            self.api_key = rt["api_key"]

            if self.api_mode == "anthropic_messages":
@@ -6556,40 +6751,22 @@ class AIAgent:
            return suffix
        return "[A multimodal message was converted to text for Anthropic compatibility.]"

-    def _get_anthropic_transport(self):
-        """Return the cached AnthropicTransport instance (lazy singleton)."""
-        t = getattr(self, "_anthropic_transport", None)
-        if t is None:
-            from agent.transports import get_transport
-            t = get_transport("anthropic_messages")
-            self._anthropic_transport = t
-        return t
+    def _get_transport(self, api_mode: str = None):
+        """Return the cached transport for the given (or current) api_mode.

-    def _get_codex_transport(self):
-        """Return the cached ResponsesApiTransport instance (lazy singleton)."""
-        t = getattr(self, "_codex_transport", None)
+        Lazy-initializes on first call per api_mode. Returns None if no
+        transport is registered for the mode.
+        """
+        mode = api_mode or self.api_mode
+        cache = getattr(self, "_transport_cache", None)
+        if cache is None:
+            cache = {}
+            self._transport_cache = cache
+        t = cache.get(mode)
        if t is None:
            from agent.transports import get_transport
-            t = get_transport("codex_responses")
-            self._codex_transport = t
-        return t
-
-    def _get_chat_completions_transport(self):
-        """Return the cached ChatCompletionsTransport instance (lazy singleton)."""
-        t = getattr(self, "_chat_completions_transport", None)
-        if t is None:
-            from agent.transports import get_transport
-            t = get_transport("chat_completions")
-            self._chat_completions_transport = t
-        return t
-
-    def _get_bedrock_transport(self):
-        """Return the cached BedrockTransport instance (lazy singleton)."""
-        t = getattr(self, "_bedrock_transport", None)
-        if t is None:
-            from agent.transports import get_transport
-            t = get_transport("bedrock_converse")
-            self._bedrock_transport = t
+            t = get_transport(mode)
+            cache[mode] = t
        return t

    def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
@@ -6708,7 +6885,7 @@ class AIAgent:
    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "anthropic_messages":
-            _transport = self._get_anthropic_transport()
+            _transport = self._get_transport()
            anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
            ctx_len = getattr(self, "context_compressor", None)
            ctx_len = ctx_len.context_length if ctx_len else None
@@ -6731,7 +6908,7 @@ class AIAgent:
        # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
        # The adapter handles message/tool conversion and boto3 calls directly.
        if self.api_mode == "bedrock_converse":
-            _bt = self._get_bedrock_transport()
+            _bt = self._get_transport()
            region = getattr(self, "_bedrock_region", None) or "us-east-1"
            guardrail = getattr(self, "_bedrock_guardrail_config", None)
            return _bt.build_kwargs(
@@ -6744,7 +6921,7 @@ class AIAgent:
            )

        if self.api_mode == "codex_responses":
-            _ct = self._get_codex_transport()
+            _ct = self._get_transport()
            is_github_responses = (
                base_url_host_matches(self.base_url, "models.github.ai")
                or base_url_host_matches(self.base_url, "api.githubcopilot.com")
@@ -6772,7 +6949,7 @@ class AIAgent:
            )

        # ── chat_completions (default) ─────────────────────────────────────
-        _ct = self._get_chat_completions_transport()
+        _ct = self._get_transport()

        # Provider detection flags
        _is_qwen = self._is_qwen_portal()
@@ -7003,6 +7180,11 @@ class AIAgent:
            "finish_reason": finish_reason,
        }

+        if hasattr(assistant_message, "reasoning_content"):
+            raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
+            if raw_reasoning_content is not None:
+                msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
+
        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            # Pass reasoning_details back unmodified so providers (OpenRouter,
            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
@@ -7077,6 +7259,30 @@ class AIAgent:

        return msg

+    def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
+        """Copy provider-facing reasoning fields onto an API replay message."""
+        if source_msg.get("role") != "assistant":
+            return
+
+        explicit_reasoning = source_msg.get("reasoning_content")
+        if isinstance(explicit_reasoning, str):
+            api_msg["reasoning_content"] = explicit_reasoning
+            return
+
+        normalized_reasoning = source_msg.get("reasoning")
+        if isinstance(normalized_reasoning, str) and normalized_reasoning:
+            api_msg["reasoning_content"] = normalized_reasoning
+            return
+
+        kimi_requires_reasoning = (
+            self.provider in {"kimi-coding", "kimi-coding-cn"}
+            or base_url_host_matches(self.base_url, "api.kimi.com")
+            or base_url_host_matches(self.base_url, "moonshot.ai")
+            or base_url_host_matches(self.base_url, "moonshot.cn")
+        )
+        if kimi_requires_reasoning and source_msg.get("tool_calls"):
+            api_msg["reasoning_content"] = ""
+
    @staticmethod
    def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
        """Strip Codex Responses API fields from tool_calls for strict providers.
@@ -7160,10 +7366,7 @@ class AIAgent:
            api_messages = []
            for msg in messages:
                api_msg = msg.copy()
-                if msg.get("role") == "assistant":
-                    reasoning = msg.get("reasoning")
-                    if reasoning:
-                        api_msg["reasoning_content"] = reasoning
+                self._copy_reasoning_content_for_api(msg, api_msg)
                api_msg.pop("reasoning", None)
                api_msg.pop("finish_reason", None)
                api_msg.pop("_flush_sentinel", None)
@@ -7205,12 +7408,15 @@ class AIAgent:
                _flush_temperature = _fixed_temp
            else:
                _flush_temperature = 0.3
+            _flush_llm_kwargs: dict = {}
+            if _flush_temperature is not None:
+                _flush_llm_kwargs["temperature"] = _flush_temperature
            try:
                response = _call_llm(
                    task="flush_memories",
                    messages=api_messages,
                    tools=[memory_tool_def],
-                    temperature=_flush_temperature,
+                    **_flush_llm_kwargs,
                    max_tokens=5120,
                    # timeout resolved from auxiliary.flush_memories.timeout config
                )
@@ -7221,7 +7427,7 @@ class AIAgent:
            if not _aux_available and self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def])
+                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
                if _flush_temperature is not None:
                    codex_kwargs["temperature"] = _flush_temperature
                else:
@@ -7231,7 +7437,7 @@ class AIAgent:
                response = self._run_codex_stream(codex_kwargs)
            elif not _aux_available and self.api_mode == "anthropic_messages":
                # Native Anthropic — use the transport for kwargs
-                _tflush = self._get_anthropic_transport()
+                _tflush = self._get_transport()
                ant_kwargs = _tflush.build_kwargs(
                    model=self.model, messages=api_messages,
                    tools=[memory_tool_def], max_tokens=5120,
@@ -7256,7 +7462,7 @@ class AIAgent:
            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
-                _ct_flush = self._get_codex_transport()
+                _ct_flush = self._get_transport()
                _cnr_flush = _ct_flush.normalize_response(response)
                if _cnr_flush and _cnr_flush.tool_calls:
                    tool_calls = [
@@ -7266,19 +7472,26 @@ class AIAgent:
                        ) for tc in _cnr_flush.tool_calls
                    ]
            elif self.api_mode == "anthropic_messages" and not _aux_available:
-                _tfn = self._get_anthropic_transport()
-                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_nr and _flush_nr.tool_calls:
+                _tfn = self._get_transport()
+                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
+                if _flush_result and _flush_result.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
                            id=tc.id, type="function",
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_nr.tool_calls
+                        ) for tc in _flush_result.tool_calls
                    ]
-            elif hasattr(response, "choices") and response.choices:
-                assistant_message = response.choices[0].message
-                if assistant_message.tool_calls:
-                    tool_calls = assistant_message.tool_calls
+            elif self.api_mode in ("chat_completions", "bedrock_converse"):
+                # chat_completions / bedrock — normalize through transport
+                _flush_result = self._get_transport().normalize_response(response)
+                if _flush_result.tool_calls:
+                    tool_calls = _flush_result.tool_calls
+            elif _aux_available and hasattr(response, "choices") and response.choices:
+                # Auxiliary client returned OpenAI-shaped response while main
+                # api_mode is codex/anthropic — extract tool_calls from .choices
+                _aux_msg = response.choices[0].message
+                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
+                    tool_calls = _aux_msg.tool_calls

            for tc in tool_calls:
                if tc.function.name == "memory":
@@ -8308,7 +8521,7 @@ class AIAgent:
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs.pop("tools", None)
                summary_response = self._run_codex_stream(codex_kwargs)
-                _ct_sum = self._get_codex_transport()
+                _ct_sum = self._get_transport()
                _cnr_sum = _ct_sum.normalize_response(summary_response)
                final_response = (_cnr_sum.content or "").strip()
            else:
@@ -8338,21 +8551,18 @@ class AIAgent:
                    summary_kwargs["extra_body"] = summary_extra_body

                if self.api_mode == "anthropic_messages":
-                    _tsum = self._get_anthropic_transport()
+                    _tsum = self._get_transport()
                    _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                   is_oauth=self._is_anthropic_oauth,
                                   preserve_dots=self._anthropic_preserve_dots())
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_sum_nr.content or "").strip()
+                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_summary_result.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
-
-                    if summary_response.choices and summary_response.choices[0].message.content:
-                        final_response = summary_response.choices[0].message.content
-                    else:
-                        final_response = ""
+                    _summary_result = self._get_transport().normalize_response(summary_response)
+                    final_response = (_summary_result.content or "").strip()

            if final_response:
                if "<think>" in final_response:
@@ -8367,18 +8577,18 @@ class AIAgent:
                    codex_kwargs = self._build_api_kwargs(api_messages)
                    codex_kwargs.pop("tools", None)
                    retry_response = self._run_codex_stream(codex_kwargs)
-                    _ct_retry = self._get_codex_transport()
+                    _ct_retry = self._get_transport()
                    _cnr_retry = _ct_retry.normalize_response(retry_response)
                    final_response = (_cnr_retry.content or "").strip()
                elif self.api_mode == "anthropic_messages":
-                    _tretry = self._get_anthropic_transport()
+                    _tretry = self._get_transport()
                    _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                    is_oauth=self._is_anthropic_oauth,
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    preserve_dots=self._anthropic_preserve_dots())
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_retry_nr.content or "").strip()
+                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_retry_result.content or "").strip()
                else:
                    summary_kwargs = {
                        "model": self.model,
@@ -8392,11 +8602,8 @@ class AIAgent:
                        summary_kwargs["extra_body"] = summary_extra_body

                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)
-
-                    if summary_response.choices and summary_response.choices[0].message.content:
-                        final_response = summary_response.choices[0].message.content
-                    else:
-                        final_response = ""
+                    _retry_result = self._get_transport().normalize_response(summary_response)
+                    final_response = (_retry_result.content or "").strip()

                if final_response:
                    if "<think>" in final_response:
@@ -8418,9 +8625,9 @@ class AIAgent:
        self,
        user_message: str,
        system_message: str = None,
-        conversation_history: List[Dict[str, Any]] = None,
+        conversation_history: List[Dict[str, Any]] | None = None,
        task_id: str = None,
-        stream_callback: Optional[callable] = None,
+        stream_callback: Optional[Callable[..., Any]] = None,
        persist_user_message: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
@@ -8923,11 +9130,7 @@ class AIAgent:

                # For ALL assistant messages, pass reasoning back to the API
                # This ensures multi-turn reasoning context is preserved
-                if msg.get("role") == "assistant":
-                    reasoning_text = msg.get("reasoning")
-                    if reasoning_text:
-                        # Add reasoning_content for API compatibility (Moonshot AI, Novita, OpenRouter)
-                        api_msg["reasoning_content"] = reasoning_text
+                self._copy_reasoning_content_for_api(msg, api_msg)

                # Remove 'reasoning' field - it's for trajectory storage only
                # We've copied it to 'reasoning_content' for the API above
@@ -9131,7 +9334,7 @@ class AIAgent:
                    if self._force_ascii_payload:
                        _sanitize_structure_non_ascii(api_kwargs)
                    if self.api_mode == "codex_responses":
-                        api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_transport().preflight_kwargs(api_kwargs, allow_stream=False)

                    try:
                        from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -9219,7 +9422,7 @@ class AIAgent:
                    response_invalid = False
                    error_details = []
                    if self.api_mode == "codex_responses":
-                        _ct_v = self._get_codex_transport()
+                        _ct_v = self._get_transport()
                        if not _ct_v.validate_response(response):
                            if response is None:
                                response_invalid = True
@@ -9248,7 +9451,7 @@ class AIAgent:
                                    response_invalid = True
                                    error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
-                        _tv = self._get_anthropic_transport()
+                        _tv = self._get_transport()
                        if not _tv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9256,7 +9459,7 @@ class AIAgent:
                            else:
                                error_details.append("response.content invalid (not a non-empty list)")
                    elif self.api_mode == "bedrock_converse":
-                        _btv = self._get_bedrock_transport()
+                        _btv = self._get_transport()
                        if not _btv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9264,7 +9467,7 @@ class AIAgent:
                            else:
                                error_details.append("Bedrock response invalid (no output or choices)")
                    else:
-                        _ctv = self._get_chat_completions_transport()
+                        _ctv = self._get_transport()
                        if not _ctv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9424,15 +9627,18 @@ class AIAgent:
                        else:
                            finish_reason = "stop"
                    elif self.api_mode == "anthropic_messages":
-                        _tfr = self._get_anthropic_transport()
+                        _tfr = self._get_transport()
                        finish_reason = _tfr.map_finish_reason(response.stop_reason)
                    elif self.api_mode == "bedrock_converse":
-                        # Bedrock response is already normalized at dispatch — finish_reason
-                        # is already in OpenAI format via normalize_converse_response()
-                        finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop"
+                        # Bedrock response already normalized at dispatch — use transport
+                        _bt_fr = self._get_transport()
+                        _bedrock_result = _bt_fr.normalize_response(response)
+                        finish_reason = _bedrock_result.finish_reason
                    else:
-                        finish_reason = response.choices[0].finish_reason
-                        assistant_message = response.choices[0].message
+                        _cc_fr = self._get_transport()
+                        _finish_result = _cc_fr.normalize_response(response)
+                        finish_reason = _finish_result.finish_reason
+                        assistant_message = _finish_result
                        if self._should_treat_stop_as_truncated(
                            finish_reason,
                            assistant_message,
@@ -9455,27 +9661,14 @@ class AIAgent:
                        # interim assistant message is byte-identical to what
                        # would have been appended in the non-truncated path.
                        _trunc_msg = None
-                        if self.api_mode in ("chat_completions", "bedrock_converse"):
-                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
-                        elif self.api_mode == "anthropic_messages":
-                            _trunc_nr = self._get_anthropic_transport().normalize_response(
+                        _trunc_transport = self._get_transport()
+                        if self.api_mode == "anthropic_messages":
+                            _trunc_result = _trunc_transport.normalize_response(
                                response, strip_tool_prefix=self._is_anthropic_oauth
                            )
-                            _trunc_msg = SimpleNamespace(
-                                content=_trunc_nr.content,
-                                tool_calls=[
-                                    SimpleNamespace(
-                                        id=tc.id, type="function",
-                                        function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                                    ) for tc in (_trunc_nr.tool_calls or [])
-                                ] or None,
-                                reasoning=_trunc_nr.reasoning,
-                                reasoning_content=None,
-                                reasoning_details=(
-                                    _trunc_nr.provider_data.get("reasoning_details")
-                                    if _trunc_nr.provider_data else None
-                                ),
-                            )
+                        else:
+                            _trunc_result = _trunc_transport.normalize_response(response)
+                        _trunc_msg = _trunc_result

                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
                        _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@@ -9724,6 +9917,7 @@ class AIAgent:
                                    billing_mode="subscription_included"
                                    if cost_result.status == "included" else None,
                                    model=self.model,
+                                    api_call_count=1,
                                )
                            except Exception:
                                pass  # never block the agent loop
@@ -10000,6 +10194,27 @@ class AIAgent:
                        if self._try_refresh_nous_client_credentials(force=True):
                            print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
                            continue
+                        # Credential refresh didn't help — show diagnostic info.
+                        # Most common causes: Portal OAuth expired/revoked,
+                        # account out of credits, or agent key blocked.
+                        from hermes_constants import display_hermes_home as _dhh_fn
+                        _dhh = _dhh_fn()
+                        _body_text = ""
+                        try:
+                            _body = getattr(api_error, "body", None) or getattr(api_error, "response", None)
+                            if _body is not None:
+                                _body_text = str(_body)[:200]
+                        except Exception:
+                            pass
+                        print(f"{self.log_prefix}🔐 Nous 401 — Portal authentication failed.")
+                        if _body_text:
+                            print(f"{self.log_prefix}   Response: {_body_text}")
+                        print(f"{self.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
+                        print(f"{self.log_prefix}   Troubleshooting:")
+                        print(f"{self.log_prefix}     • Re-authenticate: hermes login --provider nous")
+                        print(f"{self.log_prefix}     • Check credits / billing: https://portal.nousresearch.com")
+                        print(f"{self.log_prefix}     • Verify stored credentials: {_dhh}/auth.json")
+                        print(f"{self.log_prefix}     • Switch providers temporarily: /model <model> --provider openrouter")
                    if (
                        self.api_mode == "anthropic_messages"
                        and status_code == 401
@@ -10016,7 +10231,7 @@ class AIAgent:
                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
                        print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.")
                        print(f"{self.log_prefix}   Auth method: {auth_method}")
-                        print(f"{self.log_prefix}   Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix}   Token: (empty or short)")
+                        print(f"{self.log_prefix}   Token prefix: {str(key)[:12]}..." if key and len(str(key)) > 12 else f"{self.log_prefix}   Token: (empty or short)")
                        print(f"{self.log_prefix}   Troubleshooting:")
                        from hermes_constants import display_hermes_home as _dhh_fn
                        _dhh = _dhh_fn()
@@ -10684,69 +10899,13 @@ class AIAgent:
                break

            try:
-                if self.api_mode == "codex_responses":
-                    _ct = self._get_codex_transport()
-                    _cnr = _ct.normalize_response(response)
-                    # Back-compat shim: downstream expects SimpleNamespace with
-                    # codex-specific fields (.codex_reasoning_items, .reasoning_details,
-                    # and .call_id/.response_item_id on tool calls).
-                    _tc_list = None
-                    if _cnr.tool_calls:
-                        _tc_list = []
-                        for tc in _cnr.tool_calls:
-                            _tc_ns = SimpleNamespace(
-                                id=tc.id, type="function",
-                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                            )
-                            if tc.provider_data:
-                                if tc.provider_data.get("call_id"):
-                                    _tc_ns.call_id = tc.provider_data["call_id"]
-                                if tc.provider_data.get("response_item_id"):
-                                    _tc_ns.response_item_id = tc.provider_data["response_item_id"]
-                            _tc_list.append(_tc_ns)
-                    assistant_message = SimpleNamespace(
-                        content=_cnr.content,
-                        tool_calls=_tc_list or None,
-                        reasoning=_cnr.reasoning,
-                        reasoning_content=None,
-                        codex_reasoning_items=(
-                            _cnr.provider_data.get("codex_reasoning_items")
-                            if _cnr.provider_data else None
-                        ),
-                        reasoning_details=(
-                            _cnr.provider_data.get("reasoning_details")
-                            if _cnr.provider_data else None
-                        ),
-                    )
-                    finish_reason = _cnr.finish_reason
-                elif self.api_mode == "anthropic_messages":
-                    _transport = self._get_anthropic_transport()
-                    _nr = _transport.normalize_response(
-                        response, strip_tool_prefix=self._is_anthropic_oauth
-                    )
-                    # Back-compat shim: downstream code expects SimpleNamespace with
-                    # .content, .tool_calls, .reasoning, .reasoning_content,
-                    # .reasoning_details attributes.
-                    assistant_message = SimpleNamespace(
-                        content=_nr.content,
-                        tool_calls=[
-                            SimpleNamespace(
-                                id=tc.id,
-                                type="function",
-                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                            )
-                            for tc in (_nr.tool_calls or [])
-                        ] or None,
-                        reasoning=_nr.reasoning,
-                        reasoning_content=None,
-                        reasoning_details=(
-                            _nr.provider_data.get("reasoning_details")
-                            if _nr.provider_data else None
-                        ),
-                    )
-                    finish_reason = _nr.finish_reason
-                else:
-                    assistant_message = response.choices[0].message
+                _transport = self._get_transport()
+                _normalize_kwargs = {}
+                if self.api_mode == "anthropic_messages":
+                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
+                normalized = _transport.normalize_response(response, **_normalize_kwargs)
+                assistant_message = normalized
+                finish_reason = normalized.finish_reason
                
                # Normalize content to string — some OpenAI-compatible servers
                # (llama-server, etc.) return content as a dict or list instead
@@ -11416,7 +11575,7 @@ class AIAgent:
                        messages.append(assistant_msg)

                        if reasoning_text:
-                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
+                            reasoning_preview = str(reasoning_text)[:500] + "..." if len(str(reasoning_text)) > 500 else reasoning_text
                            logger.warning(
                                "Reasoning-only response (no visible content) "
                                "after exhausting retries and fallback. "
@@ -11755,7 +11914,7 @@ class AIAgent:

        return result

-    def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
+    def chat(self, message: str, stream_callback: Optional[Callable[..., Any]] = None) -> str:
        """
        Simple chat interface that returns just the final response.

@@ -20,9 +20,13 @@ Usage:
    python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
 """

+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 import json
 import logging
-import os
 import time
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -1126,7 +1130,7 @@ def main(
    num_workers: int = 4,
    resume: bool = False,
    verbose: bool = False,
-    list_distributions: bool = False,
+    show_distributions: bool = False,
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
@@ -1154,7 +1158,7 @@ def main(
        num_workers (int): Number of parallel worker processes (default: 4)
        resume (bool): Resume from checkpoint if run was interrupted (default: False)
        verbose (bool): Enable verbose logging (default: False)
-        list_distributions (bool): List available toolset distributions and exit
+        show_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
@@ -1186,10 +1190,10 @@ def main(
                               --prefill_messages_file=configs/prefill_opus.json
        
        # List available distributions
-        python batch_runner.py --list_distributions
+        python batch_runner.py --show_distributions
    """
    # Handle list distributions
-    if list_distributions:
+    if show_distributions:
        from toolset_distributions import print_distribution_info

        print("📊 Available Toolset Distributions")
@@ -265,7 +265,7 @@ def check_config(groq_key, eleven_key):
    if voice_mode_path.exists():
        try:
            import json
-            modes = json.loads(voice_mode_path.read_text())
+            modes = json.loads(voice_mode_path.read_text(encoding="utf-8"))
            off_count = sum(1 for v in modes.values() if v == "off")
            all_count = sum(1 for v in modes.values() if v == "all")
            check("Voice mode state", True, f"{all_count} on, {off_count} off, {len(modes)} total")
@@ -26,10 +26,13 @@ Usage:
    python mini_swe_runner.py --prompts_file prompts.jsonl --output_file trajectories.jsonl --env docker
 """

-import json
-import logging
 import os
 import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import json
+import logging
 import time
 import uuid
 from datetime import datetime
@@ -26,6 +26,7 @@ import shutil
 import subprocess
 import sys
 from collections import defaultdict
+from typing import Optional
 from datetime import datetime
 from pathlib import Path

@@ -43,18 +44,24 @@ AUTHOR_MAP = {
    "teknium1@gmail.com": "teknium1",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
+    "343873859@qq.com": "DrStrangerUJN",
    # contributors (from noreply pattern)
+    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
    "snreynolds2506@gmail.com": "snreynolds",
    "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
    "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
    "massivemassimo@users.noreply.github.com": "MassiveMassimo",
    "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "keifergu@tencent.com": "keifergu",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
+    "abner.the.foreman@agentmail.to": "Abnertheforeman",
+    "harryykyle1@gmail.com": "hharry11",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
+    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
    "valdi.jorge@gmail.com": "jvcl",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
@@ -92,6 +99,9 @@ AUTHOR_MAP = {
    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
    "nocoo@users.noreply.github.com": "nocoo",
    "30841158+n-WN@users.noreply.github.com": "n-WN",
+    "tsuijinglei@gmail.com": "hiddenpuppy",
+    "jerome@clawwork.ai": "HiddenPuppy",
+    "wysie@users.noreply.github.com": "Wysie",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
    "i@troy-y.org": "TroyMitchell911",
@@ -99,6 +109,8 @@ AUTHOR_MAP = {
    "hansnow@users.noreply.github.com": "hansnow",
    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
    "ben.burtenshaw@gmail.com": "burtenshaw",
+    "roopaknijhara@gmail.com": "rnijhara",
+    "josephzcan@gmail.com": "j0sephz",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
    "liujinkun@bytedance.com": "liujinkun2025",
@@ -134,6 +146,7 @@ AUTHOR_MAP = {
    "331214+counterposition@users.noreply.github.com": "counterposition",
    "blspear@gmail.com": "BrennerSpear",
    "akhater@gmail.com": "akhater",
+    "Cos_Admin@PTG-COS.lodluvup4uaudnm3ycd14giyug.xx.internal.cloudapp.net": "akhater",
    "239876380+handsdiff@users.noreply.github.com": "handsdiff",
    "hesapacicam112@gmail.com": "etherman-os",
    "mark.ramsell@rivermounts.com": "mark-ramsell",
@@ -175,6 +188,7 @@ AUTHOR_MAP = {
    "adavyasharma@gmail.com": "adavyas",
    "acaayush1111@gmail.com": "aayushchaudhary",
    "jason@outland.art": "jasonoutland",
+    "73175452+Magaav@users.noreply.github.com": "Magaav",
    "mrflu1918@proton.me": "SPANISHFLU",
    "morganemoss@gmai.com": "mormio",
    "kopjop926@gmail.com": "cesareth",
@@ -279,6 +293,7 @@ AUTHOR_MAP = {
    "srhtsrht17@gmail.com": "Sertug17",
    "stephenschoettler@gmail.com": "stephenschoettler",
    "tanishq231003@gmail.com": "yyovil",
+    "taosiyuan163@153.com": "taosiyuan163",
    "tesseracttars@gmail.com": "tesseracttars-creator",
    "tianliangjay@gmail.com": "xingkongliang",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
@@ -335,6 +350,64 @@ AUTHOR_MAP = {
    "shalompmc0505@naver.com": "pinion05",
    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
    "vivien000812@gmail.com": "iamagenius00",
+    "89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
+    "simon@gtcl.us": "simon-gtcl",
+    "suzukaze.haduki@gmail.com": "houko",
+    "cliff@cigii.com": "cgarwood82",
+    "anna@oa.ke": "anna-oake",
+    "jaffarkeikei@gmail.com": "jaffarkeikei",
+    "hxp@hxp.plus": "hxp-plus",
+    "3580442280@qq.com": "Tianworld",
+    "wujianxu91@gmail.com": "wujhsu",
+    "zhrh120@gmail.com": "niyoh120",
+    "vrinek@hey.com": "vrinek",
+    "268198004+xandersbell@users.noreply.github.com": "xandersbell",
+    "somme4096@gmail.com": "Somme4096",
+    "brian@tiuxo.com": "brianclemens",
+    "25944632+yudaiyan@users.noreply.github.com": "yudaiyan",
+    "chayton@sina.com": "ycbai",
+    "longsizhuo@gmail.com": "longsizhuo",
+    "chenb19870707@gmail.com": "ms-alan",
+    "276886827+WuTianyi123@users.noreply.github.com": "WuTianyi123",
+    "22549957+li0near@users.noreply.github.com": "li0near",
+    "23434080+sicnuyudidi@users.noreply.github.com": "sicnuyudidi",
+    "haimu0x0@proton.me": "haimu0x",
+    "abdelmajidnidnasser1@gmail.com": "NIDNASSER-Abdelmajid",
+    "projectadmin@wit.id": "projectadmin-dev",
+    "mrigankamondal10@gmail.com": "Dev-Mriganka",
+    "132275809+shushuzn@users.noreply.github.com": "shushuzn",
+    "ibrahimozsarac@gmail.com": "iborazzi",
+    "130149563+A-afflatus@users.noreply.github.com": "A-afflatus",
+    "huangkwell@163.com": "huangke19",
+    "tanishq@exa.ai": "10ishq",
+    "363708+christopherwoodall@users.noreply.github.com": "christopherwoodall",
+    "zhang9w0v5@qq.com": "zhang9w0v5",
+    "fuleinist@outlook.com": "fuleinist",
+    "43494187+Llugaes@users.noreply.github.com": "Llugaes",
+    "fengtianyu88@users.noreply.github.com": "fengtianyu88",
+    "l.moncany@gmail.com": "lmoncany",
+    "fatinghenji@users.noreply.github.com": "fatinghenji",
+    "xin.peng.dr@gmail.com": "xinpengdr",
+    "mike@mikewaters.net": "mikewaters",
+    "65117428+WadydX@users.noreply.github.com": "WadydX",
+    "216480837+isaachuangGMICLOUD@users.noreply.github.com": "isaachuangGMICLOUD",
+    "nukuom976228@gmail.com": "hsy5571616",
+    "11462216+Nan93@users.noreply.github.com": "Nan93",
+    "l973401489@126.com": "zhouxiaoya12",
+    "373119611@qq.com": "roytian1217",
+    "brett@brettbrewer.com": "minorgod",
+    "67779267+wenhao7@users.noreply.github.com": "wenhao7",
+    "git@yzx9.xyz": "yzx9",
+    "nilesh@cloudgeni.us": "lvnilesh",
+    "63502660+azhengbot@users.noreply.github.com": "azhengbot",
+    "sharvil.saxena@gmail.com": "sharziki",
+    "yuanhe@minimaxi.com": "RyanLee-Dev",
+    "curtis992250@gmail.com": "TaroballzChen",
+    "92638503+Lind3ey@users.noreply.github.com": "Lind3ey",
+    "1352808998@qq.com": "phpoh",
+    "caliberoviv@gmail.com": "vivganes",
+    "michaelfackerell@gmail.com": "MikeFac",
+    "18024642@qq.com": "GuyCui",
 }


@@ -613,7 +686,7 @@ def get_commits(since_tag=None):
    return commits


-def get_pr_number(subject: str) -> str:
+def get_pr_number(subject: str) -> Optional[str]:
    """Extract PR number from commit subject if present."""
    match = re.search(r"#(\d+)", subject)
    if match:
@@ -19,18 +19,23 @@ Environment Variables:
    OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
 """

-import asyncio
 import os
 import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import asyncio
 from pathlib import Path

 import fire
 import yaml

+from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
+
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent / '.env'
+_project_env = Path(__file__).parent.parent / '.env'

 from hermes_cli.env_loader import load_hermes_dotenv

@@ -60,8 +65,6 @@ from tools.rl_training_tool import get_missing_keys
 # Config Loading
 # ============================================================================

-from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
-
 DEFAULT_MODEL = "anthropic/claude-opus-4.5"
 DEFAULT_BASE_URL = OPENROUTER_BASE_URL

@@ -267,7 +267,7 @@ def run_compression(input_dir: Path, output_dir: Path, config_path: str):
    # Import the compressor
    import sys
    sys.path.insert(0, str(Path(__file__).parent.parent))
-    from trajectory_compressor import TrajectoryCompressor, CompressionConfig
+    from scripts.trajectory_compressor import TrajectoryCompressor, CompressionConfig
    
    print(f"\n🗜️  Running trajectory compression...")
    print(f"   Input: {input_dir}")
@@ -30,14 +30,18 @@ Usage:
    python trajectory_compressor.py --input=data/my_run --sample_percent=10
 """

-import json
 import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import json
 import time
 import yaml
 import logging
 import asyncio
 from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple, Callable
+from typing import List, Dict, Any, Optional, Tuple, Callable, cast
 from dataclasses import dataclass, field
 from datetime import datetime

@@ -52,7 +56,7 @@ from agent.retry_utils import jittered_backoff
 from hermes_cli.env_loader import load_hermes_dotenv

 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent / ".env"
+_project_env = Path(__file__).parent.parent / ".env"
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


@@ -75,7 +79,7 @@ def _effective_temperature_for_model(
    if fixed_temperature is OMIT_TEMPERATURE:
        return None  # caller must omit temperature
    if fixed_temperature is not None:
-        return fixed_temperature
+        return cast(float, fixed_temperature)
    return requested_temperature


@@ -607,11 +611,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import call_llm
+                    _call_llm_kwargs: dict = {}
+                    if summary_temperature is not None:
+                        _call_llm_kwargs["temperature"] = summary_temperature
                    response = call_llm(
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
+                        **_call_llm_kwargs,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
@@ -623,20 +630,21 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                    if summary_temperature is not None:
                        _create_kwargs["temperature"] = summary_temperature
                    response = self.client.chat.completions.create(**_create_kwargs)
-                
+
                summary = self._coerce_summary_content(response.choices[0].message.content)
                return self._ensure_summary_prefix(summary)
-                
+
            except Exception as e:
                metrics.summarization_errors += 1
                self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
-                
+
                if attempt < self.config.max_retries - 1:
                    time.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                else:
                    # Fallback: create a basic summary
                    return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
-    
+        raise AssertionError("unreachable: retry loop exhausted")
+
    async def _generate_summary_async(self, content: str, metrics: TrajectoryMetrics) -> str:
        """
        Generate a summary of the compressed turns using OpenRouter (async version).
@@ -676,11 +684,14 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                
                if getattr(self, '_use_call_llm', False):
                    from agent.auxiliary_client import async_call_llm
+                    _async_llm_kwargs: dict = {}
+                    if summary_temperature is not None:
+                        _async_llm_kwargs["temperature"] = summary_temperature
                    response = await async_call_llm(
                        provider=self._llm_provider,
                        model=self.config.summarization_model,
                        messages=[{"role": "user", "content": prompt}],
-                        temperature=summary_temperature,
+                        **_async_llm_kwargs,
                        max_tokens=self.config.summary_target_tokens * 2,
                    )
                else:
@@ -692,20 +703,21 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                    if summary_temperature is not None:
                        _create_kwargs["temperature"] = summary_temperature
                    response = await self._get_async_client().chat.completions.create(**_create_kwargs)
-                
+
                summary = self._coerce_summary_content(response.choices[0].message.content)
                return self._ensure_summary_prefix(summary)
-                
+
            except Exception as e:
                metrics.summarization_errors += 1
                self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
-                
+
                if attempt < self.config.max_retries - 1:
                    await asyncio.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                else:
                    # Fallback: create a basic summary
                    return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
-    
+        raise AssertionError("unreachable: retry loop exhausted")
+
    def compress_trajectory(
        self,
        trajectory: List[Dict[str, str]]
@@ -8,7 +8,7 @@
      "name": "hermes-whatsapp-bridge",
      "version": "1.0.0",
      "dependencies": {
-        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#fix/abprops-abt-fetch",
+        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#01047debd81beb20da7b7779b08edcb06aa03770",
        "express": "^4.21.0",
        "pino": "^9.0.0",
        "qrcode-terminal": "^0.12.0"
@@ -8,7 +8,7 @@ metadata:
  hermes:
    tags: [wiki, knowledge-base, research, notes, markdown, rag-alternative]
    category: research
-    related_skills: [obsidian, arxiv, agentic-research-ideas]
+    related_skills: [obsidian, arxiv]
 ---

 # Karpathy's LLM Wiki
@@ -18,12 +18,12 @@ from agent.anthropic_adapter import (
    convert_messages_to_anthropic,
    convert_tools_to_anthropic,
    is_claude_code_token_valid,
-    normalize_anthropic_response,
    normalize_model_name,
    read_claude_code_credentials,
    resolve_anthropic_token,
    run_oauth_setup_token,
 )
+from agent.transports import get_transport


 # ---------------------------------------------------------------------------
@@ -1242,10 +1242,10 @@ class TestNormalizeResponse:

    def test_text_response(self):
        block = SimpleNamespace(type="text", text="Hello world")
-        msg, reason = normalize_anthropic_response(self._make_response([block]))
-        assert msg.content == "Hello world"
-        assert reason == "stop"
-        assert msg.tool_calls is None
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response([block]))
+        assert nr.content == "Hello world"
+        assert nr.finish_reason == "stop"
+        assert nr.tool_calls is None

    def test_tool_use_response(self):
        blocks = [
@@ -1257,24 +1257,24 @@ class TestNormalizeResponse:
                input={"query": "test"},
            ),
        ]
-        msg, reason = normalize_anthropic_response(
+        nr = get_transport("anthropic_messages").normalize_response(
            self._make_response(blocks, "tool_use")
        )
-        assert msg.content == "Searching..."
-        assert reason == "tool_calls"
-        assert len(msg.tool_calls) == 1
-        assert msg.tool_calls[0].function.name == "search"
-        assert json.loads(msg.tool_calls[0].function.arguments) == {"query": "test"}
+        assert nr.content == "Searching..."
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        assert nr.tool_calls[0].name == "search"
+        assert json.loads(nr.tool_calls[0].arguments) == {"query": "test"}

    def test_thinking_response(self):
        blocks = [
            SimpleNamespace(type="thinking", thinking="Let me reason about this..."),
            SimpleNamespace(type="text", text="The answer is 42."),
        ]
-        msg, reason = normalize_anthropic_response(self._make_response(blocks))
-        assert msg.content == "The answer is 42."
-        assert msg.reasoning == "Let me reason about this..."
-        assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
+        assert nr.content == "The answer is 42."
+        assert nr.reasoning == "Let me reason about this..."
+        assert nr.provider_data["reasoning_details"] == [{"type": "thinking", "thinking": "Let me reason about this..."}]

    def test_thinking_response_preserves_signature(self):
        blocks = [
@@ -1285,24 +1285,24 @@ class TestNormalizeResponse:
                redacted=False,
            ),
        ]
-        msg, _ = normalize_anthropic_response(self._make_response(blocks))
-        assert msg.reasoning_details[0]["signature"] == "opaque_signature"
-        assert msg.reasoning_details[0]["thinking"] == "Let me reason about this..."
+        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
+        assert nr.provider_data["reasoning_details"][0]["signature"] == "opaque_signature"
+        assert nr.provider_data["reasoning_details"][0]["thinking"] == "Let me reason about this..."

    def test_stop_reason_mapping(self):
        block = SimpleNamespace(type="text", text="x")
-        _, r1 = normalize_anthropic_response(
+        nr1 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "end_turn")
        )
-        _, r2 = normalize_anthropic_response(
+        nr2 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "tool_use")
        )
-        _, r3 = normalize_anthropic_response(
+        nr3 = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "max_tokens")
        )
-        assert r1 == "stop"
-        assert r2 == "tool_calls"
-        assert r3 == "length"
+        assert nr1.finish_reason == "stop"
+        assert nr2.finish_reason == "tool_calls"
+        assert nr3.finish_reason == "length"

    def test_stop_reason_refusal_and_context_exceeded(self):
        # Claude 4.5+ introduced two new stop_reason values the Messages API
@@ -1310,24 +1310,24 @@ class TestNormalizeResponse:
        # handlers already understand, instead of silently collapsing to
        # "stop" (old behavior).
        block = SimpleNamespace(type="text", text="")
-        _, refusal_reason = normalize_anthropic_response(
+        nr_refusal = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "refusal")
        )
-        _, overflow_reason = normalize_anthropic_response(
+        nr_overflow = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "model_context_window_exceeded")
        )
-        assert refusal_reason == "content_filter"
-        assert overflow_reason == "length"
+        assert nr_refusal.finish_reason == "content_filter"
+        assert nr_overflow.finish_reason == "length"

    def test_no_text_content(self):
        block = SimpleNamespace(
            type="tool_use", id="tc_1", name="search", input={"q": "hi"}
        )
-        msg, reason = normalize_anthropic_response(
+        nr = get_transport("anthropic_messages").normalize_response(
            self._make_response([block], "tool_use")
        )
-        assert msg.content is None
-        assert len(msg.tool_calls) == 1
+        assert nr.content is None
+        assert len(nr.tool_calls) == 1


 # ---------------------------------------------------------------------------
@@ -1659,3 +1659,91 @@ class TestToolChoice:
            tool_choice="search",
        )
        assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}
+
+
+
+# ---------------------------------------------------------------------------
+# max_tokens resolver — openclaw/openclaw#66664 port
+# ---------------------------------------------------------------------------
+
+from agent.anthropic_adapter import (
+    _resolve_positive_anthropic_max_tokens,
+    _resolve_anthropic_messages_max_tokens,
+)
+
+
+class TestResolvePositiveMaxTokens:
+    """Unit tests for the positive-int resolver helper."""
+
+    def test_positive_int_passes_through(self):
+        assert _resolve_positive_anthropic_max_tokens(8192) == 8192
+
+    def test_zero_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(0) is None
+
+    def test_negative_int_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(-1) is None
+        assert _resolve_positive_anthropic_max_tokens(-500) is None
+
+    def test_fractional_float_floored_and_kept_if_positive(self):
+        # 8192.7 -> 8192, still positive
+        assert _resolve_positive_anthropic_max_tokens(8192.7) == 8192
+
+    def test_small_positive_float_below_one_returns_none(self):
+        # 0.5 floors to 0, which is not positive
+        assert _resolve_positive_anthropic_max_tokens(0.5) is None
+
+    def test_negative_float_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(-1.5) is None
+
+    def test_nan_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(float("nan")) is None
+
+    def test_infinity_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(float("inf")) is None
+        assert _resolve_positive_anthropic_max_tokens(float("-inf")) is None
+
+    def test_bool_true_returns_none(self):
+        # True is an int subclass but semantically never a real max_tokens value
+        assert _resolve_positive_anthropic_max_tokens(True) is None
+        assert _resolve_positive_anthropic_max_tokens(False) is None
+
+    def test_string_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens("8192") is None
+
+    def test_none_returns_none(self):
+        assert _resolve_positive_anthropic_max_tokens(None) is None
+
+
+class TestResolveMessagesMaxTokens:
+    """Integration tests for the full Messages resolver."""
+
+    def test_positive_requested_wins(self):
+        assert _resolve_anthropic_messages_max_tokens(
+            8192, "claude-opus-4-6"
+        ) == 8192
+
+    def test_zero_falls_back_to_model_default(self):
+        # Should use _get_anthropic_max_output(model), not crash
+        result = _resolve_anthropic_messages_max_tokens(0, "claude-opus-4-6")
+        assert result > 0
+
+    def test_none_falls_back_to_model_default(self):
+        result = _resolve_anthropic_messages_max_tokens(None, "claude-opus-4-6")
+        assert result > 0
+
+    def test_negative_falls_back_to_model_default(self):
+        # Previously leaked -1 to the API; now falls back safely
+        result = _resolve_anthropic_messages_max_tokens(-1, "claude-opus-4-6")
+        assert result > 0
+
+    def test_fractional_positive_floored(self):
+        assert _resolve_anthropic_messages_max_tokens(
+            8192.5, "claude-opus-4-6"
+        ) == 8192
+
+    def test_sub_one_float_falls_back(self):
+        # 0.5 floors to 0 -> not positive -> falls back to model ceiling
+        result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
+        assert result > 0
+        assert result != 0
@@ -1,238 +0,0 @@
-"""Regression tests: normalize_anthropic_response_v2 vs v1.
-
-Constructs mock Anthropic responses and asserts that the v2 function
-(returning NormalizedResponse) produces identical field values to the
-original v1 function (returning SimpleNamespace + finish_reason).
-"""
-
-import json
-import pytest
-from types import SimpleNamespace
-
-from agent.anthropic_adapter import (
-    normalize_anthropic_response,
-    normalize_anthropic_response_v2,
-)
-from agent.transports.types import NormalizedResponse, ToolCall
-
-
-# ---------------------------------------------------------------------------
-# Helpers to build mock Anthropic SDK responses
-# ---------------------------------------------------------------------------
-
-def _text_block(text: str):
-    return SimpleNamespace(type="text", text=text)
-
-
-def _thinking_block(thinking: str, signature: str = "sig_abc"):
-    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
-
-
-def _tool_use_block(id: str, name: str, input: dict):
-    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
-
-
-def _response(content_blocks, stop_reason="end_turn"):
-    return SimpleNamespace(
-        content=content_blocks,
-        stop_reason=stop_reason,
-        usage=SimpleNamespace(
-            input_tokens=10,
-            output_tokens=5,
-        ),
-    )
-
-
-# ---------------------------------------------------------------------------
-# Tests
-# ---------------------------------------------------------------------------
-
-class TestTextOnly:
-    """Text-only response — no tools, no thinking."""
-
-    def setup_method(self):
-        self.resp = _response([_text_block("Hello world")])
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_type(self):
-        assert isinstance(self.v2, NormalizedResponse)
-
-    def test_content_matches(self):
-        assert self.v2.content == self.v1_msg.content
-
-    def test_finish_reason_matches(self):
-        assert self.v2.finish_reason == self.v1_finish
-
-    def test_no_tool_calls(self):
-        assert self.v2.tool_calls is None
-        assert self.v1_msg.tool_calls is None
-
-    def test_no_reasoning(self):
-        assert self.v2.reasoning is None
-        assert self.v1_msg.reasoning is None
-
-
-class TestWithToolCalls:
-    """Response with tool calls."""
-
-    def setup_method(self):
-        self.resp = _response(
-            [
-                _text_block("I'll check that"),
-                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
-                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
-            ],
-            stop_reason="tool_use",
-        )
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_finish_reason(self):
-        assert self.v2.finish_reason == "tool_calls"
-        assert self.v1_finish == "tool_calls"
-
-    def test_tool_call_count(self):
-        assert len(self.v2.tool_calls) == 2
-        assert len(self.v1_msg.tool_calls) == 2
-
-    def test_tool_call_ids_match(self):
-        for i in range(2):
-            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
-
-    def test_tool_call_names_match(self):
-        assert self.v2.tool_calls[0].name == "terminal"
-        assert self.v2.tool_calls[1].name == "read_file"
-        for i in range(2):
-            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
-
-    def test_tool_call_arguments_match(self):
-        for i in range(2):
-            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
-
-    def test_content_preserved(self):
-        assert self.v2.content == self.v1_msg.content
-        assert "check that" in self.v2.content
-
-
-class TestWithThinking:
-    """Response with thinking blocks (Claude 3.5+ extended thinking)."""
-
-    def setup_method(self):
-        self.resp = _response([
-            _thinking_block("Let me think about this carefully..."),
-            _text_block("The answer is 42."),
-        ])
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_reasoning_matches(self):
-        assert self.v2.reasoning == self.v1_msg.reasoning
-        assert "think about this" in self.v2.reasoning
-
-    def test_reasoning_details_in_provider_data(self):
-        v1_details = self.v1_msg.reasoning_details
-        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
-        assert v1_details is not None
-        assert v2_details is not None
-        assert len(v2_details) == len(v1_details)
-
-    def test_content_excludes_thinking(self):
-        assert self.v2.content == "The answer is 42."
-
-
-class TestMixed:
-    """Response with thinking + text + tool calls."""
-
-    def setup_method(self):
-        self.resp = _response(
-            [
-                _thinking_block("Planning my approach..."),
-                _text_block("I'll run the command"),
-                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
-            ],
-            stop_reason="tool_use",
-        )
-        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
-        self.v2 = normalize_anthropic_response_v2(self.resp)
-
-    def test_all_fields_present(self):
-        assert self.v2.content is not None
-        assert self.v2.tool_calls is not None
-        assert self.v2.reasoning is not None
-        assert self.v2.finish_reason == "tool_calls"
-
-    def test_content_matches(self):
-        assert self.v2.content == self.v1_msg.content
-
-    def test_reasoning_matches(self):
-        assert self.v2.reasoning == self.v1_msg.reasoning
-
-    def test_tool_call_matches(self):
-        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
-        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
-
-
-class TestStopReasons:
-    """Verify finish_reason mapping matches between v1 and v2."""
-
-    @pytest.mark.parametrize("stop_reason,expected", [
-        ("end_turn", "stop"),
-        ("tool_use", "tool_calls"),
-        ("max_tokens", "length"),
-        ("stop_sequence", "stop"),
-        ("refusal", "content_filter"),
-        ("model_context_window_exceeded", "length"),
-        ("unknown_future_reason", "stop"),
-    ])
-    def test_stop_reason_mapping(self, stop_reason, expected):
-        resp = _response([_text_block("x")], stop_reason=stop_reason)
-        v1_msg, v1_finish = normalize_anthropic_response(resp)
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.finish_reason == v1_finish == expected
-
-
-class TestStripToolPrefix:
-    """Verify mcp_ prefix stripping works identically."""
-
-    def test_prefix_stripped(self):
-        resp = _response(
-            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
-            stop_reason="tool_use",
-        )
-        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
-        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
-        assert v1_msg.tool_calls[0].function.name == "terminal"
-        assert v2.tool_calls[0].name == "terminal"
-
-    def test_prefix_kept(self):
-        resp = _response(
-            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
-            stop_reason="tool_use",
-        )
-        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
-        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
-        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
-        assert v2.tool_calls[0].name == "mcp_terminal"
-
-
-class TestEdgeCases:
-    """Edge cases: empty content, no blocks, etc."""
-
-    def test_empty_content_blocks(self):
-        resp = _response([])
-        v1_msg, v1_finish = normalize_anthropic_response(resp)
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.content == v1_msg.content
-        assert v2.content is None
-
-    def test_no_reasoning_details_means_none_provider_data(self):
-        resp = _response([_text_block("hi")])
-        v2 = normalize_anthropic_response_v2(resp)
-        assert v2.provider_data is None
-
-    def test_v2_returns_dataclass_not_namespace(self):
-        resp = _response([_text_block("hi")])
-        v2 = normalize_anthropic_response_v2(resp)
-        assert isinstance(v2, NormalizedResponse)
-        assert not isinstance(v2, SimpleNamespace)
@@ -253,6 +253,35 @@ class TestSummaryPrefixNormalization:


 class TestCompressWithClient:
+    def test_system_content_list_gets_compression_note_without_crashing(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+
+        msgs = [
+            {"role": "system", "content": [{"type": "text", "text": "system prompt"}]},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "assistant", "content": "msg 6"},
+            {"role": "user", "content": "msg 7"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        assert isinstance(result[0]["content"], list)
+        assert any(
+            isinstance(block, dict)
+            and "compacted into a handoff summary" in block.get("text", "")
+            for block in result[0]["content"]
+        )
+
    def test_summarization_path(self):
        mock_client = MagicMock()
        mock_response = MagicMock()
@@ -460,6 +489,41 @@ class TestCompressWithClient:
        assert len(first_tail) == 1
        assert "summary text" in first_tail[0]["content"]

+    def test_double_collision_merges_summary_into_list_tail_content(self):
+        """Structured tail content should accept a merged summary without TypeError."""
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "summary text"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
+
+        msgs = [
+            {"role": "system", "content": "system prompt"},
+            {"role": "user", "content": "msg 1"},
+            {"role": "assistant", "content": "msg 2"},
+            {"role": "user", "content": "msg 3"},
+            {"role": "assistant", "content": "msg 4"},
+            {"role": "user", "content": "msg 5"},
+            {"role": "user", "content": [{"type": "text", "text": "msg 6"}]},
+            {"role": "assistant", "content": "msg 7"},
+            {"role": "user", "content": "msg 8"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response):
+            result = c.compress(msgs)
+
+        merged_tail = next(
+            m for m in result
+            if m.get("role") == "user" and isinstance(m.get("content"), list)
+        )
+        assert isinstance(merged_tail["content"], list)
+        assert "summary text" in merged_tail["content"][0]["text"]
+        assert any(
+            isinstance(block, dict) and block.get("text") == "msg 6"
+            for block in merged_tail["content"]
+        )
+
    def test_double_collision_user_head_assistant_tail(self):
        """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
        summary='assistant' collides with tail, 'user' collides with head → merge."""
@@ -1162,3 +1162,75 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch)

    assert not pool.has_credentials()
    assert pool.entries() == []
+
+
+def _build_pool_with_entries(tmp_path, monkeypatch, provider="openrouter", entries=None):
+    """Helper: build a CredentialPool directly without seeding side-effects."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    monkeypatch.setattr("agent.credential_pool._seed_from_singletons", lambda p, e: (False, set()))
+    monkeypatch.setattr("agent.credential_pool._seed_from_env", lambda p, e: (False, set()))
+    if entries is None:
+        entries = [
+            {
+                "id": "cred-1",
+                "label": "primary",
+                "auth_type": "api_key",
+                "priority": 0,
+                "source": "manual",
+                "access_token": "tok-1",
+            },
+            {
+                "id": "cred-2",
+                "label": "secondary",
+                "auth_type": "api_key",
+                "priority": 1,
+                "source": "manual",
+                "access_token": "tok-2",
+            },
+        ]
+    _write_auth_store(tmp_path, {"version": 1, "credential_pool": {provider: entries}})
+    from agent.credential_pool import load_pool
+    return load_pool(provider)
+
+
+def test_remove_entry_removes_by_id(tmp_path, monkeypatch):
+    """remove_entry should remove the entry with matching id and return it."""
+    pool = _build_pool_with_entries(tmp_path, monkeypatch)
+
+    removed = pool.remove_entry("cred-1")
+
+    assert removed is not None
+    assert removed.id == "cred-1"
+    remaining_ids = [e.id for e in pool.entries()]
+    assert "cred-1" not in remaining_ids
+    assert "cred-2" in remaining_ids
+
+
+def test_remove_entry_returns_none_for_unknown_id(tmp_path, monkeypatch):
+    """remove_entry returns None when no entry matches the given id."""
+    pool = _build_pool_with_entries(tmp_path, monkeypatch)
+
+    result = pool.remove_entry("nonexistent-id")
+
+    assert result is None
+    # Pool should still have both original entries
+    assert len(pool.entries()) == 2
+
+
+def test_remove_entry_renumbers_priorities(tmp_path, monkeypatch):
+    """After remove_entry, remaining entries receive sequential priorities 0, 1, ..."""
+    pool = _build_pool_with_entries(
+        tmp_path,
+        monkeypatch,
+        entries=[
+            {"id": "cred-1", "label": "a", "auth_type": "api_key", "priority": 0, "source": "manual", "access_token": "tok-1"},
+            {"id": "cred-2", "label": "b", "auth_type": "api_key", "priority": 1, "source": "manual", "access_token": "tok-2"},
+            {"id": "cred-3", "label": "c", "auth_type": "api_key", "priority": 2, "source": "manual", "access_token": "tok-3"},
+        ],
+    )
+
+    pool.remove_entry("cred-2")
+
+    remaining = sorted(pool.entries(), key=lambda e: e.priority)
+    assert [e.priority for e in remaining] == [0, 1]
+    assert [e.id for e in remaining] == ["cred-1", "cred-3"]
@@ -298,9 +298,15 @@ class TestClassifyApiError:
        assert result.retryable is False

    def test_404_generic(self):
+        # Generic 404 with no "model not found" signal — common for local
+        # llama.cpp/Ollama/vLLM endpoints with slightly wrong paths.  Treat
+        # as unknown (retryable) so the real error surfaces, rather than
+        # claiming the model is missing and silently falling back.
        e = MockAPIError("Not Found", status_code=404)
        result = classify_api_error(e)
-        assert result.reason == FailoverReason.model_not_found
+        assert result.reason == FailoverReason.unknown
+        assert result.retryable is True
+        assert result.should_fallback is False

    # ── Payload too large ──

@@ -943,3 +949,94 @@ class TestAdversarialEdgeCases:
        e = MockAPIError("server error", status_code=500, body={"message": None})
        result = classify_api_error(e)
        assert result is not None
+
+
+# ── Test: SSL/TLS transient errors ─────────────────────────────────────
+
+class TestSSLTransientPatterns:
+    """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
+    should NOT trigger context compression even on a large session.
+
+    Motivation: OpenSSL 3.x changed TLS alert error code format
+    (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+    breaking string-exact matching in downstream retry logic.  We match
+    stable substrings instead.
+    """
+
+    def test_bad_record_mac_classifies_as_timeout(self):
+        """OpenSSL 3.x mid-stream bad record mac alert."""
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_openssl_3x_format_classifies_as_timeout(self):
+        """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
+        because we key on both space- and underscore-separated forms of
+        the stable `bad_record_mac` token."""
+        e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_tls_alert_internal_error_classifies_as_timeout(self):
+        e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+        assert result.should_compress is False
+
+    def test_ssl_handshake_failure_classifies_as_timeout(self):
+        e = Exception("ssl handshake failure during mid-stream")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_prefix_classifies_as_timeout(self):
+        """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
+        e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
+
+    def test_ssl_alert_on_large_session_does_not_compress(self):
+        """Critical: SSL alerts on big contexts must NOT trigger context
+        compression — compression is expensive and won't fix a transport
+        hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
+        _SERVER_DISCONNECT_PATTERNS.
+        """
+        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,      # 90% of a 200k-context window
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.timeout
+        assert result.should_compress is False
+
+    def test_plain_disconnect_on_large_session_still_compresses(self):
+        """Regression guard: the context-overflow-via-disconnect path
+        (non-SSL disconnects on large sessions) must still trigger
+        compression.  Only SSL-specific disconnects skip it.
+        """
+        e = Exception("Server disconnected without sending a response")
+        result = classify_api_error(
+            e,
+            approx_tokens=180000,
+            context_length=200000,
+            num_messages=300,
+        )
+        assert result.reason == FailoverReason.context_overflow
+        assert result.should_compress is True
+
+    def test_real_ssl_error_type_classifies_as_timeout(self):
+        """Real ssl.SSLError instance — the type name alone (not message)
+        should route to the transport bucket."""
+        import ssl
+        e = ssl.SSLError("arbitrary ssl error")
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.timeout
+        assert result.retryable is True
@@ -106,3 +106,25 @@ class TestIsLocalEndpoint:
    ])
    def test_remote_endpoints(self, url):
        assert is_local_endpoint(url) is False
+
+    @pytest.mark.parametrize("url", [
+        "http://100.64.0.0:11434",            # lower bound of CGNAT block
+        "http://100.64.0.1:11434/v1",         # lower bound +1
+        "http://100.77.243.5:11434",          # representative Tailscale host
+        "https://100.100.100.100:443",        # Tailscale MagicDNS anchor
+        "https://100.127.255.254:443",        # upper bound -1
+        "http://100.127.255.255:11434",       # upper bound of CGNAT block
+    ])
+    def test_tailscale_cgnat_is_local(self, url):
+        """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
+        assert is_local_endpoint(url) is True
+
+    @pytest.mark.parametrize("url", [
+        "http://100.63.255.255:11434",        # just below CGNAT block
+        "http://100.128.0.1:11434",           # just above CGNAT block
+        "http://100.200.0.1:11434",           # well outside CGNAT
+        "http://99.64.0.1:11434",             # first octet wrong
+    ])
+    def test_near_but_not_cgnat_is_remote(self, url):
+        """Hosts adjacent to but outside 100.64.0.0/10 must not match."""
+        assert is_local_endpoint(url) is False
@@ -79,6 +79,28 @@ class TestMemoryManagerUserIdThreading:
        assert p._init_kwargs.get("platform") == "telegram"
        assert p._init_session_id == "sess-123"

+    def test_chat_context_forwarded_to_provider(self):
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-chat",
+            platform="discord",
+            user_id="discord_u_7",
+            user_name="fakeusername",
+            chat_id="1485316232612941897",
+            chat_name="fakeassistantname-forums",
+            chat_type="thread",
+            thread_id="1491249007475949698",
+        )
+
+        assert p._init_kwargs.get("user_name") == "fakeusername"
+        assert p._init_kwargs.get("chat_id") == "1485316232612941897"
+        assert p._init_kwargs.get("chat_name") == "fakeassistantname-forums"
+        assert p._init_kwargs.get("chat_type") == "thread"
+        assert p._init_kwargs.get("thread_id") == "1491249007475949698"
+
    def test_no_user_id_when_cli(self):
        """CLI sessions should not have user_id in kwargs."""
        mgr = MemoryManager()
@@ -334,3 +356,4 @@ class TestAIAgentUserIdPropagation:
            agent = object.__new__(AIAgent)
            agent._user_id = None
            assert agent._user_id is None
+
--- a/Show More
+++ b/Show More