fix: use consistent fallback for pickle_key derivation

Address review: _pickle_key now uses _acct_id (which has the 'hermes' fallback) instead of raw self._user_id, so both values stay consistent when user_id is empty.
fix(matrix): pass required args to MemoryCryptoStore for mautrix ≥0.21
2026-04-11 17:26:17 +00:00 · 2026-04-11 17:03:38 +00:00 · 2026-04-11 03:29:31 -07:00 · 2026-04-11 03:13:23 -07:00 · 2026-04-11 03:11:34 -07:00 · 2026-04-11 03:09:46 -07:00
214 changed files with 18248 additions and 3512 deletions
@@ -6,7 +6,7 @@ ENV PYTHONUNBUFFERED=1
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
+        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
    rm -rf /var/lib/apt/lists/*

 COPY . /opt/hermes
@@ -60,6 +60,8 @@ _ANTHROPIC_OUTPUT_LIMITS = {
    "claude-3-opus":       4_096,
    "claude-3-sonnet":     4_096,
    "claude-3-haiku":      4_096,
+    # Third-party Anthropic-compatible providers
+    "minimax":            131_072,
 }

 # For any model not in the table, assume the highest current limit.
@@ -161,18 +163,27 @@ def _get_claude_code_version() -> str:


 def _is_oauth_token(key: str) -> bool:
-    """Check if the key is an OAuth/setup token (not a regular Console API key).
+    """Check if the key is an Anthropic OAuth/setup token.

-    Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens
-    starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth.
+    Positively identifies Anthropic OAuth tokens by their key format:
+    - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
+    - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
+
+    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
+    and correctly return False.
    """
    if not key:
        return False
-    # Regular Console API keys use x-api-key header
+    # Regular Anthropic Console API keys — x-api-key auth, never OAuth
    if key.startswith("sk-ant-api"):
        return False
-    # Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth
-    return True
+    # Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys)
+    if key.startswith("sk-ant-"):
+        return True
+    # JWTs from Anthropic OAuth flow
+    if key.startswith("eyJ"):
+        return True
+    return False


 def _normalize_base_url_text(base_url) -> str:
@@ -1304,9 +1315,10 @@ def build_anthropic_kwargs(
    # Map reasoning_config to Anthropic's thinking parameter.
    # Claude 4.6 models use adaptive thinking + output_config.effort.
    # Older models use manual thinking with budget_tokens.
-    # Haiku and MiniMax models do NOT support extended thinking — skip entirely.
+    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
+    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower():
+        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
            if _supports_adaptive_thinking(model):
@@ -59,6 +59,9 @@ from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

+# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
+_stale_base_url_warned = False
+
 _PROVIDER_ALIASES = {
    "google": "gemini",
    "google-gemini": "gemini",
@@ -687,6 +690,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if pconfig.auth_type != "api_key":
            continue
        if provider_id == "anthropic":
+            # Only try anthropic when the user has explicitly configured it.
+            # Without this gate, Claude Code credentials get silently used
+            # as auxiliary fallback when the user's primary provider fails.
+            try:
+                from hermes_cli.auth import is_provider_explicitly_configured
+                if not is_provider_explicitly_configured("anthropic"):
+                    continue
+            except ImportError:
+                pass
            return _try_anthropic()

        pool_present, entry = _select_pool_entry(provider_id)
@@ -698,7 +710,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            base_url = _to_openai_base_url(
                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
            )
-            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+            if model is None:
+                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
            extra = {}
            if "api.kimi.com" in base_url.lower():
@@ -717,7 +731,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        base_url = _to_openai_base_url(
            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
        )
-        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
+        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+        if model is None:
+            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
        extra = {}
        if "api.kimi.com" in base_url.lower():
@@ -848,7 +864,7 @@ def _read_main_provider() -> str:
    return ""


-def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
+def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
    """Resolve the active custom/main endpoint the same way the main CLI does.

    This covers both env-driven OPENAI_BASE_URL setups and config-saved custom
@@ -861,18 +877,29 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
        runtime = resolve_runtime_provider(requested="custom")
    except Exception as exc:
        logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc)
-        return None, None
+        runtime = None
+
+    if not isinstance(runtime, dict):
+        openai_base = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
+        openai_key = os.getenv("OPENAI_API_KEY", "").strip()
+        if not openai_base:
+            return None, None, None
+        runtime = {
+            "base_url": openai_base,
+            "api_key": openai_key,
+        }

    custom_base = runtime.get("base_url")
    custom_key = runtime.get("api_key")
+    custom_mode = runtime.get("api_mode")
    if not isinstance(custom_base, str) or not custom_base.strip():
-        return None, None
+        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
    if "openrouter.ai" in custom_base.lower():
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
-        return None, None
+        return None, None, None

    # Local servers (Ollama, llama.cpp, vLLM, LM Studio) don't require auth.
    # Use a placeholder key — the OpenAI SDK requires a non-empty string but
@@ -881,20 +908,33 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
    if not isinstance(custom_key, str) or not custom_key.strip():
        custom_key = "no-key-required"

-    return custom_base, custom_key.strip()
+    if not isinstance(custom_mode, str) or not custom_mode.strip():
+        custom_mode = None
+
+    return custom_base, custom_key.strip(), custom_mode


 def _current_custom_base_url() -> str:
-    custom_base, _ = _resolve_custom_runtime()
+    custom_base, _, _ = _resolve_custom_runtime()
    return custom_base or ""


 def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
-    custom_base, custom_key = _resolve_custom_runtime()
+    runtime = _resolve_custom_runtime()
+    if len(runtime) == 2:
+        custom_base, custom_key = runtime
+        custom_mode = None
+    else:
+        custom_base, custom_key, custom_mode = runtime
    if not custom_base or not custom_key:
        return None, None
+    if custom_base.lower().startswith(_CODEX_AUX_BASE_URL.lower()):
+        return None, None
    model = _read_main_model() or "gpt-4o-mini"
-    logger.debug("Auxiliary client: custom endpoint (%s)", model)
+    logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
+    if custom_mode == "codex_responses":
+        real_client = OpenAI(api_key=custom_key, base_url=custom_base)
+        return CodexAuxiliaryClient(real_client, model), model
    return OpenAI(api_key=custom_key, base_url=custom_base), model


@@ -1042,11 +1082,12 @@ def _is_connection_error(exc: Exception) -> bool:
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
+    reason: str = "payment error",
 ) -> Tuple[Optional[Any], Optional[str], str]:
-    """Try alternative providers after a payment/credit error.
+    """Try alternative providers after a payment/credit or connection error.

    Iterates the standard auto-detection chain, skipping the provider that
-    returned a payment error.
+    failed.

    Returns:
        (client, model, provider_label) or (None, None, "") if no fallback.
@@ -1072,15 +1113,15 @@ def _try_payment_fallback(
        client, model = try_fn()
        if client is not None:
            logger.info(
-                "Auxiliary %s: payment error on %s — falling back to %s (%s)",
-                task or "call", failed_provider, label, model or "default",
+                "Auxiliary %s: %s on %s — falling back to %s (%s)",
+                task or "call", reason, failed_provider, label, model or "default",
            )
            return client, model, label
        tried.append(label)

    logger.warning(
-        "Auxiliary %s: payment error on %s and no fallback available (tried: %s)",
-        task or "call", failed_provider, ", ".join(tried),
+        "Auxiliary %s: %s on %s and no fallback available (tried: %s)",
+        task or "call", reason, failed_provider, ", ".join(tried),
    )
    return None, None, ""

@@ -1095,9 +1136,28 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
         provider they already have credentials for — no OpenRouter key needed.
      2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
    """
-    global auxiliary_is_nous
+    global auxiliary_is_nous, _stale_base_url_warned
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins

+    # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
+    #    provider (not 'custom').  This catches the common "env poisoning"
+    #    scenario where a user switches providers via `hermes model` but the
+    #    old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
+    if not _stale_base_url_warned:
+        _env_base = os.getenv("OPENAI_BASE_URL", "").strip()
+        _cfg_provider = _read_main_provider()
+        if (_env_base and _cfg_provider
+                and _cfg_provider != "custom"
+                and not _cfg_provider.startswith("custom:")):
+            logger.warning(
+                "OPENAI_BASE_URL is set (%s) but model.provider is '%s'. "
+                "Auxiliary clients may route to the wrong endpoint. "
+                "Run: hermes model to reconfigure, or remove "
+                "OPENAI_BASE_URL from ~/.hermes/.env",
+                _env_base, _cfg_provider,
+            )
+            _stale_base_url_warned = True
+
    # ── Step 1: non-aggregator main provider → use main model directly ──
    main_provider = _read_main_provider()
    main_model = _read_main_model()
@@ -1165,6 +1225,18 @@ def _to_async_client(sync_client, model: str):
    return AsyncOpenAI(**async_kwargs), model


+def _normalize_resolved_model(model_name: Optional[str], provider: str) -> Optional[str]:
+    """Normalize a resolved model for the provider that will receive it."""
+    if not model_name:
+        return model_name
+    try:
+        from hermes_cli.model_normalize import normalize_model_for_provider
+
+        return normalize_model_for_provider(model_name, provider)
+    except Exception:
+        return model_name
+
+
 def resolve_provider_client(
    provider: str,
    model: str = None,
@@ -1172,6 +1244,7 @@ def resolve_provider_client(
    raw_codex: bool = False,
    explicit_base_url: str = None,
    explicit_api_key: str = None,
+    api_mode: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1195,6 +1268,10 @@ def resolve_provider_client(
            the main agent loop).
        explicit_base_url: Optional direct OpenAI-compatible endpoint.
        explicit_api_key: Optional API key paired with explicit_base_url.
+        api_mode: API mode override.  One of "chat_completions",
+            "codex_responses", or None (auto-detect).  When set to
+            "codex_responses", the client is wrapped in
+            CodexAuxiliaryClient to route through the Responses API.

    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
@@ -1202,6 +1279,40 @@ def resolve_provider_client(
    # Normalise aliases
    provider = _normalize_aux_provider(provider)

+    def _needs_codex_wrap(client_obj, base_url_str: str, model_str: str) -> bool:
+        """Decide if a plain OpenAI client should be wrapped for Responses API.
+
+        Returns True when api_mode is explicitly "codex_responses", or when
+        auto-detection (api.openai.com + codex-family model) suggests it.
+        Already-wrapped clients (CodexAuxiliaryClient) are skipped.
+        """
+        if isinstance(client_obj, CodexAuxiliaryClient):
+            return False
+        if raw_codex:
+            return False
+        if api_mode == "codex_responses":
+            return True
+        # Auto-detect: api.openai.com + codex model name pattern
+        if api_mode and api_mode != "codex_responses":
+            return False  # explicit non-codex mode
+        normalized_base = (base_url_str or "").strip().lower()
+        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
+            model_lower = (model_str or "").lower()
+            if "codex" in model_lower:
+                return True
+        return False
+
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
+        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
+        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
+            logger.debug(
+                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
+                "(api_mode=%s, model=%s, base_url=%s)",
+                api_mode or "auto-detected", final_model_str,
+                base_url_str[:60] if base_url_str else "")
+            return CodexAuxiliaryClient(client_obj, final_model_str)
+        return client_obj
+
    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
        client, resolved = _resolve_auto()
@@ -1227,7 +1338,7 @@ def resolve_provider_client(
            logger.warning("resolve_provider_client: openrouter requested "
                           "but OPENROUTER_API_KEY not set")
            return None, None
-        final_model = model or default
+        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

@@ -1238,7 +1349,7 @@ def resolve_provider_client(
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
-        final_model = model or default
+        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

@@ -1252,7 +1363,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = model or _CODEX_AUX_MODEL
+            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
            raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
@@ -1261,7 +1372,7 @@ def resolve_provider_client(
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
-        final_model = model or default
+        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

@@ -1280,7 +1391,10 @@ def resolve_provider_client(
                    "but base_url is empty"
                )
                return None, None
-            final_model = model or _read_main_model() or "gpt-4o-mini"
+            final_model = _normalize_resolved_model(
+                model or _read_main_model() or "gpt-4o-mini",
+                provider,
+            )
            extra = {}
            if "api.kimi.com" in custom_base.lower():
                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
@@ -1288,6 +1402,7 @@ def resolve_provider_client(
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
+            client = _wrap_if_needed(client, final_model, custom_base)
            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
@@ -1295,7 +1410,9 @@ def resolve_provider_client(
                       _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
-                final_model = model or default
+                final_model = _normalize_resolved_model(model or default, provider)
+                _cbase = str(getattr(client, "base_url", "") or "")
+                client = _wrap_if_needed(client, final_model, _cbase)
                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
@@ -1310,8 +1427,12 @@ def resolve_provider_client(
            custom_base = custom_entry.get("base_url", "").strip()
            custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
            if custom_base:
-                final_model = model or _read_main_model() or "gpt-4o-mini"
+                final_model = _normalize_resolved_model(
+                    model or _read_main_model() or "gpt-4o-mini",
+                    provider,
+                )
                client = OpenAI(api_key=custom_key, base_url=custom_base)
+                client = _wrap_if_needed(client, final_model, custom_base)
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s)",
                    provider, final_model)
@@ -1342,7 +1463,7 @@ def resolve_provider_client(
            if client is None:
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
-            final_model = model or default_model
+            final_model = _normalize_resolved_model(model or default_model, provider)
            return (_to_async_client(client, final_model) if async_mode else (client, final_model))

        creds = resolve_api_key_provider_credentials(provider)
@@ -1361,7 +1482,7 @@ def resolve_provider_client(
        )

        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
-        final_model = model or default_model
+        final_model = _normalize_resolved_model(model or default_model, provider)

        # Provider-specific headers
        headers = {}
@@ -1374,6 +1495,28 @@ def resolve_provider_client(

        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))
+
+        # Copilot GPT-5+ models (except gpt-5-mini) require the Responses
+        # API — they are not accessible via /chat/completions.  Wrap the
+        # plain client in CodexAuxiliaryClient so call_llm() transparently
+        # routes through responses.stream().
+        if provider == "copilot" and final_model and not raw_codex:
+            try:
+                from hermes_cli.models import _should_use_copilot_responses_api
+                if _should_use_copilot_responses_api(final_model):
+                    logger.debug(
+                        "resolve_provider_client: copilot model %s needs "
+                        "Responses API — wrapping with CodexAuxiliaryClient",
+                        final_model)
+                    client = CodexAuxiliaryClient(client, final_model)
+            except ImportError:
+                pass
+
+        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
+        # codex-family models).  The copilot-specific wrapping above handles
+        # copilot; this covers the general case (#6800).
+        client = _wrap_if_needed(client, final_model, base_url)
+
        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
@@ -1406,12 +1549,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
    Callers may override the returned model with a per-task env var
    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
    """
-    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
+    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
    return resolve_provider_client(
        provider,
        model=model,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
+        api_mode=api_mode,
    )


@@ -1422,13 +1566,14 @@ def get_async_text_auxiliary_client(task: str = ""):
    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
    Returns (None, None) when no provider is available.
    """
-    provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
+    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
    return resolve_provider_client(
        provider,
        model=model,
        async_mode=True,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
+        api_mode=api_mode,
    )


@@ -1501,7 +1646,7 @@ def resolve_vision_provider_client(
    backends, so users can intentionally force experimental providers. Auto mode
    stays conservative and only tries vision backends known to work today.
    """
-    requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+    requested, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        "vision", provider, model, base_url, api_key
    )
    requested = _normalize_vision_provider(requested)
@@ -1717,12 +1862,30 @@ def cleanup_stale_async_clients() -> None:
            del _client_cache[key]


+def _is_openrouter_client(client: Any) -> bool:
+    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
+        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+            return True
+    return False
+
+
+def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
+    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
+
+    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
+    """
+    if model and "/" in model and not _is_openrouter_client(client):
+        return cached_default
+    return model or cached_default
+
+
 def _get_cached_client(
    provider: str,
    model: str = None,
    async_mode: bool = False,
    base_url: str = None,
    api_key: str = None,
+    api_mode: str = None,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -1746,7 +1909,7 @@ def _get_cached_client(
            loop_id = id(current_loop)
        except RuntimeError:
            pass
-    cache_key = (provider, async_mode, base_url or "", api_key or "", loop_id)
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -1758,9 +1921,11 @@ def _get_cached_client(
                    _force_close_async_httpx(cached_client)
                    del _client_cache[cache_key]
                else:
-                    return cached_client, model or cached_default
+                    effective = _compat_model(cached_client, model, cached_default)
+                    return cached_client, effective
            else:
-                return cached_client, model or cached_default
+                effective = _compat_model(cached_client, model, cached_default)
+                return cached_client, effective
    # Build outside the lock
    client, default_model = resolve_provider_client(
        provider,
@@ -1768,6 +1933,7 @@ def _get_cached_client(
        async_mode,
        explicit_base_url=base_url,
        explicit_api_key=api_key,
+        api_mode=api_mode,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -1787,7 +1953,7 @@ def _resolve_task_provider_model(
    model: str = None,
    base_url: str = None,
    api_key: str = None,
-) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
+) -> Tuple[str, Optional[str], Optional[str], Optional[str], Optional[str]]:
    """Determine provider + model for a call.

    Priority:
@@ -1796,15 +1962,17 @@ def _resolve_task_provider_model(
      3. Config file (auxiliary.{task}.* or compression.*)
      4. "auto" (full auto-detection chain)

-    Returns (provider, model, base_url, api_key) where model may be None
-    (use provider default). When base_url is set, provider is forced to
-    "custom" and the task uses that direct endpoint.
+    Returns (provider, model, base_url, api_key, api_mode) where model may
+    be None (use provider default). When base_url is set, provider is forced
+    to "custom" and the task uses that direct endpoint. api_mode is one of
+    "chat_completions", "codex_responses", or None (auto-detect).
    """
    config = {}
    cfg_provider = None
    cfg_model = None
    cfg_base_url = None
    cfg_api_key = None
+    cfg_api_mode = None

    if task:
        try:
@@ -1821,6 +1989,7 @@ def _resolve_task_provider_model(
        cfg_model = str(task_config.get("model", "")).strip() or None
        cfg_base_url = str(task_config.get("base_url", "")).strip() or None
        cfg_api_key = str(task_config.get("api_key", "")).strip() or None
+        cfg_api_mode = str(task_config.get("api_mode", "")).strip() or None

        # Backwards compat: compression section has its own keys.
        # The auxiliary.compression defaults to provider="auto", so treat
@@ -1834,30 +2003,32 @@ def _resolve_task_provider_model(
                cfg_base_url = cfg_base_url or _sbu.strip() or None

    env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
+    env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
    resolved_model = model or env_model or cfg_model
+    resolved_api_mode = env_api_mode or cfg_api_mode

    if base_url:
-        return "custom", resolved_model, base_url, api_key
+        return "custom", resolved_model, base_url, api_key, resolved_api_mode
    if provider:
-        return provider, resolved_model, base_url, api_key
+        return provider, resolved_model, base_url, api_key, resolved_api_mode

    if task:
        env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
        env_api_key = _get_auxiliary_env_override(task, "API_KEY")
        if env_base_url:
-            return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
+            return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key, resolved_api_mode

        env_provider = _get_auxiliary_provider(task)
        if env_provider != "auto":
-            return env_provider, resolved_model, None, None
+            return env_provider, resolved_model, None, None, resolved_api_mode

        if cfg_base_url:
-            return "custom", resolved_model, cfg_base_url, cfg_api_key
+            return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
        if cfg_provider and cfg_provider != "auto":
-            return cfg_provider, resolved_model, None, None
-        return "auto", resolved_model, None, None
+            return cfg_provider, resolved_model, None, None, resolved_api_mode
+        return "auto", resolved_model, None, None, resolved_api_mode

-    return "auto", resolved_model, None, None
+    return "auto", resolved_model, None, None, resolved_api_mode


 _DEFAULT_AUX_TIMEOUT = 30.0
@@ -1929,6 +2100,37 @@ def _build_call_kwargs(
    return kwargs


+def _validate_llm_response(response: Any, task: str = None) -> Any:
+    """Validate that an LLM response has the expected .choices[0].message shape.
+
+    Fails fast with a clear error instead of letting malformed payloads
+    propagate to downstream consumers where they crash with misleading
+    AttributeError (e.g. "'str' object has no attribute 'choices'").
+
+    See #7264.
+    """
+    if response is None:
+        raise RuntimeError(
+            f"Auxiliary {task or 'call'}: LLM returned None response"
+        )
+    # Allow SimpleNamespace responses from adapters (CodexAuxiliaryClient,
+    # AnthropicAuxiliaryClient) — they have .choices[0].message.
+    try:
+        choices = response.choices
+        if not choices or not hasattr(choices[0], "message"):
+            raise AttributeError("missing choices[0].message")
+    except (AttributeError, TypeError, IndexError) as exc:
+        response_type = type(response).__name__
+        response_preview = str(response)[:120]
+        raise RuntimeError(
+            f"Auxiliary {task or 'call'}: LLM returned invalid response "
+            f"(type={response_type}): {response_preview!r}. "
+            f"Expected object with .choices[0].message — check provider "
+            f"adapter or custom endpoint compatibility."
+        ) from exc
+    return response
+
+
 def call_llm(
    task: str = None,
    *,
@@ -1967,7 +2169,7 @@ def call_llm(
    Raises:
        RuntimeError: If no provider is configured.
    """
-    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)

    if task == "vision":
@@ -2000,6 +2202,7 @@ def call_llm(
            resolved_model,
            base_url=resolved_base_url,
            api_key=resolved_api_key,
+            api_mode=resolved_api_mode,
        )
        if client is None:
            # When the user explicitly chose a non-OpenRouter provider but no
@@ -2043,18 +2246,20 @@ def call_llm(

    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
-        return client.chat.completions.create(**kwargs)
+        return _validate_llm_response(
+            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        err_str = str(first_err)
        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
-                return client.chat.completions.create(**kwargs)
+                return _validate_llm_response(
+                    client.chat.completions.create(**kwargs), task)
            except Exception as retry_err:
-                # If the max_tokens retry also hits a payment error,
-                # fall through to the payment fallback below.
-                if not _is_payment_error(retry_err):
+                # If the max_tokens retry also hits a payment or connection
+                # error, fall through to the fallback chain below.
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
                    raise
                first_err = retry_err

@@ -2071,19 +2276,24 @@ def call_llm(
        # and providers the user never configured that got picked up by
        # the auto-detection chain.
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
-        if should_fallback:
+        # Only try alternative providers when the user didn't explicitly
+        # configure this task's provider.  Explicit provider = hard constraint;
+        # auto (the default) = best-effort fallback chain.  (#7559)
+        is_auto = resolved_provider in ("auto", "", None)
+        if should_fallback and is_auto:
            reason = "payment error" if _is_payment_error(first_err) else "connection error"
            logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
                        task or "call", reason, resolved_provider, first_err)
            fb_client, fb_model, fb_label = _try_payment_fallback(
-                resolved_provider, task)
+                resolved_provider, task, reason=reason)
            if fb_client is not None:
                fb_kwargs = _build_call_kwargs(
                    fb_label, fb_model, messages,
                    temperature=temperature, max_tokens=max_tokens,
                    tools=tools, timeout=effective_timeout,
                    extra_body=extra_body)
-                return fb_client.chat.completions.create(**fb_kwargs)
+                return _validate_llm_response(
+                    fb_client.chat.completions.create(**fb_kwargs), task)
        raise


@@ -2161,7 +2371,7 @@ async def async_call_llm(

    Same as call_llm() but async. See call_llm() for full documentation.
    """
-    resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
+    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)

    if task == "vision":
@@ -2195,6 +2405,7 @@ async def async_call_llm(
            async_mode=True,
            base_url=resolved_base_url,
            api_key=resolved_api_key,
+            api_mode=resolved_api_mode,
        )
        if client is None:
            _explicit = (resolved_provider or "").strip().lower()
@@ -2205,11 +2416,9 @@ async def async_call_llm(
                    f"variable, or switch to a different provider with `hermes model`."
                )
            if not resolved_base_url:
-                logger.warning("Provider %s unavailable, falling back to openrouter",
-                               resolved_provider)
-                client, final_model = _get_cached_client(
-                    "openrouter", resolved_model or _OPENROUTER_MODEL,
-                    async_mode=True)
+                logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
+                            task or "call", resolved_provider)
+                client, final_model = _get_cached_client("auto", async_mode=True)
        if client is None:
            raise RuntimeError(
                f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -2224,11 +2433,42 @@ async def async_call_llm(
        base_url=resolved_base_url)

    try:
-        return await client.chat.completions.create(**kwargs)
+        return _validate_llm_response(
+            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
        err_str = str(first_err)
        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
-            return await client.chat.completions.create(**kwargs)
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**kwargs), task)
+            except Exception as retry_err:
+                # If the max_tokens retry also hits a payment or connection
+                # error, fall through to the fallback chain below.
+                if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
+                    raise
+                first_err = retry_err
+
+        # ── Payment / connection fallback (mirrors sync call_llm) ─────
+        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
+        is_auto = resolved_provider in ("auto", "", None)
+        if should_fallback and is_auto:
+            reason = "payment error" if _is_payment_error(first_err) else "connection error"
+            logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
+                        task or "call", reason, resolved_provider, first_err)
+            fb_client, fb_model, fb_label = _try_payment_fallback(
+                resolved_provider, task, reason=reason)
+            if fb_client is not None:
+                fb_kwargs = _build_call_kwargs(
+                    fb_label, fb_model, messages,
+                    temperature=temperature, max_tokens=max_tokens,
+                    tools=tools, timeout=effective_timeout,
+                    extra_body=extra_body)
+                # Convert sync fallback client to async
+                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
+                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
+                    fb_kwargs["model"] = async_fb_model
+                return _validate_llm_response(
+                    await async_fb.chat.completions.create(**fb_kwargs), task)
        raise
@@ -18,6 +18,7 @@ import time
 from typing import Any, Dict, List, Optional

 from agent.auxiliary_client import call_llm
+from agent.context_engine import ContextEngine
 from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
@@ -50,8 +51,8 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-class ContextCompressor:
-    """Compresses conversation context when approaching the model's context limit.
+class ContextCompressor(ContextEngine):
+    """Default context engine — compresses conversation context via lossy summarization.

    Algorithm:
      1. Prune old tool results (cheap, no LLM call)
@@ -61,6 +62,33 @@ class ContextCompressor:
      5. On subsequent compactions, iteratively update the previous summary
    """

+    @property
+    def name(self) -> str:
+        return "compressor"
+
+    def on_session_reset(self) -> None:
+        """Reset all per-session state for /new or /reset."""
+        super().on_session_reset()
+        self._context_probed = False
+        self._context_probe_persistable = False
+        self._previous_summary = None
+
+    def update_model(
+        self,
+        model: str,
+        context_length: int,
+        base_url: str = "",
+        api_key: str = "",
+        provider: str = "",
+    ) -> None:
+        """Update model info after a model switch or fallback activation."""
+        self.model = model
+        self.base_url = base_url
+        self.api_key = api_key
+        self.provider = provider
+        self.context_length = context_length
+        self.threshold_tokens = int(context_length * self.threshold_percent)
+
    def __init__(
        self,
        model: str,
@@ -0,0 +1,184 @@
+"""Abstract base class for pluggable context engines.
+
+A context engine controls how conversation context is managed when
+approaching the model's token limit. The built-in ContextCompressor
+is the default implementation. Third-party engines (e.g. LCM) can
+replace it via the plugin system or by being placed in the
+``plugins/context_engine/<name>/`` directory.
+
+Selection is config-driven: ``context.engine`` in config.yaml.
+Default is ``"compressor"`` (the built-in). Only one engine is active.
+
+The engine is responsible for:
+  - Deciding when compaction should fire
+  - Performing compaction (summarization, DAG construction, etc.)
+  - Optionally exposing tools the agent can call (e.g. lcm_grep)
+  - Tracking token usage from API responses
+
+Lifecycle:
+  1. Engine is instantiated and registered (plugin register() or default)
+  2. on_session_start() called when a conversation begins
+  3. update_from_response() called after each API response with usage data
+  4. should_compress() checked after each turn
+  5. compress() called when should_compress() returns True
+  6. on_session_end() called at real session boundaries (CLI exit, /reset,
+     gateway session expiry) — NOT per-turn
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+
+class ContextEngine(ABC):
+    """Base class all context engines must implement."""
+
+    # -- Identity ----------------------------------------------------------
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Short identifier (e.g. 'compressor', 'lcm')."""
+
+    # -- Token state (read by run_agent.py for display/logging) ------------
+    #
+    # Engines MUST maintain these. run_agent.py reads them directly.
+
+    last_prompt_tokens: int = 0
+    last_completion_tokens: int = 0
+    last_total_tokens: int = 0
+    threshold_tokens: int = 0
+    context_length: int = 0
+    compression_count: int = 0
+
+    # -- Compaction parameters (read by run_agent.py for preflight) --------
+    #
+    # These control the preflight compression check.  Subclasses may
+    # override via __init__ or property; defaults are sensible for most
+    # engines.
+
+    threshold_percent: float = 0.75
+    protect_first_n: int = 3
+    protect_last_n: int = 6
+
+    # -- Core interface ----------------------------------------------------
+
+    @abstractmethod
+    def update_from_response(self, usage: Dict[str, Any]) -> None:
+        """Update tracked token usage from an API response.
+
+        Called after every LLM call with the usage dict from the response.
+        """
+
+    @abstractmethod
+    def should_compress(self, prompt_tokens: int = None) -> bool:
+        """Return True if compaction should fire this turn."""
+
+    @abstractmethod
+    def compress(
+        self,
+        messages: List[Dict[str, Any]],
+        current_tokens: int = None,
+    ) -> List[Dict[str, Any]]:
+        """Compact the message list and return the new message list.
+
+        This is the main entry point. The engine receives the full message
+        list and returns a (possibly shorter) list that fits within the
+        context budget. The implementation is free to summarize, build a
+        DAG, or do anything else — as long as the returned list is a valid
+        OpenAI-format message sequence.
+        """
+
+    # -- Optional: pre-flight check ----------------------------------------
+
+    def should_compress_preflight(self, messages: List[Dict[str, Any]]) -> bool:
+        """Quick rough check before the API call (no real token count yet).
+
+        Default returns False (skip pre-flight). Override if your engine
+        can do a cheap estimate.
+        """
+        return False
+
+    # -- Optional: session lifecycle ---------------------------------------
+
+    def on_session_start(self, session_id: str, **kwargs) -> None:
+        """Called when a new conversation session begins.
+
+        Use this to load persisted state (DAG, store) for the session.
+        kwargs may include hermes_home, platform, model, etc.
+        """
+
+    def on_session_end(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
+        """Called at real session boundaries (CLI exit, /reset, gateway expiry).
+
+        Use this to flush state, close DB connections, etc.
+        NOT called per-turn — only when the session truly ends.
+        """
+
+    def on_session_reset(self) -> None:
+        """Called on /new or /reset. Reset per-session state.
+
+        Default resets compression_count and token tracking.
+        """
+        self.last_prompt_tokens = 0
+        self.last_completion_tokens = 0
+        self.last_total_tokens = 0
+        self.compression_count = 0
+
+    # -- Optional: tools ---------------------------------------------------
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Return tool schemas this engine provides to the agent.
+
+        Default returns empty list (no tools). LCM would return schemas
+        for lcm_grep, lcm_describe, lcm_expand here.
+        """
+        return []
+
+    def handle_tool_call(self, name: str, args: Dict[str, Any], **kwargs) -> str:
+        """Handle a tool call from the agent.
+
+        Only called for tool names returned by get_tool_schemas().
+        Must return a JSON string.
+
+        kwargs may include:
+          messages: the current in-memory message list (for live ingestion)
+        """
+        import json
+        return json.dumps({"error": f"Unknown context engine tool: {name}"})
+
+    # -- Optional: status / display ----------------------------------------
+
+    def get_status(self) -> Dict[str, Any]:
+        """Return status dict for display/logging.
+
+        Default returns the standard fields run_agent.py expects.
+        """
+        return {
+            "last_prompt_tokens": self.last_prompt_tokens,
+            "threshold_tokens": self.threshold_tokens,
+            "context_length": self.context_length,
+            "usage_percent": (
+                min(100, self.last_prompt_tokens / self.context_length * 100)
+                if self.context_length else 0
+            ),
+            "compression_count": self.compression_count,
+        }
+
+    # -- Optional: model switch support ------------------------------------
+
+    def update_model(
+        self,
+        model: str,
+        context_length: int,
+        base_url: str = "",
+        api_key: str = "",
+        provider: str = "",
+    ) -> None:
+        """Called when the user switches models or on fallback activation.
+
+        Default updates context_length and recalculates threshold_tokens
+        from threshold_percent. Override if your engine needs more
+        (e.g. recalculate DAG budgets, switch summary models).
+        """
+        self.context_length = context_length
+        self.threshold_tokens = int(context_length * self.threshold_percent)
@@ -13,8 +13,9 @@ from typing import Awaitable, Callable

 from agent.model_metadata import estimate_tokens_rough

+_QUOTED_REFERENCE_VALUE = r'(?:`[^`\n]+`|"[^"\n]+"|\'[^\'\n]+\')'
 REFERENCE_PATTERN = re.compile(
-    r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
+    rf"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>{_QUOTED_REFERENCE_VALUE}(?::\d+(?:-\d+)?)?|\S+))"
 )
 TRAILING_PUNCTUATION = ",.;!?"
 _SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
@@ -81,14 +82,10 @@ def parse_context_references(message: str) -> list[ContextReference]:
        value = _strip_trailing_punctuation(match.group("value") or "")
        line_start = None
        line_end = None
-        target = value
+        target = _strip_reference_wrappers(value)

        if kind == "file":
-            range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
-            if range_match:
-                target = range_match.group("path")
-                line_start = int(range_match.group("start"))
-                line_end = int(range_match.group("end") or range_match.group("start"))
+            target, line_start, line_end = _parse_file_reference_value(value)

        refs.append(
            ContextReference(
@@ -375,6 +372,38 @@ def _strip_trailing_punctuation(value: str) -> str:
    return stripped


+def _strip_reference_wrappers(value: str) -> str:
+    if len(value) >= 2 and value[0] == value[-1] and value[0] in "`\"'":
+        return value[1:-1]
+    return value
+
+
+def _parse_file_reference_value(value: str) -> tuple[str, int | None, int | None]:
+    quoted_match = re.match(
+        r'^(?P<quote>`|"|\')(?P<path>.+?)(?P=quote)(?::(?P<start>\d+)(?:-(?P<end>\d+))?)?$',
+        value,
+    )
+    if quoted_match:
+        line_start = quoted_match.group("start")
+        line_end = quoted_match.group("end")
+        return (
+            quoted_match.group("path"),
+            int(line_start) if line_start is not None else None,
+            int(line_end or line_start) if line_start is not None else None,
+        )
+
+    range_match = re.match(r"^(?P<path>.+?):(?P<start>\d+)(?:-(?P<end>\d+))?$", value)
+    if range_match:
+        line_start = int(range_match.group("start"))
+        return (
+            range_match.group("path"),
+            line_start,
+            int(range_match.group("end") or range_match.group("start")),
+        )
+
+    return _strip_reference_wrappers(value), None, None
+
+
 def _remove_reference_tokens(message: str, refs: list[ContextReference]) -> str:
    pieces: list[str] = []
    cursor = 0
@@ -1059,6 +1059,17 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    auth_store = _load_auth_store()

    if provider == "anthropic":
+        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
+        # when the user has explicitly configured anthropic as their provider.
+        # Without this gate, auxiliary client fallback chains silently read
+        # ~/.claude/.credentials.json without user consent.  See PR #4210.
+        try:
+            from hermes_cli.auth import is_provider_explicitly_configured
+            if not is_provider_explicitly_configured("anthropic"):
+                return changed, active_sources
+        except ImportError:
+            pass
+
        from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials

        for source_name, creds in (
@@ -1066,6 +1077,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
+                # Check if user explicitly removed this source
+                try:
+                    from hermes_cli.auth import is_source_suppressed
+                    if is_source_suppressed(provider, source_name):
+                        continue
+                except ImportError:
+                    pass
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -21,11 +21,73 @@ _RESET = "\033[0m"
 logger = logging.getLogger(__name__)

 _ANSI_RESET = "\033[0m"
-_ANSI_DIM = "\033[38;2;150;150;150m"
-_ANSI_FILE = "\033[38;2;180;160;255m"
-_ANSI_HUNK = "\033[38;2;120;120;140m"
-_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
-_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
+
+# Diff colors — resolved lazily from the skin engine so they adapt
+# to light/dark themes.  Falls back to sensible defaults on import
+# failure.  We cache after first resolution for performance.
+_diff_colors_cached: dict[str, str] | None = None
+
+
+def _diff_ansi() -> dict[str, str]:
+    """Return ANSI escapes for diff display, resolved from the active skin."""
+    global _diff_colors_cached
+    if _diff_colors_cached is not None:
+        return _diff_colors_cached
+
+    # Defaults that work on dark terminals
+    dim = "\033[38;2;150;150;150m"
+    file_c = "\033[38;2;180;160;255m"
+    hunk = "\033[38;2;120;120;140m"
+    minus = "\033[38;2;255;255;255;48;2;120;20;20m"
+    plus = "\033[38;2;255;255;255;48;2;20;90;20m"
+
+    try:
+        from hermes_cli.skin_engine import get_active_skin
+        skin = get_active_skin()
+
+        def _hex_fg(key: str, fallback_rgb: tuple[int, int, int]) -> str:
+            h = skin.get_color(key, "")
+            if h and len(h) == 7 and h[0] == "#":
+                r, g, b = int(h[1:3], 16), int(h[3:5], 16), int(h[5:7], 16)
+                return f"\033[38;2;{r};{g};{b}m"
+            r, g, b = fallback_rgb
+            return f"\033[38;2;{r};{g};{b}m"
+
+        dim = _hex_fg("banner_dim", (150, 150, 150))
+        file_c = _hex_fg("session_label", (180, 160, 255))
+        hunk = _hex_fg("session_border", (120, 120, 140))
+        # minus/plus use background colors — derive from ui_error/ui_ok
+        err_h = skin.get_color("ui_error", "#ef5350")
+        ok_h = skin.get_color("ui_ok", "#4caf50")
+        if err_h and len(err_h) == 7:
+            er, eg, eb = int(err_h[1:3], 16), int(err_h[3:5], 16), int(err_h[5:7], 16)
+            # Use a dark tinted version as background
+            minus = f"\033[38;2;255;255;255;48;2;{max(er//2,20)};{max(eg//4,10)};{max(eb//4,10)}m"
+        if ok_h and len(ok_h) == 7:
+            or_, og, ob = int(ok_h[1:3], 16), int(ok_h[3:5], 16), int(ok_h[5:7], 16)
+            plus = f"\033[38;2;255;255;255;48;2;{max(or_//4,10)};{max(og//2,20)};{max(ob//4,10)}m"
+    except Exception:
+        pass
+
+    _diff_colors_cached = {
+        "dim": dim, "file": file_c, "hunk": hunk,
+        "minus": minus, "plus": plus,
+    }
+    return _diff_colors_cached
+
+
+def reset_diff_colors() -> None:
+    """Reset cached diff colors (call after /skin switch)."""
+    global _diff_colors_cached
+    _diff_colors_cached = None
+
+
+# Module-level helpers — each call resolves from the active skin lazily.
+def _diff_dim():   return _diff_ansi()["dim"]
+def _diff_file():  return _diff_ansi()["file"]
+def _diff_hunk():  return _diff_ansi()["hunk"]
+def _diff_minus(): return _diff_ansi()["minus"]
+def _diff_plus():  return _diff_ansi()["plus"]
 _MAX_INLINE_DIFF_FILES = 6
 _MAX_INLINE_DIFF_LINES = 80

@@ -403,19 +465,19 @@ def _render_inline_unified_diff(diff: str) -> list[str]:
        if raw_line.startswith("+++ "):
            to_file = raw_line[4:].strip()
            if from_file or to_file:
-                rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
+                rendered.append(f"{_diff_file()}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
            continue
        if raw_line.startswith("@@"):
-            rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
+            rendered.append(f"{_diff_hunk()}{raw_line}{_ANSI_RESET}")
            continue
        if raw_line.startswith("-"):
-            rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
+            rendered.append(f"{_diff_minus()}{raw_line}{_ANSI_RESET}")
            continue
        if raw_line.startswith("+"):
-            rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
+            rendered.append(f"{_diff_plus()}{raw_line}{_ANSI_RESET}")
            continue
        if raw_line.startswith(" "):
-            rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
+            rendered.append(f"{_diff_dim()}{raw_line}{_ANSI_RESET}")
            continue
        if raw_line:
            rendered.append(raw_line)
@@ -481,7 +543,7 @@ def _summarize_rendered_diff_sections(
        summary = f"… omitted {omitted_lines} diff line(s)"
        if omitted_files:
            summary += f" across {omitted_files} additional file(s)/section(s)"
-        rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
+        rendered.append(f"{_diff_hunk()}{summary}{_ANSI_RESET}")

    return rendered

@@ -112,6 +112,7 @@ _RATE_LIMIT_PATTERNS = [
    "try again in",
    "please retry after",
    "resource_exhausted",
+    "rate increased too quickly",  # Alibaba/DashScope throttling
 ]

 # Usage-limit patterns that need disambiguation (could be billing OR rate_limit)
@@ -0,0 +1,49 @@
+"""User-facing summaries for manual compression commands."""
+
+from __future__ import annotations
+
+from typing import Any, Sequence
+
+
+def summarize_manual_compression(
+    before_messages: Sequence[dict[str, Any]],
+    after_messages: Sequence[dict[str, Any]],
+    before_tokens: int,
+    after_tokens: int,
+) -> dict[str, Any]:
+    """Return consistent user-facing feedback for manual compression."""
+    before_count = len(before_messages)
+    after_count = len(after_messages)
+    noop = list(after_messages) == list(before_messages)
+
+    if noop:
+        headline = f"No changes from compression: {before_count} messages"
+        if after_tokens == before_tokens:
+            token_line = (
+                f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
+            )
+        else:
+            token_line = (
+                f"Rough transcript estimate: ~{before_tokens:,} → "
+                f"~{after_tokens:,} tokens"
+            )
+    else:
+        headline = f"Compressed: {before_count} → {after_count} messages"
+        token_line = (
+            f"Rough transcript estimate: ~{before_tokens:,} → "
+            f"~{after_tokens:,} tokens"
+        )
+
+    note = None
+    if not noop and after_count < before_count and after_tokens > before_tokens:
+        note = (
+            "Note: fewer messages can still raise this rough transcript estimate "
+            "when compression rewrites the transcript into denser summaries."
+        )
+
+    return {
+        "noop": noop,
+        "headline": headline,
+        "token_line": token_line,
+        "note": note,
+    }
@@ -113,17 +113,14 @@ DEFAULT_CONTEXT_LENGTHS = {
    "deepseek": 128000,
    # Meta
    "llama": 131072,
-    # Qwen
+    # Qwen — specific model families before the catch-all.
+    # Official docs: https://help.aliyun.com/zh/model-studio/developer-reference/
+    "qwen3-coder-plus": 1000000,  # 1M context
+    "qwen3-coder": 262144,        # 256K context
    "qwen": 131072,
-    # MiniMax (lowercase — lookup lowercases model names at line 973)
-    "minimax-m1-256k": 1000000,
-    "minimax-m1-128k": 1000000,
-    "minimax-m1-80k": 1000000,
-    "minimax-m1-40k": 1000000,
-    "minimax-m1": 1000000,
-    "minimax-m2.5": 1048576,
-    "minimax-m2.7": 1048576,
-    "minimax": 1048576,
+    # MiniMax — official docs: 204,800 context for all models
+    # https://platform.minimax.io/docs/api-reference/text-anthropic-api
+    "minimax": 204800,
    # GLM
    "glm": 202752,
    # xAI Grok — xAI /v1/models does not return context_length metadata,
@@ -151,7 +148,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
-    "MiniMaxAI/MiniMax-M2.5": 1048576,
+    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 32768,
    "mimo-v2-pro": 1048576,
    "mimo-v2-omni": 1048576,
@@ -213,6 +210,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "models.github.ai": "copilot",
    "api.fireworks.ai": "fireworks",
    "opencode.ai": "opencode-go",
+    "api.x.ai": "xai",
 }


@@ -356,6 +356,14 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "weixin": (
+        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
+        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
+        "include MEDIA:/absolute/path/to/file in your response. Images are sent as native "
+        "photos, videos play inline when supported, and other files arrive as downloadable "
+        "documents. You can also include image URLs in markdown format ![alt](url) and they "
+        "will be downloaded and sent as native media when possible."
+    ),
 }

 CONTEXT_FILE_MAX_CHARS = 20_000
@@ -479,7 +487,7 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:
    (True, {}, "") to err on the side of showing the skill.
    """
    try:
-        raw = skill_file.read_text(encoding="utf-8")[:2000]
+        raw = skill_file.read_text(encoding="utf-8")
        frontmatter, _ = parse_frontmatter(raw)

        if not skill_matches_platform(frontmatter):
@@ -487,7 +495,7 @@ def _parse_skill_file(skill_file: Path) -> tuple[bool, dict, str]:

        return True, frontmatter, extract_skill_description(frontmatter)
    except Exception as e:
-        logger.debug("Failed to parse skill file %s: %s", skill_file, e)
+        logger.warning("Failed to parse skill file %s: %s", skill_file, e)
        return True, {}, ""


@@ -550,9 +558,10 @@ def build_skills_system_prompt(
    # ── Layer 1: in-process LRU cache ─────────────────────────────────
    # Include the resolved platform so per-platform disabled-skill lists
    # produce distinct cache entries (gateway serves multiple platforms).
+    from gateway.session_context import get_session_env
    _platform_hint = (
        os.environ.get("HERMES_PLATFORM")
-        or os.environ.get("HERMES_SESSION_PLATFORM")
+        or get_session_env("HERMES_SESSION_PLATFORM")
        or ""
    )
    cache_key = (
@@ -97,8 +97,12 @@ def parse_rate_limit_headers(

    Returns None if no rate limit headers are present.
    """
+    # Normalize to lowercase so lookups work regardless of how the server
+    # capitalises headers (HTTP header names are case-insensitive per RFC 7230).
+    lowered = {k.lower(): v for k, v in headers.items()}
+
    # Quick check: at least one rate limit header must exist
-    has_any = any(k.lower().startswith("x-ratelimit-") for k in headers)
+    has_any = any(k.startswith("x-ratelimit-") for k in lowered)
    if not has_any:
        return None

@@ -109,9 +113,9 @@ def parse_rate_limit_headers(
        #      resource="tokens", suffix="-1h" -> per-hour
        tag = f"{resource}{suffix}"
        return RateLimitBucket(
-            limit=_safe_int(headers.get(f"x-ratelimit-limit-{tag}")),
-            remaining=_safe_int(headers.get(f"x-ratelimit-remaining-{tag}")),
-            reset_seconds=_safe_float(headers.get(f"x-ratelimit-reset-{tag}")),
+            limit=_safe_int(lowered.get(f"x-ratelimit-limit-{tag}")),
+            remaining=_safe_int(lowered.get(f"x-ratelimit-remaining-{tag}")),
+            reset_seconds=_safe_float(lowered.get(f"x-ratelimit-reset-{tag}")),
            captured_at=now,
        )

@@ -168,7 +168,7 @@ def _build_skill_message(
            subdir_path = skill_dir / subdir
            if subdir_path.exists():
                for f in sorted(subdir_path.rglob("*")):
-                    if f.is_file():
+                    if f.is_file() and not f.is_symlink():
                        rel = str(f.relative_to(skill_dir))
                        supporting.append(rel)

@@ -145,10 +145,11 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    if not isinstance(skills_cfg, dict):
        return set()

+    from gateway.session_context import get_session_env
    resolved_platform = (
        platform
        or os.getenv("HERMES_PLATFORM")
-        or os.getenv("HERMES_SESSION_PLATFORM")
+        or get_session_env("HERMES_SESSION_PLATFORM")
    )
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
@@ -181,6 +181,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
            "api_mode": runtime.get("api_mode"),
            "command": runtime.get("command"),
            "args": list(runtime.get("args") or []),
+            "credential_pool": runtime.get("credential_pool"),
        },
        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
        "signature": (
@@ -480,6 +480,12 @@ agent:
  # Fires once per run when inactivity reaches this threshold (seconds).
  # Set to 0 to disable the warning.
  # gateway_timeout_warning: 900
+
+  # Graceful drain timeout for gateway stop/restart (seconds).
+  # The gateway stops accepting new work, waits for in-flight agents to
+  # finish, then interrupts anything still running after this timeout.
+  # 0 = no drain, interrupt immediately.
+  # restart_drain_timeout: 60
  
  # Enable verbose logging
  verbose: false
@@ -582,7 +588,7 @@ platform_toolsets:
 #   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
 #   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 #   todo         - todo (in-memory task planning, no deps)
-#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX key)
+#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key)
 #   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
 #   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 #
@@ -611,7 +617,7 @@ platform_toolsets:
 #   todo         - Task planning and tracking for multi-step work
 #   memory       - Persistent memory across sessions (personal notes + user profile)
 #   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
-#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax)
+#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral)
 #   cronjob      - Schedule and manage automated tasks (CLI-only)
 #   rl           - RL training tools (Tinker-Atropos)
 #
@@ -319,7 +319,7 @@ def load_cli_config() -> Dict[str, Any]:
    # Load from file if exists
    if config_path.exists():
        try:
-            with open(config_path, "r") as f:
+            with open(config_path, "r", encoding="utf-8") as f:
                file_config = yaml.safe_load(f) or {}
            
            _file_has_terminal_config = "terminal" in file_config
@@ -987,11 +987,60 @@ def _prune_orphaned_branches(repo_root: str) -> None:
 # - Dim: #B8860B (muted text)

 # ANSI building blocks for conversation display
-_GOLD = "\033[1;38;2;255;215;0m"  # True-color #FFD700 bold — matches Rich Panel gold
+_ACCENT_ANSI_DEFAULT = "\033[1;38;2;255;215;0m"  # True-color #FFD700 bold — fallback
 _BOLD = "\033[1m"
 _DIM = "\033[2m"
 _RST = "\033[0m"

+
+def _hex_to_ansi_bold(hex_color: str) -> str:
+    """Convert a hex color like '#268bd2' to a bold true-color ANSI escape."""
+    try:
+        r = int(hex_color[1:3], 16)
+        g = int(hex_color[3:5], 16)
+        b = int(hex_color[5:7], 16)
+        return f"\033[1;38;2;{r};{g};{b}m"
+    except (ValueError, IndexError):
+        return _ACCENT_ANSI_DEFAULT
+
+
+class _SkinAwareAnsi:
+    """Lazy ANSI escape that resolves from the skin engine on first use.
+
+    Acts as a string in f-strings and concatenation.  Call ``.reset()`` to
+    force re-resolution after a ``/skin`` switch.
+    """
+
+    def __init__(self, skin_key: str, fallback_hex: str = "#FFD700"):
+        self._skin_key = skin_key
+        self._fallback_hex = fallback_hex
+        self._cached: str | None = None
+
+    def __str__(self) -> str:
+        if self._cached is None:
+            try:
+                from hermes_cli.skin_engine import get_active_skin
+                self._cached = _hex_to_ansi_bold(
+                    get_active_skin().get_color(self._skin_key, self._fallback_hex)
+                )
+            except Exception:
+                self._cached = _hex_to_ansi_bold(self._fallback_hex)
+        return self._cached
+
+    def __add__(self, other: str) -> str:
+        return str(self) + other
+
+    def __radd__(self, other: str) -> str:
+        return other + str(self)
+
+    def reset(self) -> None:
+        """Clear cache so the next access re-reads the skin."""
+        self._cached = None
+
+
+_ACCENT = _SkinAwareAnsi("response_border", "#FFD700")
+
+
 def _accent_hex() -> str:
    """Return the active skin accent color for legacy CLI output lines."""
    try:
@@ -1048,7 +1097,7 @@ def _termux_example_image_path(filename: str = "cat.png") -> str:


 def _split_path_input(raw: str) -> tuple[str, str]:
-    """Split a leading file path token from trailing free-form text.
+    r"""Split a leading file path token from trailing free-form text.

    Supports quoted paths and backslash-escaped spaces so callers can accept
    inputs like:
@@ -1122,6 +1171,45 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:
    return resolved


+def _format_process_notification(evt: dict) -> "str | None":
+    """Format a process notification event into a [SYSTEM: ...] message.
+
+    Handles both completion events (notify_on_complete) and watch pattern
+    match events from the unified completion_queue.
+    """
+    evt_type = evt.get("type", "completion")
+    _sid = evt.get("session_id", "unknown")
+    _cmd = evt.get("command", "unknown")
+
+    if evt_type == "watch_disabled":
+        return f"[SYSTEM: {evt.get('message', '')}]"
+
+    if evt_type == "watch_match":
+        _pat = evt.get("pattern", "?")
+        _out = evt.get("output", "")
+        _sup = evt.get("suppressed", 0)
+        text = (
+            f"[SYSTEM: Background process {_sid} matched "
+            f"watch pattern \"{_pat}\".\n"
+            f"Command: {_cmd}\n"
+            f"Matched output:\n{_out}"
+        )
+        if _sup:
+            text += f"\n({_sup} earlier matches were suppressed by rate limit)"
+        text += "]"
+        return text
+
+    # Default: completion event
+    _exit = evt.get("exit_code", "?")
+    _out = evt.get("output", "")
+    return (
+        f"[SYSTEM: Background process {_sid} completed "
+        f"(exit code {_exit}).\n"
+        f"Command: {_cmd}\n"
+        f"Output:\n{_out}]"
+    )
+
+
 def _detect_file_drop(user_input: str) -> "dict | None":
    """Detect if *user_input* starts with a real local file path.

@@ -1719,6 +1807,7 @@ class HermesCLI:
        self._secret_state = None
        self._secret_deadline = 0
        self._spinner_text: str = ""  # thinking spinner text for TUI
+        self._tool_start_time: float = 0.0  # monotonic timestamp when current tool started (for live elapsed)
        self._command_running = False
        self._command_status = ""
        self._attached_images: list[Path] = []
@@ -2027,6 +2116,25 @@ class HermesCLI:
        current_model = (self.model or "").strip()
        changed = False

+        try:
+            from hermes_cli.model_normalize import (
+                _AGGREGATOR_PROVIDERS,
+                normalize_model_for_provider,
+            )
+
+            if resolved_provider not in _AGGREGATOR_PROVIDERS:
+                normalized_model = normalize_model_for_provider(current_model, resolved_provider)
+                if normalized_model and normalized_model != current_model:
+                    if not self._model_is_default:
+                        self.console.print(
+                            f"[yellow]⚠️  Normalized model '{current_model}' to '{normalized_model}' for {resolved_provider}.[/]"
+                        )
+                    self.model = normalized_model
+                    current_model = normalized_model
+                    changed = True
+        except Exception:
+            pass
+
        if resolved_provider == "copilot":
            try:
                from hermes_cli.models import copilot_model_api_mode, normalize_copilot_model_id
@@ -2072,7 +2180,7 @@ class HermesCLI:
            return changed

        if resolved_provider != "openai-codex":
-            return False
+            return changed

        # 1. Strip provider prefix ("openai/gpt-5.4" → "gpt-5.4")
        if "/" in current_model:
@@ -2111,6 +2219,7 @@ class HermesCLI:
        if not text:
            self._flush_reasoning_preview(force=True)
        self._spinner_text = text or ""
+        self._tool_start_time = 0.0  # clear tool timer when switching to thinking
        self._invalidate()

    # ── Streaming display ────────────────────────────────────────────────
@@ -2445,7 +2554,7 @@ class HermesCLI:
                self._stream_text_ansi = ""
            w = shutil.get_terminal_size().columns
            fill = w - 2 - len(label)
-            _cprint(f"\n{_GOLD}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
+            _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")

        self._stream_buf += text

@@ -2476,7 +2585,7 @@ class HermesCLI:
        # Close the response box
        if self._stream_box_opened:
            w = shutil.get_terminal_size().columns
-            _cprint(f"{_GOLD}╰{'─' * (w - 2)}╯{_RST}")
+            _cprint(f"{_ACCENT}╰{'─' * (w - 2)}╯{_RST}")

    def _reset_stream_state(self) -> None:
        """Reset streaming state before each agent invocation."""
@@ -2899,15 +3008,17 @@ class HermesCLI:
            title_part = ""
            if session_meta.get("title"):
                title_part = f' "{session_meta["title"]}"'
+            accent_color = _accent_hex()
            self.console.print(
-                f"[#DAA520]↻ Resumed session [bold]{self.session_id}[/bold]"
+                f"[{accent_color}]↻ Resumed session [bold]{self.session_id}[/bold]"
                f"{title_part} "
                f"({msg_count} user message{'s' if msg_count != 1 else ''}, "
                f"{len(restored)} total messages)[/]"
            )
        else:
+            accent_color = _accent_hex()
            self.console.print(
-                f"[#DAA520]Session {self.session_id} found but has no "
+                f"[{accent_color}]Session {self.session_id} found but has no "
                f"messages. Starting fresh.[/]"
            )
            return False
@@ -3360,36 +3471,97 @@ class HermesCLI:
            pass  # Don't crash on import errors
    
    def _show_status(self):
-        """Show current status bar."""
+        """Show compact startup status line."""
        # Get tool count
        tools = get_tool_definitions(enabled_toolsets=self.enabled_toolsets, quiet_mode=True)
        tool_count = len(tools) if tools else 0
-        
+
        # Format model name (shorten if needed)
        model_short = self.model.split("/")[-1] if "/" in self.model else self.model
        if len(model_short) > 30:
            model_short = model_short[:27] + "..."
-        
+
        # Get API status indicator
        if self.api_key:
            api_indicator = "[green bold]●[/]"
        else:
            api_indicator = "[red bold]●[/]"
-        
-        # Build status line with proper markup
+
+        # Build status line with proper markup — skin-aware colors
+        try:
+            from hermes_cli.skin_engine import get_active_skin
+            skin = get_active_skin()
+            separator_color = skin.get_color("banner_dim", "#B8860B")
+            accent_color = skin.get_color("ui_accent", "#FFBF00")
+            label_color = skin.get_color("ui_label", "#4dd0e1")
+        except Exception:
+            separator_color, accent_color, label_color = "#B8860B", "#FFBF00", "cyan"
        toolsets_info = ""
        if self.enabled_toolsets and "all" not in self.enabled_toolsets:
-            toolsets_info = f" [dim #B8860B]·[/] [#CD7F32]toolsets: {', '.join(self.enabled_toolsets)}[/]"
+            toolsets_info = f" [dim {separator_color}]·[/] [{label_color}]toolsets: {', '.join(self.enabled_toolsets)}[/]"

-        provider_info = f" [dim #B8860B]·[/] [dim]provider: {self.provider}[/]"
+        provider_info = f" [dim {separator_color}]·[/] [dim]provider: {self.provider}[/]"
        if self._provider_source:
-            provider_info += f" [dim #B8860B]·[/] [dim]auth: {self._provider_source}[/]"
+            provider_info += f" [dim {separator_color}]·[/] [dim]auth: {self._provider_source}[/]"

        self.console.print(
-            f"  {api_indicator} [#FFBF00]{model_short}[/] "
-            f"[dim #B8860B]·[/] [bold cyan]{tool_count} tools[/]"
+            f"  {api_indicator} [{accent_color}]{model_short}[/] "
+            f"[dim {separator_color}]·[/] [bold {label_color}]{tool_count} tools[/]"
            f"{toolsets_info}{provider_info}"
        )
+
+    def _show_session_status(self):
+        """Show gateway-style status for the current CLI session."""
+        session_meta = {}
+        if self._session_db:
+            try:
+                session_meta = self._session_db.get_session(self.session_id) or {}
+            except Exception:
+                session_meta = {}
+
+        title = (session_meta.get("title") or "").strip()
+
+        created_at = self.session_start
+        started_at = session_meta.get("started_at")
+        if started_at:
+            try:
+                created_at = datetime.fromtimestamp(float(started_at))
+            except Exception:
+                created_at = self.session_start
+
+        updated_at = created_at
+        for field in ("updated_at", "last_updated_at", "last_activity_at"):
+            value = session_meta.get(field)
+            if not value:
+                continue
+            try:
+                updated_at = datetime.fromtimestamp(float(value))
+                break
+            except Exception:
+                pass
+
+        agent = getattr(self, "agent", None)
+        total_tokens = getattr(agent, "session_total_tokens", 0) or 0
+        provider = getattr(self, "provider", None) or "unknown"
+        model = getattr(self, "model", None) or "(unknown)"
+        is_running = bool(getattr(self, "_agent_running", False))
+
+        lines = [
+            "Hermes CLI Status",
+            "",
+            f"Session ID: {self.session_id}",
+            f"Path: {display_hermes_home()}",
+        ]
+        if title:
+            lines.append(f"Title: {title}")
+        lines.extend([
+            f"Model: {model} ({provider})",
+            f"Created: {created_at.strftime('%Y-%m-%d %H:%M')}",
+            f"Last Activity: {updated_at.strftime('%Y-%m-%d %H:%M')}",
+            f"Tokens: {total_tokens:,}",
+            f"Agent Running: {'Yes' if is_running else 'No'}",
+        ])
+        self.console.print("\n".join(lines), highlight=False, markup=False)
    
    def _fast_command_available(self) -> bool:
        try:
@@ -3525,7 +3697,7 @@ class HermesCLI:
        # TUI event loop (known pitfall).
        verb = "Disabling" if subcommand == "disable" else "Enabling"
        label = ", ".join(names)
-        _cprint(f"{_GOLD}{verb} {label}...{_RST}")
+        _cprint(f"{_ACCENT}{verb} {label}...{_RST}")

        tools_disable_enable_command(
            Namespace(tools_action=subcommand, names=names, platform="cli"))
@@ -4873,6 +5045,8 @@ class HermesCLI:
                self._handle_skills_command(cmd_original)
        elif canonical == "platforms":
            self._show_gateway_status()
+        elif canonical == "status":
+            self._show_session_status()
        elif canonical == "statusbar":
            self._status_bar_visible = not self._status_bar_visible
            state = "visible" if self._status_bar_visible else "hidden"
@@ -5036,17 +5210,17 @@ class HermesCLI:
                    if full_name == typed_base:
                        # Already an exact token — no expansion possible; fall through
                        _cprint(f"\033[1;31mUnknown command: {cmd_lower}{_RST}")
-                        _cprint(f"{_DIM}{_GOLD}Type /help for available commands{_RST}")
+                        _cprint(f"{_DIM}{_ACCENT}Type /help for available commands{_RST}")
                    else:
                        remainder = cmd_original.strip()[len(typed_base):]
                        full_cmd = full_name + remainder
                        return self.process_command(full_cmd)
                elif len(matches) > 1:
-                    _cprint(f"{_GOLD}Ambiguous command: {cmd_lower}{_RST}")
+                    _cprint(f"{_ACCENT}Ambiguous command: {cmd_lower}{_RST}")
                    _cprint(f"{_DIM}Did you mean: {', '.join(sorted(matches))}?{_RST}")
                else:
                    _cprint(f"\033[1;31mUnknown command: {cmd_lower}{_RST}")
-                    _cprint(f"{_DIM}{_GOLD}Type /help for available commands{_RST}")
+                    _cprint(f"{_DIM}{_ACCENT}Type /help for available commands{_RST}")
        
        return True
    
@@ -5584,6 +5758,7 @@ class HermesCLI:
            return

        set_active_skin(new_skin)
+        _ACCENT.reset()  # Re-resolve ANSI color for the new skin
        if save_config_value("display.skin", new_skin):
            print(f"  Skin set to: {new_skin} (saved)")
        else:
@@ -5652,8 +5827,8 @@ class HermesCLI:
            else:
                level = rc.get("effort", "medium")
            display_state = "on ✓" if self.show_reasoning else "off"
-            _cprint(f"  {_GOLD}Reasoning effort:  {level}{_RST}")
-            _cprint(f"  {_GOLD}Reasoning display: {display_state}{_RST}")
+            _cprint(f"  {_ACCENT}Reasoning effort:  {level}{_RST}")
+            _cprint(f"  {_ACCENT}Reasoning display: {display_state}{_RST}")
            _cprint(f"  {_DIM}Usage: /reasoning <none|minimal|low|medium|high|xhigh|show|hide>{_RST}")
            return

@@ -5665,7 +5840,7 @@ class HermesCLI:
            if self.agent:
                self.agent.reasoning_callback = self._current_reasoning_callback()
            save_config_value("display.show_reasoning", True)
-            _cprint(f"  {_GOLD}✓ Reasoning display: ON (saved){_RST}")
+            _cprint(f"  {_ACCENT}✓ Reasoning display: ON (saved){_RST}")
            _cprint(f"  {_DIM}  Model thinking will be shown during and after each response.{_RST}")
            return
        if arg in ("hide", "off"):
@@ -5673,7 +5848,7 @@ class HermesCLI:
            if self.agent:
                self.agent.reasoning_callback = self._current_reasoning_callback()
            save_config_value("display.show_reasoning", False)
-            _cprint(f"  {_GOLD}✓ Reasoning display: OFF (saved){_RST}")
+            _cprint(f"  {_ACCENT}✓ Reasoning display: OFF (saved){_RST}")
            return

        # Effort level change
@@ -5688,9 +5863,9 @@ class HermesCLI:
        self.agent = None  # Force agent re-init with new reasoning config

        if save_config_value("agent.reasoning_effort", arg):
-            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (saved to config){_RST}")
+            _cprint(f"  {_ACCENT}✓ Reasoning effort set to '{arg}' (saved to config){_RST}")
        else:
-            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
+            _cprint(f"  {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")

    def _handle_fast_command(self, cmd: str):
        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
@@ -5710,7 +5885,7 @@ class HermesCLI:
        parts = cmd.strip().split(maxsplit=1)
        if len(parts) < 2 or parts[1].strip().lower() == "status":
            status = "fast" if self.service_tier == "priority" else "normal"
-            _cprint(f"  {_GOLD}{feature_name}: {status}{_RST}")
+            _cprint(f"  {_ACCENT}{feature_name}: {status}{_RST}")
            _cprint(f"  {_DIM}Usage: /fast [normal|fast|status]{_RST}")
            return

@@ -5731,9 +5906,9 @@ class HermesCLI:

        self.agent = None  # Force agent re-init with new service-tier config
        if save_config_value("agent.service_tier", saved_value):
-            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (saved to config){_RST}")
+            _cprint(f"  {_ACCENT}✓ {feature_name} set to {label} (saved to config){_RST}")
        else:
-            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (session only){_RST}")
+            _cprint(f"  {_ACCENT}✓ {feature_name} set to {label} (session only){_RST}")

    def _on_reasoning(self, reasoning_text: str):
        """Callback for intermediate reasoning display during tool-call loops."""
@@ -5759,21 +5934,29 @@ class HermesCLI:
        original_count = len(self.conversation_history)
        try:
            from agent.model_metadata import estimate_messages_tokens_rough
-            approx_tokens = estimate_messages_tokens_rough(self.conversation_history)
+            from agent.manual_compression_feedback import summarize_manual_compression
+            original_history = list(self.conversation_history)
+            approx_tokens = estimate_messages_tokens_rough(original_history)
            print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

-            compressed, _new_system = self.agent._compress_context(
-                self.conversation_history,
+            compressed, _ = self.agent._compress_context(
+                original_history,
                self.agent._cached_system_prompt or "",
                approx_tokens=approx_tokens,
            )
            self.conversation_history = compressed
-            new_count = len(self.conversation_history)
            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
-            print(
-                f"  ✅ Compressed: {original_count} → {new_count} messages "
-                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
+            summary = summarize_manual_compression(
+                original_history,
+                self.conversation_history,
+                approx_tokens,
+                new_tokens,
            )
+            icon = "🗜️" if summary["noop"] else "✅"
+            print(f"  {icon} {summary['headline']}")
+            print(f"     {summary['token_line']}")
+            if summary["note"]:
+                print(f"     {summary['note']}")

        except Exception as e:
            print(f"  ❌ Compression failed: {e}")
@@ -6071,11 +6254,20 @@ class HermesCLI:
        Updates the TUI spinner widget so the user can see what the agent
        is doing during tool execution (fills the gap between thinking
        spinner and next response).  Also plays audio cue in voice mode.
+
+        On tool.started, records a monotonic timestamp so get_spinner_text()
+        can show a live elapsed timer (the TUI poll loop already invalidates
+        every ~0.15s, so the counter updates automatically).
        """
-        # Only act on tool.started; ignore tool.completed, reasoning.available, etc.
+        if event_type == "tool.completed":
+            import time as _time
+            self._tool_start_time = 0.0
+            self._invalidate()
+            return
        if event_type != "tool.started":
            return
        if function_name and not function_name.startswith("_"):
+            import time as _time
            from agent.display import get_tool_emoji
            emoji = get_tool_emoji(function_name)
            label = preview or function_name
@@ -6084,6 +6276,7 @@ class HermesCLI:
            if _pl > 0 and len(label) > _pl:
                label = label[:_pl - 3] + "..."
            self._spinner_text = f"{emoji} {label}"
+            self._tool_start_time = _time.monotonic()
            self._invalidate()

        if not self._voice_mode:
@@ -6215,7 +6408,7 @@ class HermesCLI:
            _recording_hint = "Termux:API capture | Ctrl+B to stop"
        else:
            _recording_hint = "Ctrl+B to stop"
-        _cprint(f"\n{_GOLD}● Recording...{_RST} {_DIM}({_recording_hint}){_RST}")
+        _cprint(f"\n{_ACCENT}● Recording...{_RST} {_DIM}({_recording_hint}){_RST}")

        # Periodically refresh prompt to update audio level indicator
        def _refresh_level():
@@ -6415,14 +6608,14 @@ class HermesCLI:
        # Environment detection -- warn and block in incompatible environments
        env_check = detect_audio_environment()
        if not env_check["available"]:
-            _cprint(f"\n{_GOLD}Voice mode unavailable in this environment:{_RST}")
+            _cprint(f"\n{_ACCENT}Voice mode unavailable in this environment:{_RST}")
            for warning in env_check["warnings"]:
                _cprint(f"  {_DIM}{warning}{_RST}")
            return

        reqs = check_voice_requirements()
        if not reqs["available"]:
-            _cprint(f"\n{_GOLD}Voice mode requirements not met:{_RST}")
+            _cprint(f"\n{_ACCENT}Voice mode requirements not met:{_RST}")
            for line in reqs["details"].split("\n"):
                _cprint(f"  {_DIM}{line}{_RST}")
            if reqs["missing_packages"]:
@@ -6460,7 +6653,7 @@ class HermesCLI:
        except Exception:
            _ptt_key = "c-b"
        _ptt_display = _ptt_key.replace("c-", "Ctrl+").upper()
-        _cprint(f"\n{_GOLD}Voice mode enabled{tts_status}{_RST}")
+        _cprint(f"\n{_ACCENT}Voice mode enabled{tts_status}{_RST}")
        _cprint(f"  {_DIM}{_ptt_display} to start/stop recording{_RST}")
        _cprint(f"  {_DIM}/voice tts  to toggle speech output{_RST}")
        _cprint(f"  {_DIM}/voice off  to disable voice mode{_RST}")
@@ -6512,7 +6705,7 @@ class HermesCLI:
            if not check_tts_requirements():
                _cprint(f"{_DIM}Warning: No TTS provider available. Install edge-tts or set API keys.{_RST}")

-        _cprint(f"{_GOLD}Voice TTS {status}.{_RST}")
+        _cprint(f"{_ACCENT}Voice TTS {status}.{_RST}")

    def _show_voice_status(self):
        """Show current voice mode status."""
@@ -6997,7 +7190,7 @@ class HermesCLI:
                        w = self.console.width
                        label = " ⚕ Hermes "
                        fill = w - 2 - len(label)
-                        _cprint(f"\n{_GOLD}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
+                        _cprint(f"\n{_ACCENT}╭─{label}{'─' * max(fill - 1, 0)}╮{_RST}")
                    _cprint(sentence.rstrip())

                tts_thread = threading.Thread(
@@ -7213,7 +7406,7 @@ class HermesCLI:
                if use_streaming_tts and _streaming_box_opened and not is_error_response:
                    # Text was already printed sentence-by-sentence; just close the box
                    w = shutil.get_terminal_size().columns
-                    _cprint(f"\n{_GOLD}╰{'─' * (w - 2)}╯{_RST}")
+                    _cprint(f"\n{_ACCENT}╰{'─' * (w - 2)}╯{_RST}")
                elif already_streamed:
                    # Response was already streamed token-by-token with box framing;
                    # _flush_stream() already closed the box. Skip Rich Panel.
@@ -7925,7 +8118,7 @@ class HermesCLI:
            agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
            msg = f"\n{agent_name} has been suspended. Run `fg` to bring {agent_name} back."
            def _suspend():
-                os.write(1, msg.encode())
+                os.write(1, msg.encode("utf-8", errors="replace"))
                os.kill(0, _sig.SIGTSTP)
            run_in_terminal(_suspend)

@@ -8285,6 +8478,17 @@ class HermesCLI:
            txt = cli_ref._spinner_text
            if not txt:
                return []
+            # Append live elapsed timer when a tool is running
+            t0 = cli_ref._tool_start_time
+            if t0 > 0:
+                import time as _time
+                elapsed = _time.monotonic() - t0
+                if elapsed >= 60:
+                    _m, _s = int(elapsed // 60), int(elapsed % 60)
+                    elapsed_str = f"{_m}m {_s}s"
+                else:
+                    elapsed_str = f"{elapsed:.1f}s"
+                return [('class:hint', f'  {txt}  ({elapsed_str})')]
            return [('class:hint', f'  {txt}')]

        def get_spinner_height():
@@ -8705,23 +8909,15 @@ class HermesCLI:
                        # Periodic config watcher — auto-reload MCP on mcp_servers change
                        if not self._agent_running:
                            self._check_config_mcp_changes()
-                            # Check for background process completion notifications
-                            # while the agent is idle (user hasn't typed anything yet).
+                            # Check for background process notifications (completions
+                            # and watch pattern matches) while agent is idle.
                            try:
                                from tools.process_registry import process_registry
                                if not process_registry.completion_queue.empty():
-                                    completion = process_registry.completion_queue.get_nowait()
-                                    _exit = completion.get("exit_code", "?")
-                                    _cmd = completion.get("command", "unknown")
-                                    _sid = completion.get("session_id", "unknown")
-                                    _out = completion.get("output", "")
-                                    _synth = (
-                                        f"[SYSTEM: Background process {_sid} completed "
-                                        f"(exit code {_exit}).\n"
-                                        f"Command: {_cmd}\n"
-                                        f"Output:\n{_out}]"
-                                    )
-                                    self._pending_input.put(_synth)
+                                    evt = process_registry.completion_queue.get_nowait()
+                                    _synth = _format_process_notification(evt)
+                                    if _synth:
+                                        self._pending_input.put(_synth)
                            except Exception:
                                pass
                        continue
@@ -8819,6 +9015,7 @@ class HermesCLI:
                    finally:
                        self._agent_running = False
                        self._spinner_text = ""
+                        self._tool_start_time = 0.0

                        app.invalidate()  # Refresh status line

@@ -8838,25 +9035,15 @@ class HermesCLI:
                                    _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
                            threading.Thread(target=_restart_recording, daemon=True).start()

-                        # Drain process completion notifications — any background
-                        # process that finished with notify_on_complete while the
-                        # agent was running (or before) gets auto-injected as a
-                        # new user message so the agent can react to it.
+                        # Drain process notifications (completions + watch matches)
+                        # that arrived while the agent was running.
                        try:
                            from tools.process_registry import process_registry
                            while not process_registry.completion_queue.empty():
-                                completion = process_registry.completion_queue.get_nowait()
-                                _exit = completion.get("exit_code", "?")
-                                _cmd = completion.get("command", "unknown")
-                                _sid = completion.get("session_id", "unknown")
-                                _out = completion.get("output", "")
-                                _synth = (
-                                    f"[SYSTEM: Background process {_sid} completed "
-                                    f"(exit code {_exit}).\n"
-                                    f"Command: {_cmd}\n"
-                                    f"Output:\n{_out}]"
-                                )
-                                self._pending_input.put(_synth)
+                                evt = process_registry.completion_queue.get_nowait()
+                                _synth = _format_process_notification(evt)
+                                if _synth:
+                                    self._pending_input.put(_synth)
                        except Exception:
                            pass  # Non-fatal — don't break the main loop

@@ -31,7 +31,7 @@ except ImportError:
 # Configuration
 # =============================================================================

-HERMES_DIR = get_hermes_home()
+HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
 OUTPUT_DIR = CRON_DIR / "output"
@@ -338,10 +338,12 @@ def load_jobs() -> List[Dict[str, Any]]:
                    save_jobs(jobs)
                    logger.warning("Auto-repaired jobs.json (had invalid control characters)")
                return jobs
-        except Exception:
-            return []
-    except IOError:
-        return []
+        except Exception as e:
+            logger.error("Failed to auto-repair jobs.json: %s", e)
+            raise RuntimeError(f"Cron database corrupted and unrepairable: {e}") from e
+    except IOError as e:
+        logger.error("IOError reading jobs.json: %s", e)
+        raise RuntimeError(f"Failed to read cron database: {e}") from e


 def save_jobs(jobs: List[Dict[str, Any]]):
@@ -452,6 +454,7 @@ def create_job(
        "last_run_at": None,
        "last_status": None,
        "last_error": None,
+        "last_delivery_error": None,
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
@@ -620,8 +623,8 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,

            save_jobs(jobs)
            return
-    
-    save_jobs(jobs)
+
+    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -44,7 +44,7 @@ logger = logging.getLogger(__name__)
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
-    "wecom", "sms", "email", "webhook", "bluebubbles",
+    "wecom", "weixin", "sms", "email", "webhook", "bluebubbles",
 })

 from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run
@@ -234,6 +234,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "dingtalk": Platform.DINGTALK,
        "feishu": Platform.FEISHU,
        "wecom": Platform.WECOM,
+        "weixin": Platform.WEIXIN,
        "email": Platform.EMAIL,
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
@@ -441,6 +442,14 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
        stdout = (result.stdout or "").strip()
        stderr = (result.stderr or "").strip()

+        # Redact secrets from both stdout and stderr before any return path.
+        try:
+            from agent.redact import redact_sensitive_text
+            stdout = redact_sensitive_text(stdout)
+            stderr = redact_sensitive_text(stderr)
+        except Exception:
+            pass
+
        if result.returncode != 0:
            parts = [f"Script exited with code {result.returncode}"]
            if stderr:
@@ -449,13 +458,6 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
                parts.append(f"stdout:\n{stdout}")
            return False, "\n".join(parts)

-        # Redact any secrets that may appear in script output before
-        # they are injected into the LLM prompt context.
-        try:
-            from agent.redact import redact_sensitive_text
-            stdout = redact_sensitive_text(stdout)
-        except Exception:
-            pass
        return True, stdout

    except subprocess.TimeoutExpired:
@@ -768,7 +770,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            _cron_pool.shutdown(wait=False, cancel_futures=True)
            raise
        finally:
-            _cron_pool.shutdown(wait=False)
+            _cron_pool.shutdown(wait=False, cancel_futures=True)

        if _inactivity_timeout:
            # Build diagnostic summary from the agent's activity tracker.
@@ -9,7 +9,10 @@ INSTALL_DIR="/opt/hermes"
 # (cache/images, cache/audio, platforms/whatsapp, etc.) are created on
 # demand by the application — don't pre-create them here so new installs
 # get the consolidated layout from get_hermes_dir().
-mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills}
+# The "home/" subdirectory is a per-profile HOME for subprocesses (git,
+# ssh, gh, npm …).  Without it those tools write to /root which is
+# ephemeral and shared across profiles.  See issue #4426.
+mkdir -p "$HERMES_HOME"/{cron,sessions,logs,hooks,memories,skills,skins,plans,workspace,home}

 # .env
 if [ ! -f "$HERMES_HOME/.env" ]; then
@@ -49,6 +49,8 @@ class HermesToolCallParser(ToolCallParser):
                    continue

                tc_data = json.loads(raw_json)
+                if "name" not in tc_data:
+                    continue
                tool_calls.append(
                    ChatCompletionMessageToolCall(
                        id=f"call_{uuid.uuid4().hex[:8]}",
@@ -89,6 +89,8 @@ class MistralToolCallParser(ToolCallParser):
                        parsed = [parsed]

                    for tc in parsed:
+                        if "name" not in tc:
+                            continue
                        args = tc.get("arguments", {})
                        if isinstance(args, dict):
                            args = json.dumps(args, ensure_ascii=False)
@@ -76,10 +76,15 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

-    # Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
-    for plat_name in ("telegram", "whatsapp", "signal", "email", "sms", "bluebubbles"):
-        if plat_name not in platforms:
-            platforms[plat_name] = _build_from_sessions(plat_name)
+    # Platforms that don't support direct channel enumeration get session-based
+    # discovery automatically.  Skip infrastructure entries that aren't messaging
+    # platforms — everything else falls through to _build_from_sessions().
+    _SKIP_SESSION_DISCOVERY = frozenset({"local", "api_server", "webhook"})
+    for plat in Platform:
+        plat_name = plat.value
+        if plat_name in _SKIP_SESSION_DISCOVERY or plat_name in platforms:
+            continue
+        platforms[plat_name] = _build_from_sessions(plat_name)

    directory = {
        "updated_at": datetime.now().isoformat(),
@@ -63,6 +63,7 @@ class Platform(Enum):
    WEBHOOK = "webhook"
    FEISHU = "feishu"
    WECOM = "wecom"
+    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"


@@ -261,6 +262,11 @@ class GatewayConfig:
        for platform, config in self.platforms.items():
            if not config.enabled:
                continue
+            # Weixin requires both a token and an account_id
+            if platform == Platform.WEIXIN:
+                if config.extra.get("account_id") and (config.token or config.extra.get("token")):
+                    connected.append(platform)
+                continue
            # Platforms that use token/api_key auth
            if config.token or config.api_key:
                connected.append(platform)
@@ -536,6 +542,8 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
+                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
+                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
@@ -634,6 +642,8 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc)
                if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"):
                    os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower()
+                if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
+                    os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()

    except Exception as e:
        logger.warning(
@@ -672,6 +682,7 @@ def load_gateway_config() -> GatewayConfig:
        Platform.SLACK: "SLACK_BOT_TOKEN",
        Platform.MATTERMOST: "MATTERMOST_TOKEN",
        Platform.MATRIX: "MATRIX_ACCESS_TOKEN",
+        Platform.WEIXIN: "WEIXIN_TOKEN",
    }
    for platform, pconfig in config.platforms.items():
        if not pconfig.enabled:
@@ -976,6 +987,44 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("WECOM_HOME_CHANNEL_NAME", "Home"),
            )

+    # Weixin (personal WeChat via iLink Bot API)
+    weixin_token = os.getenv("WEIXIN_TOKEN")
+    weixin_account_id = os.getenv("WEIXIN_ACCOUNT_ID")
+    if weixin_token or weixin_account_id:
+        if Platform.WEIXIN not in config.platforms:
+            config.platforms[Platform.WEIXIN] = PlatformConfig()
+        config.platforms[Platform.WEIXIN].enabled = True
+        if weixin_token:
+            config.platforms[Platform.WEIXIN].token = weixin_token
+        extra = config.platforms[Platform.WEIXIN].extra
+        if weixin_account_id:
+            extra["account_id"] = weixin_account_id
+        weixin_base_url = os.getenv("WEIXIN_BASE_URL", "").strip()
+        if weixin_base_url:
+            extra["base_url"] = weixin_base_url.rstrip("/")
+        weixin_cdn_base_url = os.getenv("WEIXIN_CDN_BASE_URL", "").strip()
+        if weixin_cdn_base_url:
+            extra["cdn_base_url"] = weixin_cdn_base_url.rstrip("/")
+        weixin_dm_policy = os.getenv("WEIXIN_DM_POLICY", "").strip().lower()
+        if weixin_dm_policy:
+            extra["dm_policy"] = weixin_dm_policy
+        weixin_group_policy = os.getenv("WEIXIN_GROUP_POLICY", "").strip().lower()
+        if weixin_group_policy:
+            extra["group_policy"] = weixin_group_policy
+        weixin_allowed_users = os.getenv("WEIXIN_ALLOWED_USERS", "").strip()
+        if weixin_allowed_users:
+            extra["allow_from"] = weixin_allowed_users
+        weixin_group_allowed_users = os.getenv("WEIXIN_GROUP_ALLOWED_USERS", "").strip()
+        if weixin_group_allowed_users:
+            extra["group_allow_from"] = weixin_group_allowed_users
+        weixin_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip()
+        if weixin_home:
+            config.platforms[Platform.WEIXIN].home_channel = HomeChannel(
+                platform=Platform.WEIXIN,
+                chat_id=weixin_home,
+                name=os.getenv("WEIXIN_HOME_CHANNEL_NAME", "Home"),
+            )
+
    # BlueBubbles (iMessage)
    bluebubbles_server_url = os.getenv("BLUEBUBBLES_SERVER_URL")
    bluebubbles_password = os.getenv("BLUEBUBBLES_PASSWORD")
@@ -25,6 +25,7 @@ import hmac
 import json
 import logging
 import os
+import socket as _socket
 import re
 import sqlite3
 import time
@@ -42,6 +43,7 @@ from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
    BasePlatformAdapter,
    SendResult,
+    is_network_accessible,
 )

 logger = logging.getLogger(__name__)
@@ -406,7 +408,8 @@ class APIServerAdapter(BasePlatformAdapter):
        Validate Bearer token from Authorization header.

        Returns None if auth is OK, or a 401 web.Response on failure.
-        If no API key is configured, all requests are allowed.
+        If no API key is configured, all requests are allowed (only when API
+        server is local).
        """
        if not self._api_key:
            return None  # No key configured — allow all (local-only use)
@@ -641,15 +644,35 @@ class APIServerAdapter(BasePlatformAdapter):
                    _stream_q.put(delta)

            def _on_tool_progress(event_type, name, preview, args, **kwargs):
-                """Inject tool progress into the SSE stream for Open WebUI."""
+                """Send tool progress as a separate SSE event.
+
+                Previously, progress markers like ``⏰ list`` were injected
+                directly into ``delta.content``.  OpenAI-compatible frontends
+                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
+                the assistant message and send it back on subsequent requests.
+                After enough turns the model learns to *emit* the markers as
+                plain text instead of issuing real tool calls — silently
+                hallucinating tool results.  See #6972.
+
+                The fix: push a tagged tuple ``("__tool_progress__", payload)``
+                onto the stream queue.  The SSE writer emits it as a custom
+                ``event: hermes.tool.progress`` line that compliant frontends
+                can render for UX but will *not* persist into conversation
+                history.  Clients that don't understand the custom event type
+                silently ignore it per the SSE specification.
+                """
                if event_type != "tool.started":
-                    return  # Only show tool start events in chat stream
+                    return
                if name.startswith("_"):
-                    return  # Skip internal events (_thinking)
+                    return
                from agent.display import get_tool_emoji
                emoji = get_tool_emoji(name)
                label = preview or name
-                _stream_q.put(f"\n`{emoji} {label}`\n")
+                _stream_q.put(("__tool_progress__", {
+                    "tool": name,
+                    "emoji": emoji,
+                    "label": label,
+                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
@@ -760,6 +783,29 @@ class APIServerAdapter(BasePlatformAdapter):
            }
            await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())

+            # Helper — route a queue item to the correct SSE event.
+            async def _emit(item):
+                """Write a single queue item to the SSE stream.
+
+                Plain strings are sent as normal ``delta.content`` chunks.
+                Tagged tuples ``("__tool_progress__", payload)`` are sent
+                as a custom ``event: hermes.tool.progress`` SSE event so
+                frontends can display them without storing the markers in
+                conversation history.  See #6972.
+                """
+                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
+                    event_data = json.dumps(item[1])
+                    await response.write(
+                        f"event: hermes.tool.progress\ndata: {event_data}\n\n".encode()
+                    )
+                else:
+                    content_chunk = {
+                        "id": completion_id, "object": "chat.completion.chunk",
+                        "created": created, "model": model,
+                        "choices": [{"index": 0, "delta": {"content": item}, "finish_reason": None}],
+                    }
+                    await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
+
            # Stream content chunks as they arrive from the agent
            loop = asyncio.get_event_loop()
            while True:
@@ -773,12 +819,7 @@ class APIServerAdapter(BasePlatformAdapter):
                                delta = stream_q.get_nowait()
                                if delta is None:
                                    break
-                                content_chunk = {
-                                    "id": completion_id, "object": "chat.completion.chunk",
-                                    "created": created, "model": model,
-                                    "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
-                                }
-                                await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
+                                await _emit(delta)
                            except _q.Empty:
                                break
                        break
@@ -787,12 +828,7 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is None:  # End of stream sentinel
                    break

-                content_chunk = {
-                    "id": completion_id, "object": "chat.completion.chunk",
-                    "created": created, "model": model,
-                    "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
-                }
-                await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
+                await _emit(delta)

            # Get usage from completed agent
            usage = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
@@ -1713,8 +1749,16 @@ class APIServerAdapter(BasePlatformAdapter):
            if hasattr(sweep_task, "add_done_callback"):
                sweep_task.add_done_callback(self._background_tasks.discard)

+            # Refuse to start network-accessible without authentication
+            if is_network_accessible(self._host) and not self._api_key:
+                logger.error(
+                    "[%s] Refusing to start: binding to %s requires API_SERVER_KEY. "
+                    "Set API_SERVER_KEY or use the default 127.0.0.1.",
+                    self.name, self._host,
+                )
+                return False
+
            # Port conflict detection — fail fast if port is already in use
-            import socket as _socket
            try:
                with _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM) as _s:
                    _s.settimeout(1)
@@ -6,10 +6,12 @@ and implement the required methods.
 """

 import asyncio
+import ipaddress
 import logging
 import os
 import random
 import re
+import socket as _socket
 import subprocess
 import sys
 import uuid
@@ -19,6 +21,41 @@ from urllib.parse import urlsplit
 logger = logging.getLogger(__name__)


+def is_network_accessible(host: str) -> bool:
+    """Return True if *host* would expose the server beyond loopback.
+
+    Loopback addresses (127.0.0.1, ::1, IPv4-mapped ::ffff:127.0.0.1)
+    are local-only.  Unspecified addresses (0.0.0.0, ::) bind all
+    interfaces.  Hostnames are resolved; DNS failure fails closed.
+    """
+    try:
+        addr = ipaddress.ip_address(host)
+        if addr.is_loopback:
+            return False
+        # ::ffff:127.0.0.1 — Python reports is_loopback=False for mapped
+        # addresses, so check the underlying IPv4 explicitly.
+        if getattr(addr, "ipv4_mapped", None) and addr.ipv4_mapped.is_loopback:
+            return False
+        return True
+    except ValueError:
+        # when host variable is a hostname, we should try to resolve below
+        pass
+
+    try:
+        resolved = _socket.getaddrinfo(
+            host, None, _socket.AF_UNSPEC, _socket.SOCK_STREAM,
+        )
+        # if the hostname resolves into at least one non-loopback address,
+        # then we consider it to be network accessible
+        for _family, _type, _proto, _canonname, sockaddr in resolved:
+            addr = ipaddress.ip_address(sockaddr[0])
+            if not addr.is_loopback:
+                return True
+        return False
+    except (_socket.gaierror, OSError):
+        return True
+
+
 def _detect_macos_system_proxy() -> str | None:
    """Read the macOS system HTTP(S) proxy via ``scutil --proxy``.

@@ -160,7 +197,7 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
 )


-def _safe_url_for_log(url: str, max_len: int = 80) -> str:
+def safe_url_for_log(url: str, max_len: int = 80) -> str:
    """Return a URL string safe for logs (no query/fragment/userinfo)."""
    if max_len <= 0:
        return ""
@@ -197,6 +234,23 @@ def _safe_url_for_log(url: str, max_len: int = 80) -> str:
    return f"{safe[:max_len - 3]}..."


+async def _ssrf_redirect_guard(response):
+    """Re-validate each redirect target to prevent redirect-based SSRF.
+
+    Without this, an attacker can host a public URL that 302-redirects to
+    http://169.254.169.254/ and bypass the pre-flight is_safe_url() check.
+
+    Must be async because httpx.AsyncClient awaits response event hooks.
+    """
+    if response.is_redirect and response.next_request:
+        redirect_url = str(response.next_request.url)
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(redirect_url):
+            raise ValueError(
+                f"Blocked redirect to private/internal address: {safe_url_for_log(redirect_url)}"
+            )
+
+
 # ---------------------------------------------------------------------------
 # Image cache utilities
 #
@@ -281,7 +335,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    """
    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
-        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

    import asyncio
    import httpx
@@ -289,7 +343,11 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    _log = _logging.getLogger(__name__)

    last_exc = None
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        event_hooks={"response": [_ssrf_redirect_guard]},
+    ) as client:
        for attempt in range(retries + 1):
            try:
                response = await client.get(
@@ -311,7 +369,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                        "Media cache retry %d/%d for %s (%.1fs): %s",
                        attempt + 1,
                        retries,
-                        _safe_url_for_log(url),
+                        safe_url_for_log(url),
                        wait,
                        exc,
                    )
@@ -396,7 +454,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    """
    from tools.url_safety import is_safe_url
    if not is_safe_url(url):
-        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

    import asyncio
    import httpx
@@ -404,7 +462,11 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    _log = _logging.getLogger(__name__)

    last_exc = None
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+    async with httpx.AsyncClient(
+        timeout=30.0,
+        follow_redirects=True,
+        event_hooks={"response": [_ssrf_redirect_guard]},
+    ) as client:
        for attempt in range(retries + 1):
            try:
                response = await client.get(
@@ -426,7 +488,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                        "Audio cache retry %d/%d for %s (%.1fs): %s",
                        attempt + 1,
                        retries,
-                        _safe_url_for_log(url),
+                        safe_url_for_log(url),
                        wait,
                        exc,
                    )
@@ -564,8 +626,9 @@ class MessageEvent:
    reply_to_message_id: Optional[str] = None
    reply_to_text: Optional[str] = None  # Text of the replied-to message (for context injection)
    
-    # Auto-loaded skill for topic/channel bindings (e.g., Telegram DM Topics)
-    auto_skill: Optional[str] = None
+    # Auto-loaded skill(s) for topic/channel bindings (e.g., Telegram DM Topics,
+    # Discord channel_skill_bindings).  A single name or ordered list.
+    auto_skill: Optional[str | list[str]] = None
    
    # Internal flag — set for synthetic events (e.g. background process
    # completion notifications) that must bypass user authorization checks.
@@ -587,6 +650,9 @@ class MessageEvent:
        raw = parts[0][1:].lower() if parts else None
        if raw and "@" in raw:
            raw = raw.split("@", 1)[0]
+        # Reject file paths: valid command names never contain /
+        if raw and "/" in raw:
+            return None
        return raw
    
    def get_command_args(self) -> str:
@@ -607,6 +673,32 @@ class SendResult:
    retryable: bool = False  # True for transient connection errors — base will retry automatically


+def merge_pending_message_event(
+    pending_messages: Dict[str, MessageEvent],
+    session_key: str,
+    event: MessageEvent,
+) -> None:
+    """Store or merge a pending event for a session.
+
+    Photo bursts/albums often arrive as multiple near-simultaneous PHOTO
+    events. Merge those into the existing queued event so the next turn sees
+    the whole burst, while non-photo follow-ups still replace the pending
+    event normally.
+    """
+    existing = pending_messages.get(session_key)
+    if (
+        existing
+        and getattr(existing, "message_type", None) == MessageType.PHOTO
+        and event.message_type == MessageType.PHOTO
+    ):
+        existing.media_urls.extend(event.media_urls)
+        existing.media_types.extend(event.media_types)
+        if event.text:
+            existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
+        return
+    pending_messages[session_key] = event
+
+
 # Error substrings that indicate a transient *connection* failure worth retrying.
 # "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
 # excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
@@ -661,6 +753,7 @@ class BasePlatformAdapter(ABC):
        # working on a task after --replace or manual restarts.
        self._background_tasks: set[asyncio.Task] = set()
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
+        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
@@ -749,6 +842,10 @@ class BasePlatformAdapter(ABC):
        an optional response string.
        """
        self._message_handler = handler
+
+    def set_busy_session_handler(self, handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]]) -> None:
+        """Set an optional handler for messages arriving during active sessions."""
+        self._busy_session_handler = handler
    
    def set_session_store(self, session_store: Any) -> None:
        """
@@ -1330,7 +1427,7 @@ class BasePlatformAdapter(ABC):
            # session lifecycle and its cleanup races with the running task
            # (see PR #4926).
            cmd = event.get_command()
-            if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background"):
+            if cmd in ("approve", "deny", "status", "stop", "new", "reset", "background", "restart"):
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1349,19 +1446,19 @@ class BasePlatformAdapter(ABC):
                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return

+            if self._busy_session_handler is not None:
+                try:
+                    if await self._busy_session_handler(event, session_key):
+                        return
+                except Exception as e:
+                    logger.error("[%s] Busy-session handler failed: %s", self.name, e, exc_info=True)
+
            # Special case: photo bursts/albums frequently arrive as multiple near-
            # simultaneous messages. Queue them without interrupting the active run,
            # then process them immediately after the current task finishes.
            if event.message_type == MessageType.PHOTO:
                logger.debug("[%s] Queuing photo follow-up for session %s without interrupt", self.name, session_key)
-                existing = self._pending_messages.get(session_key)
-                if existing and existing.message_type == MessageType.PHOTO:
-                    existing.media_urls.extend(event.media_urls)
-                    existing.media_types.extend(event.media_types)
-                    if event.text:
-                        existing.text = self._merge_caption(existing.text, event.text)
-                else:
-                    self._pending_messages[session_key] = event
+                merge_pending_message_event(self._pending_messages, session_key, event)
                return  # Don't interrupt now - will run after current task completes

            # Default behavior for non-photo follow-ups: interrupt the running agent
@@ -1525,7 +1622,7 @@ class BasePlatformAdapter(ABC):
                        logger.info(
                            "[%s] Sending image: %s (alt=%s)",
                            self.name,
-                            _safe_url_for_log(image_url),
+                            safe_url_for_log(image_url),
                            alt_text[:30] if alt_text else "",
                        )
                        # Route animated GIFs through send_animation for proper playback
@@ -606,22 +606,35 @@ class DiscordAdapter(BasePlatformAdapter):
                        if not self._client.user or self._client.user not in message.mentions:
                            return
                    # "all" falls through to handle_message
-
-                # If the message @mentions other users but NOT the bot, the
-                # sender is talking to someone else — stay silent.  Only
-                # applies in server channels; in DMs the user is always
-                # talking to the bot (mentions are just references).
-                # Controlled by DISCORD_IGNORE_NO_MENTION (default: true).
-                _ignore_no_mention = os.getenv(
-                    "DISCORD_IGNORE_NO_MENTION", "true"
-                ).lower() in ("true", "1", "yes")
-                if _ignore_no_mention and message.mentions and not isinstance(message.channel, discord.DMChannel):
-                    _bot_mentioned = (
+                
+                # Multi-agent filtering: if the message mentions specific bots
+                # but NOT this bot, the sender is talking to another agent —
+                # stay silent.  Messages with no bot mentions (general chat)
+                # still fall through to _handle_message for the existing
+                # DISCORD_REQUIRE_MENTION check.
+                #
+                # This replaces the older DISCORD_IGNORE_NO_MENTION logic
+                # with bot-aware filtering that works correctly when multiple
+                # agents share a channel.
+                if not isinstance(message.channel, discord.DMChannel) and message.mentions:
+                    _self_mentioned = (
                        self._client.user is not None
                        and self._client.user in message.mentions
                    )
-                    if not _bot_mentioned:
-                        return  # Talking to someone else, don't interrupt
+                    _other_bots_mentioned = any(
+                        m.bot and m != self._client.user
+                        for m in message.mentions
+                    )
+                    # If other bots are mentioned but we're not → not for us
+                    if _other_bots_mentioned and not _self_mentioned:
+                        return
+                    # If humans are mentioned but we're not → not for us
+                    # (preserves old DISCORD_IGNORE_NO_MENTION=true behavior)
+                    _ignore_no_mention = os.getenv(
+                        "DISCORD_IGNORE_NO_MENTION", "true"
+                    ).lower() in ("true", "1", "yes")
+                    if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned:
+                        return

                await self._handle_message(message)

@@ -1892,14 +1905,42 @@ class DiscordAdapter(BasePlatformAdapter):
            chat_topic=chat_topic,
        )

+        _parent_id = str(getattr(getattr(interaction, "channel", None), "parent_id", "") or "")
+        _skills = self._resolve_channel_skills(thread_id, _parent_id or None)
        event = MessageEvent(
            text=text,
            message_type=MessageType.TEXT,
            source=source,
            raw_message=interaction,
+            auto_skill=_skills,
        )
        await self.handle_message(event)

+    def _resolve_channel_skills(self, channel_id: str, parent_id: str | None = None) -> list[str] | None:
+        """Look up auto-skill bindings for a Discord channel/forum thread.
+
+        Config format (in platform extra):
+            channel_skill_bindings:
+              - id: "123456"
+                skills: ["skill-a", "skill-b"]
+        Also checks parent_id so forum threads inherit the forum's bindings.
+        """
+        bindings = self.config.extra.get("channel_skill_bindings", [])
+        if not bindings:
+            return None
+        ids_to_check = {channel_id}
+        if parent_id:
+            ids_to_check.add(parent_id)
+        for entry in bindings:
+            entry_id = str(entry.get("id", ""))
+            if entry_id in ids_to_check:
+                skills = entry.get("skills") or entry.get("skill")
+                if isinstance(skills, str):
+                    return [skills]
+                if isinstance(skills, list) and skills:
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
+        return None
+
    def _thread_parent_channel(self, channel: Any) -> Any:
        """Return the parent text channel when invoked from a thread."""
        return getattr(channel, "parent", None) or channel
@@ -2484,6 +2525,10 @@ class DiscordAdapter(BasePlatformAdapter):
        if not event_text or not event_text.strip():
            event_text = "(The user sent a message with no text content)"

+        _chan = message.channel
+        _parent_id = str(getattr(_chan, "parent_id", "") or "")
+        _chan_id = str(getattr(_chan, "id", ""))
+        _skills = self._resolve_channel_skills(_chan_id, _parent_id or None)
        event = MessageEvent(
            text=event_text,
            message_type=msg_type,
@@ -2494,6 +2539,7 @@ class DiscordAdapter(BasePlatformAdapter):
            media_types=media_types,
            reply_to_message_id=str(message.reference.message_id) if message.reference else None,
            timestamp=message.created_at,
+            auto_skill=_skills,
        )

        # Track thread participation so the bot won't require @mention for
@@ -1190,6 +1190,8 @@ class FeishuAdapter(BasePlatformAdapter):
                lambda data: self._on_reaction_event("im.message.reaction.deleted_v1", data)
            )
            .register_p2_card_action_trigger(self._on_card_action_trigger)
+            .register_p2_im_chat_member_bot_added_v1(self._on_bot_added_to_chat)
+            .register_p2_im_chat_member_bot_deleted_v1(self._on_bot_removed_from_chat)
            .build()
        )

@@ -1580,13 +1582,18 @@ class FeishuAdapter(BasePlatformAdapter):
            return SendResult(success=False, error=f"Image file not found: {image_path}")

        try:
-            with open(image_path, "rb") as image_file:
-                body = self._build_image_upload_body(
-                    image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
-                    image=image_file,
-                )
-                request = self._build_image_upload_request(body)
-                upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
+            import io as _io
+            with open(image_path, "rb") as f:
+                image_bytes = f.read()
+            # Wrap in BytesIO so lark SDK's MultipartEncoder can read .name and .tell()
+            image_file = _io.BytesIO(image_bytes)
+            image_file.name = os.path.basename(image_path)
+            body = self._build_image_upload_body(
+                image_type=_FEISHU_IMAGE_UPLOAD_TYPE,
+                image=image_file,
+            )
+            request = self._build_image_upload_request(body)
+            upload_response = await asyncio.to_thread(self._client.im.v1.image.create, request)
            image_key = self._extract_response_field(upload_response, "image_key")
            if not image_key:
                return self._response_error_result(
@@ -39,6 +39,7 @@ from gateway.platforms.base import (
    MessageType,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
+    safe_url_for_log,
    cache_document_from_bytes,
 )

@@ -656,8 +657,19 @@ class SlackAdapter(BasePlatformAdapter):
        try:
            import httpx

+            async def _ssrf_redirect_guard(response):
+                """Re-check redirect targets so public URLs cannot bounce into private IPs."""
+                if response.is_redirect and response.next_request:
+                    redirect_url = str(response.next_request.url)
+                    if not is_safe_url(redirect_url):
+                        raise ValueError("Blocked redirect to private/internal address")
+
            # Download the image first
-            async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+            async with httpx.AsyncClient(
+                timeout=30.0,
+                follow_redirects=True,
+                event_hooks={"response": [_ssrf_redirect_guard]},
+            ) as client:
                response = await client.get(image_url)
                response.raise_for_status()

@@ -674,7 +686,7 @@ class SlackAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning(
                "[Slack] Failed to upload image from URL %s, falling back to text: %s",
-                image_url,
+                safe_url_for_log(image_url),
                e,
                exc_info=True,
            )
@@ -518,6 +518,16 @@ class TelegramAdapter(BasePlatformAdapter):

            # Build the application
            builder = Application.builder().token(self.config.token)
+            custom_base_url = self.config.extra.get("base_url")
+            if custom_base_url:
+                builder = builder.base_url(custom_base_url)
+                builder = builder.base_file_url(
+                    self.config.extra.get("base_file_url", custom_base_url)
+                )
+                logger.info(
+                    "[%s] Using custom Telegram base_url: %s",
+                    self.name, custom_base_url,
+                )

            # PTB defaults (pool_timeout=1s) are too aggressive on flaky networks and
            # can trigger "Pool timeout: All connections in the connection pool are occupied"
@@ -547,7 +557,6 @@ class TelegramAdapter(BasePlatformAdapter):
                for k in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy")
            )
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
-
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
                fallback_ips = await discover_fallback_ips()
@@ -2793,5 +2802,5 @@ class TelegramAdapter(BasePlatformAdapter):
            await self._set_reaction(
                chat_id,
                message_id,
-                "\u2705" if outcome == ProcessingOutcome.SUCCESS else "\u274c",
+                "\U0001f44d" if outcome == ProcessingOutcome.SUCCESS else "\U0001f44e",
            )
@@ -201,6 +201,7 @@ class WebhookAdapter(BasePlatformAdapter):
            "dingtalk",
            "feishu",
            "wecom",
+            "weixin",
            "bluebubbles",
        ):
            return await self._deliver_cross_platform(
@@ -0,0 +1,20 @@
+"""Shared gateway restart constants and parsing helpers."""
+
+from hermes_cli.config import DEFAULT_CONFIG
+
+# EX_TEMPFAIL from sysexits.h — used to ask the service manager to restart
+# the gateway after a graceful drain/reload path completes.
+GATEWAY_SERVICE_RESTART_EXIT_CODE = 75
+
+DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT = float(
+    DEFAULT_CONFIG["agent"]["restart_drain_timeout"]
+)
+
+
+def parse_restart_drain_timeout(raw: object) -> float:
+    """Parse a configured drain timeout, falling back to the shared default."""
+    try:
+        value = float(raw) if str(raw or "").strip() else DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
+    except (TypeError, ValueError):
+        return DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
+    return max(0.0, value)
@@ -0,0 +1,113 @@
+"""
+Session-scoped context variables for the Hermes gateway.
+
+Replaces the previous ``os.environ``-based session state
+(``HERMES_SESSION_PLATFORM``, ``HERMES_SESSION_CHAT_ID``, etc.) with
+Python's ``contextvars.ContextVar``.
+
+**Why this matters**
+
+The gateway processes messages concurrently via ``asyncio``.  When two
+messages arrive at the same time the old code did:
+
+    os.environ["HERMES_SESSION_THREAD_ID"] = str(context.source.thread_id)
+
+Because ``os.environ`` is *process-global*, Message A's value was
+silently overwritten by Message B before Message A's agent finished
+running.  Background-task notifications and tool calls therefore routed
+to the wrong thread.
+
+``contextvars.ContextVar`` values are *task-local*: each ``asyncio``
+task (and any ``run_in_executor`` thread it spawns) gets its own copy,
+so concurrent messages never interfere.
+
+**Backward compatibility**
+
+The public helper ``get_session_env(name, default="")`` mirrors the old
+``os.getenv("HERMES_SESSION_*", ...)`` calls.  Existing tool code only
+needs to replace the import + call site:
+
+    # before
+    import os
+    platform = os.getenv("HERMES_SESSION_PLATFORM", "")
+
+    # after
+    from gateway.session_context import get_session_env
+    platform = get_session_env("HERMES_SESSION_PLATFORM", "")
+"""
+
+from contextvars import ContextVar
+
+# ---------------------------------------------------------------------------
+# Per-task session variables
+# ---------------------------------------------------------------------------
+
+_SESSION_PLATFORM: ContextVar[str] = ContextVar("HERMES_SESSION_PLATFORM", default="")
+_SESSION_CHAT_ID: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_ID", default="")
+_SESSION_CHAT_NAME: ContextVar[str] = ContextVar("HERMES_SESSION_CHAT_NAME", default="")
+_SESSION_THREAD_ID: ContextVar[str] = ContextVar("HERMES_SESSION_THREAD_ID", default="")
+
+_VAR_MAP = {
+    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
+    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
+    "HERMES_SESSION_CHAT_NAME": _SESSION_CHAT_NAME,
+    "HERMES_SESSION_THREAD_ID": _SESSION_THREAD_ID,
+}
+
+
+def set_session_vars(
+    platform: str = "",
+    chat_id: str = "",
+    chat_name: str = "",
+    thread_id: str = "",
+) -> list:
+    """Set all session context variables and return reset tokens.
+
+    Call ``clear_session_vars(tokens)`` in a ``finally`` block to restore
+    the previous values when the handler exits.
+
+    Returns a list of ``Token`` objects (one per variable) that can be
+    passed to ``clear_session_vars``.
+    """
+    tokens = [
+        _SESSION_PLATFORM.set(platform),
+        _SESSION_CHAT_ID.set(chat_id),
+        _SESSION_CHAT_NAME.set(chat_name),
+        _SESSION_THREAD_ID.set(thread_id),
+    ]
+    return tokens
+
+
+def clear_session_vars(tokens: list) -> None:
+    """Restore session context variables to their pre-handler values."""
+    if not tokens:
+        return
+    vars_in_order = [
+        _SESSION_PLATFORM,
+        _SESSION_CHAT_ID,
+        _SESSION_CHAT_NAME,
+        _SESSION_THREAD_ID,
+    ]
+    for var, token in zip(vars_in_order, tokens):
+        var.reset(token)
+
+
+def get_session_env(name: str, default: str = "") -> str:
+    """Read a session context variable by its legacy ``HERMES_SESSION_*`` name.
+
+    Drop-in replacement for ``os.getenv("HERMES_SESSION_*", default)``.
+
+    Resolution order:
+    1. Context variable (set by the gateway for concurrency-safe access)
+    2. ``os.environ`` (used by CLI, cron scheduler, and tests)
+    3. *default*
+    """
+    import os
+
+    var = _VAR_MAP.get(name)
+    if var is not None:
+        value = var.get()
+        if value:
+            return value
+    # Fall back to os.environ for CLI, cron, and test compatibility
+    return os.getenv(name, default)
@@ -158,6 +158,8 @@ def _build_runtime_status_record() -> dict[str, Any]:
    payload.update({
        "gateway_state": "starting",
        "exit_reason": None,
+        "restart_requested": False,
+        "active_agents": 0,
        "platforms": {},
        "updated_at": _utc_now_iso(),
    })
@@ -218,6 +220,8 @@ def write_runtime_status(
    *,
    gateway_state: Optional[str] = None,
    exit_reason: Optional[str] = None,
+    restart_requested: Optional[bool] = None,
+    active_agents: Optional[int] = None,
    platform: Optional[str] = None,
    platform_state: Optional[str] = None,
    error_code: Optional[str] = None,
@@ -236,6 +240,10 @@ def write_runtime_status(
        payload["gateway_state"] = gateway_state
    if exit_reason is not None:
        payload["exit_reason"] = exit_reason
+    if restart_requested is not None:
+        payload["restart_requested"] = bool(restart_requested)
+    if active_agents is not None:
+        payload["active_agents"] = max(0, int(active_agents))

    if platform is not None:
        platform_payload = payload["platforms"].get(platform, {})
@@ -198,6 +198,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("DEEPSEEK_API_KEY",),
        base_url_env_var="DEEPSEEK_BASE_URL",
    ),
+    "xai": ProviderConfig(
+        id="xai",
+        name="xAI",
+        auth_type="api_key",
+        inference_base_url="https://api.x.ai/v1",
+        api_key_env_vars=("XAI_API_KEY",),
+        base_url_env_var="XAI_BASE_URL",
+    ),
    "ai-gateway": ProviderConfig(
        id="ai-gateway",
        name="AI Gateway",
@@ -704,6 +712,27 @@ def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Pa
        return _save_auth_store(auth_store)


+def suppress_credential_source(provider_id: str, source: str) -> None:
+    """Mark a credential source as suppressed so it won't be re-seeded."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        suppressed = auth_store.setdefault("suppressed_sources", {})
+        provider_list = suppressed.setdefault(provider_id, [])
+        if source not in provider_list:
+            provider_list.append(source)
+        _save_auth_store(auth_store)
+
+
+def is_source_suppressed(provider_id: str, source: str) -> bool:
+    """Check if a credential source has been suppressed by the user."""
+    try:
+        auth_store = _load_auth_store()
+        suppressed = auth_store.get("suppressed_sources", {})
+        return source in suppressed.get(provider_id, [])
+    except Exception:
+        return False
+
+
 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
    """Return persisted auth state for a provider, or None."""
    auth_store = _load_auth_store()
@@ -716,6 +745,57 @@ def get_active_provider() -> Optional[str]:
    return auth_store.get("active_provider")


+def is_provider_explicitly_configured(provider_id: str) -> bool:
+    """Return True only if the user has explicitly configured this provider.
+
+    Checks:
+      1. active_provider in auth.json matches
+      2. model.provider in config.yaml matches
+      3. Provider-specific env vars are set (e.g. ANTHROPIC_API_KEY)
+
+    This is used to gate auto-discovery of external credentials (e.g.
+    Claude Code's ~/.claude/.credentials.json) so they are never used
+    without the user's explicit choice.  See PR #4210 for the same
+    pattern applied to the setup wizard gate.
+    """
+    normalized = (provider_id or "").strip().lower()
+
+    # 1. Check auth.json active_provider
+    try:
+        auth_store = _load_auth_store()
+        active = (auth_store.get("active_provider") or "").strip().lower()
+        if active and active == normalized:
+            return True
+    except Exception:
+        pass
+
+    # 2. Check config.yaml model.provider
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        model_cfg = cfg.get("model")
+        if isinstance(model_cfg, dict):
+            cfg_provider = (model_cfg.get("provider") or "").strip().lower()
+            if cfg_provider == normalized:
+                return True
+    except Exception:
+        pass
+
+    # 3. Check provider-specific env vars
+    # Exclude CLAUDE_CODE_OAUTH_TOKEN — it's set by Claude Code itself,
+    # not by the user explicitly configuring anthropic in Hermes.
+    _IMPLICIT_ENV_VARS = {"CLAUDE_CODE_OAUTH_TOKEN"}
+    pconfig = PROVIDER_REGISTRY.get(normalized)
+    if pconfig and pconfig.auth_type == "api_key":
+        for env_var in pconfig.api_key_env_vars:
+            if env_var in _IMPLICIT_ENV_VARS:
+                continue
+            if has_usable_secret(os.getenv(env_var, "")):
+                return True
+
+    return False
+
+
 def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
    """
    Clear auth state for a provider. Used by `hermes logout`.
@@ -818,7 +898,7 @@ def resolve_provider(
    _PROVIDER_ALIASES = {
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
-        "kimi": "kimi-coding", "moonshot": "kimi-coding",
+        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
        "github": "copilot", "github-copilot": "copilot",
@@ -1441,7 +1521,15 @@ def _resolve_verify(
    if effective_insecure:
        return False
    if effective_ca:
-        return str(effective_ca)
+        ca_path = str(effective_ca)
+        if not os.path.isfile(ca_path):
+            import logging
+            logging.getLogger("hermes.auth").warning(
+                "CA bundle path does not exist: %s — falling back to default certificates",
+                ca_path,
+            )
+            return True
+        return ca_path
    return True


@@ -2544,6 +2632,8 @@ def _prompt_model_selection(
            title=effective_title,
        )
        idx = menu.show()
+        from hermes_cli.curses_ui import flush_stdin
+        flush_stdin()
        if idx is None:
            return None
        print()
@@ -347,8 +347,11 @@ def auth_remove_command(args) -> None:
            print("Cleared Hermes Anthropic OAuth credentials")

    elif removed.source == "claude_code" and provider == "anthropic":
-        print("Note: Claude Code credentials live in ~/.claude/.credentials.json")
-        print("      Remove them manually if you want to deauthorize Claude Code.")
+        from hermes_cli.auth import suppress_credential_source
+        suppress_credential_source(provider, "claude_code")
+        print("Suppressed claude_code credential — it will not be re-seeded.")
+        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
+        print("Run `hermes auth add anthropic` to re-enable if needed.")


 def auth_reset_command(args) -> None:
@@ -19,10 +19,9 @@ import subprocess
 import sys
 from pathlib import Path

-logger = logging.getLogger(__name__)
+from hermes_constants import is_wsl as _is_wsl

-# Cache WSL detection (checked once per process)
-_wsl_detected: bool | None = None
+logger = logging.getLogger(__name__)


 def save_clipboard_image(dest: Path) -> bool:
@@ -217,19 +216,6 @@ def _windows_save(dest: Path) -> bool:

 # ── Linux ────────────────────────────────────────────────────────────────

-def _is_wsl() -> bool:
-    """Detect if running inside WSL (1 or 2)."""
-    global _wsl_detected
-    if _wsl_detected is not None:
-        return _wsl_detected
-    try:
-        with open("/proc/version", "r") as f:
-            _wsl_detected = "microsoft" in f.read().lower()
-    except Exception:
-        _wsl_detected = False
-    return _wsl_detected
-
-
 def _linux_save(dest: Path) -> bool:
    """Try clipboard backends in priority order: WSL → Wayland → X11."""
    if _is_wsl():
@@ -83,8 +83,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="<question>"),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
               aliases=("q",), args_hint="<prompt>"),
-    CommandDef("status", "Show session info", "Session",
-               gateway_only=True),
+    CommandDef("status", "Show session info", "Session"),
    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
               gateway_only=True, aliases=("set-home",)),
@@ -111,7 +110,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="[level|show|hide]",
               subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
    CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
-               cli_only=True, args_hint="[normal|fast|status]",
+               args_hint="[normal|fast|status]",
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
@@ -141,6 +140,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
               gateway_only=True, args_hint="[page]"),
    CommandDef("help", "Show available commands", "Info"),
+    CommandDef("restart", "Gracefully restart the gateway after draining active runs", "Session",
+               gateway_only=True),
    CommandDef("usage", "Show token usage and rate limits for the current session", "Info"),
    CommandDef("insights", "Show usage insights and analytics", "Info",
               args_hint="[days]"),
@@ -39,6 +39,9 @@ _EXTRA_ENV_KEYS = frozenset({
    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
    "FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
    "WECOM_BOT_ID", "WECOM_SECRET",
+    "WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
+    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
+    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
@@ -266,6 +269,11 @@ DEFAULT_CONFIG = {
        # tools or receiving API responses.  Only fires when the agent has
        # been completely idle for this duration.  0 = unlimited.
        "gateway_timeout": 1800,
+        # Graceful drain timeout for gateway stop/restart (seconds).
+        # The gateway stops accepting new work, waits for running agents
+        # to finish, then interrupts any remaining runs after the timeout.
+        # 0 = no drain, interrupt immediately.
+        "restart_drain_timeout": 60,
        "service_tier": "",
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
@@ -450,7 +458,7 @@ DEFAULT_CONFIG = {
    
    # Text-to-speech configuration
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "neutts" (local)
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -464,6 +472,10 @@ DEFAULT_CONFIG = {
            "voice": "alloy",
            # Voices: alloy, echo, fable, onyx, nova, shimmer
        },
+        "mistral": {
+            "model": "voxtral-mini-tts-2603",
+            "voice_id": "c69964a6-ab8b-4f8a-9465-ec0925096ec8",  # Paul - Neutral
+        },
        "neutts": {
            "ref_audio": "",  # Path to reference voice audio (empty = bundled default)
            "ref_text": "",   # Path to reference voice transcript (empty = bundled default)
@@ -501,6 +513,16 @@ DEFAULT_CONFIG = {
        "max_ms": 2500,
    },
    
+    # Context engine -- controls how the context window is managed when
+    # approaching the model's token limit.
+    # "compressor" = built-in lossy summarization (default).
+    # Set to a plugin name to activate an alternative engine (e.g. "lcm"
+    # for Lossless Context Management).  The engine must be installed as
+    # a plugin in plugins/context_engine/<name>/ or ~/.hermes/plugins/.
+    "context": {
+        "engine": "compressor",
+    },
+
    # Persistent memory -- bounded curated memory injected into system prompt
    "memory": {
        "memory_enabled": True,
@@ -525,6 +547,8 @@ DEFAULT_CONFIG = {
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
+        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
+                                 # "low", "minimal", "none" (empty = inherit parent's level)
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -996,6 +1020,13 @@ OPTIONAL_ENV_VARS = {
        "password": True,
        "category": "tool",
    },
+    "MISTRAL_API_KEY": {
+        "description": "Mistral API key for Voxtral TTS and transcription (STT)",
+        "prompt": "Mistral API key",
+        "url": "https://console.mistral.ai/",
+        "password": True,
+        "category": "tool",
+    },
    "GITHUB_TOKEN": {
        "description": "GitHub token for Skills Hub (higher API rate limits, skill publish)",
        "prompt": "GitHub Token",
@@ -1206,8 +1237,8 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "API_SERVER_KEY": {
-        "description": "Bearer token for API server authentication. If empty, all requests are allowed (local use only).",
-        "prompt": "API server auth key (optional)",
+        "description": "Bearer token for API server authentication. Required for non-loopback binding; server refuses to start without it. On loopback (127.0.0.1), all requests are allowed if empty.",
+        "prompt": "API server auth key (required for network access)",
        "url": None,
        "password": True,
        "category": "messaging",
@@ -1222,7 +1253,7 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "API_SERVER_HOST": {
-        "description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — requires API_SERVER_KEY for security.",
+        "description": "Host/bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access — server refuses to start without API_SERVER_KEY.",
        "prompt": "API server host",
        "url": None,
        "password": False,
@@ -1447,7 +1478,7 @@ _KNOWN_ROOT_KEYS = {
    "_config_version", "model", "providers", "fallback_model",
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
-    "auxiliary", "custom_providers", "memory", "gateway",
+    "auxiliary", "custom_providers", "context", "memory", "gateway",
 }

 # Valid fields inside a custom_providers list entry
@@ -2770,6 +2801,10 @@ def set_config_value(key: str, value: str):
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
        "terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL",
+        "terminal.container_cpu": "TERMINAL_CONTAINER_CPU",
+        "terminal.container_memory": "TERMINAL_CONTAINER_MEMORY",
+        "terminal.container_disk": "TERMINAL_CONTAINER_DISK",
+        "terminal.container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
    }
    if key in _config_to_env_sync:
        save_env_value(_config_to_env_sync[key], str(value))
@@ -10,6 +10,28 @@ from typing import Callable, List, Optional, Set
 from hermes_cli.colors import Colors, color


+def flush_stdin() -> None:
+    """Flush any stray bytes from the stdin input buffer.
+
+    Must be called after ``curses.wrapper()`` (or any terminal-mode library
+    like simple_term_menu) returns, **before** the next ``input()`` /
+    ``getpass.getpass()`` call.  ``curses.endwin()`` restores the terminal
+    but does NOT drain the OS input buffer — leftover escape-sequence bytes
+    (from arrow keys, terminal mode-switch responses, or rapid keypresses)
+    remain buffered and silently get consumed by the next ``input()`` call,
+    corrupting user data (e.g. writing ``^[^[`` into .env files).
+
+    On non-TTY stdin (piped, redirected) or Windows, this is a no-op.
+    """
+    try:
+        if not sys.stdin.isatty():
+            return
+        import termios
+        termios.tcflush(sys.stdin, termios.TCIFLUSH)
+    except Exception:
+        pass
+
+
 def curses_checklist(
    title: str,
    items: List[str],
@@ -131,12 +153,140 @@ def curses_checklist(
                    return

        curses.wrapper(_draw)
+        flush_stdin()
        return result_holder[0] if result_holder[0] is not None else cancel_returns

    except Exception:
        return _numbered_fallback(title, items, selected, cancel_returns, status_fn)


+def curses_radiolist(
+    title: str,
+    items: List[str],
+    selected: int = 0,
+    *,
+    cancel_returns: int | None = None,
+) -> int:
+    """Curses single-select radio list. Returns the selected index.
+
+    Args:
+        title: Header line displayed above the list.
+        items: Display labels for each row.
+        selected: Index that starts selected (pre-selected).
+        cancel_returns: Returned on ESC/q. Defaults to the original *selected*.
+    """
+    if cancel_returns is None:
+        cancel_returns = selected
+
+    if not sys.stdin.isatty():
+        return cancel_returns
+
+    try:
+        import curses
+        result_holder: list = [None]
+
+        def _draw(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+            cursor = selected
+            scroll_offset = 0
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+
+                # Header
+                try:
+                    hattr = curses.A_BOLD
+                    if curses.has_colors():
+                        hattr |= curses.color_pair(2)
+                    stdscr.addnstr(0, 0, title, max_x - 1, hattr)
+                    stdscr.addnstr(
+                        1, 0,
+                        "  \u2191\u2193 navigate  ENTER/SPACE select  ESC cancel",
+                        max_x - 1, curses.A_DIM,
+                    )
+                except curses.error:
+                    pass
+
+                # Scrollable item list
+                visible_rows = max_y - 4
+                if cursor < scroll_offset:
+                    scroll_offset = cursor
+                elif cursor >= scroll_offset + visible_rows:
+                    scroll_offset = cursor - visible_rows + 1
+
+                for draw_i, i in enumerate(
+                    range(scroll_offset, min(len(items), scroll_offset + visible_rows))
+                ):
+                    y = draw_i + 3
+                    if y >= max_y - 1:
+                        break
+                    radio = "\u25cf" if i == selected else "\u25cb"
+                    arrow = "\u2192" if i == cursor else " "
+                    line = f" {arrow} ({radio}) {items[i]}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+
+                if key in (curses.KEY_UP, ord("k")):
+                    cursor = (cursor - 1) % len(items)
+                elif key in (curses.KEY_DOWN, ord("j")):
+                    cursor = (cursor + 1) % len(items)
+                elif key in (ord(" "), curses.KEY_ENTER, 10, 13):
+                    result_holder[0] = cursor
+                    return
+                elif key in (27, ord("q")):
+                    result_holder[0] = cancel_returns
+                    return
+
+        curses.wrapper(_draw)
+        flush_stdin()
+        return result_holder[0] if result_holder[0] is not None else cancel_returns
+
+    except Exception:
+        return _radio_numbered_fallback(title, items, selected, cancel_returns)
+
+
+def _radio_numbered_fallback(
+    title: str,
+    items: List[str],
+    selected: int,
+    cancel_returns: int,
+) -> int:
+    """Text-based numbered fallback for radio selection."""
+    print(color(f"\n  {title}", Colors.YELLOW))
+    print(color("  Select by number, Enter to confirm.\n", Colors.DIM))
+
+    for i, label in enumerate(items):
+        marker = color("(\u25cf)", Colors.GREEN) if i == selected else "(\u25cb)"
+        print(f"  {marker} {i + 1:>2}. {label}")
+    print()
+    try:
+        val = input(color(f"  Choice [default {selected + 1}]: ", Colors.DIM)).strip()
+        if not val:
+            return selected
+        idx = int(val) - 1
+        if 0 <= idx < len(items):
+            return idx
+        return selected
+    except (ValueError, KeyboardInterrupt, EOFError):
+        return cancel_returns
+
+
 def _numbered_fallback(
    title: str,
    items: List[str],
@@ -722,9 +722,9 @@ def run_doctor(args):
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
-        # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
-        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
-        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
+        # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
+        ("MiniMax",          ("MINIMAX_API_KEY",),                            "https://api.minimax.io/v1/models",    "MINIMAX_BASE_URL", True),
+        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         "https://api.minimaxi.com/v1/models",  "MINIMAX_CN_BASE_URL", True),
        ("AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
@@ -749,6 +749,11 @@ def run_doctor(args):
                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
+                # Anthropic-compat endpoints (/anthropic) don't support /models.
+                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                if _base and _base.rstrip("/").endswith("/anthropic"):
+                    from agent.auxiliary_client import _to_openai_base_url
+                    _base = _to_openai_base_url(_base)
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
                _headers = {"Authorization": f"Bearer {_key}"}
                if "api.kimi.com" in _url.lower():
@@ -119,6 +119,7 @@ def _configured_platforms() -> list[str]:
        "dingtalk": "DINGTALK_CLIENT_ID",
        "feishu": "FEISHU_APP_ID",
        "wecom": "WECOM_BOT_ID",
+        "weixin": "WEIXIN_ACCOUNT_ID",
    }
    return [name for name, env in checks.items() if os.getenv(env)]

@@ -15,7 +15,19 @@ from pathlib import Path
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()

 from gateway.status import terminate_pid
-from hermes_cli.config import get_env_value, get_hermes_home, save_env_value, is_managed, managed_error
+from gateway.restart import (
+    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
+    GATEWAY_SERVICE_RESTART_EXIT_CODE,
+    parse_restart_drain_timeout,
+)
+from hermes_cli.config import (
+    get_env_value,
+    get_hermes_home,
+    is_managed,
+    managed_error,
+    read_raw_config,
+    save_env_value,
+)
 # display_hermes_home is imported lazily at call sites to avoid ImportError
 # when hermes_constants is cached from a pre-update version during `hermes update`.
 from hermes_cli.setup import (
@@ -92,6 +104,59 @@ def _get_service_pids() -> set:
    return pids


+def _get_parent_pid(pid: int) -> int | None:
+    """Return the parent PID for ``pid``, or ``None`` when unavailable."""
+    if pid <= 1:
+        return None
+    try:
+        result = subprocess.run(
+            ["ps", "-o", "ppid=", "-p", str(pid)],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return None
+    if result.returncode != 0:
+        return None
+    raw = result.stdout.strip()
+    if not raw:
+        return None
+    try:
+        parent_pid = int(raw.splitlines()[-1].strip())
+    except ValueError:
+        return None
+    return parent_pid if parent_pid > 0 else None
+
+
+def _is_pid_ancestor_of_current_process(target_pid: int) -> bool:
+    """Return True when ``target_pid`` is this process or one of its ancestors."""
+    if target_pid <= 0:
+        return False
+
+    pid = os.getpid()
+    seen: set[int] = set()
+    while pid and pid not in seen:
+        if pid == target_pid:
+            return True
+        seen.add(pid)
+        pid = _get_parent_pid(pid) or 0
+    return False
+
+
+def _request_gateway_self_restart(pid: int) -> bool:
+    """Ask a running gateway ancestor to restart itself asynchronously."""
+    if not hasattr(signal, "SIGUSR1"):
+        return False
+    if not _is_pid_ancestor_of_current_process(pid):
+        return False
+    try:
+        os.kill(pid, signal.SIGUSR1)
+    except (ProcessLookupError, PermissionError, OSError):
+        return False
+    return True
+
+
 def find_gateway_pids(exclude_pids: set | None = None) -> list:
    """Find PIDs of running gateway processes.

@@ -226,11 +291,33 @@ def is_linux() -> bool:
    return sys.platform.startswith('linux')


-from hermes_constants import is_termux
+from hermes_constants import is_termux, is_wsl
+
+
+def _wsl_systemd_operational() -> bool:
+    """Check if systemd is actually running as PID 1 on WSL.
+
+    WSL2 with ``systemd=true`` in wsl.conf has working systemd.
+    WSL2 without it (or WSL1) does not — systemctl commands fail.
+    """
+    try:
+        result = subprocess.run(
+            ["systemctl", "is-system-running"],
+            capture_output=True, text=True, timeout=5,
+        )
+        # "running", "degraded", "starting" all mean systemd is PID 1
+        status = result.stdout.strip().lower()
+        return status in ("running", "degraded", "starting", "initializing")
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return False


 def supports_systemd_services() -> bool:
-    return is_linux() and not is_termux()
+    if not is_linux() or is_termux():
+        return False
+    if is_wsl():
+        return _wsl_systemd_operational()
+    return True


 def is_macos() -> bool:
@@ -251,18 +338,18 @@ SERVICE_DESCRIPTION = "Hermes Agent Gateway - Messaging Platform Integration"
 def _profile_suffix() -> str:
    """Derive a service-name suffix from the current HERMES_HOME.

-    Returns ``""`` for the default ``~/.hermes``, the profile name for
-    ``~/.hermes/profiles/<name>``, or a short hash for any other custom
-    HERMES_HOME path.
+    Returns ``""`` for the default root, the profile name for
+    ``<root>/profiles/<name>``, or a short hash for any other path.
+    Works correctly in Docker (HERMES_HOME=/opt/data) and standard deployments.
    """
    import hashlib
    import re
-    from pathlib import Path as _Path
+    from hermes_constants import get_default_hermes_root
    home = get_hermes_home().resolve()
-    default = (_Path.home() / ".hermes").resolve()
+    default = get_default_hermes_root().resolve()
    if home == default:
        return ""
-    # Detect ~/.hermes/profiles/<name> pattern → use the profile name
+    # Detect <root>/profiles/<name> pattern → use the profile name
    profiles_root = (default / "profiles").resolve()
    try:
        rel = home.relative_to(profiles_root)
@@ -287,9 +374,9 @@ def _profile_arg(hermes_home: str | None = None) -> str:
            service definition for a different user (e.g. system service).
    """
    import re
-    from pathlib import Path as _Path
+    from hermes_constants import get_default_hermes_root
    home = Path(hermes_home or str(get_hermes_home())).resolve()
-    default = (_Path.home() / ".hermes").resolve()
+    default = get_default_hermes_root().resolve()
    if home == default:
        return ""
    profiles_root = (default / "profiles").resolve()
@@ -665,6 +752,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
            path_entries.append(resolved_node_dir)

    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
+    restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -703,9 +791,11 @@ Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
 Restart=on-failure
 RestartSec=30
+RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=60
+ExecReload=/bin/kill -USR1 $MAINPID
+TimeoutStopSec={restart_timeout}
 StandardOutput=journal
 StandardError=journal

@@ -733,9 +823,11 @@ Environment="VIRTUAL_ENV={venv_dir}"
 Environment="HERMES_HOME={hermes_home}"
 Restart=on-failure
 RestartSec=30
+RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=60
+ExecReload=/bin/kill -USR1 $MAINPID
+TimeoutStopSec={restart_timeout}
 StandardOutput=journal
 StandardError=journal

@@ -838,6 +930,20 @@ def _select_systemd_scope(system: bool = False) -> bool:
    return get_systemd_unit_path(system=True).exists() and not get_systemd_unit_path(system=False).exists()


+def _get_restart_drain_timeout() -> float:
+    """Return the configured gateway restart drain timeout in seconds."""
+    raw = os.getenv("HERMES_RESTART_DRAIN_TIMEOUT", "").strip()
+    if not raw:
+        cfg = read_raw_config()
+        agent_cfg = cfg.get("agent", {}) if isinstance(cfg, dict) else {}
+        raw = str(
+            agent_cfg.get(
+                "restart_drain_timeout", DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
+            )
+        )
+    return parse_restart_drain_timeout(raw)
+
+
 def systemd_install(force: bool = False, system: bool = False, run_as_user: str | None = None):
    if system:
        _require_root_for_system_service("install")
@@ -923,7 +1029,13 @@ def systemd_restart(system: bool = False):
    if system:
        _require_root_for_system_service("restart")
    refresh_systemd_unit_if_needed(system=system)
-    subprocess.run(_systemctl_cmd(system) + ["restart", get_service_name()], check=True, timeout=90)
+    from gateway.status import get_running_pid
+
+    pid = get_running_pid()
+    if pid is not None and _request_gateway_self_restart(pid):
+        print(f"✓ {_service_scope_label(system).capitalize()} service restart requested")
+        return
+    subprocess.run(_systemctl_cmd(system) + ["reload-or-restart", get_service_name()], check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")


@@ -1211,7 +1323,7 @@ def launchd_stop():
    _wait_for_gateway_exit(timeout=10.0, force_after=5.0)
    print("✓ Service stopped")

-def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
+def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float | None = 5.0) -> bool:
    """Wait for the gateway process (by saved PID) to exit.

    Uses the PID from the gateway.pid file — not launchd labels — so this
@@ -1226,21 +1338,21 @@ def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
    from gateway.status import get_running_pid

    deadline = time.monotonic() + timeout
-    force_deadline = time.monotonic() + force_after
+    force_deadline = (time.monotonic() + force_after) if force_after is not None else None
    force_sent = False

    while time.monotonic() < deadline:
        pid = get_running_pid()
        if pid is None:
-            return  # Process exited cleanly.
+            return True  # Process exited cleanly.

-        if not force_sent and time.monotonic() >= force_deadline:
+        if force_after is not None and not force_sent and time.monotonic() >= force_deadline:
            # Grace period expired — force-kill the specific PID.
            try:
                terminate_pid(pid, force=True)
                print(f"⚠ Gateway PID {pid} did not exit gracefully; sent SIGKILL")
            except (ProcessLookupError, PermissionError, OSError):
-                return  # Already gone or we can't touch it.
+                return True  # Already gone or we can't touch it.
            force_sent = True

        time.sleep(0.3)
@@ -1249,15 +1361,30 @@ def _wait_for_gateway_exit(timeout: float = 10.0, force_after: float = 5.0):
    remaining_pid = get_running_pid()
    if remaining_pid is not None:
        print(f"⚠ Gateway PID {remaining_pid} still running after {timeout}s — restart may fail")
+        return False
+    return True


 def launchd_restart():
    label = get_launchd_label()
    target = f"{_launchd_domain()}/{label}"
-    # Use kickstart -k so launchd performs an atomic kill+restart.
-    # A two-step stop/start from inside the gateway's own process tree
-    # would kill the shell before the start command is reached.
+    drain_timeout = _get_restart_drain_timeout()
+    from gateway.status import get_running_pid
+
    try:
+        pid = get_running_pid()
+        if pid is not None and _request_gateway_self_restart(pid):
+            print("✓ Service restart requested")
+            return
+        if pid is not None:
+            try:
+                terminate_pid(pid, force=False)
+            except (ProcessLookupError, PermissionError, OSError):
+                pid = None
+            if pid is not None:
+                exited = _wait_for_gateway_exit(timeout=drain_timeout, force_after=None)
+                if not exited:
+                    print(f"⚠ Gateway drain timed out after {drain_timeout:.0f}s — forcing launchd restart")
        subprocess.run(["launchctl", "kickstart", "-k", target], check=True, timeout=90)
        print("✓ Service restarted")
    except subprocess.CalledProcessError as e:
@@ -1442,7 +1569,7 @@ _PLATFORMS = [
            "   Or via API: curl -X POST https://your-server/_matrix/client/v3/login \\",
            "     -d '{\"type\":\"m.login.password\",\"user\":\"@bot:server\",\"password\":\"...\"}'",
            "4. Alternatively, provide user ID + password and Hermes will log in directly",
-            "5. For E2EE: set MATRIX_ENCRYPTION=true (requires pip install 'matrix-nio[e2e]')",
+            "5. For E2EE: set MATRIX_ENCRYPTION=true (requires pip install 'mautrix[encryption]')",
            "6. To find your user ID: it's @username:your-server (shown in Element profile)",
        ],
        "vars": [
@@ -1624,6 +1751,12 @@ _PLATFORMS = [
             "help": "Chat ID for scheduled results and notifications."},
        ],
    },
+    {
+        "key": "weixin",
+        "label": "Weixin / WeChat",
+        "emoji": "💬",
+        "token_var": "WEIXIN_ACCOUNT_ID",
+    },
    {
        "key": "bluebubbles",
        "label": "BlueBubbles (iMessage)",
@@ -1696,6 +1829,13 @@ def _platform_status(platform: dict) -> str:
        if val or password or homeserver:
            return "partially configured"
        return "not configured"
+    if platform.get("key") == "weixin":
+        token = get_env_value("WEIXIN_TOKEN")
+        if val and token:
+            return "configured"
+        if val or token:
+            return "partially configured"
+        return "not configured"
    if val:
        return "configured"
    return "not configured"
@@ -1715,6 +1855,8 @@ def _runtime_health_lines() -> list[str]:
    lines: list[str] = []
    gateway_state = state.get("gateway_state")
    exit_reason = state.get("exit_reason")
+    active_agents = state.get("active_agents")
+    restart_requested = state.get("restart_requested")
    platforms = state.get("platforms", {}) or {}

    for platform, pdata in platforms.items():
@@ -1724,6 +1866,10 @@ def _runtime_health_lines() -> list[str]:

    if gateway_state == "startup_failed" and exit_reason:
        lines.append(f"⚠ Last startup issue: {exit_reason}")
+    elif gateway_state == "draining":
+        action = "restart" if restart_requested else "shutdown"
+        count = int(active_agents or 0)
+        lines.append(f"⏳ Gateway draining for {action} ({count} active agent(s))")
    elif gateway_state == "stopped" and exit_reason:
        lines.append(f"⚠ Last shutdown reason: {exit_reason}")

@@ -1799,7 +1945,7 @@ def _setup_standard_platform(platform: dict):
                    print_warning("  Open access enabled — anyone can use your bot!")
                elif access_idx == 1:
                    print_success("  DM pairing mode — users will receive a code to request access.")
-                    print_info("  Approve with: hermes pairing approve {platform} {code}")
+                    print_info("  Approve with: hermes pairing approve <platform> <code>")
                else:
                    print_info("  Skipped — configure later with 'hermes gateway setup'")
            continue
@@ -1886,6 +2032,133 @@ def _is_service_running() -> bool:
    return len(find_gateway_pids()) > 0


+def _setup_weixin():
+    """Interactive setup for Weixin / WeChat personal accounts."""
+    print()
+    print(color("  ─── 💬 Weixin / WeChat Setup ───", Colors.CYAN))
+    print()
+    print_info("  1. Hermes will open Tencent iLink QR login in this terminal.")
+    print_info("  2. Use WeChat to scan and confirm the QR code.")
+    print_info("  3. Hermes will store the returned account_id/token in ~/.hermes/.env.")
+    print_info("  4. This adapter supports native text, image, video, and document delivery.")
+
+    existing_account = get_env_value("WEIXIN_ACCOUNT_ID")
+    existing_token = get_env_value("WEIXIN_TOKEN")
+    if existing_account and existing_token:
+        print()
+        print_success("Weixin is already configured.")
+        if not prompt_yes_no("  Reconfigure Weixin?", False):
+            return
+
+    try:
+        from gateway.platforms.weixin import check_weixin_requirements, qr_login
+    except Exception as exc:
+        print_error(f"  Weixin adapter import failed: {exc}")
+        print_info("  Install gateway dependencies first, then retry.")
+        return
+
+    if not check_weixin_requirements():
+        print_error("  Missing dependencies: Weixin needs aiohttp and cryptography.")
+        print_info("  Install them, then rerun `hermes gateway setup`.")
+        return
+
+    print()
+    if not prompt_yes_no("  Start QR login now?", True):
+        print_info("  Cancelled.")
+        return
+
+    import asyncio
+    try:
+        credentials = asyncio.run(qr_login(str(get_hermes_home())))
+    except KeyboardInterrupt:
+        print()
+        print_warning("  Weixin setup cancelled.")
+        return
+    except Exception as exc:
+        print_error(f"  QR login failed: {exc}")
+        return
+
+    if not credentials:
+        print_warning("  QR login did not complete.")
+        return
+
+    account_id = credentials.get("account_id", "")
+    token = credentials.get("token", "")
+    base_url = credentials.get("base_url", "")
+    user_id = credentials.get("user_id", "")
+
+    save_env_value("WEIXIN_ACCOUNT_ID", account_id)
+    save_env_value("WEIXIN_TOKEN", token)
+    if base_url:
+        save_env_value("WEIXIN_BASE_URL", base_url)
+    save_env_value("WEIXIN_CDN_BASE_URL", get_env_value("WEIXIN_CDN_BASE_URL") or "https://novac2c.cdn.weixin.qq.com/c2c")
+
+    print()
+    access_choices = [
+        "Use DM pairing approval (recommended)",
+        "Allow all direct messages",
+        "Only allow listed user IDs",
+        "Disable direct messages",
+    ]
+    access_idx = prompt_choice("  How should direct messages be authorized?", access_choices, 0)
+    if access_idx == 0:
+        save_env_value("WEIXIN_DM_POLICY", "pairing")
+        save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
+        save_env_value("WEIXIN_ALLOWED_USERS", "")
+        print_success("  DM pairing enabled.")
+        print_info("  Unknown DM users can request access and you approve them with `hermes pairing approve`.")
+    elif access_idx == 1:
+        save_env_value("WEIXIN_DM_POLICY", "open")
+        save_env_value("WEIXIN_ALLOW_ALL_USERS", "true")
+        save_env_value("WEIXIN_ALLOWED_USERS", "")
+        print_warning("  Open DM access enabled for Weixin.")
+    elif access_idx == 2:
+        default_allow = user_id or ""
+        allowlist = prompt("  Allowed Weixin user IDs (comma-separated)", default_allow, password=False).replace(" ", "")
+        save_env_value("WEIXIN_DM_POLICY", "allowlist")
+        save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
+        save_env_value("WEIXIN_ALLOWED_USERS", allowlist)
+        print_success("  Weixin allowlist saved.")
+    else:
+        save_env_value("WEIXIN_DM_POLICY", "disabled")
+        save_env_value("WEIXIN_ALLOW_ALL_USERS", "false")
+        save_env_value("WEIXIN_ALLOWED_USERS", "")
+        print_warning("  Direct messages disabled.")
+
+    print()
+    group_choices = [
+        "Disable group chats (recommended)",
+        "Allow all group chats",
+        "Only allow listed group chat IDs",
+    ]
+    group_idx = prompt_choice("  How should group chats be handled?", group_choices, 0)
+    if group_idx == 0:
+        save_env_value("WEIXIN_GROUP_POLICY", "disabled")
+        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
+        print_info("  Group chats disabled.")
+    elif group_idx == 1:
+        save_env_value("WEIXIN_GROUP_POLICY", "open")
+        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
+        print_warning("  All group chats enabled.")
+    else:
+        allow_groups = prompt("  Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
+        save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
+        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
+        print_success("  Group allowlist saved.")
+
+    if user_id:
+        print()
+        if prompt_yes_no(f"  Use your Weixin user ID ({user_id}) as the home channel?", True):
+            save_env_value("WEIXIN_HOME_CHANNEL", user_id)
+            print_success(f"  Home channel set to {user_id}")
+
+    print()
+    print_success("Weixin configured!")
+    print_info(f"  Account ID: {account_id}")
+    if user_id:
+        print_info(f"  User ID: {user_id}")
+
+
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -2061,6 +2334,8 @@ def gateway_setup():
            _setup_whatsapp()
        elif platform["key"] == "signal":
            _setup_signal()
+        elif platform["key"] == "weixin":
+            _setup_weixin()
        else:
            _setup_standard_platform(platform)

@@ -2102,7 +2377,8 @@ def gateway_setup():
            print()
            if supports_systemd_services() or is_macos():
                platform_name = "systemd" if supports_systemd_services() else "launchd"
-                if prompt_yes_no(f"  Install the gateway as a {platform_name} service? (runs in background, starts on boot)", True):
+                wsl_note = " (note: services may not survive WSL restarts)" if is_wsl() else ""
+                if prompt_yes_no(f"  Install the gateway as a {platform_name} service?{wsl_note} (runs in background, starts on boot)", True):
                    try:
                        installed_scope = None
                        did_install = False
@@ -2127,16 +2403,21 @@ def gateway_setup():
                    print_info("  You can install later: hermes gateway install")
                    if supports_systemd_services():
                        print_info("  Or as a boot-time service: sudo hermes gateway install --system")
-                    print_info("  Or run in foreground:  hermes gateway")
+                    print_info("  Or run in foreground:  hermes gateway run")
+            elif is_wsl():
+                print_info("  WSL detected but systemd is not running.")
+                print_info("  Run in foreground: hermes gateway run")
+                print_info("  For persistence:   tmux new -s hermes 'hermes gateway run'")
+                print_info("  To enable systemd: add systemd=true to /etc/wsl.conf, then 'wsl --shutdown'")
            else:
                if is_termux():
                    from hermes_constants import display_hermes_home as _dhh
                    print_info("  Termux does not use systemd/launchd services.")
-                    print_info("  Run in foreground: hermes gateway")
-                    print_info(f"  Or start it manually in the background (best effort): nohup hermes gateway >{_dhh()}/logs/gateway.log 2>&1 &")
+                    print_info("  Run in foreground: hermes gateway run")
+                    print_info(f"  Or start it manually in the background (best effort): nohup hermes gateway run >{_dhh()}/logs/gateway.log 2>&1 &")
                else:
                    print_info("  Service install not supported on this platform.")
-                    print_info("  Run in foreground: hermes gateway")
+                    print_info("  Run in foreground: hermes gateway run")
    else:
        print()
        print_info("No platforms configured. Run 'hermes gateway setup' when ready.")
@@ -2177,9 +2458,23 @@ def gateway_command(args):
            print("Run manually: hermes gateway")
            sys.exit(1)
        if supports_systemd_services():
+            if is_wsl():
+                print_warning("WSL detected — systemd services may not survive WSL restarts.")
+                print_info("  Consider running in foreground instead: hermes gateway run")
+                print_info("  Or use tmux/screen for persistence: tmux new -s hermes 'hermes gateway run'")
+                print()
            systemd_install(force=force, system=system, run_as_user=run_as_user)
        elif is_macos():
            launchd_install(force)
+        elif is_wsl():
+            print("WSL detected but systemd is not running.")
+            print("Either enable systemd (add systemd=true to /etc/wsl.conf and restart WSL)")
+            print("or run the gateway in foreground mode:")
+            print()
+            print("  hermes gateway run                              # direct foreground")
+            print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
+            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
+            sys.exit(1)
        else:
            print("Service installation not supported on this platform.")
            print("Run manually: hermes gateway run")
@@ -2212,6 +2507,16 @@ def gateway_command(args):
            systemd_start(system=system)
        elif is_macos():
            launchd_start()
+        elif is_wsl():
+            print("WSL detected but systemd is not available.")
+            print("Run the gateway in foreground mode instead:")
+            print()
+            print("  hermes gateway run                              # direct foreground")
+            print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
+            print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
+            print()
+            print("To enable systemd: add systemd=true to /etc/wsl.conf and run 'wsl --shutdown' from PowerShell.")
+            sys.exit(1)
        else:
            print("Not supported on this platform.")
            sys.exit(1)
@@ -2346,6 +2651,10 @@ def gateway_command(args):
                if is_termux():
                    print("Termux note:")
                    print("  Android may stop background jobs when Termux is suspended")
+                elif is_wsl():
+                    print("WSL note:")
+                    print("  The gateway is running in foreground/manual mode (recommended for WSL).")
+                    print("  Use tmux or screen for persistence across terminal closes.")
                else:
                    print("To install as a service:")
                    print("  hermes gateway install")
@@ -2360,9 +2669,12 @@ def gateway_command(args):
                        print(f"  {line}")
                print()
                print("To start:")
-                print("  hermes gateway          # Run in foreground")
+                print("  hermes gateway run      # Run in foreground")
                if is_termux():
-                    print("  nohup hermes gateway > ~/.hermes/logs/gateway.log 2>&1 &  # Best-effort background start")
+                    print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # Best-effort background start")
+                elif is_wsl():
+                    print("  tmux new -s hermes 'hermes gateway run'         # persistent via tmux")
+                    print("  nohup hermes gateway run > ~/.hermes/logs/gateway.log 2>&1 &  # background")
                else:
                    print("  hermes gateway install  # Install as user service")
                    print("  sudo hermes gateway install --system  # Install as boot-time system service")
@@ -97,10 +97,11 @@ def _apply_profile_override() -> None:
            consume = 1
            break

-    # 2. If no flag, check ~/.hermes/active_profile
+    # 2. If no flag, check active_profile in the hermes root
    if profile_name is None:
        try:
-            active_path = Path.home() / ".hermes" / "active_profile"
+            from hermes_constants import get_default_hermes_root
+            active_path = get_default_hermes_root() / "active_profile"
            if active_path.exists():
                name = active_path.read_text().strip()
                if name and name != "default":
@@ -1079,6 +1080,42 @@ def select_provider_and_model(args=None):
    elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

+    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
+    # When the user switches to a named provider (anything except "custom"),
+    # a leftover OPENAI_BASE_URL in ~/.hermes/.env can poison auxiliary
+    # clients that use provider:auto. Clear it proactively.  (#5161)
+    if selected_provider not in ("custom", "cancel", "remove-custom") \
+            and not selected_provider.startswith("custom:"):
+        _clear_stale_openai_base_url()
+
+
+def _clear_stale_openai_base_url():
+    """Remove OPENAI_BASE_URL from ~/.hermes/.env if the active provider is not 'custom'.
+
+    After a provider switch, a leftover OPENAI_BASE_URL causes auxiliary
+    clients (compression, vision, delegation) with provider:auto to route
+    requests to the old custom endpoint instead of the newly selected
+    provider.  See issue #5161.
+    """
+    from hermes_cli.config import get_env_value, save_env_value, load_config
+
+    cfg = load_config()
+    model_cfg = cfg.get("model", {})
+    if isinstance(model_cfg, dict):
+        provider = (model_cfg.get("provider") or "").strip().lower()
+    else:
+        provider = ""
+
+    if provider == "custom" or not provider:
+        return  # custom provider legitimately uses OPENAI_BASE_URL
+
+    stale_url = get_env_value("OPENAI_BASE_URL")
+    if stale_url:
+        save_env_value("OPENAI_BASE_URL", "")
+        print(f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)"
+              if len(stale_url) > 40
+              else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})")
+

 def _prompt_provider_choice(choices, *, default=0):
    """Show provider selection menu with curses arrow-key navigation.
@@ -1672,6 +1709,8 @@ def _remove_custom_provider(config):
            title="Select provider to remove:",
        )
        idx = menu.show()
+        from hermes_cli.curses_ui import flush_stdin
+        flush_stdin()
        print()
    except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
        for i, c in enumerate(choices, 1):
@@ -1697,8 +1736,9 @@ def _remove_custom_provider(config):
 def _model_flow_named_custom(config, provider_info):
    """Handle a named custom provider from config.yaml custom_providers list.

-    If the entry has a saved model name, activates it immediately.
-    Otherwise probes the endpoint's /models API to let the user pick one.
+    Always probes the endpoint's /models API to let the user pick a model.
+    If a model was previously saved, it is pre-selected in the menu.
+    Falls back to the saved model if probing fails.
    """
    from hermes_cli.auth import _save_model_choice, deactivate_provider
    from hermes_cli.config import load_config, save_config
@@ -1709,46 +1749,37 @@ def _model_flow_named_custom(config, provider_info):
    api_key = provider_info.get("api_key", "")
    saved_model = provider_info.get("model", "")

-    # If a model is saved, just activate immediately — no probing needed
-    if saved_model:
-        _save_model_choice(saved_model)
-
-        cfg = load_config()
-        model = cfg.get("model")
-        if not isinstance(model, dict):
-            model = {"default": model} if model else {}
-            cfg["model"] = model
-        model["provider"] = "custom"
-        model["base_url"] = base_url
-        if api_key:
-            model["api_key"] = api_key
-        save_config(cfg)
-        deactivate_provider()
-
-        print(f"✅ Switched to: {saved_model}")
-        print(f"   Provider: {name} ({base_url})")
-        return
-
-    # No saved model — probe endpoint and let user pick
    print(f"  Provider: {name}")
    print(f"  URL:      {base_url}")
+    if saved_model:
+        print(f"  Current:  {saved_model}")
    print()
-    print("No model saved for this provider. Fetching available models...")
+
+    print("Fetching available models...")
    models = fetch_api_models(api_key, base_url, timeout=8.0)

    if models:
+        default_idx = 0
+        if saved_model and saved_model in models:
+            default_idx = models.index(saved_model)
+
        print(f"Found {len(models)} model(s):\n")
        try:
            from simple_term_menu import TerminalMenu
-            menu_items = [f"  {m}" for m in models] + ["  Cancel"]
+            menu_items = [
+                f"  {m} (current)" if m == saved_model else f"  {m}"
+                for m in models
+            ] + ["  Cancel"]
            menu = TerminalMenu(
-                menu_items, cursor_index=0,
+                menu_items, cursor_index=default_idx,
                menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
                menu_highlight_style=("fg_green",),
                cycle_cursor=True, clear_screen=False,
                title=f"Select model from {name}:",
            )
            idx = menu.show()
+            from hermes_cli.curses_ui import flush_stdin
+            flush_stdin()
            print()
            if idx is None or idx >= len(models):
                print("Cancelled.")
@@ -1756,7 +1787,8 @@ def _model_flow_named_custom(config, provider_info):
            model_name = models[idx]
        except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
            for i, m in enumerate(models, 1):
-                print(f"  {i}. {m}")
+                suffix = " (current)" if m == saved_model else ""
+                print(f"  {i}. {m}{suffix}")
            print(f"  {len(models) + 1}. Cancel")
            print()
            try:
@@ -1772,6 +1804,13 @@ def _model_flow_named_custom(config, provider_info):
            except (ValueError, KeyboardInterrupt, EOFError):
                print("\nCancelled.")
                return
+    elif saved_model:
+        print("Could not fetch models from endpoint.")
+        try:
+            model_name = input(f"Model name [{saved_model}]: ").strip() or saved_model
+        except (KeyboardInterrupt, EOFError):
+            print("\nCancelled.")
+            return
    else:
        print("Could not fetch models from endpoint. Enter model name manually.")
        try:
@@ -1867,6 +1906,8 @@ def _prompt_reasoning_effort_selection(efforts, current_effort=""):
            title="Select reasoning effort:",
        )
        idx = menu.show()
+        from hermes_cli.curses_ui import flush_stdin
+        flush_stdin()
        if idx is None:
            return None
        print()
@@ -3309,10 +3350,11 @@ def _invalidate_update_cache():
    ``hermes update``, every profile is now current.
    """
    homes = []
-    # Default profile home
-    default_home = Path.home() / ".hermes"
+    # Default profile home (Docker-aware — uses /opt/data in Docker)
+    from hermes_constants import get_default_hermes_root
+    default_home = get_default_hermes_root()
    homes.append(default_home)
-    # Named profiles under ~/.hermes/profiles/
+    # Named profiles under <root>/profiles/
    profiles_root = default_home / "profiles"
    if profiles_root.is_dir():
        for entry in profiles_root.iterdir():
@@ -4049,7 +4091,10 @@ def cmd_profile(args):
            print(f"  {name} chat               Start chatting")
            print(f"  {name} gateway start      Start the messaging gateway")
            if clone or clone_all:
-                profile_dir_display = f"~/.hermes/profiles/{name}"
+                try:
+                    profile_dir_display = "~/" + str(profile_dir.relative_to(Path.home()))
+                except ValueError:
+                    profile_dir_display = str(profile_dir)
                print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
            print()
@@ -4438,7 +4483,7 @@ For more help on a command:
    gateway_subparsers = gateway_parser.add_subparsers(dest="gateway_command")
    
    # gateway run (default)
-    gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground")
+    gateway_run = gateway_subparsers.add_parser("run", help="Run gateway in foreground (recommended for WSL, Docker, Termux)")
    gateway_run.add_argument("-v", "--verbose", action="count", default=0,
                             help="Increase stderr log verbosity (-v=INFO, -vv=DEBUG)")
    gateway_run.add_argument("-q", "--quiet", action="store_true",
@@ -4447,7 +4492,7 @@ For more help on a command:
                             help="Replace any existing gateway instance (useful for systemd)")
    
    # gateway start
-    gateway_start = gateway_subparsers.add_parser("start", help="Start gateway service")
+    gateway_start = gateway_subparsers.add_parser("start", help="Start the installed systemd/launchd background service")
    gateway_start.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway stop
@@ -4465,7 +4510,7 @@ For more help on a command:
    gateway_status.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
    
    # gateway install
-    gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as service")
+    gateway_install = gateway_subparsers.add_parser("install", help="Install gateway as a systemd/launchd background service")
    gateway_install.add_argument("--force", action="store_true", help="Force reinstall")
    gateway_install.add_argument("--system", action="store_true", help="Install as a Linux system-level service (starts at boot)")
    gateway_install.add_argument("--run-as-user", dest="run_as_user", help="User account the Linux system service should run as")
@@ -76,17 +76,22 @@ _STRIP_VENDOR_ONLY_PROVIDERS: frozenset[str] = frozenset({
    "copilot-acp",
 })

-# Providers whose own naming is authoritative -- pass through unchanged.
-_PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
+# Providers whose native naming is authoritative -- pass through unchanged.
+_AUTHORITATIVE_NATIVE_PROVIDERS: frozenset[str] = frozenset({
    "gemini",
+    "huggingface",
+    "openai-codex",
+})
+
+# Direct providers that accept bare native names but should repair a matching
+# provider/ prefix when users copy the aggregator form into config.yaml.
+_MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "zai",
    "kimi-coding",
    "minimax",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
-    "huggingface",
-    "openai-codex",
    "custom",
 })

@@ -168,6 +173,40 @@ def _dots_to_hyphens(model_name: str) -> str:
    return model_name.replace(".", "-")


+def _normalize_provider_alias(provider_name: str) -> str:
+    """Resolve provider aliases to Hermes' canonical ids."""
+    raw = (provider_name or "").strip().lower()
+    if not raw:
+        return raw
+    try:
+        from hermes_cli.models import normalize_provider
+
+        return normalize_provider(raw)
+    except Exception:
+        return raw
+
+
+def _strip_matching_provider_prefix(model_name: str, target_provider: str) -> str:
+    """Strip ``provider/`` only when the prefix matches the target provider.
+
+    This prevents arbitrary slash-bearing model IDs from being mangled on
+    native providers while still repairing manual config values like
+    ``zai/glm-5.1`` for the ``zai`` provider.
+    """
+    if "/" not in model_name:
+        return model_name
+
+    prefix, remainder = model_name.split("/", 1)
+    if not prefix.strip() or not remainder.strip():
+        return model_name
+
+    normalized_prefix = _normalize_provider_alias(prefix)
+    normalized_target = _normalize_provider_alias(target_provider)
+    if normalized_prefix and normalized_prefix == normalized_target:
+        return remainder.strip()
+    return model_name
+
+
 def detect_vendor(model_name: str) -> Optional[str]:
    """Detect the vendor slug from a bare model name.

@@ -305,24 +344,37 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
    if not name:
        return name

-    provider = (target_provider or "").strip().lower()
+    provider = _normalize_provider_alias(target_provider)

    # --- Aggregators: need vendor/model format ---
    if provider in _AGGREGATOR_PROVIDERS:
        return _prepend_vendor(name)

-    # --- Anthropic / OpenCode: strip vendor, dots -> hyphens ---
+    # --- Anthropic / OpenCode: strip matching provider prefix, dots -> hyphens ---
    if provider in _DOT_TO_HYPHEN_PROVIDERS:
-        bare = _strip_vendor_prefix(name)
+        bare = _strip_matching_provider_prefix(name, provider)
+        if "/" in bare:
+            return bare
        return _dots_to_hyphens(bare)

-    # --- Copilot: strip vendor, keep dots ---
+    # --- Copilot: strip matching provider prefix, keep dots ---
    if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
-        return _strip_vendor_prefix(name)
+        return _strip_matching_provider_prefix(name, provider)

    # --- DeepSeek: map to one of two canonical names ---
    if provider == "deepseek":
-        return _normalize_for_deepseek(name)
+        bare = _strip_matching_provider_prefix(name, provider)
+        if "/" in bare:
+            return bare
+        return _normalize_for_deepseek(bare)
+
+    # --- Direct providers: repair matching provider prefixes only ---
+    if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
+        return _strip_matching_provider_prefix(name, provider)
+
+    # --- Authoritative native providers: preserve user-facing slugs as-is ---
+    if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
+        return name

    # --- Custom & all others: pass through as-is ---
    return name
@@ -809,42 +809,69 @@ def list_authenticated_providers(
        })
        seen_slugs.add(slug)

-    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot) ---
+    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
+    from hermes_cli.auth import PROVIDER_REGISTRY as _auth_registry
+
+    # Build reverse mapping: models.dev ID → Hermes provider ID.
+    # HERMES_OVERLAYS keys may be models.dev IDs (e.g. "github-copilot")
+    # while _PROVIDER_MODELS and config.yaml use Hermes IDs ("copilot").
+    _mdev_to_hermes = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
+
    for pid, overlay in HERMES_OVERLAYS.items():
        if pid in seen_slugs:
            continue
+
+        # Resolve Hermes slug — e.g. "github-copilot" → "copilot"
+        hermes_slug = _mdev_to_hermes.get(pid, pid)
+        if hermes_slug in seen_slugs:
+            continue
+
        # Check if credentials exist
        has_creds = False
        if overlay.extra_env_vars:
            has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
-        if overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
+        # Also check api_key_env_vars from PROVIDER_REGISTRY for api_key auth_type
+        if not has_creds and overlay.auth_type == "api_key":
+            for _key in (pid, hermes_slug):
+                pcfg = _auth_registry.get(_key)
+                if pcfg and pcfg.api_key_env_vars:
+                    if any(os.environ.get(ev) for ev in pcfg.api_key_env_vars):
+                        has_creds = True
+                        break
+        if not has_creds and overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
            # These use auth stores, not env vars — check for auth.json entries
            try:
                from hermes_cli.auth import _load_auth_store
                store = _load_auth_store()
-                if store and (pid in store.get("providers", {}) or pid in store.get("credential_pool", {})):
+                providers_store = store.get("providers", {})
+                pool_store = store.get("credential_pool", {})
+                if store and (
+                    pid in providers_store or hermes_slug in providers_store
+                    or pid in pool_store or hermes_slug in pool_store
+                ):
                    has_creds = True
            except Exception as exc:
                logger.debug("Auth store check failed for %s: %s", pid, exc)
        if not has_creds:
            continue

-        # Use curated list
-        model_ids = curated.get(pid, [])
+        # Use curated list — look up by Hermes slug, fall back to overlay key
+        model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
        total = len(model_ids)
        top = model_ids[:max_models]

        results.append({
-            "slug": pid,
-            "name": get_label(pid),
-            "is_current": pid == current_provider,
+            "slug": hermes_slug,
+            "name": get_label(hermes_slug),
+            "is_current": hermes_slug == current_provider or pid == current_provider,
            "is_user_defined": False,
            "models": top,
            "total_models": total,
            "source": "hermes",
        })
        seen_slugs.add(pid)
+        seen_slugs.add(hermes_slug)

    # --- 3. User-defined endpoints from config ---
    if user_providers and isinstance(user_providers, dict):
@@ -87,6 +87,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "openai/gpt-5.4-nano",
    ],
    "openai-codex": [
+        "gpt-5.4",
+        "gpt-5.4-mini",
        "gpt-5.3-codex",
        "gpt-5.2-codex",
        "gpt-5.1-codex-mini",
@@ -129,6 +131,19 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5",
        "glm-4.5-flash",
    ],
+    "xai": [
+        "grok-4.20-0309-reasoning",
+        "grok-4.20-0309-non-reasoning",
+        "grok-4.20-multi-agent-0309",
+        "grok-4-1-fast-reasoning",
+        "grok-4-1-fast-non-reasoning",
+        "grok-4-fast-reasoning",
+        "grok-4-fast-non-reasoning",
+        "grok-4-0709",
+        "grok-code-fast-1",
+        "grok-3",
+        "grok-3-mini",
+    ],
    "kimi-coding": [
        "kimi-for-coding",
        "kimi-k2.5",
@@ -144,22 +159,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-0905-preview",
    ],
    "minimax": [
-        "MiniMax-M1",
-        "MiniMax-M1-40k",
-        "MiniMax-M1-80k",
-        "MiniMax-M1-128k",
-        "MiniMax-M1-256k",
-        "MiniMax-M2.5",
        "MiniMax-M2.7",
+        "MiniMax-M2.5",
+        "MiniMax-M2.1",
+        "MiniMax-M2",
    ],
    "minimax-cn": [
-        "MiniMax-M1",
-        "MiniMax-M1-40k",
-        "MiniMax-M1-80k",
-        "MiniMax-M1-128k",
-        "MiniMax-M1-256k",
-        "MiniMax-M2.5",
        "MiniMax-M2.7",
+        "MiniMax-M2.5",
+        "MiniMax-M2.1",
+        "MiniMax-M2",
    ],
    "anthropic": [
        "claude-opus-4-6",
@@ -143,6 +143,7 @@ def _tts_label(current_provider: str) -> str:
        "openai": "OpenAI TTS",
        "elevenlabs": "ElevenLabs",
        "edge": "Edge TTS",
+        "mistral": "Mistral Voxtral TTS",
        "neutts": "NeuTTS",
    }
    return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
@@ -309,6 +310,7 @@ def get_nous_subscription_features(
        tts_current_provider in {"edge", "neutts"}
        or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
        or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
+        or (tts_current_provider == "mistral" and bool(get_env_value("MISTRAL_API_KEY")))
    )
    tts_active = bool(tts_tool_enabled and tts_available)

@@ -201,8 +201,7 @@ class PluginContext:

        The *setup_fn* receives an argparse subparser and should add any
        arguments/sub-subparsers.  If *handler_fn* is provided it is set
-        as the default dispatch function via ``set_defaults(func=...)``.
-        """
+        as the default dispatch function via ``set_defaults(func=...)``."""
        self._manager._cli_commands[name] = {
            "name": name,
            "help": help,
@@ -213,6 +212,38 @@ class PluginContext:
        }
        logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)

+    # -- context engine registration -----------------------------------------
+
+    def register_context_engine(self, engine) -> None:
+        """Register a context engine to replace the built-in ContextCompressor.
+
+        Only one context engine plugin is allowed. If a second plugin tries
+        to register one, it is rejected with a warning.
+
+        The engine must be an instance of ``agent.context_engine.ContextEngine``.
+        """
+        if self._manager._context_engine is not None:
+            logger.warning(
+                "Plugin '%s' tried to register a context engine, but one is "
+                "already registered. Only one context engine plugin is allowed.",
+                self.manifest.name,
+            )
+            return
+        # Defer the import to avoid circular deps at module level
+        from agent.context_engine import ContextEngine
+        if not isinstance(engine, ContextEngine):
+            logger.warning(
+                "Plugin '%s' tried to register a context engine that does not "
+                "inherit from ContextEngine. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        self._manager._context_engine = engine
+        logger.info(
+            "Plugin '%s' registered context engine: %s",
+            self.manifest.name, engine.name,
+        )
+
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -245,6 +276,7 @@ class PluginManager:
        self._hooks: Dict[str, List[Callable]] = {}
        self._plugin_tool_names: Set[str] = set()
        self._cli_commands: Dict[str, dict] = {}
+        self._context_engine = None  # Set by a plugin via register_context_engine()
        self._discovered: bool = False
        self._cli_ref = None  # Set by CLI after plugin discovery

@@ -566,6 +598,11 @@ def get_plugin_cli_commands() -> Dict[str, dict]:
    return dict(get_plugin_manager()._cli_commands)


+def get_plugin_context_engine():
+    """Return the plugin-registered context engine, or None."""
+    return get_plugin_manager()._context_engine
+
+
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

@@ -531,7 +531,7 @@ def cmd_disable(name: str) -> None:

    disabled.add(name)
    _save_disabled_set(disabled)
-    console.print(f"[yellow]⊘[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
+    console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")


 def cmd_list() -> None:
@@ -594,8 +594,152 @@ def cmd_list() -> None:
    console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")


+# ---------------------------------------------------------------------------
+# Provider plugin discovery helpers
+# ---------------------------------------------------------------------------
+
+
+def _discover_memory_providers() -> list[tuple[str, str]]:
+    """Return [(name, description), ...] for available memory providers."""
+    try:
+        from plugins.memory import discover_memory_providers
+        return [(name, desc) for name, desc, _avail in discover_memory_providers()]
+    except Exception:
+        return []
+
+
+def _discover_context_engines() -> list[tuple[str, str]]:
+    """Return [(name, description), ...] for available context engines."""
+    try:
+        from plugins.context_engine import discover_context_engines
+        return [(name, desc) for name, desc, _avail in discover_context_engines()]
+    except Exception:
+        return []
+
+
+def _get_current_memory_provider() -> str:
+    """Return the current memory.provider from config (empty = built-in)."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        return config.get("memory", {}).get("provider", "") or ""
+    except Exception:
+        return ""
+
+
+def _get_current_context_engine() -> str:
+    """Return the current context.engine from config."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        return config.get("context", {}).get("engine", "compressor") or "compressor"
+    except Exception:
+        return "compressor"
+
+
+def _save_memory_provider(name: str) -> None:
+    """Persist memory.provider to config.yaml."""
+    from hermes_cli.config import load_config, save_config
+    config = load_config()
+    if "memory" not in config:
+        config["memory"] = {}
+    config["memory"]["provider"] = name
+    save_config(config)
+
+
+def _save_context_engine(name: str) -> None:
+    """Persist context.engine to config.yaml."""
+    from hermes_cli.config import load_config, save_config
+    config = load_config()
+    if "context" not in config:
+        config["context"] = {}
+    config["context"]["engine"] = name
+    save_config(config)
+
+
+def _configure_memory_provider() -> bool:
+    """Launch a radio picker for memory providers. Returns True if changed."""
+    from hermes_cli.curses_ui import curses_radiolist
+
+    current = _get_current_memory_provider()
+    providers = _discover_memory_providers()
+
+    # Build items: "built-in" first, then discovered providers
+    items = ["built-in (default)"]
+    names = [""]  # empty string = built-in
+    selected = 0
+
+    for name, desc in providers:
+        names.append(name)
+        label = f"{name} \u2014 {desc}" if desc else name
+        items.append(label)
+        if name == current:
+            selected = len(items) - 1
+
+    # If current provider isn't in discovered list, add it
+    if current and current not in names:
+        names.append(current)
+        items.append(f"{current} (not found)")
+        selected = len(items) - 1
+
+    choice = curses_radiolist(
+        title="Memory Provider (select one)",
+        items=items,
+        selected=selected,
+    )
+
+    new_provider = names[choice]
+    if new_provider != current:
+        _save_memory_provider(new_provider)
+        return True
+    return False
+
+
+def _configure_context_engine() -> bool:
+    """Launch a radio picker for context engines. Returns True if changed."""
+    from hermes_cli.curses_ui import curses_radiolist
+
+    current = _get_current_context_engine()
+    engines = _discover_context_engines()
+
+    # Build items: "compressor" first (built-in), then discovered engines
+    items = ["compressor (default)"]
+    names = ["compressor"]
+    selected = 0
+
+    for name, desc in engines:
+        names.append(name)
+        label = f"{name} \u2014 {desc}" if desc else name
+        items.append(label)
+        if name == current:
+            selected = len(items) - 1
+
+    # If current engine isn't in discovered list and isn't compressor, add it
+    if current != "compressor" and current not in names:
+        names.append(current)
+        items.append(f"{current} (not found)")
+        selected = len(items) - 1
+
+    choice = curses_radiolist(
+        title="Context Engine (select one)",
+        items=items,
+        selected=selected,
+    )
+
+    new_engine = names[choice]
+    if new_engine != current:
+        _save_context_engine(new_engine)
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Composite plugins UI
+# ---------------------------------------------------------------------------
+
+
 def cmd_toggle() -> None:
-    """Interactive curses checklist to enable/disable installed plugins."""
+    """Interactive composite UI — general plugins + provider plugin categories."""
    from rich.console import Console

    try:
@@ -606,18 +750,13 @@ def cmd_toggle() -> None:
    console = Console()
    plugins_dir = _plugins_dir()

+    # -- General plugins discovery --
    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
-    if not dirs:
-        console.print("[dim]No plugins installed.[/dim]")
-        console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
-        return
-
    disabled = _get_disabled_set()

-    # Build items list: "name — description" for display
-    names = []
-    labels = []
-    selected = set()
+    plugin_names = []
+    plugin_labels = []
+    plugin_selected = set()

    for i, d in enumerate(dirs):
        manifest_file = d / "plugin.yaml"
@@ -633,36 +772,335 @@ def cmd_toggle() -> None:
            except Exception:
                pass

-        names.append(name)
-        label = f"{name} — {description}" if description else name
-        labels.append(label)
+        plugin_names.append(name)
+        label = f"{name} \u2014 {description}" if description else name
+        plugin_labels.append(label)

        if name not in disabled and d.name not in disabled:
-            selected.add(i)
+            plugin_selected.add(i)

-    from hermes_cli.curses_ui import curses_checklist
+    # -- Provider categories --
+    current_memory = _get_current_memory_provider() or "built-in"
+    current_context = _get_current_context_engine()
+    categories = [
+        ("Memory Provider", current_memory, _configure_memory_provider),
+        ("Context Engine", current_context, _configure_context_engine),
+    ]

-    result = curses_checklist(
-        title="Plugins — toggle enabled/disabled",
-        items=labels,
-        selected=selected,
-    )
+    has_plugins = bool(plugin_names)
+    has_categories = bool(categories)

-    # Compute new disabled set from deselected items
+    if not has_plugins and not has_categories:
+        console.print("[dim]No plugins installed and no provider categories available.[/dim]")
+        console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
+        return
+
+    # Non-TTY fallback
+    if not sys.stdin.isatty():
+        console.print("[dim]Interactive mode requires a terminal.[/dim]")
+        return
+
+    # Launch the composite curses UI
+    try:
+        import curses
+        _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
+                          disabled, categories, console)
+    except ImportError:
+        _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
+                                disabled, categories, console)
+
+
+def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
+                      disabled, categories, console):
+    """Custom curses screen with checkboxes + category action rows."""
+    from hermes_cli.curses_ui import flush_stdin
+
+    chosen = set(plugin_selected)
+    n_plugins = len(plugin_names)
+    # Total rows: plugins + separator + categories
+    # separator is not navigable
+    n_categories = len(categories)
+    total_items = n_plugins + n_categories  # navigable items
+
+    result_holder = {"plugins_changed": False, "providers_changed": False}
+
+    def _draw(stdscr):
+        curses.curs_set(0)
+        if curses.has_colors():
+            curses.start_color()
+            curses.use_default_colors()
+            curses.init_pair(1, curses.COLOR_GREEN, -1)
+            curses.init_pair(2, curses.COLOR_YELLOW, -1)
+            curses.init_pair(3, curses.COLOR_CYAN, -1)
+            curses.init_pair(4, 8, -1)  # dim gray
+        cursor = 0
+        scroll_offset = 0
+
+        while True:
+            stdscr.clear()
+            max_y, max_x = stdscr.getmaxyx()
+
+            # Header
+            try:
+                hattr = curses.A_BOLD
+                if curses.has_colors():
+                    hattr |= curses.color_pair(2)
+                stdscr.addnstr(0, 0, "Plugins", max_x - 1, hattr)
+                stdscr.addnstr(
+                    1, 0,
+                    "  \u2191\u2193 navigate  SPACE toggle  ENTER configure/confirm  ESC done",
+                    max_x - 1, curses.A_DIM,
+                )
+            except curses.error:
+                pass
+
+            # Build display rows
+            # Row layout:
+            #   [plugins section header] (not navigable, skipped in scroll math)
+            #   plugin checkboxes (navigable, indices 0..n_plugins-1)
+            #   [separator] (not navigable)
+            #   [categories section header] (not navigable)
+            #   category action rows (navigable, indices n_plugins..total_items-1)
+
+            visible_rows = max_y - 4
+            if cursor < scroll_offset:
+                scroll_offset = cursor
+            elif cursor >= scroll_offset + visible_rows:
+                scroll_offset = cursor - visible_rows + 1
+
+            y = 3  # start drawing after header
+
+            # Determine which items are visible based on scroll
+            # We need to map logical cursor positions to screen rows
+            # accounting for non-navigable separator/headers
+
+            draw_row = 0  # tracks navigable item index
+
+            # --- General Plugins section ---
+            if n_plugins > 0:
+                # Section header
+                if y < max_y - 1:
+                    try:
+                        sattr = curses.A_BOLD
+                        if curses.has_colors():
+                            sattr |= curses.color_pair(2)
+                        stdscr.addnstr(y, 0, "  General Plugins", max_x - 1, sattr)
+                    except curses.error:
+                        pass
+                    y += 1
+
+                for i in range(n_plugins):
+                    if y >= max_y - 1:
+                        break
+                    check = "\u2713" if i in chosen else " "
+                    arrow = "\u2192" if i == cursor else " "
+                    line = f" {arrow} [{check}] {plugin_labels[i]}"
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+                    y += 1
+
+            # --- Separator ---
+            if y < max_y - 1:
+                y += 1  # blank line
+
+            # --- Provider Plugins section ---
+            if n_categories > 0 and y < max_y - 1:
+                try:
+                    sattr = curses.A_BOLD
+                    if curses.has_colors():
+                        sattr |= curses.color_pair(2)
+                    stdscr.addnstr(y, 0, "  Provider Plugins", max_x - 1, sattr)
+                except curses.error:
+                    pass
+                y += 1
+
+                for ci, (cat_name, cat_current, _cat_fn) in enumerate(categories):
+                    if y >= max_y - 1:
+                        break
+                    cat_idx = n_plugins + ci
+                    arrow = "\u2192" if cat_idx == cursor else " "
+                    line = f" {arrow}   {cat_name:<24} \u25b8 {cat_current}"
+                    attr = curses.A_NORMAL
+                    if cat_idx == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(3)
+                    try:
+                        stdscr.addnstr(y, 0, line, max_x - 1, attr)
+                    except curses.error:
+                        pass
+                    y += 1
+
+            stdscr.refresh()
+            key = stdscr.getch()
+
+            if key in (curses.KEY_UP, ord("k")):
+                if total_items > 0:
+                    cursor = (cursor - 1) % total_items
+            elif key in (curses.KEY_DOWN, ord("j")):
+                if total_items > 0:
+                    cursor = (cursor + 1) % total_items
+            elif key == ord(" "):
+                if cursor < n_plugins:
+                    # Toggle general plugin
+                    chosen.symmetric_difference_update({cursor})
+                else:
+                    # Provider category — launch sub-screen
+                    ci = cursor - n_plugins
+                    if 0 <= ci < n_categories:
+                        curses.endwin()
+                        _cat_name, _cat_cur, cat_fn = categories[ci]
+                        changed = cat_fn()
+                        if changed:
+                            result_holder["providers_changed"] = True
+                            # Refresh current values
+                            categories[ci] = (
+                                _cat_name,
+                                _get_current_memory_provider() or "built-in" if ci == 0
+                                else _get_current_context_engine(),
+                                cat_fn,
+                            )
+                        # Re-enter curses
+                        stdscr = curses.initscr()
+                        curses.noecho()
+                        curses.cbreak()
+                        stdscr.keypad(True)
+                        if curses.has_colors():
+                            curses.start_color()
+                            curses.use_default_colors()
+                            curses.init_pair(1, curses.COLOR_GREEN, -1)
+                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                            curses.init_pair(3, curses.COLOR_CYAN, -1)
+                            curses.init_pair(4, 8, -1)
+                        curses.curs_set(0)
+            elif key in (curses.KEY_ENTER, 10, 13):
+                if cursor < n_plugins:
+                    # ENTER on a plugin checkbox — confirm and exit
+                    result_holder["plugins_changed"] = True
+                    return
+                else:
+                    # ENTER on a category — same as SPACE, launch sub-screen
+                    ci = cursor - n_plugins
+                    if 0 <= ci < n_categories:
+                        curses.endwin()
+                        _cat_name, _cat_cur, cat_fn = categories[ci]
+                        changed = cat_fn()
+                        if changed:
+                            result_holder["providers_changed"] = True
+                            categories[ci] = (
+                                _cat_name,
+                                _get_current_memory_provider() or "built-in" if ci == 0
+                                else _get_current_context_engine(),
+                                cat_fn,
+                            )
+                        stdscr = curses.initscr()
+                        curses.noecho()
+                        curses.cbreak()
+                        stdscr.keypad(True)
+                        if curses.has_colors():
+                            curses.start_color()
+                            curses.use_default_colors()
+                            curses.init_pair(1, curses.COLOR_GREEN, -1)
+                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                            curses.init_pair(3, curses.COLOR_CYAN, -1)
+                            curses.init_pair(4, 8, -1)
+                        curses.curs_set(0)
+            elif key in (27, ord("q")):
+                # Save plugin changes on exit
+                result_holder["plugins_changed"] = True
+                return
+
+    curses.wrapper(_draw)
+    flush_stdin()
+
+    # Persist general plugin changes
    new_disabled = set()
-    for i, name in enumerate(names):
-        if i not in result:
+    for i, name in enumerate(plugin_names):
+        if i not in chosen:
            new_disabled.add(name)

    if new_disabled != disabled:
        _save_disabled_set(new_disabled)
-        enabled_count = len(names) - len(new_disabled)
+        enabled_count = len(plugin_names) - len(new_disabled)
        console.print(
-            f"\n[green]✓[/green] {enabled_count} enabled, {len(new_disabled)} disabled. "
-            f"Takes effect on next session."
+            f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
+            f"{len(new_disabled)} disabled."
        )
-    else:
-        console.print("\n[dim]No changes.[/dim]")
+    elif n_plugins > 0:
+        console.print("\n[dim]General plugins unchanged.[/dim]")
+
+    if result_holder["providers_changed"]:
+        new_memory = _get_current_memory_provider() or "built-in"
+        new_context = _get_current_context_engine()
+        console.print(
+            f"[green]\u2713[/green] Memory provider: [bold]{new_memory}[/bold]  "
+            f"Context engine: [bold]{new_context}[/bold]"
+        )
+
+    if n_plugins > 0 or result_holder["providers_changed"]:
+        console.print("[dim]Changes take effect on next session.[/dim]")
+    console.print()
+
+
+def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
+                            disabled, categories, console):
+    """Text-based fallback for the composite plugins UI."""
+    from hermes_cli.colors import Colors, color
+
+    print(color("\n  Plugins", Colors.YELLOW))
+
+    # General plugins
+    if plugin_names:
+        chosen = set(plugin_selected)
+        print(color("\n  General Plugins", Colors.YELLOW))
+        print(color("  Toggle by number, Enter to confirm.\n", Colors.DIM))
+
+        while True:
+            for i, label in enumerate(plugin_labels):
+                marker = color("[\u2713]", Colors.GREEN) if i in chosen else "[ ]"
+                print(f"  {marker} {i + 1:>2}. {label}")
+            print()
+            try:
+                val = input(color("  Toggle # (or Enter to confirm): ", Colors.DIM)).strip()
+                if not val:
+                    break
+                idx = int(val) - 1
+                if 0 <= idx < len(plugin_names):
+                    chosen.symmetric_difference_update({idx})
+            except (ValueError, KeyboardInterrupt, EOFError):
+                return
+            print()
+
+        new_disabled = set()
+        for i, name in enumerate(plugin_names):
+            if i not in chosen:
+                new_disabled.add(name)
+        if new_disabled != disabled:
+            _save_disabled_set(new_disabled)
+
+    # Provider categories
+    if categories:
+        print(color("\n  Provider Plugins", Colors.YELLOW))
+        for ci, (cat_name, cat_current, cat_fn) in enumerate(categories):
+            print(f"  {ci + 1}. {cat_name} [{cat_current}]")
+        print()
+        try:
+            val = input(color("  Configure # (or Enter to skip): ", Colors.DIM)).strip()
+            if val:
+                ci = int(val) - 1
+                if 0 <= ci < len(categories):
+                    categories[ci][2]()  # call the configure function
+        except (ValueError, KeyboardInterrupt, EOFError):
+            pass
+
+    print()


 def plugins_command(args) -> None:
@@ -42,6 +42,11 @@ _PROFILE_DIRS = [
    "plans",
    "workspace",
    "cron",
+    # Per-profile HOME for subprocesses: isolates system tool configs (git,
+    # ssh, gh, npm …) so credentials don't bleed between profiles.  In Docker
+    # this also ensures tool configs land inside the persistent volume.
+    # See hermes_constants.get_subprocess_home() and issue #4426.
+    "home",
 ]

 # Files copied during --clone (if they exist in the source)
@@ -115,16 +120,26 @@ _HERMES_SUBCOMMANDS = frozenset({
 def _get_profiles_root() -> Path:
    """Return the directory where named profiles are stored.

-    Always ``~/.hermes/profiles/`` — anchored to the user's home,
-    NOT to the current HERMES_HOME (which may itself be a profile).
-    This ensures ``coder profile list`` can see all profiles.
+    Anchored to the hermes root, NOT to the current HERMES_HOME
+    (which may itself be a profile).  This ensures ``coder profile list``
+    can see all profiles.
+
+    In Docker/custom deployments where HERMES_HOME points outside
+    ``~/.hermes``, profiles live under ``HERMES_HOME/profiles/`` so
+    they persist on the mounted volume.
    """
-    return Path.home() / ".hermes" / "profiles"
+    return _get_default_hermes_home() / "profiles"


 def _get_default_hermes_home() -> Path:
-    """Return the default (pre-profile) HERMES_HOME path."""
-    return Path.home() / ".hermes"
+    """Return the default (pre-profile) HERMES_HOME path.
+
+    In standard deployments this is ``~/.hermes``.
+    In Docker/custom deployments where HERMES_HOME is outside ``~/.hermes``
+    (e.g. ``/opt/data``), returns HERMES_HOME directly.
+    """
+    from hermes_constants import get_default_hermes_root
+    return get_default_hermes_root()


 def _get_active_profile_path() -> Path:
@@ -88,11 +88,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_env_var="KIMI_BASE_URL",
    ),
    "minimax": HermesOverlay(
-        transport="openai_chat",
+        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
    ),
    "minimax-cn": HermesOverlay(
-        transport="openai_chat",
+        transport="anthropic_messages",
        base_url_env_var="MINIMAX_CN_BASE_URL",
    ),
    "deepseek": HermesOverlay(
@@ -127,6 +127,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        is_aggregator=True,
        base_url_env_var="HF_BASE_URL",
    ),
+    "xai": HermesOverlay(
+        transport="openai_chat",
+        base_url_override="https://api.x.ai/v1",
+        base_url_env_var="XAI_BASE_URL",
+    ),
 }


@@ -163,6 +168,10 @@ ALIASES: Dict[str, str] = {
    "z.ai": "zai",
    "zhipu": "zai",

+    # xai
+    "x-ai": "xai",
+    "x.ai": "xai",
+
    # kimi-for-coding (models.dev ID)
    "kimi": "kimi-for-coding",
    "kimi-coding": "kimi-for-coding",
@@ -341,6 +350,7 @@ def get_label(provider_id: str) -> str:



+
 def is_aggregator(provider: str) -> bool:
    """Return True when the provider is a multi-model aggregator."""
    pdef = get_provider(provider)
@@ -106,8 +106,8 @@ _DEFAULT_PROVIDER_MODELS = {
    ],
    "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
-    "minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
+    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
+    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
@@ -338,6 +338,8 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
                    return

        curses.wrapper(_curses_menu)
+        from hermes_cli.curses_ui import flush_stdin
+        flush_stdin()
        return result_holder[0]
    except Exception:
        return -1
@@ -555,6 +557,8 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Text-to-Speech (OpenAI)", True, None))
    elif tts_provider == "minimax" and get_env_value("MINIMAX_API_KEY"):
        tool_status.append(("Text-to-Speech (MiniMax)", True, None))
+    elif tts_provider == "mistral" and get_env_value("MISTRAL_API_KEY"):
+        tool_status.append(("Text-to-Speech (Mistral Voxtral)", True, None))
    elif tts_provider == "neutts":
        try:
            import importlib.util
@@ -1042,6 +1046,7 @@ def _setup_tts_provider(config: dict):
        "elevenlabs": "ElevenLabs",
        "openai": "OpenAI TTS",
        "minimax": "MiniMax TTS",
+        "mistral": "Mistral Voxtral TTS",
        "neutts": "NeuTTS",
    }
    current_label = provider_labels.get(current_provider, current_provider)
@@ -1062,10 +1067,11 @@ def _setup_tts_provider(config: dict):
            "ElevenLabs (premium quality, needs API key)",
            "OpenAI TTS (good quality, needs API key)",
            "MiniMax TTS (high quality with voice cloning, needs API key)",
+            "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
            "NeuTTS (local on-device, free, ~300MB model download)",
        ]
    )
-    providers.extend(["edge", "elevenlabs", "openai", "minimax", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "minimax", "mistral", "neutts"])
    choices.append(f"Keep current ({current_label})")
    keep_current_idx = len(choices) - 1
    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -1143,6 +1149,18 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "mistral":
+        existing = get_env_value("MISTRAL_API_KEY")
+        if not existing:
+            print()
+            api_key = prompt("Mistral API key for TTS", password=True)
+            if api_key:
+                save_env_value("MISTRAL_API_KEY", api_key)
+                print_success("Mistral TTS API key saved")
+            else:
+                print_warning("No API key provided. Falling back to Edge TTS.")
+                selected = "edge"
+
    # Save the selection
    if "tts" not in config:
        config["tts"] = {}
@@ -1923,9 +1941,9 @@ def _setup_matrix():
            save_env_value("MATRIX_ENCRYPTION", "true")
            print_success("E2EE enabled")

-        matrix_pkg = "matrix-nio[e2e]" if want_e2ee else "matrix-nio"
+        matrix_pkg = "mautrix[encryption]" if want_e2ee else "mautrix"
        try:
-            __import__("nio")
+            __import__("mautrix")
        except ImportError:
            print_info(f"Installing {matrix_pkg}...")
            import subprocess
@@ -2028,6 +2046,12 @@ def _setup_whatsapp():
        print_info("or personal self-chat) and pair via QR code.")


+def _setup_weixin():
+    """Configure Weixin (personal WeChat) via iLink Bot API QR login."""
+    from hermes_cli.gateway import _setup_weixin as _gateway_setup_weixin
+    _gateway_setup_weixin()
+
+
 def _setup_bluebubbles():
    """Configure BlueBubbles iMessage gateway."""
    print_header("BlueBubbles (iMessage)")
@@ -2147,6 +2171,7 @@ _GATEWAY_PLATFORMS = [
    ("Matrix", "MATRIX_ACCESS_TOKEN", _setup_matrix),
    ("Mattermost", "MATTERMOST_TOKEN", _setup_mattermost),
    ("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
+    ("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
    ("BlueBubbles (iMessage)", "BLUEBUBBLES_SERVER_URL", _setup_bluebubbles),
    ("Webhooks (GitHub, GitLab, etc.)", "WEBHOOK_ENABLED", _setup_webhooks),
 ]
@@ -2913,19 +2938,33 @@ def run_setup_wizard(args):
    _offer_launch_chat()


+def _resolve_hermes_chat_argv() -> Optional[list[str]]:
+    """Resolve argv for launching ``hermes chat`` in a fresh process."""
+    hermes_bin = shutil.which("hermes")
+    if hermes_bin:
+        return [hermes_bin, "chat"]
+
+    try:
+        if importlib.util.find_spec("hermes_cli") is not None:
+            return [sys.executable, "-m", "hermes_cli.main", "chat"]
+    except Exception:
+        pass
+
+    return None
+
+
 def _offer_launch_chat():
    """Prompt the user to jump straight into chat after setup."""
    print()
-    if prompt_yes_no("Launch hermes chat now?", True):
-        from hermes_cli.main import cmd_chat
-        from types import SimpleNamespace
-        cmd_chat(SimpleNamespace(
-            query=None, resume=None, continue_last=None, model=None,
-            provider=None, effort=None, skin=None, oneshot=False,
-            quiet=False, verbose=False, toolsets=None, skills=None,
-            yolo=False, source=None, worktree=False, checkpoints=False,
-            pass_session_id=False, max_turns=None,
-        ))
+    if not prompt_yes_no("Launch hermes chat now?", True):
+        return
+
+    chat_argv = _resolve_hermes_chat_argv()
+    if not chat_argv:
+        print_info("Could not relaunch Hermes automatically. Run 'hermes chat' manually.")
+        return
+
+    os.execvp(chat_argv[0], chat_argv)


 def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
@@ -31,6 +31,7 @@ PLATFORMS = {
    "dingtalk": "💬 DingTalk",
    "feishu": "🪽 Feishu",
    "wecom": "💬 WeCom",
+    "weixin": "💬 Weixin",
    "webhook": "🔗 Webhook",
 }

@@ -151,7 +151,8 @@ def do_search(query: str, source: str = "all", limit: int = 10,

    auth = GitHubAuth()
    sources = create_source_router(auth)
-    results = unified_search(query, sources, source_filter=source, limit=limit)
+    with c.status("[bold]Searching registries..."):
+        results = unified_search(query, sources, source_filter=source, limit=limit)

    if not results:
        c.print("[dim]No skills found matching your query.[/]\n")
@@ -187,7 +188,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
    Official skills are always shown first, regardless of source filter.
    """
    from tools.skills_hub import (
-        GitHubAuth, create_source_router,
+        GitHubAuth, create_source_router, parallel_search_sources,
    )

    # Clamp page_size to safe range
@@ -198,27 +199,23 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
    auth = GitHubAuth()
    sources = create_source_router(auth)

-    # Collect results from all (or filtered) sources
-    # Use empty query to get everything; per-source limits prevent overload
+    # Collect results from all (or filtered) sources in parallel.
+    # Per-source limits are generous — parallelism + 30s timeout cap prevents hangs.
    _TRUST_RANK = {"builtin": 3, "trusted": 2, "community": 1}
-    _PER_SOURCE_LIMIT = {"official": 100, "skills-sh": 100, "well-known": 25, "github": 100, "clawhub": 50,
-                         "claude-marketplace": 50, "lobehub": 50}
+    _PER_SOURCE_LIMIT = {
+        "official": 200, "skills-sh": 200, "well-known": 50,
+        "github": 200, "clawhub": 500, "claude-marketplace": 100,
+        "lobehub": 500,
+    }

-    all_results: list = []
-    source_counts: dict = {}
-
-    for src in sources:
-        sid = src.source_id()
-        if source != "all" and sid != source and sid != "official":
-            # Always include official source for the "first" placement
-            continue
-        try:
-            limit = _PER_SOURCE_LIMIT.get(sid, 50)
-            results = src.search("", limit=limit)
-            source_counts[sid] = len(results)
-            all_results.extend(results)
-        except Exception:
-            continue
+    with c.status("[bold]Fetching skills from registries..."):
+        all_results, source_counts, timed_out = parallel_search_sources(
+            sources,
+            query="",
+            per_source_limits=_PER_SOURCE_LIMIT,
+            source_filter=source,
+            overall_timeout=30,
+        )

    if not all_results:
        c.print("[dim]No skills found in the Skills Hub.[/]\n")
@@ -252,8 +249,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",

    # Build header
    source_label = f"— {source}" if source != "all" else "— all sources"
+    loaded_label = f"{total} skills loaded"
+    if timed_out:
+        loaded_label += f", {len(timed_out)} source(s) still loading"
    c.print(f"\n[bold]Skills Hub — Browse {source_label}[/]"
-            f"  [dim]({total} skills, page {page}/{total_pages})[/]")
+            f"  [dim]({loaded_label}, page {page}/{total_pages})[/]")
    if official_count > 0 and page == 1:
        c.print(f"[bright_cyan]★ {official_count} official optional skill(s) from Nous Research[/]")
    c.print()
@@ -300,8 +300,11 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
        parts = [f"{sid}: {ct}" for sid, ct in sorted(source_counts.items())]
        c.print(f"  [dim]Sources: {', '.join(parts)}[/]")

-    c.print("[dim]Use: hermes skills inspect <identifier> to preview, "
-            "hermes skills install <identifier> to install[/]\n")
+    if timed_out:
+        c.print(f"  [yellow]⚡ Slow sources skipped: {', '.join(timed_out)} "
+                f"— run again for cached results[/]")
+
+    c.print("[dim]Tip: 'hermes skills search <query>' searches deeper across all registries[/]\n")


 def do_install(identifier: str, category: str = "", force: bool = False,
@@ -305,6 +305,7 @@ def show_status(args):
        "DingTalk": ("DINGTALK_CLIENT_ID", None),
        "Feishu": ("FEISHU_APP_ID", "FEISHU_HOME_CHANNEL"),
        "WeCom": ("WECOM_BOT_ID", "WECOM_HOME_CHANNEL"),
+        "Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
        "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
    }
    
@@ -133,6 +133,7 @@ PLATFORMS = {
 "dingtalk": {"label": "💬 DingTalk", "default_toolset": "hermes-dingtalk"},
    "feishu": {"label": "🪽 Feishu", "default_toolset": "hermes-feishu"},
    "wecom": {"label": "💬 WeCom", "default_toolset": "hermes-wecom"},
+    "weixin": {"label": "💬 Weixin", "default_toolset": "hermes-weixin"},
    "api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
    "mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
    "webhook": {"label": "🔗 Webhook", "default_toolset": "hermes-webhook"},
@@ -180,6 +181,14 @@ TOOL_CATEGORIES = {
                ],
                "tts_provider": "elevenlabs",
            },
+            {
+                "name": "Mistral (Voxtral TTS)",
+                "tag": "Multilingual, native Opus, needs MISTRAL_API_KEY",
+                "env_vars": [
+                    {"key": "MISTRAL_API_KEY", "prompt": "Mistral API key", "url": "https://console.mistral.ai/"},
+                ],
+                "tts_provider": "mistral",
+            },
        ],
    },
    "web": {
@@ -500,6 +509,10 @@ def _get_platform_tools(
        default_ts = PLATFORMS[platform]["default_toolset"]
        toolset_names = [default_ts]

+    # YAML may parse bare numeric names (e.g. ``12306:``) as int.
+    # Normalise to str so downstream sorted() never mixes types.
+    toolset_names = [str(ts) for ts in toolset_names]
+
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    # If the saved list contains any configurable keys directly, the user
@@ -558,7 +571,7 @@ def _get_platform_tools(
    # Special sentinel: "no_mcp" in the toolset list disables all MCP servers.
    mcp_servers = config.get("mcp_servers") or {}
    enabled_mcp_servers = {
-        name
+        str(name)
        for name, server_cfg in mcp_servers.items()
        if isinstance(server_cfg, dict)
        and _parse_enabled_flag(server_cfg.get("enabled", True), default=True)
@@ -720,6 +733,8 @@ def _prompt_choice(question: str, choices: list, default: int = 0) -> int:
                    return

        curses.wrapper(_curses_menu)
+        from hermes_cli.curses_ui import flush_stdin
+        flush_stdin()
        return result_holder[0]

    except Exception:
@@ -17,6 +17,45 @@ def get_hermes_home() -> Path:
    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))


+def get_default_hermes_root() -> Path:
+    """Return the root Hermes directory for profile-level operations.
+
+    In standard deployments this is ``~/.hermes``.
+
+    In Docker or custom deployments where ``HERMES_HOME`` points outside
+    ``~/.hermes`` (e.g. ``/opt/data``), returns ``HERMES_HOME`` directly
+    — that IS the root.
+
+    In profile mode where ``HERMES_HOME`` is ``<root>/profiles/<name>``,
+    returns ``<root>`` so that ``profile list`` can see all profiles.
+    Works both for standard (``~/.hermes/profiles/coder``) and Docker
+    (``/opt/data/profiles/coder``) layouts.
+
+    Import-safe — no dependencies beyond stdlib.
+    """
+    native_home = Path.home() / ".hermes"
+    env_home = os.environ.get("HERMES_HOME", "")
+    if not env_home:
+        return native_home
+    env_path = Path(env_home)
+    try:
+        env_path.resolve().relative_to(native_home.resolve())
+        # HERMES_HOME is under ~/.hermes (normal or profile mode)
+        return native_home
+    except ValueError:
+        pass
+
+    # Docker / custom deployment.
+    # Check if this is a profile path: <root>/profiles/<name>
+    # If the immediate parent dir is named "profiles", the root is
+    # the grandparent — this covers Docker profiles correctly.
+    if env_path.parent.name == "profiles":
+        return env_path.parent.parent
+
+    # Not a profile path — HERMES_HOME itself is the root
+    return env_path
+
+
 def get_optional_skills_dir(default: Path | None = None) -> Path:
    """Return the optional-skills directory, honoring package-manager wrappers.

@@ -72,6 +111,32 @@ def display_hermes_home() -> str:
        return str(home)


+def get_subprocess_home() -> str | None:
+    """Return a per-profile HOME directory for subprocesses, or None.
+
+    When ``{HERMES_HOME}/home/`` exists on disk, subprocesses should use it
+    as ``HOME`` so system tools (git, ssh, gh, npm …) write their configs
+    inside the Hermes data directory instead of the OS-level ``/root`` or
+    ``~/``.  This provides:
+
+    * **Docker persistence** — tool configs land inside the persistent volume.
+    * **Profile isolation** — each profile gets its own git identity, SSH
+      keys, gh tokens, etc.
+
+    The Python process's own ``os.environ["HOME"]`` and ``Path.home()`` are
+    **never** modified — only subprocess environments should inject this value.
+    Activation is directory-based: if the ``home/`` subdirectory doesn't
+    exist, returns ``None`` and behavior is unchanged.
+    """
+    hermes_home = os.getenv("HERMES_HOME")
+    if not hermes_home:
+        return None
+    profile_home = os.path.join(hermes_home, "home")
+    if os.path.isdir(profile_home):
+        return profile_home
+    return None
+
+
 VALID_REASONING_EFFORTS = ("minimal", "low", "medium", "high", "xhigh")


@@ -103,6 +168,27 @@ def is_termux() -> bool:
    return bool(os.getenv("TERMUX_VERSION") or "com.termux/files/usr" in prefix)


+_wsl_detected: bool | None = None
+
+
+def is_wsl() -> bool:
+    """Return True when running inside WSL (Windows Subsystem for Linux).
+
+    Checks ``/proc/version`` for the ``microsoft`` marker that both WSL1
+    and WSL2 inject.  Result is cached for the process lifetime.
+    Import-safe — no heavy deps.
+    """
+    global _wsl_detected
+    if _wsl_detected is not None:
+        return _wsl_detected
+    try:
+        with open("/proc/version", "r") as f:
+            _wsl_detected = "microsoft" in f.read().lower()
+    except Exception:
+        _wsl_detected = False
+    return _wsl_detected
+
+
 OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"

@@ -0,0 +1,219 @@
+"""Context engine plugin discovery.
+
+Scans ``plugins/context_engine/<name>/`` directories for context engine
+plugins.  Each subdirectory must contain ``__init__.py`` with a class
+implementing the ContextEngine ABC.
+
+Context engines are separate from the general plugin system — they live
+in the repo and are always available without user installation.  Only ONE
+can be active at a time, selected via ``context.engine`` in config.yaml.
+The default engine is ``"compressor"`` (the built-in ContextCompressor).
+
+Usage:
+    from plugins.context_engine import discover_context_engines, load_context_engine
+
+    available = discover_context_engines()   # [(name, desc, available), ...]
+    engine = load_context_engine("lcm")      # ContextEngine instance
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.util
+import logging
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+_CONTEXT_ENGINE_PLUGINS_DIR = Path(__file__).parent
+
+
+def discover_context_engines() -> List[Tuple[str, str, bool]]:
+    """Scan plugins/context_engine/ for available engines.
+
+    Returns list of (name, description, is_available) tuples.
+    Does NOT import the engines — just reads plugin.yaml for metadata
+    and does a lightweight availability check.
+    """
+    results = []
+    if not _CONTEXT_ENGINE_PLUGINS_DIR.is_dir():
+        return results
+
+    for child in sorted(_CONTEXT_ENGINE_PLUGINS_DIR.iterdir()):
+        if not child.is_dir() or child.name.startswith(("_", ".")):
+            continue
+        init_file = child / "__init__.py"
+        if not init_file.exists():
+            continue
+
+        # Read description from plugin.yaml if available
+        desc = ""
+        yaml_file = child / "plugin.yaml"
+        if yaml_file.exists():
+            try:
+                import yaml
+                with open(yaml_file) as f:
+                    meta = yaml.safe_load(f) or {}
+                desc = meta.get("description", "")
+            except Exception:
+                pass
+
+        # Quick availability check — try loading and calling is_available()
+        available = True
+        try:
+            engine = _load_engine_from_dir(child)
+            if engine is None:
+                available = False
+            elif hasattr(engine, "is_available"):
+                available = engine.is_available()
+        except Exception:
+            available = False
+
+        results.append((child.name, desc, available))
+
+    return results
+
+
+def load_context_engine(name: str) -> Optional["ContextEngine"]:
+    """Load and return a ContextEngine instance by name.
+
+    Returns None if the engine is not found or fails to load.
+    """
+    engine_dir = _CONTEXT_ENGINE_PLUGINS_DIR / name
+    if not engine_dir.is_dir():
+        logger.debug("Context engine '%s' not found in %s", name, _CONTEXT_ENGINE_PLUGINS_DIR)
+        return None
+
+    try:
+        engine = _load_engine_from_dir(engine_dir)
+        if engine:
+            return engine
+        logger.warning("Context engine '%s' loaded but no engine instance found", name)
+        return None
+    except Exception as e:
+        logger.warning("Failed to load context engine '%s': %s", name, e)
+        return None
+
+
+def _load_engine_from_dir(engine_dir: Path) -> Optional["ContextEngine"]:
+    """Import an engine module and extract the ContextEngine instance.
+
+    The module must have either:
+    - A register(ctx) function (plugin-style) — we simulate a ctx
+    - A top-level class that extends ContextEngine — we instantiate it
+    """
+    name = engine_dir.name
+    module_name = f"plugins.context_engine.{name}"
+    init_file = engine_dir / "__init__.py"
+
+    if not init_file.exists():
+        return None
+
+    # Check if already loaded
+    if module_name in sys.modules:
+        mod = sys.modules[module_name]
+    else:
+        # Handle relative imports within the plugin
+        # First ensure the parent packages are registered
+        for parent in ("plugins", "plugins.context_engine"):
+            if parent not in sys.modules:
+                parent_path = Path(__file__).parent
+                if parent == "plugins":
+                    parent_path = parent_path.parent
+                parent_init = parent_path / "__init__.py"
+                if parent_init.exists():
+                    spec = importlib.util.spec_from_file_location(
+                        parent, str(parent_init),
+                        submodule_search_locations=[str(parent_path)]
+                    )
+                    if spec:
+                        parent_mod = importlib.util.module_from_spec(spec)
+                        sys.modules[parent] = parent_mod
+                        try:
+                            spec.loader.exec_module(parent_mod)
+                        except Exception:
+                            pass
+
+        # Now load the engine module
+        spec = importlib.util.spec_from_file_location(
+            module_name, str(init_file),
+            submodule_search_locations=[str(engine_dir)]
+        )
+        if not spec:
+            return None
+
+        mod = importlib.util.module_from_spec(spec)
+        sys.modules[module_name] = mod
+
+        # Register submodules so relative imports work
+        for sub_file in engine_dir.glob("*.py"):
+            if sub_file.name == "__init__.py":
+                continue
+            sub_name = sub_file.stem
+            full_sub_name = f"{module_name}.{sub_name}"
+            if full_sub_name not in sys.modules:
+                sub_spec = importlib.util.spec_from_file_location(
+                    full_sub_name, str(sub_file)
+                )
+                if sub_spec:
+                    sub_mod = importlib.util.module_from_spec(sub_spec)
+                    sys.modules[full_sub_name] = sub_mod
+                    try:
+                        sub_spec.loader.exec_module(sub_mod)
+                    except Exception as e:
+                        logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
+
+        try:
+            spec.loader.exec_module(mod)
+        except Exception as e:
+            logger.debug("Failed to exec_module %s: %s", module_name, e)
+            sys.modules.pop(module_name, None)
+            return None
+
+    # Try register(ctx) pattern first (how plugins are written)
+    if hasattr(mod, "register"):
+        collector = _EngineCollector()
+        try:
+            mod.register(collector)
+            if collector.engine:
+                return collector.engine
+        except Exception as e:
+            logger.debug("register() failed for %s: %s", name, e)
+
+    # Fallback: find a ContextEngine subclass and instantiate it
+    from agent.context_engine import ContextEngine
+    for attr_name in dir(mod):
+        attr = getattr(mod, attr_name, None)
+        if (isinstance(attr, type) and issubclass(attr, ContextEngine)
+                and attr is not ContextEngine):
+            try:
+                return attr()
+            except Exception:
+                pass
+
+    return None
+
+
+class _EngineCollector:
+    """Fake plugin context that captures register_context_engine calls."""
+
+    def __init__(self):
+        self.engine = None
+
+    def register_context_engine(self, engine):
+        self.engine = engine
+
+    # No-op for other registration methods
+    def register_tool(self, *args, **kwargs):
+        pass
+
+    def register_hook(self, *args, **kwargs):
+        pass
+
+    def register_cli_command(self, *args, **kwargs):
+        pass
+
+    def register_memory_provider(self, *args, **kwargs):
+        pass
@@ -218,9 +218,11 @@ class HonchoMemoryProvider(MemoryProvider):
                return

            # Override peer_name with gateway user_id for per-user memory scoping.
-            # CLI sessions won't have user_id, so the config default is preserved.
+            # Only when no explicit peerName was configured — an explicit peerName
+            # means the user chose their identity; a raw user_id (e.g. Telegram
+            # chat ID) should not silently replace it.
            _gw_user_id = kwargs.get("user_id")
-            if _gw_user_id:
+            if _gw_user_id and not cfg.peer_name:
                cfg.peer_name = _gw_user_id

            self._config = cfg
@@ -248,6 +250,12 @@ class HonchoMemoryProvider(MemoryProvider):

            # ----- Port #1957: lazy session init for tools-only mode -----
            if self._recall_mode == "tools":
+                if cfg.init_on_session_start:
+                    # Eager init: create session now so sync_turn() works from turn 1.
+                    # Does NOT enable auto-injection — prefetch() still returns empty.
+                    logger.debug("Honcho tools-only mode — eager session init (initOnSessionStart=true)")
+                    self._do_session_init(cfg, session_id, **kwargs)
+                    return
                # Defer actual session creation until first tool call
                self._lazy_init_kwargs = kwargs
                self._lazy_init_session_id = session_id
@@ -189,6 +189,11 @@ class HonchoClientConfig:
    # "context" — auto-injected context only, Honcho tools removed
    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
+    # When True and recallMode is "tools", create the Honcho session eagerly
+    # during initialize() instead of deferring to the first tool call.
+    # This ensures sync_turn() can write from the very first turn.
+    # Does NOT enable automatic context injection — only changes init timing.
+    init_on_session_start: bool = False
    # Observation mode: legacy string shorthand ("directional" or "unified").
    # Kept for backward compat; granular per-peer booleans below are preferred.
    observation_mode: str = "directional"
@@ -366,6 +371,11 @@ class HonchoClientConfig:
                or raw.get("recallMode")
                or "hybrid"
            ),
+            init_on_session_start=_resolve_bool(
+                host_block.get("initOnSessionStart"),
+                raw.get("initOnSessionStart"),
+                default=False,
+            ),
            # Migration guard: existing configs without an explicit
            # observationMode keep the old "unified" default so users
            # aren't silently switched to full bidirectional observation.
@@ -16,7 +16,7 @@ dependencies = [
  "anthropic>=0.39.0,<1",
  "python-dotenv>=1.2.1,<2",
  "fire>=0.7.1,<1",
-  "httpx>=0.28.1,<1",
+  "httpx[socks]>=0.28.1,<1",
  "rich>=14.3.3,<15",
  "tenacity>=9.1.4,<10",
  "pyyaml>=6.0.2,<7",
@@ -43,7 +43,7 @@ dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "py
 messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
-matrix = ["matrix-nio[e2e]>=0.24.0,<1", "Markdown>=3.6,<4"]
+matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4"]
 cli = ["simple-term-menu>=1.0,<2"]
 tts-premium = ["elevenlabs>=1.0,<2"]
 voice = [
@@ -88,10 +88,10 @@ all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
  "hermes-agent[messaging]",
-  # matrix excluded: python-olm (required by matrix-nio[e2e]) is upstream-broken
-  # on modern macOS (archived libolm, C++ errors with Clang 21+). Including it
-  # here causes the entire [all] install to fail, dropping all other extras.
-  # Users who need Matrix can install manually: pip install 'hermes-agent[matrix]'
+  # matrix: python-olm (required by matrix-nio[e2e]) is upstream-broken on
+  # modern macOS (archived libolm, C++ errors with Clang 21+).  On Linux the
+  # [matrix] extra's own marker pulls in the [e2e] variant automatically.
+  "hermes-agent[matrix]; sys_platform == 'linux'",
  "hermes-agent[cron]",
  "hermes-agent[cli]",
  "hermes-agent[dev]",
@@ -359,8 +359,9 @@ def _sanitize_surrogates(text: str) -> str:
 def _sanitize_messages_surrogates(messages: list) -> bool:
    """Sanitize surrogate characters from all string content in a messages list.

-    Walks message dicts in-place.  Returns True if any surrogates were found
-    and replaced, False otherwise.
+    Walks message dicts in-place. Returns True if any surrogates were found
+    and replaced, False otherwise. Covers content/text, name, and tool call
+    metadata/arguments so retries don't fail on a non-content field.
    """
    found = False
    for msg in messages:
@@ -377,6 +378,88 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
                    if isinstance(text, str) and _SURROGATE_RE.search(text):
                        part["text"] = _SURROGATE_RE.sub('\ufffd', text)
                        found = True
+        name = msg.get("name")
+        if isinstance(name, str) and _SURROGATE_RE.search(name):
+            msg["name"] = _SURROGATE_RE.sub('\ufffd', name)
+            found = True
+        tool_calls = msg.get("tool_calls")
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if not isinstance(tc, dict):
+                    continue
+                tc_id = tc.get("id")
+                if isinstance(tc_id, str) and _SURROGATE_RE.search(tc_id):
+                    tc["id"] = _SURROGATE_RE.sub('\ufffd', tc_id)
+                    found = True
+                fn = tc.get("function")
+                if isinstance(fn, dict):
+                    fn_name = fn.get("name")
+                    if isinstance(fn_name, str) and _SURROGATE_RE.search(fn_name):
+                        fn["name"] = _SURROGATE_RE.sub('\ufffd', fn_name)
+                        found = True
+                    fn_args = fn.get("arguments")
+                    if isinstance(fn_args, str) and _SURROGATE_RE.search(fn_args):
+                        fn["arguments"] = _SURROGATE_RE.sub('\ufffd', fn_args)
+                        found = True
+    return found
+
+
+def _strip_non_ascii(text: str) -> str:
+    """Remove non-ASCII characters, replacing with closest ASCII equivalent or removing.
+
+    Used as a last resort when the system encoding is ASCII and can't handle
+    any non-ASCII characters (e.g. LANG=C on Chromebooks).
+    """
+    return text.encode('ascii', errors='ignore').decode('ascii')
+
+
+def _sanitize_messages_non_ascii(messages: list) -> bool:
+    """Strip non-ASCII characters from all string content in a messages list.
+
+    This is a last-resort recovery for systems with ASCII-only encoding
+    (LANG=C, Chromebooks, minimal containers).  Returns True if any
+    non-ASCII content was found and sanitized.
+    """
+    found = False
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        # Sanitize content (string)
+        content = msg.get("content")
+        if isinstance(content, str):
+            sanitized = _strip_non_ascii(content)
+            if sanitized != content:
+                msg["content"] = sanitized
+                found = True
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    text = part.get("text")
+                    if isinstance(text, str):
+                        sanitized = _strip_non_ascii(text)
+                        if sanitized != text:
+                            part["text"] = sanitized
+                            found = True
+        # Sanitize name field (can contain non-ASCII in tool results)
+        name = msg.get("name")
+        if isinstance(name, str):
+            sanitized = _strip_non_ascii(name)
+            if sanitized != name:
+                msg["name"] = sanitized
+                found = True
+        # Sanitize tool_calls
+        tool_calls = msg.get("tool_calls")
+        if isinstance(tool_calls, list):
+            for tc in tool_calls:
+                if isinstance(tc, dict):
+                    fn = tc.get("function", {})
+                    if isinstance(fn, dict):
+                        fn_args = fn.get("arguments")
+                        if isinstance(fn_args, str):
+                            sanitized = _strip_non_ascii(fn_args)
+                            if sanitized != fn_args:
+                                fn["arguments"] = sanitized
+                                found = True
    return found


@@ -606,6 +689,17 @@ class AIAgent:
        else:
            self.api_mode = "chat_completions"

+        try:
+            from hermes_cli.model_normalize import (
+                _AGGREGATOR_PROVIDERS,
+                normalize_model_for_provider,
+            )
+
+            if self.provider not in _AGGREGATOR_PROVIDERS:
+                self.model = normalize_model_for_provider(self.model, self.provider)
+        except Exception:
+            pass
+
        # Direct OpenAI sessions use the Responses API path.  GPT-5.x tool
        # calls with reasoning are rejected on /v1/chat/completions, and
        # Hermes is a tool-using client by default.
@@ -672,7 +766,7 @@ class AIAgent:
        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
        is_openrouter = self._is_openrouter_url()
        is_claude = "claude" in self.model.lower()
-        is_native_anthropic = self.api_mode == "anthropic_messages"
+        is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic"
        self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
        self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
        
@@ -853,6 +947,7 @@ class AIAgent:
                    client_kwargs["default_headers"] = headers

            self.api_key = client_kwargs.get("api_key", "")
+            self.base_url = client_kwargs.get("base_url", self.base_url)
            try:
                self.client = self._create_openai_client(client_kwargs, reason="agent_init", shared=True)
                if not self.quiet_mode:
@@ -1149,6 +1244,9 @@ class AIAgent:
            except (TypeError, ValueError):
                _config_context_length = None

+        # Store for reuse in switch_model (so config override persists across model switches)
+        self._config_context_length = _config_context_length
+
        # Check custom_providers per-model context_length
        if _config_context_length is None:
            _custom_providers = _agent_cfg.get("custom_providers")
@@ -1170,20 +1268,88 @@ class AIAgent:
                                        pass
                        break
        
-        self.context_compressor = ContextCompressor(
-            model=self.model,
-            threshold_percent=compression_threshold,
-            protect_first_n=3,
-            protect_last_n=compression_protect_last,
-            summary_target_ratio=compression_target_ratio,
-            summary_model_override=compression_summary_model,
-            quiet_mode=self.quiet_mode,
-            base_url=self.base_url,
-            api_key=getattr(self, "api_key", ""),
-            config_context_length=_config_context_length,
-            provider=self.provider,
-        )
+        # Select context engine: config-driven (like memory providers).
+        # 1. Check config.yaml context.engine setting
+        # 2. Check plugins/context_engine/<name>/ directory (repo-shipped)
+        # 3. Check general plugin system (user-installed plugins)
+        # 4. Fall back to built-in ContextCompressor
+        _selected_engine = None
+        _engine_name = "compressor"  # default
+        try:
+            _ctx_cfg = _agent_cfg.get("context", {}) if isinstance(_agent_cfg, dict) else {}
+            _engine_name = _ctx_cfg.get("engine", "compressor") or "compressor"
+        except Exception:
+            pass
+
+        if _engine_name != "compressor":
+            # Try loading from plugins/context_engine/<name>/
+            try:
+                from plugins.context_engine import load_context_engine
+                _selected_engine = load_context_engine(_engine_name)
+            except Exception as _ce_load_err:
+                logger.debug("Context engine load from plugins/context_engine/: %s", _ce_load_err)
+
+            # Try general plugin system as fallback
+            if _selected_engine is None:
+                try:
+                    from hermes_cli.plugins import get_plugin_context_engine
+                    _candidate = get_plugin_context_engine()
+                    if _candidate and _candidate.name == _engine_name:
+                        _selected_engine = _candidate
+                except Exception:
+                    pass
+
+            if _selected_engine is None:
+                logger.warning(
+                    "Context engine '%s' not found — falling back to built-in compressor",
+                    _engine_name,
+                )
+        # else: config says "compressor" — use built-in, don't auto-activate plugins
+
+        if _selected_engine is not None:
+            self.context_compressor = _selected_engine
+            if not self.quiet_mode:
+                logger.info("Using context engine: %s", _selected_engine.name)
+        else:
+            self.context_compressor = ContextCompressor(
+                model=self.model,
+                threshold_percent=compression_threshold,
+                protect_first_n=3,
+                protect_last_n=compression_protect_last,
+                summary_target_ratio=compression_target_ratio,
+                summary_model_override=compression_summary_model,
+                quiet_mode=self.quiet_mode,
+                base_url=self.base_url,
+                api_key=getattr(self, "api_key", ""),
+                config_context_length=_config_context_length,
+                provider=self.provider,
+            )
        self.compression_enabled = compression_enabled
+
+        # Inject context engine tool schemas (e.g. lcm_grep, lcm_describe, lcm_expand)
+        self._context_engine_tool_names: set = set()
+        if hasattr(self, "context_compressor") and self.context_compressor and self.tools is not None:
+            for _schema in self.context_compressor.get_tool_schemas():
+                _wrapped = {"type": "function", "function": _schema}
+                self.tools.append(_wrapped)
+                _tname = _schema.get("name", "")
+                if _tname:
+                    self.valid_tool_names.add(_tname)
+                    self._context_engine_tool_names.add(_tname)
+
+        # Notify context engine of session start
+        if hasattr(self, "context_compressor") and self.context_compressor:
+            try:
+                self.context_compressor.on_session_start(
+                    self.session_id,
+                    hermes_home=str(get_hermes_home()),
+                    platform=self.platform or "cli",
+                    model=self.model,
+                    context_length=getattr(self.context_compressor, "context_length", 0),
+                )
+            except Exception as _ce_err:
+                logger.debug("Context engine on_session_start: %s", _ce_err)
+
        self._subdirectory_hints = SubdirectoryHintTracker(
            working_dir=os.getenv("TERMINAL_CWD") or None,
        )
@@ -1249,11 +1415,13 @@ class AIAgent:
            "api_key": getattr(self, "api_key", ""),
            "client_kwargs": dict(self._client_kwargs),
            "use_prompt_caching": self._use_prompt_caching,
-            # Compressor state that _try_activate_fallback() overwrites
-            "compressor_model": _cc.model,
-            "compressor_base_url": _cc.base_url,
+            # Context engine state that _try_activate_fallback() overwrites.
+            # Use getattr for model/base_url/api_key/provider since plugin
+            # engines may not have these (they're ContextCompressor-specific).
+            "compressor_model": getattr(_cc, "model", self.model),
+            "compressor_base_url": getattr(_cc, "base_url", self.base_url),
            "compressor_api_key": getattr(_cc, "api_key", ""),
-            "compressor_provider": _cc.provider,
+            "compressor_provider": getattr(_cc, "provider", self.provider),
            "compressor_context_length": _cc.context_length,
            "compressor_threshold_tokens": _cc.threshold_tokens,
        }
@@ -1299,15 +1467,9 @@ class AIAgent:
        # Turn counter (added after reset_session_state was first written — #2635)
        self._user_turn_count = 0

-        # Context compressor internal counters (if present)
+        # Context engine reset (works for both built-in compressor and plugins)
        if hasattr(self, "context_compressor") and self.context_compressor:
-            self.context_compressor.last_prompt_tokens = 0
-            self.context_compressor.last_completion_tokens = 0
-            self.context_compressor.compression_count = 0
-            self.context_compressor._context_probed = False
-            self.context_compressor._context_probe_persistable = False
-            # Iterative summary from previous session must not bleed into new one (#2635)
-            self.context_compressor._previous_summary = None
+            self.context_compressor.on_session_reset()
    
    def switch_model(self, new_model, new_provider, api_key='', base_url='', api_mode=''):
        """Switch the model/provider in-place for a live agent.
@@ -1348,7 +1510,11 @@ class AIAgent:
                resolve_anthropic_token,
                _is_oauth_token,
            )
-            effective_key = api_key or self.api_key or resolve_anthropic_token() or ""
+            # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
+            # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own
+            # API key — falling back would send Anthropic credentials to third-party endpoints.
+            _is_native_anthropic = new_provider == "anthropic"
+            effective_key = (api_key or self.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or self.api_key or "")
            self.api_key = effective_key
            self._anthropic_api_key = effective_key
            self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None)
@@ -1372,7 +1538,7 @@ class AIAgent:
            )

        # ── Re-evaluate prompt caching ──
-        is_native_anthropic = api_mode == "anthropic_messages"
+        is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic"
        self._use_prompt_caching = (
            ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower())
            or is_native_anthropic
@@ -1386,14 +1552,14 @@ class AIAgent:
                base_url=self.base_url,
                api_key=self.api_key,
                provider=self.provider,
+                config_context_length=getattr(self, "_config_context_length", None),
            )
-            self.context_compressor.model = self.model
-            self.context_compressor.base_url = self.base_url
-            self.context_compressor.api_key = self.api_key
-            self.context_compressor.provider = self.provider
-            self.context_compressor.context_length = new_context_length
-            self.context_compressor.threshold_tokens = int(
-                new_context_length * self.context_compressor.threshold_percent
+            self.context_compressor.update_model(
+                model=self.model,
+                context_length=new_context_length,
+                base_url=self.base_url,
+                api_key=getattr(self, "api_key", ""),
+                provider=self.provider,
            )

        # ── Invalidate cached system prompt so it rebuilds next turn ──
@@ -1409,10 +1575,10 @@ class AIAgent:
            "api_key": getattr(self, "api_key", ""),
            "client_kwargs": dict(self._client_kwargs),
            "use_prompt_caching": self._use_prompt_caching,
-            "compressor_model": _cc.model if _cc else self.model,
-            "compressor_base_url": _cc.base_url if _cc else self.base_url,
+            "compressor_model": getattr(_cc, "model", self.model) if _cc else self.model,
+            "compressor_base_url": getattr(_cc, "base_url", self.base_url) if _cc else self.base_url,
            "compressor_api_key": getattr(_cc, "api_key", "") if _cc else "",
-            "compressor_provider": _cc.provider if _cc else self.provider,
+            "compressor_provider": getattr(_cc, "provider", self.provider) if _cc else self.provider,
            "compressor_context_length": _cc.context_length if _cc else 0,
            "compressor_threshold_tokens": _cc.threshold_tokens if _cc else 0,
        }
@@ -1878,19 +2044,14 @@ class AIAgent:
            except Exception as e:
                logger.debug("Background memory/skill review failed: %s", e)
            finally:
-                # Explicitly close the OpenAI/httpx client so GC doesn't
-                # try to clean it up on a dead asyncio event loop (which
-                # produces "Event loop is closed" errors in the terminal).
+                # Close all resources (httpx client, subprocesses, etc.) so
+                # GC doesn't try to clean them up on a dead asyncio event
+                # loop (which produces "Event loop is closed" errors).
                if review_agent is not None:
-                    client = getattr(review_agent, "client", None)
-                    if client is not None:
-                        try:
-                            review_agent._close_openai_client(
-                                client, reason="bg_review_done", shared=True
-                            )
-                            review_agent.client = None
-                        except Exception:
-                            pass
+                    try:
+                        review_agent.close()
+                    except Exception:
+                        pass

        t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
        t.start()
@@ -2614,10 +2775,11 @@ class AIAgent:
        }

    def shutdown_memory_provider(self, messages: list = None) -> None:
-        """Shut down the memory provider — call at actual session boundaries.
+        """Shut down the memory provider and context engine — call at actual session boundaries.

        This calls on_session_end() then shutdown_all() on the memory
-        manager. NOT called per-turn — only at CLI exit, /reset, gateway
+        manager, and on_session_end() on the context engine.
+        NOT called per-turn — only at CLI exit, /reset, gateway
        session expiry, etc.
        """
        if self._memory_manager:
@@ -2629,7 +2791,74 @@ class AIAgent:
                self._memory_manager.shutdown_all()
            except Exception:
                pass
+        # Notify context engine of session end (flush DAG, close DBs, etc.)
+        if hasattr(self, "context_compressor") and self.context_compressor:
+            try:
+                self.context_compressor.on_session_end(
+                    self.session_id or "",
+                    messages or [],
+                )
+            except Exception:
+                pass
    
+    def close(self) -> None:
+        """Release all resources held by this agent instance.
+
+        Cleans up subprocess resources that would otherwise become orphans:
+        - Background processes tracked in ProcessRegistry
+        - Terminal sandbox environments
+        - Browser daemon sessions
+        - Active child agents (subagent delegation)
+        - OpenAI/httpx client connections
+
+        Safe to call multiple times (idempotent).  Each cleanup step is
+        independently guarded so a failure in one does not prevent the rest.
+        """
+        task_id = getattr(self, "session_id", None) or ""
+
+        # 1. Kill background processes for this task
+        try:
+            from tools.process_registry import process_registry
+            process_registry.kill_all(task_id=task_id)
+        except Exception:
+            pass
+
+        # 2. Clean terminal sandbox environments
+        try:
+            from tools.terminal_tool import cleanup_vm
+            cleanup_vm(task_id)
+        except Exception:
+            pass
+
+        # 3. Clean browser daemon sessions
+        try:
+            from tools.browser_tool import cleanup_browser
+            cleanup_browser(task_id)
+        except Exception:
+            pass
+
+        # 4. Close active child agents
+        try:
+            with self._active_children_lock:
+                children = list(self._active_children)
+                self._active_children.clear()
+            for child in children:
+                try:
+                    child.close()
+                except Exception:
+                    pass
+        except Exception:
+            pass
+
+        # 5. Close the OpenAI/httpx client
+        try:
+            client = getattr(self, "client", None)
+            if client is not None:
+                self._close_openai_client(client, reason="agent_close", shared=True)
+                self.client = None
+        except Exception:
+            pass
+
    def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
        """
        Recover todo state from conversation history.
@@ -2922,7 +3151,7 @@ class AIAgent:

    @staticmethod
    def _cap_delegate_task_calls(tool_calls: list) -> list:
-        """Truncate excess delegate_task calls to MAX_CONCURRENT_CHILDREN.
+        """Truncate excess delegate_task calls to max_concurrent_children.

        The delegate_tool caps the task list inside a single call, but the
        model can emit multiple separate delegate_task tool_calls in one
@@ -2930,23 +3159,24 @@ class AIAgent:

        Returns the original list if no truncation was needed.
        """
-        from tools.delegate_tool import MAX_CONCURRENT_CHILDREN
+        from tools.delegate_tool import _get_max_concurrent_children
+        max_children = _get_max_concurrent_children()
        delegate_count = sum(1 for tc in tool_calls if tc.function.name == "delegate_task")
-        if delegate_count <= MAX_CONCURRENT_CHILDREN:
+        if delegate_count <= max_children:
            return tool_calls
        kept_delegates = 0
        truncated = []
        for tc in tool_calls:
            if tc.function.name == "delegate_task":
-                if kept_delegates < MAX_CONCURRENT_CHILDREN:
+                if kept_delegates < max_children:
                    truncated.append(tc)
                    kept_delegates += 1
            else:
                truncated.append(tc)
        logger.warning(
            "Truncated %d excess delegate_task call(s) to enforce "
-            "MAX_CONCURRENT_CHILDREN=%d limit",
-            delegate_count - MAX_CONCURRENT_CHILDREN, MAX_CONCURRENT_CHILDREN,
+            "max_concurrent_children=%d limit",
+            delegate_count - max_children, max_children,
        )
        return truncated

@@ -4199,7 +4429,7 @@ class AIAgent:
            self._anthropic_api_key = runtime_key
            self._anthropic_base_url = runtime_base
            self._anthropic_client = build_anthropic_client(runtime_key, runtime_base)
-            self._is_anthropic_oauth = _is_oauth_token(runtime_key) if self.provider == "anthropic" else False
+            self._is_anthropic_oauth = _is_oauth_token(runtime_key)
            self.api_key = runtime_key
            self.base_url = runtime_base
            return
@@ -5005,7 +5235,7 @@ class AIAgent:
            # when no explicit key is in the fallback config.
            if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
                fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
-            fb_client, _ = resolve_provider_client(
+            fb_client, _resolved_fb_model = resolve_provider_client(
                fb_provider, model=fb_model, raw_codex=True,
                explicit_base_url=fb_base_url_hint,
                explicit_api_key=fb_api_key_hint)
@@ -5014,6 +5244,12 @@ class AIAgent:
                    "Fallback to %s failed: provider not configured",
                    fb_provider)
                return self._try_activate_fallback()  # try next in chain
+            try:
+                from hermes_cli.model_normalize import normalize_model_for_provider
+
+                fb_model = normalize_model_for_provider(fb_model, fb_provider)
+            except Exception:
+                pass

            # Determine api_mode from provider / base URL
            fb_api_mode = "chat_completions"
@@ -5065,7 +5301,7 @@ class AIAgent:
                }

            # Re-evaluate prompt caching for the new provider/model
-            is_native_anthropic = fb_api_mode == "anthropic_messages"
+            is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic"
            self._use_prompt_caching = (
                ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower())
                or is_native_anthropic
@@ -5081,13 +5317,12 @@ class AIAgent:
                    self.model, base_url=self.base_url,
                    api_key=self.api_key, provider=self.provider,
                )
-                self.context_compressor.model = self.model
-                self.context_compressor.base_url = self.base_url
-                self.context_compressor.api_key = self.api_key
-                self.context_compressor.provider = self.provider
-                self.context_compressor.context_length = fb_context_length
-                self.context_compressor.threshold_tokens = int(
-                    fb_context_length * self.context_compressor.threshold_percent
+                self.context_compressor.update_model(
+                    model=self.model,
+                    context_length=fb_context_length,
+                    base_url=self.base_url,
+                    api_key=getattr(self, "api_key", ""),
+                    provider=self.provider,
                )

            self._emit_status(
@@ -5147,14 +5382,15 @@ class AIAgent:
                    shared=True,
                )

-            # ── Restore context compressor state ──
+            # ── Restore context engine state ──
            cc = self.context_compressor
-            cc.model = rt["compressor_model"]
-            cc.base_url = rt["compressor_base_url"]
-            cc.api_key = rt["compressor_api_key"]
-            cc.provider = rt["compressor_provider"]
-            cc.context_length = rt["compressor_context_length"]
-            cc.threshold_tokens = rt["compressor_threshold_tokens"]
+            cc.update_model(
+                model=rt["compressor_model"],
+                context_length=rt["compressor_context_length"],
+                base_url=rt["compressor_base_url"],
+                api_key=rt["compressor_api_key"],
+                provider=rt["compressor_provider"],
+            )

            # ── Reset fallback chain for the new turn ──
            self._fallback_activated = False
@@ -5401,11 +5637,12 @@ class AIAgent:
    def _anthropic_preserve_dots(self) -> bool:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
+        MiniMax keeps dots (e.g. MiniMax-M2.7).
        OpenCode Go keeps dots (e.g. minimax-m2.7)."""
-        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "opencode-go"}:
+        if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go"}:
            return True
        base = (getattr(self, "base_url", "") or "").lower()
-        return "dashscope" in base or "aliyuncs" in base or "opencode.ai/zen/go" in base
+        return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/go" in base

    def _is_qwen_portal(self) -> bool:
        """Return True when the base URL targets Qwen Portal."""
@@ -5498,7 +5735,7 @@ class AIAgent:
                preserve_dots=self._anthropic_preserve_dots(),
                context_length=ctx_len,
                base_url=getattr(self, "_anthropic_base_url", None),
-                fast_mode=self.request_overrides.get("speed") == "fast",
+                fast_mode=(self.request_overrides or {}).get("speed") == "fast",
            )

        if self.api_mode == "codex_responses":
@@ -5651,8 +5888,16 @@ class AIAgent:
            api_kwargs["tools"] = self.tools

        if self.max_tokens is not None:
-            if not self._is_qwen_portal():
-                api_kwargs.update(self._max_tokens_param(self.max_tokens))
+            api_kwargs.update(self._max_tokens_param(self.max_tokens))
+        elif self._is_qwen_portal():
+            # Qwen Portal defaults to a very low max_tokens when omitted.
+            # Reasoning models (qwen3-coder-plus) exhaust that budget on
+            # thinking tokens alone, causing the portal to return
+            # finish_reason="stop" with truncated output — the agent sees
+            # this as an intentional stop and exits the loop.  Send 65536
+            # (the documented max output for qwen3-coder models) so the
+            # model has adequate output budget for tool calls.
+            api_kwargs.update(self._max_tokens_param(65536))
        elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
            # OpenRouter and Nous Portal translate requests to Anthropic's
            # Messages API, which requires max_tokens as a mandatory field.
@@ -6719,6 +6964,29 @@ class AIAgent:
                        spinner.stop(cute_msg)
                    elif self._should_emit_quiet_tool_messages():
                        self._vprint(f"  {cute_msg}")
+            elif self._context_engine_tool_names and function_name in self._context_engine_tool_names:
+                # Context engine tools (lcm_grep, lcm_describe, lcm_expand, etc.)
+                spinner = None
+                if self.quiet_mode and not self.tool_progress_callback:
+                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                    emoji = _get_tool_emoji(function_name)
+                    preview = _build_tool_preview(function_name, function_args) or function_name
+                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots', print_fn=self._print_fn)
+                    spinner.start()
+                _ce_result = None
+                try:
+                    function_result = self.context_compressor.handle_tool_call(function_name, function_args, messages=messages)
+                    _ce_result = function_result
+                except Exception as tool_error:
+                    function_result = json.dumps({"error": f"Context engine tool '{function_name}' failed: {tool_error}"})
+                    logger.error("context_engine.handle_tool_call raised for %s: %s", function_name, tool_error, exc_info=True)
+                finally:
+                    tool_duration = time.time() - tool_start_time
+                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_ce_result)
+                    if spinner:
+                        spinner.stop(cute_msg)
+                    elif self.quiet_mode:
+                        self._vprint(f"  {cute_msg}")
            elif self._memory_manager and self._memory_manager.has_tool(function_name):
                # Memory provider tools (hindsight_retain, honcho_search, etc.)
                # These are not in the tool registry — route through MemoryManager.
@@ -7162,7 +7430,7 @@ class AIAgent:
        self._thinking_prefill_retries = 0
        self._last_content_with_tools = None
        self._mute_post_response = False
-        self._surrogate_sanitized = False
+        self._unicode_sanitization_passes = 0

        # Pre-turn connection health check: detect and clean up dead TCP
        # connections left over from provider outages or dropped streams.
@@ -7374,6 +7642,7 @@ class AIAgent:
                is_first_turn=(not bool(conversation_history)),
                model=self.model,
                platform=getattr(self, "platform", None) or "",
+                sender_id=getattr(self, "_user_id", None) or "",
            )
            _ctx_parts: list[str] = []
            for r in _pre_results:
@@ -7602,6 +7871,7 @@ class AIAgent:

            finish_reason = "stop"
            response = None  # Guard against UnboundLocalError if all retries fail
+            api_kwargs = None  # Guard against UnboundLocalError in except handler

            while retry_count < max_retries:
                try:
@@ -8032,7 +8302,7 @@ class AIAgent:
                        # Cache discovered context length after successful call.
                        # Only persist limits confirmed by the provider (parsed
                        # from the error message), not guessed probe tiers.
-                        if self.context_compressor._context_probed:
+                        if getattr(self.context_compressor, "_context_probed", False):
                            ctx = self.context_compressor.context_length
                            if getattr(self.context_compressor, "_context_probe_persistable", False):
                                save_context_length(self.model, self.base_url, ctx)
@@ -8147,22 +8417,40 @@ class AIAgent:
                        self.thinking_callback("")

                    # -----------------------------------------------------------
-                    # Surrogate character recovery.  UnicodeEncodeError happens
-                    # when the messages contain lone surrogates (U+D800..U+DFFF)
-                    # that are invalid UTF-8.  Common source: clipboard paste
-                    # from Google Docs or similar rich-text editors.  We sanitize
-                    # the entire messages list in-place and retry once.
+                    # UnicodeEncodeError recovery.  Two common causes:
+                    #   1. Lone surrogates (U+D800..U+DFFF) from clipboard paste
+                    #      (Google Docs, rich-text editors) — sanitize and retry.
+                    #   2. ASCII codec on systems with LANG=C or non-UTF-8 locale
+                    #      (e.g. Chromebooks) — any non-ASCII character fails.
+                    #      Detect via the error message mentioning 'ascii' codec.
+                    # We sanitize messages in-place and may retry twice:
+                    # first to strip surrogates, then once more for pure
+                    # ASCII-only locale sanitization if needed.
                    # -----------------------------------------------------------
-                    if isinstance(api_error, UnicodeEncodeError) and not getattr(self, '_surrogate_sanitized', False):
-                        self._surrogate_sanitized = True
-                        if _sanitize_messages_surrogates(messages):
+                    if isinstance(api_error, UnicodeEncodeError) and getattr(self, '_unicode_sanitization_passes', 0) < 2:
+                        _err_str = str(api_error).lower()
+                        _is_ascii_codec = "'ascii'" in _err_str or "ascii" in _err_str
+                        _surrogates_found = _sanitize_messages_surrogates(messages)
+                        if _surrogates_found:
+                            self._unicode_sanitization_passes += 1
                            self._vprint(
                                f"{self.log_prefix}⚠️  Stripped invalid surrogate characters from messages. Retrying...",
                                force=True,
                            )
                            continue
-                        # Surrogates weren't in messages — might be in system
-                        # prompt or prefill.  Fall through to normal error path.
+                        if _is_ascii_codec:
+                            # ASCII codec: the system encoding can't handle
+                            # non-ASCII characters at all. Sanitize all
+                            # non-ASCII content from messages and retry.
+                            if _sanitize_messages_non_ascii(messages):
+                                self._unicode_sanitization_passes += 1
+                                self._vprint(
+                                    f"{self.log_prefix}⚠️  System encoding is ASCII — stripped non-ASCII characters from messages. Retrying...",
+                                    force=True,
+                                )
+                                continue
+                        # Nothing to sanitize in messages — might be in system
+                        # prompt or prefill. Fall through to normal error path.

                    status_code = getattr(api_error, "status_code", None)
                    error_context = self._extract_api_error_context(api_error)
@@ -8353,16 +8641,22 @@ class AIAgent:
                        compressor = self.context_compressor
                        old_ctx = compressor.context_length
                        if old_ctx > _reduced_ctx:
-                            compressor.context_length = _reduced_ctx
-                            compressor.threshold_tokens = int(
-                                _reduced_ctx * compressor.threshold_percent
+                            compressor.update_model(
+                                model=self.model,
+                                context_length=_reduced_ctx,
+                                base_url=self.base_url,
+                                api_key=getattr(self, "api_key", ""),
+                                provider=self.provider,
                            )
-                            compressor._context_probed = True
-                            # Don't persist — this is a subscription-tier
-                            # limitation, not a model capability.  If the user
-                            # later enables extra usage the 1M limit should
-                            # come back automatically.
-                            compressor._context_probe_persistable = False
+                            # Context probing flags — only set on built-in
+                            # compressor (plugin engines manage their own).
+                            if hasattr(compressor, "_context_probed"):
+                                compressor._context_probed = True
+                                # Don't persist — this is a subscription-tier
+                                # limitation, not a model capability.  If the
+                                # user later enables extra usage the 1M limit
+                                # should come back automatically.
+                                compressor._context_probe_persistable = False
                            self._vprint(
                                f"{self.log_prefix}⚠️  Anthropic long-context tier "
                                f"requires extra usage — reducing context: "
@@ -8526,17 +8820,25 @@ class AIAgent:
                            new_ctx = get_next_probe_tier(old_ctx)

                        if new_ctx and new_ctx < old_ctx:
-                            compressor.context_length = new_ctx
-                            compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
-                            compressor._context_probed = True
-                            # Only persist limits parsed from the provider's
-                            # error message (a real number).  Guessed fallback
-                            # tiers from get_next_probe_tier() should stay
-                            # in-memory only — persisting them pollutes the
-                            # cache with wrong values.
-                            compressor._context_probe_persistable = bool(
-                                parsed_limit and parsed_limit == new_ctx
+                            compressor.update_model(
+                                model=self.model,
+                                context_length=new_ctx,
+                                base_url=self.base_url,
+                                api_key=getattr(self, "api_key", ""),
+                                provider=self.provider,
                            )
+                            # Context probing flags — only set on built-in
+                            # compressor (plugin engines manage their own).
+                            if hasattr(compressor, "_context_probed"):
+                                compressor._context_probed = True
+                                # Only persist limits parsed from the provider's
+                                # error message (a real number).  Guessed fallback
+                                # tiers from get_next_probe_tier() should stay
+                                # in-memory only — persisting them pollutes the
+                                # cache with wrong values.
+                                compressor._context_probe_persistable = bool(
+                                    parsed_limit and parsed_limit == new_ctx
+                                )
                            self._vprint(f"{self.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True)
                        else:
                            self._vprint(f"{self.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...", force=True)
@@ -8618,9 +8920,10 @@ class AIAgent:
                        if self._try_activate_fallback():
                            retry_count = 0
                            continue
-                        self._dump_api_request_debug(
-                            api_kwargs, reason="non_retryable_client_error", error=api_error,
-                        )
+                        if api_kwargs is not None:
+                            self._dump_api_request_debug(
+                                api_kwargs, reason="non_retryable_client_error", error=api_error,
+                            )
                        self._emit_status(
                            f"❌ Non-retryable error (HTTP {status_code}): "
                            f"{self._summarize_api_error(api_error)}"
@@ -8723,9 +9026,10 @@ class AIAgent:
                            self.log_prefix, max_retries, _final_summary,
                            _provider, _model, len(api_messages), f"{approx_tokens:,}",
                        )
-                        self._dump_api_request_debug(
-                            api_kwargs, reason="max_retries_exhausted", error=api_error,
-                        )
+                        if api_kwargs is not None:
+                            self._dump_api_request_debug(
+                                api_kwargs, reason="max_retries_exhausted", error=api_error,
+                            )
                        self._persist_session(messages, conversation_history)
                        _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
                        if _is_stream_drop:
@@ -9279,7 +9583,8 @@ class AIAgent:
                        fallback = getattr(self, '_last_content_with_tools', None)
                        if fallback:
                            _turn_exit_reason = "fallback_prior_turn_content"
-                            logger.debug("Empty follow-up after tool calls — using prior turn content as final response")
+                            logger.info("Empty follow-up after tool calls — using prior turn content as final response")
+                            self._emit_status("↻ Empty response after tool calls — using earlier content as final answer")
                            self._last_content_with_tools = None
                            self._empty_content_retries = 0
                            for i in range(len(messages) - 1, -1, -1):
@@ -9310,9 +9615,13 @@ class AIAgent:
                        )
                        if _has_structured and self._thinking_prefill_retries < 2:
                            self._thinking_prefill_retries += 1
-                            self._vprint(
-                                f"{self.log_prefix}↻ Thinking-only response — "
-                                f"prefilling to continue "
+                            logger.info(
+                                "Thinking-only response (no visible content) — "
+                                "prefilling to continue (%d/2)",
+                                self._thinking_prefill_retries,
+                            )
+                            self._emit_status(
+                                f"↻ Thinking-only response — prefilling to continue "
                                f"({self._thinking_prefill_retries}/2)"
                            )
                            interim_msg = self._build_assistant_message(
@@ -9328,23 +9637,57 @@ class AIAgent:
                        # Model returned nothing — no content, no
                        # structured reasoning, no tool calls.  Common
                        # with open models (transient provider issues,
-                        # rate limits, sampling flukes).  Silently retry
-                        # up to 3 times before giving up.  Skip when
+                        # rate limits, sampling flukes).  Retry up to 3
+                        # times before attempting fallback.  Skip when
                        # content has inline <think> tags (model chose
                        # to reason, just no visible text).
                        _truly_empty = not final_response.strip()
                        if _truly_empty and not _has_structured and self._empty_content_retries < 3:
                            self._empty_content_retries += 1
-                            self._vprint(
-                                f"{self.log_prefix}↻ Empty response (no content or reasoning) "
-                                f"— retrying ({self._empty_content_retries}/3)",
-                                force=True,
+                            logger.warning(
+                                "Empty response (no content or reasoning) — "
+                                "retry %d/3 (model=%s)",
+                                self._empty_content_retries, self.model,
+                            )
+                            self._emit_status(
+                                f"⚠️ Empty response from model — retrying "
+                                f"({self._empty_content_retries}/3)"
                            )
                            continue

-                        # Exhausted prefill attempts, empty retries, or
-                        # structured reasoning with no content —
-                        # fall through to "(empty)" terminal.
+                        # ── Exhausted retries — try fallback provider ──
+                        # Before giving up with "(empty)", attempt to
+                        # switch to the next provider in the fallback
+                        # chain.  This covers the case where a model
+                        # (e.g. GLM-4.5-Air) consistently returns empty
+                        # due to context degradation or provider issues.
+                        if _truly_empty and self._fallback_chain:
+                            logger.warning(
+                                "Empty response after %d retries — "
+                                "attempting fallback (model=%s, provider=%s)",
+                                self._empty_content_retries, self.model,
+                                self.provider,
+                            )
+                            self._emit_status(
+                                "⚠️ Model returning empty responses — "
+                                "switching to fallback provider..."
+                            )
+                            if self._try_activate_fallback():
+                                self._empty_content_retries = 0
+                                self._emit_status(
+                                    f"↻ Switched to fallback: {self.model} "
+                                    f"({self.provider})"
+                                )
+                                logger.info(
+                                    "Fallback activated after empty responses: "
+                                    "now using %s on %s",
+                                    self.model, self.provider,
+                                )
+                                continue
+
+                        # Exhausted retries and fallback chain (or no
+                        # fallback configured).  Fall through to the
+                        # "(empty)" terminal.
                        _turn_exit_reason = "empty_response_exhausted"
                        reasoning_text = self._extract_reasoning(assistant_message)
                        assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
@@ -9353,9 +9696,28 @@ class AIAgent:

                        if reasoning_text:
                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
-                            self._vprint(f"{self.log_prefix}ℹ️  Reasoning-only response (no visible content). Reasoning: {reasoning_preview}")
+                            logger.warning(
+                                "Reasoning-only response (no visible content) "
+                                "after exhausting retries and fallback. "
+                                "Reasoning: %s", reasoning_preview,
+                            )
+                            self._emit_status(
+                                "⚠️ Model produced reasoning but no visible "
+                                "response after all retries. Returning empty."
+                            )
                        else:
-                            self._vprint(f"{self.log_prefix}ℹ️  Empty response (no content or reasoning) after 3 retries.")
+                            logger.warning(
+                                "Empty response (no content or reasoning) "
+                                "after %d retries. No fallback available. "
+                                "model=%s provider=%s",
+                                self._empty_content_retries, self.model,
+                                self.provider,
+                            )
+                            self._emit_status(
+                                "❌ Model returned no content after all retries"
+                                + (" and fallback attempts." if self._fallback_chain else
+                                   ". No fallback providers configured.")
+                            )

                        final_response = "(empty)"
                        break
@@ -249,8 +249,12 @@ def check_config(groq_key, eleven_key):

            if stt_provider == "groq" and not groq_key:
                warn("STT config says groq but GROQ_API_KEY is missing")
+            if stt_provider == "mistral" and not os.getenv("MISTRAL_API_KEY"):
+                warn("STT config says mistral but MISTRAL_API_KEY is missing")
            if tts_provider == "elevenlabs" and not eleven_key:
                warn("TTS config says elevenlabs but ELEVENLABS_API_KEY is missing")
+            if tts_provider == "mistral" and not os.getenv("MISTRAL_API_KEY"):
+                warn("TTS config says mistral but MISTRAL_API_KEY is missing")
        except Exception as e:
            warn("config.yaml", f"parse error: {e}")
    else:
@@ -1082,10 +1082,19 @@ install_node_deps() {
        log_success "Node.js dependencies installed"

        # Install Playwright browser + system dependencies.
-        # Playwright's install-deps only supports apt/dnf/zypper natively.
+        # Playwright's --with-deps only supports apt-based systems natively.
        # For Arch/Manjaro we install the system libs via pacman first.
+        # Other systems must install Chromium dependencies manually.
        log_info "Installing browser engine (Playwright Chromium)..."
        case "$DISTRO" in
+            ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot)
+                log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
+                log_info "This is standard Playwright setup — Hermes itself does not require root access."
+                cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || {
+                    log_warn "Playwright browser installation failed — browser tools will not work."
+                    log_warn "Try running manually: cd $INSTALL_DIR && npx playwright install --with-deps chromium"
+                }
+                ;;
            arch|manjaro)
                if command -v pacman &> /dev/null; then
                    log_info "Arch/Manjaro detected — installing Chromium system dependencies via pacman..."
@@ -1100,15 +1109,35 @@ install_node_deps() {
                        log_warn "  sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib"
                    fi
                fi
-                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
+                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
+                    log_warn "Playwright browser installation failed — browser tools will not work."
+                }
+                ;;
+            fedora|rhel|centos|rocky|alma)
+                log_warn "Playwright does not support automatic dependency installation on RPM-based systems."
+                log_info "Install Chromium system dependencies manually before using browser tools:"
+                log_info "  sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib"
+                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
+                    log_warn "Playwright browser installation failed — install dependencies above and retry."
+                }
+                ;;
+            opensuse*|sles)
+                log_warn "Playwright does not support automatic dependency installation on zypper-based systems."
+                log_info "Install Chromium system dependencies manually before using browser tools:"
+                log_info "  sudo zypper install mozilla-nss libatk-1_0-0 at-spi2-core cups-libs libdrm2 libxkbcommon0 Mesa-libgbm1 pango cairo libasound2"
+                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || {
+                    log_warn "Playwright browser installation failed — install dependencies above and retry."
+                }
                ;;
            *)
-                log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
-                log_info "This is standard Playwright setup — Hermes itself does not require root access."
-                cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
+                log_warn "Playwright does not support automatic dependency installation on $DISTRO."
+                log_info "Install Chromium/browser system dependencies for your distribution, then run:"
+                log_info "  cd $INSTALL_DIR && npx playwright install chromium"
+                log_info "Browser tools will not work until dependencies are installed."
+                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
                ;;
        esac
-        log_success "Browser engine installed"
+        log_success "Browser engine setup complete"
    fi

    # Install WhatsApp bridge dependencies
@@ -203,3 +203,30 @@ For segmented videos (quotes, scenes, chapters), render each as a separate clip
 | `references/inputs.md` | Audio analysis (FFT, bands, beats), video sampling, image conversion, text/lyrics, TTS integration (ElevenLabs, voice assignment, audio mixing) |
 | `references/optimization.md` | Hardware detection, quality profiles, vectorized patterns, parallel rendering, memory management, performance budgets |
 | `references/troubleshooting.md` | NumPy broadcasting traps, blend mode pitfalls, multiprocessing/pickling, brightness diagnostics, ffmpeg issues, font problems, common mistakes |
+
+---
+
+## Creative Divergence (use only when user requests experimental/creative/unique output)
+
+If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code.
+
+- **Forced Connections** — when the user wants cross-domain inspiration ("make it look organic," "industrial aesthetic")
+- **Conceptual Blending** — when the user names two things to combine ("ocean meets music," "space + calligraphy")
+- **Oblique Strategies** — when the user is maximally open ("surprise me," "something I've never seen")
+
+### Forced Connections
+1. Pick a domain unrelated to the visual goal (weather systems, microbiology, architecture, fluid dynamics, textile weaving)
+2. List its core visual/structural elements (erosion → gradual reveal; mitosis → splitting duplication; weaving → interlocking patterns)
+3. Map those elements onto ASCII characters and animation patterns
+4. Synthesize — what does "erosion" or "crystallization" look like in a character grid?
+
+### Conceptual Blending
+1. Name two distinct visual/conceptual spaces (e.g., ocean waves + sheet music)
+2. Map correspondences (crests = high notes, troughs = rests, foam = staccato)
+3. Blend selectively — keep the most interesting mappings, discard forced ones
+4. Develop emergent properties that exist only in the blend
+
+### Oblique Strategies
+1. Draw one: "Honor thy error as a hidden intention" / "Use an old idea" / "What would your closest friend do?" / "Emphasize the flaws" / "Turn it upside down" / "Only a part, not the whole" / "Reverse"
+2. Interpret the directive against the current ASCII animation challenge
+3. Apply the lateral insight to the visual design before writing code
@@ -0,0 +1,147 @@
+---
+name: ideation
+title: Creative Ideation — Constraint-Driven Project Generation
+description: "Generate project ideas through creative constraints. Use when the user says 'I want to build something', 'give me a project idea', 'I'm bored', 'what should I make', 'inspire me', or any variant of 'I have tools but no direction'. Works for code, art, hardware, writing, tools, and anything that can be made."
+version: 1.0.0
+author: SHL0MS
+license: MIT
+metadata:
+  hermes:
+    tags: [Creative, Ideation, Projects, Brainstorming, Inspiration]
+    category: creative
+    requires_toolsets: []
+---
+
+# Creative Ideation
+
+Generate project ideas through creative constraints. Constraint + direction = creativity.
+
+## How It Works
+
+1. **Pick a constraint** from the library below — random, or matched to the user's domain/mood
+2. **Interpret it broadly** — a coding prompt can become a hardware project, an art prompt can become a CLI tool
+3. **Generate 3 concrete project ideas** that satisfy the constraint
+4. **If they pick one, build it** — create the project, write the code, ship it
+
+## The Rule
+
+Every prompt is interpreted as broadly as possible. "Does this include X?" → Yes. The prompts provide direction and mild constraint. Without either, there is no creativity.
+
+## Constraint Library
+
+### For Developers
+
+**Solve your own itch:**
+Build the tool you wished existed this week. Under 50 lines. Ship it today.
+
+**Automate the annoying thing:**
+What's the most tedious part of your workflow? Script it away. Two hours to fix a problem that costs you five minutes a day.
+
+**The CLI tool that should exist:**
+Think of a command you've wished you could type. `git undo-that-thing-i-just-did`. `docker why-is-this-broken`. `npm explain-yourself`. Now build it.
+
+**Nothing new except glue:**
+Make something entirely from existing APIs, libraries, and datasets. The only original contribution is how you connect them.
+
+**Frankenstein week:**
+Take something that does X and make it do Y. A git repo that plays music. A Dockerfile that generates poetry. A cron job that sends compliments.
+
+**Subtract:**
+How much can you remove from a codebase before it breaks? Strip a tool to its minimum viable function. Delete until only the essence remains.
+
+**High concept, low effort:**
+A deep idea, lazily executed. The concept should be brilliant. The implementation should take an afternoon. If it takes longer, you're overthinking it.
+
+### For Makers & Artists
+
+**Blatantly copy something:**
+Pick something you admire — a tool, an artwork, an interface. Recreate it from scratch. The learning is in the gap between your version and theirs.
+
+**One million of something:**
+One million is both a lot and not that much. One million pixels is a 1MB photo. One million API calls is a Tuesday. One million of anything becomes interesting at scale.
+
+**Make something that dies:**
+A website that loses a feature every day. A chatbot that forgets. A countdown to nothing. An exercise in rot, killing, or letting go.
+
+**Do a lot of math:**
+Generative geometry, shader golf, mathematical art, computational origami. Time to re-learn what an arcsin is.
+
+### For Anyone
+
+**Text is the universal interface:**
+Build something where text is the only interface. No buttons, no graphics, just words in and words out. Text can go in and out of almost anything.
+
+**Start at the punchline:**
+Think of something that would be a funny sentence. Work backwards to make it real. "I taught my thermostat to gaslight me" → now build it.
+
+**Hostile UI:**
+Make something intentionally painful to use. A password field that requires 47 conditions. A form where every label lies. A CLI that judges your commands.
+
+**Take two:**
+Remember an old project. Do it again from scratch. No looking at the original. See what changed about how you think.
+
+See `references/full-prompt-library.md` for 30+ additional constraints across communication, scale, philosophy, transformation, and more.
+
+## Matching Constraints to Users
+
+| User says | Pick from |
+|-----------|-----------|
+| "I want to build something" (no direction) | Random — any constraint |
+| "I'm learning [language]" | Blatantly copy something, Automate the annoying thing |
+| "I want something weird" | Hostile UI, Frankenstein week, Start at the punchline |
+| "I want something useful" | Solve your own itch, The CLI that should exist, Automate the annoying thing |
+| "I want something beautiful" | Do a lot of math, One million of something |
+| "I'm burned out" | High concept low effort, Make something that dies |
+| "Weekend project" | Nothing new except glue, Start at the punchline |
+| "I want a challenge" | One million of something, Subtract, Take two |
+
+## Output Format
+
+```
+## Constraint: [Name]
+> [The constraint, one sentence]
+
+### Ideas
+
+1. **[One-line pitch]**
+   [2-3 sentences: what you'd build and why it's interesting]
+   ⏱ [weekend / week / month] • 🔧 [stack]
+
+2. **[One-line pitch]**
+   [2-3 sentences]
+   ⏱ ... • 🔧 ...
+
+3. **[One-line pitch]**
+   [2-3 sentences]
+   ⏱ ... • 🔧 ...
+```
+
+## Example
+
+```
+## Constraint: The CLI tool that should exist
+> Think of a command you've wished you could type. Now build it.
+
+### Ideas
+
+1. **`git whatsup` — show what happened while you were away**
+   Compares your last active commit to HEAD and summarizes what changed,
+   who committed, and what PRs merged. Like a morning standup from your repo.
+   ⏱ weekend • 🔧 Python, GitPython, click
+
+2. **`explain 503` — HTTP status codes for humans**
+   Pipe any status code or error message and get a plain-English explanation
+   with common causes and fixes. Pulls from a curated database, not an LLM.
+   ⏱ weekend • 🔧 Rust or Go, static dataset
+
+3. **`deps why <package>` — why is this in my dependency tree**
+   Traces a transitive dependency back to the direct dependency that pulled
+   it in. Answers "why do I have 47 copies of lodash" in one command.
+   ⏱ weekend • 🔧 Node.js, npm/yarn lockfile parsing
+```
+
+After the user picks one, start building — create the project, write the code, iterate.
+
+## Attribution
+
+Constraint approach inspired by [wttdotm.com/prompts.html](https://wttdotm.com/prompts.html). Adapted and expanded for software development and general-purpose ideation.
@@ -0,0 +1,110 @@
+# Full Prompt Library
+
+Extended constraint library beyond the core set in SKILL.md. Load these when the user wants more variety or a specific category.
+
+## Communication & Connection
+
+**Create a means of distribution:**
+The project works when you can use what you made to give something to somebody else.
+
+**Make a way to communicate:**
+The project works when you can hold a conversation with someone else using what you created. Not chat — something weirder.
+
+**Write a love letter:**
+To a person, a programming language, a game, a place, a tool. On paper, in code, in music, in light. Mail it.
+
+**Mail chess / Asynchronous games:**
+Something turn-based played with no time limit. No requirement to be there at the same time. The game happens in the gaps.
+
+**Twitch plays X:**
+A group of people share control over something. Collective input, emergent behavior.
+
+## Screens & Interfaces
+
+**Something for your desktop:**
+You spend a lot of time there. Spruce it up. A custom clock, a pet that lives in your terminal, a wallpaper that changes based on your git activity.
+
+**One screen, two screen, old screen, new screen:**
+Take something you associate with one screen and put it on a very different one. DOOM on a smart fridge. A spreadsheet on a watch. A terminal in a painting.
+
+**Make a mirror:**
+Something that reflects the viewer back at themselves. A website that shows your browsing history. A CLI that prints your git sins.
+
+## Philosophy & Concept
+
+**Code as koan, koan as code:**
+What is the sound of one hand clapping? A program that answers a question it wasn't asked. A function that returns before it's called.
+
+**The useless tree:**
+Make something useless. Deliberately, completely, beautifully useless. No utility. No purpose. No point. That's the point.
+
+**Artificial stupidity:**
+Make fun of AI by showcasing its faults. Mistrain it. Lie to it. Build the opposite of what AI is supposed to be good at.
+
+**"I use technology in order to hate it properly":**
+Make something inspired by the tension between loving and hating your tools.
+
+**The more things change, the more they stay the same:**
+Reflect on time, difference, and similarity.
+
+## Transformation
+
+**Translate:**
+Take something meant for one audience and make it understandable by another. A research paper as a children's book. An API as a board game. A song as an architecture diagram.
+
+**I mean, I GUESS you could store something that way:**
+The project works when you can save and open something. Store data in DNS caches. Encode a novel in emoji. Write a file system on top of something that isn't a file system.
+
+**I mean, I GUESS those could be pixels:**
+The project works when you can display an image. Render anything visual in a medium that wasn't meant for rendering.
+
+## Identity & Reflection
+
+**Make a self-portrait:**
+Be yourself? Be fake? Be real? In code, in data, in sound, in a directory structure.
+
+**Make a pun:**
+The stupider the better. Physical, digital, linguistic, visual. The project IS the joke.
+
+**Doors, walls, borders, barriers, boundaries:**
+Things that intermediate two places: opening, closing, permeating, excluding, combining.
+
+## Scale & Repetition
+
+**Lists!:**
+Itemizations, taxonomies, exhaustive recountings, iterations. This one. A list of list of lists.
+
+**Did you mean *recursion*?**
+Did you mean recursion?
+
+**Animals:**
+Lions, and tigers, and bears. Crab logic gates. Fish plays the stock market.
+
+**Cats:**
+Where would the internet be without them.
+
+## Starting Points
+
+**An idea that comes from a book:**
+Read something. Make something inspired by it.
+
+**Go to a museum:**
+Project ensues.
+
+**NPC loot:**
+What do you drop when you die? What do you take on your journey? Build the item.
+
+**Mythological objects and entities:**
+Pandora's box, the ocarina of time, the palantir. Build the artifact.
+
+**69:**
+Nice. Make something with the joke being the number 69.
+
+**Office Space printer scene:**
+Capture the same energy. Channel the catharsis of destroying the thing that frustrates you.
+
+**Borges week:**
+Something inspired by the Argentine. The library of babel. The map that is the territory.
+
+**Lights!:**
+LED throwies, light installations, illuminated anything. Make something that glows.
@@ -239,3 +239,26 @@ Always iterate at `-ql`. Only render `-qh` for final output.
 | `references/paper-explainer.md` | Turning research papers into animations — workflow, templates, domain patterns |
 | `references/decorations.md` | SurroundingRectangle, Brace, arrows, DashedLine, Angle, annotation lifecycle |
 | `references/production-quality.md` | Pre-code, pre-render, post-render checklists, spatial layout, color, tempo |
+
+---
+
+## Creative Divergence (use only when user requests experimental/creative/unique output)
+
+If the user asks for creative, experimental, or unconventional explanatory approaches, select a strategy and reason through it BEFORE designing the animation.
+
+- **SCAMPER** — when the user wants a fresh take on a standard explanation
+- **Assumption Reversal** — when the user wants to challenge how something is typically taught
+
+### SCAMPER Transformation
+Take a standard mathematical/technical visualization and transform it:
+- **Substitute**: replace the standard visual metaphor (number line → winding path, matrix → city grid)
+- **Combine**: merge two explanation approaches (algebraic + geometric simultaneously)
+- **Reverse**: derive backward — start from the result and deconstruct to axioms
+- **Modify**: exaggerate a parameter to show why it matters (10x the learning rate, 1000x the sample size)
+- **Eliminate**: remove all notation — explain purely through animation and spatial relationships
+
+### Assumption Reversal
+1. List what's "standard" about how this topic is visualized (left-to-right, 2D, discrete steps, formal notation)
+2. Pick the most fundamental assumption
+3. Reverse it (right-to-left derivation, 3D embedding of a 2D concept, continuous morphing instead of steps, zero notation)
+4. Explore what the reversal reveals that the standard approach hides
@@ -511,3 +511,37 @@ When building p5.js sketches:
 | `references/export-pipeline.md` | `saveCanvas()`, `saveGif()`, `saveFrames()`, deterministic headless capture, ffmpeg frame-to-video, CCapture.js, SVG export, per-clip architecture, platform export (fxhash), video gotchas |
 | `references/troubleshooting.md` | Performance profiling, per-pixel budgets, common mistakes, browser compatibility, WebGL debugging, font loading issues, pixel density traps, memory leaks, CORS |
 | `templates/viewer.html` | Interactive viewer template: seed navigation (prev/next/random/jump), parameter sliders, download PNG, responsive canvas. Start from this for explorable generative art |
+
+---
+
+## Creative Divergence (use only when user requests experimental/creative/unique output)
+
+If the user asks for creative, experimental, surprising, or unconventional output, select the strategy that best fits and reason through its steps BEFORE generating code.
+
+- **Conceptual Blending** — when the user names two things to combine or wants hybrid aesthetics
+- **SCAMPER** — when the user wants a twist on a known generative art pattern
+- **Distance Association** — when the user gives a single concept and wants exploration ("make something about time")
+
+### Conceptual Blending
+1. Name two distinct visual systems (e.g., particle physics + handwriting)
+2. Map correspondences (particles = ink drops, forces = pen pressure, fields = letterforms)
+3. Blend selectively — keep mappings that produce interesting emergent visuals
+4. Code the blend as a unified system, not two systems side-by-side
+
+### SCAMPER Transformation
+Take a known generative pattern (flow field, particle system, L-system, cellular automata) and systematically transform it:
+- **Substitute**: replace circles with text characters, lines with gradients
+- **Combine**: merge two patterns (flow field + voronoi)
+- **Adapt**: apply a 2D pattern to a 3D projection
+- **Modify**: exaggerate scale, warp the coordinate space
+- **Purpose**: use a physics sim for typography, a sorting algorithm for color
+- **Eliminate**: remove the grid, remove color, remove symmetry
+- **Reverse**: run the simulation backward, invert the parameter space
+
+### Distance Association
+1. Anchor on the user's concept (e.g., "loneliness")
+2. Generate associations at three distances:
+   - Close (obvious): empty room, single figure, silence
+   - Medium (interesting): one fish in a school swimming the wrong way, a phone with no notifications, the gap between subway cars
+   - Far (abstract): prime numbers, asymptotic curves, the color of 3am
+3. Develop the medium-distance associations — they're specific enough to visualize but unexpected enough to be interesting
@@ -39,8 +39,13 @@ class TestIsOAuthToken:
        assert _is_oauth_token("sk-ant-api03-abcdef1234567890") is False

    def test_managed_key(self):
-        # Managed keys from ~/.claude.json are NOT regular API keys
-        assert _is_oauth_token("ou1R1z-ft0A-bDeZ9wAA") is True
+        # Managed keys from ~/.claude.json without a recognisable Anthropic
+        # prefix are not positively identified as OAuth.  They enter the system
+        # via diagnostics-only read_claude_managed_key(), not via
+        # resolve_anthropic_token(), so they don't reach the OAuth gate in
+        # practice.  Third-party provider keys (MiniMax, Alibaba) also lack
+        # the sk-ant- prefix and must NOT be treated as OAuth.
+        assert _is_oauth_token("ou1R1z-ft0A-bDeZ9wAA") is False

    def test_jwt_token(self):
        # JWTs from OAuth flow
@@ -1,9 +1,10 @@
 """Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""

 import json
+import logging
 import os
 from pathlib import Path
-from unittest.mock import patch, MagicMock
+from unittest.mock import patch, MagicMock, AsyncMock

 import pytest

@@ -14,6 +15,7 @@ from agent.auxiliary_client import (
    resolve_provider_client,
    auxiliary_max_tokens_param,
    call_llm,
+    async_call_llm,
    _read_codex_access_token,
    _get_auxiliary_provider,
    _get_provider_chain,
@@ -658,6 +660,19 @@ class TestGetTextAuxiliaryClient:
        assert client is None
        assert model is None

+    def test_custom_endpoint_uses_codex_wrapper_when_runtime_requests_responses_api(self):
+        with patch("agent.auxiliary_client._resolve_custom_runtime",
+                   return_value=("https://api.openai.com/v1", "sk-test", "codex_responses")), \
+             patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.3-codex"), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.3-codex"
+        assert mock_openai.call_args.kwargs["base_url"] == "https://api.openai.com/v1"
+        assert mock_openai.call_args.kwargs["api_key"] == "sk-test"
+

 class TestVisionClientFallback:
    """Vision client auto mode resolves known-good multimodal backends."""
@@ -743,6 +758,69 @@ class TestAuxiliaryPoolAwareness:
        assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
        assert call_kwargs["default_headers"]["Editor-Version"]

+    def test_copilot_responses_api_model_wrapped_in_codex_client(self, monkeypatch):
+        """Copilot GPT-5+ models (needing Responses API) are wrapped in CodexAuxiliaryClient."""
+        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+        monkeypatch.delenv("GH_TOKEN", raising=False)
+
+        with (
+            patch(
+                "hermes_cli.auth.resolve_api_key_provider_credentials",
+                return_value={
+                    "provider": "copilot",
+                    "api_key": "test-token",
+                    "base_url": "https://api.githubcopilot.com",
+                    "source": "gh auth token",
+                },
+            ),
+            patch("agent.auxiliary_client.OpenAI"),
+        ):
+            client, model = resolve_provider_client("copilot", model="gpt-5.4-mini")
+
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-5.4-mini"
+
+    def test_copilot_chat_completions_model_not_wrapped(self, monkeypatch):
+        """Copilot models using Chat Completions are returned as plain OpenAI clients."""
+        monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+        monkeypatch.delenv("GH_TOKEN", raising=False)
+
+        with (
+            patch(
+                "hermes_cli.auth.resolve_api_key_provider_credentials",
+                return_value={
+                    "provider": "copilot",
+                    "api_key": "test-token",
+                    "base_url": "https://api.githubcopilot.com",
+                    "source": "gh auth token",
+                },
+            ),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            client, model = resolve_provider_client("copilot", model="gpt-4.1-mini")
+
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert not isinstance(client, CodexAuxiliaryClient)
+        assert model == "gpt-4.1-mini"
+        # Should be the raw mock OpenAI client
+        assert client is mock_openai.return_value
+
+    def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch):
+        """When no OpenRouter/Nous available, vision auto falls back to active provider."""
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
+            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
+        ):
+            client, model = get_vision_auxiliary_client()
+
+        assert client is not None
+        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
+
    def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch):
        """Active provider is tried before OpenRouter in vision auto."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
@@ -1046,8 +1124,8 @@ class TestCallLlmPaymentFallback:
        exc.status_code = 402
        return exc

-    def test_402_triggers_fallback(self, monkeypatch):
-        """When the primary provider returns 402, call_llm tries the next one."""
+    def test_402_triggers_fallback_when_auto(self, monkeypatch):
+        """When provider is auto and returns 402, call_llm tries the next one."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

        primary_client = MagicMock()
@@ -1060,7 +1138,7 @@ class TestCallLlmPaymentFallback:
        with patch("agent.auxiliary_client._get_cached_client",
                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \
+                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
             patch("agent.auxiliary_client._try_payment_fallback",
                    return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb:
            result = call_llm(
@@ -1069,13 +1147,62 @@ class TestCallLlmPaymentFallback:
            )

        assert result is fallback_response
-        mock_fb.assert_called_once_with("openrouter", "compression")
+        mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
        # Fallback call should use the fallback model
        fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs
        assert fb_kwargs["model"] == "gpt-5.2-codex"

+    def test_402_no_fallback_when_explicit_provider(self, monkeypatch):
+        """When provider is explicitly configured (not auto), 402 should NOT fallback (#7559)."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        primary_client.chat.completions.create.side_effect = self._make_402_error()
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                    return_value=(primary_client, "local-model")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                    return_value=("custom", "local-model", None, None, None)), \
+             patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
+            with pytest.raises(Exception, match="insufficient credits"):
+                call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "hello"}],
+                )
+
+        # Fallback should NOT be attempted when provider is explicit
+        mock_fb.assert_not_called()
+
+    def test_connection_error_triggers_fallback_when_auto(self, monkeypatch):
+        """Connection errors also trigger fallback when provider is auto."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        conn_err = Exception("Connection refused")
+        conn_err.status_code = None
+        primary_client.chat.completions.create.side_effect = conn_err
+
+        fallback_client = MagicMock()
+        fallback_response = MagicMock()
+        fallback_client.chat.completions.create.return_value = fallback_response
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                    return_value=(primary_client, "model")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                    return_value=("auto", "model", None, None, None)), \
+             patch("agent.auxiliary_client._is_connection_error", return_value=True), \
+             patch("agent.auxiliary_client._try_payment_fallback",
+                    return_value=(fallback_client, "fb-model", "nous")) as mock_fb:
+            result = call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hello"}],
+            )
+
+        assert result is fallback_response
+        mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
+
    def test_non_payment_error_not_caught(self, monkeypatch):
-        """Non-payment errors (500, connection, etc.) should NOT trigger fallback."""
+        """Non-payment/non-connection errors (500) should NOT trigger fallback."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

        primary_client = MagicMock()
@@ -1086,7 +1213,7 @@ class TestCallLlmPaymentFallback:
        with patch("agent.auxiliary_client._get_cached_client",
                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("openrouter", "google/gemini-3-flash-preview", None, None)):
+                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)):
            with pytest.raises(Exception, match="Internal Server Error"):
                call_llm(
                    task="compression",
@@ -1103,7 +1230,7 @@ class TestCallLlmPaymentFallback:
        with patch("agent.auxiliary_client._get_cached_client",
                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
             patch("agent.auxiliary_client._resolve_task_provider_model",
-                    return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \
+                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
             patch("agent.auxiliary_client._try_payment_fallback",
                    return_value=(None, None, "")):
            with pytest.raises(Exception, match="insufficient credits"):
@@ -1111,3 +1238,325 @@ class TestCallLlmPaymentFallback:
                    task="compression",
                    messages=[{"role": "user", "content": "hello"}],
                )
+
+
+# ---------------------------------------------------------------------------
+# Gate: _resolve_api_key_provider must skip anthropic when not configured
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
+    """_resolve_api_key_provider must not try anthropic when user never configured it."""
+    from collections import OrderedDict
+    from hermes_cli.auth import ProviderConfig
+
+    # Build a minimal registry with only "anthropic" so the loop is guaranteed
+    # to reach it without being short-circuited by earlier providers.
+    fake_registry = OrderedDict({
+        "anthropic": ProviderConfig(
+            id="anthropic",
+            name="Anthropic",
+            auth_type="api_key",
+            inference_base_url="https://api.anthropic.com",
+            api_key_env_vars=("ANTHROPIC_API_KEY",),
+        ),
+    })
+
+    called = []
+
+    def mock_try_anthropic():
+        called.append("anthropic")
+        return None, None
+
+    monkeypatch.setattr("agent.auxiliary_client._try_anthropic", mock_try_anthropic)
+    monkeypatch.setattr("hermes_cli.auth.PROVIDER_REGISTRY", fake_registry)
+    monkeypatch.setattr(
+        "hermes_cli.auth.is_provider_explicitly_configured",
+        lambda pid: False,
+    )
+
+    from agent.auxiliary_client import _resolve_api_key_provider
+    _resolve_api_key_provider()
+
+    assert "anthropic" not in called, \
+        "_try_anthropic() should not be called when anthropic is not explicitly configured"
+
+
+# ---------------------------------------------------------------------------
+# model="default" elimination (#7512)
+# ---------------------------------------------------------------------------
+
+
+class TestModelDefaultElimination:
+    """_resolve_api_key_provider must skip providers without known aux models."""
+
+    def test_unknown_provider_skipped(self, monkeypatch):
+        """Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+
+        # Verify our known providers have entries
+        assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
+        assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
+
+        # A random provider_id not in the dict should return None
+        assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
+
+    def test_known_provider_gets_real_model(self):
+        """Known providers get a real model name, not 'default'."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+
+        for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
+            assert model != "default", f"{provider_id} should not map to 'default'"
+            assert isinstance(model, str) and model.strip(), \
+                f"{provider_id} should have a non-empty model string"
+
+
+# ---------------------------------------------------------------------------
+# _try_payment_fallback reason parameter (#7512 bug 3)
+# ---------------------------------------------------------------------------
+
+
+class TestTryPaymentFallbackReason:
+    """_try_payment_fallback uses the reason parameter in log messages."""
+
+    def test_reason_parameter_passed_through(self, monkeypatch):
+        """The reason= parameter is accepted without error."""
+        from agent.auxiliary_client import _try_payment_fallback
+
+        # Mock the provider chain to return nothing
+        monkeypatch.setattr(
+            "agent.auxiliary_client._get_provider_chain",
+            lambda: [],
+        )
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_main_provider",
+            lambda: "",
+        )
+
+        client, model, label = _try_payment_fallback(
+            "openrouter", task="compression", reason="connection error"
+        )
+        assert client is None
+        assert label == ""
+
+
+# ---------------------------------------------------------------------------
+# _is_connection_error coverage
+# ---------------------------------------------------------------------------
+
+
+class TestIsConnectionError:
+    """Tests for _is_connection_error detection."""
+
+    def test_connection_refused(self):
+        from agent.auxiliary_client import _is_connection_error
+        err = Exception("Connection refused")
+        assert _is_connection_error(err) is True
+
+    def test_timeout(self):
+        from agent.auxiliary_client import _is_connection_error
+        err = Exception("Request timed out.")
+        assert _is_connection_error(err) is True
+
+    def test_dns_failure(self):
+        from agent.auxiliary_client import _is_connection_error
+        err = Exception("Name or service not known")
+        assert _is_connection_error(err) is True
+
+    def test_normal_api_error_not_connection(self):
+        from agent.auxiliary_client import _is_connection_error
+        err = Exception("Bad Request: invalid model")
+        err.status_code = 400
+        assert _is_connection_error(err) is False
+
+    def test_500_not_connection(self):
+        from agent.auxiliary_client import _is_connection_error
+        err = Exception("Internal Server Error")
+        err.status_code = 500
+        assert _is_connection_error(err) is False
+
+
+# ---------------------------------------------------------------------------
+# async_call_llm payment / connection fallback (#7512 bug 2)
+# ---------------------------------------------------------------------------
+
+
+class TestAsyncCallLlmFallback:
+    """async_call_llm mirrors call_llm fallback behavior."""
+
+    def _make_402_error(self, msg="Payment Required: insufficient credits"):
+        exc = Exception(msg)
+        exc.status_code = 402
+        return exc
+
+    @pytest.mark.asyncio
+    async def test_402_triggers_async_fallback_when_auto(self, monkeypatch):
+        """When provider is auto and returns 402, async_call_llm tries fallback."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        primary_client.chat.completions.create = AsyncMock(
+            side_effect=self._make_402_error())
+
+        # Fallback client (sync) returned by _try_payment_fallback
+        fb_sync_client = MagicMock()
+        fb_async_client = MagicMock()
+        fb_response = MagicMock()
+        fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                    return_value=(primary_client, "google/gemini-3-flash-preview")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                    return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
+             patch("agent.auxiliary_client._try_payment_fallback",
+                    return_value=(fb_sync_client, "gpt-5.2-codex", "openai-codex")) as mock_fb, \
+             patch("agent.auxiliary_client._to_async_client",
+                    return_value=(fb_async_client, "gpt-5.2-codex")):
+            result = await async_call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hello"}],
+            )
+
+        assert result is fb_response
+        mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
+
+    @pytest.mark.asyncio
+    async def test_402_no_async_fallback_when_explicit(self, monkeypatch):
+        """When provider is explicit, 402 should NOT trigger async fallback."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        primary_client.chat.completions.create = AsyncMock(
+            side_effect=self._make_402_error())
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                    return_value=(primary_client, "local-model")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                    return_value=("custom", "local-model", None, None, None)), \
+             patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
+            with pytest.raises(Exception, match="insufficient credits"):
+                await async_call_llm(
+                    task="compression",
+                    messages=[{"role": "user", "content": "hello"}],
+                )
+
+        mock_fb.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_connection_error_triggers_async_fallback(self, monkeypatch):
+        """Connection errors trigger async fallback when provider is auto."""
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+
+        primary_client = MagicMock()
+        conn_err = Exception("Connection refused")
+        conn_err.status_code = None
+        primary_client.chat.completions.create = AsyncMock(side_effect=conn_err)
+
+        fb_sync_client = MagicMock()
+        fb_async_client = MagicMock()
+        fb_response = MagicMock()
+        fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
+
+        with patch("agent.auxiliary_client._get_cached_client",
+                    return_value=(primary_client, "model")), \
+             patch("agent.auxiliary_client._resolve_task_provider_model",
+                    return_value=("auto", "model", None, None, None)), \
+             patch("agent.auxiliary_client._is_connection_error", return_value=True), \
+             patch("agent.auxiliary_client._try_payment_fallback",
+                    return_value=(fb_sync_client, "fb-model", "nous")) as mock_fb, \
+             patch("agent.auxiliary_client._to_async_client",
+                    return_value=(fb_async_client, "fb-model")):
+            result = await async_call_llm(
+                task="compression",
+                messages=[{"role": "user", "content": "hello"}],
+            )
+
+        assert result is fb_response
+        mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
+class TestStaleBaseUrlWarning:
+    """_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161)."""
+
+    def test_warns_when_openai_base_url_set_with_named_provider(self, monkeypatch, caplog):
+        """Warning fires when OPENAI_BASE_URL is set but provider is a named provider."""
+        import agent.auxiliary_client as mod
+        # Reset the module-level flag so the warning fires
+        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
+
+        with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
+             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            _resolve_auto()
+
+        assert any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
+            "Expected a warning about stale OPENAI_BASE_URL"
+        assert mod._stale_base_url_warned is True
+
+    def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
+        """No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
+        import agent.auxiliary_client as mod
+        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client._read_main_provider", return_value="custom"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
+             patch("agent.auxiliary_client._resolve_custom_runtime",
+                   return_value=("http://localhost:11434/v1", "test-key", None)), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai, \
+             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            mock_openai.return_value = MagicMock()
+            _resolve_auto()
+
+        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
+            "Should NOT warn when provider is 'custom'"
+
+    def test_no_warning_when_provider_is_named_custom(self, monkeypatch, caplog):
+        """No warning when the provider is 'custom:myname' — base_url comes from config."""
+        import agent.auxiliary_client as mod
+        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
+
+        with patch("agent.auxiliary_client._read_main_provider", return_value="custom:ollama-local"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
+             patch("agent.auxiliary_client.resolve_provider_client",
+                   return_value=(MagicMock(), "llama3")), \
+             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            _resolve_auto()
+
+        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
+            "Should NOT warn when provider is 'custom:*'"
+
+    def test_no_warning_when_openai_base_url_not_set(self, monkeypatch, caplog):
+        """No warning when OPENAI_BASE_URL is absent."""
+        import agent.auxiliary_client as mod
+        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
+
+        with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
+             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            _resolve_auto()
+
+        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
+            "Should NOT warn when OPENAI_BASE_URL is not set"
+
+    def test_warning_only_fires_once(self, monkeypatch, caplog):
+        """Warning is suppressed after the first invocation."""
+        import agent.auxiliary_client as mod
+        monkeypatch.setattr(mod, "_stale_base_url_warned", False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
+
+        with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
+             caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+            _resolve_auto()
+            caplog.clear()
+            _resolve_auto()
+
+        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
+            "Warning should not fire a second time"
@@ -12,6 +12,17 @@ def _isolate(tmp_path, monkeypatch):
    hermes_home = tmp_path / ".hermes"
    hermes_home.mkdir()
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    for env_var in (
+        "AUXILIARY_VISION_PROVIDER",
+        "AUXILIARY_VISION_MODEL",
+        "AUXILIARY_VISION_BASE_URL",
+        "AUXILIARY_VISION_API_KEY",
+        "CONTEXT_VISION_PROVIDER",
+        "CONTEXT_VISION_MODEL",
+        "CONTEXT_VISION_BASE_URL",
+        "CONTEXT_VISION_API_KEY",
+    ):
+        monkeypatch.delenv(env_var, raising=False)
    # Write a minimal config so load_config doesn't fail
    (hermes_home / "config.yaml").write_text("model:\n  default: test-model\n")

@@ -149,3 +160,83 @@ class TestResolveProviderClientNamedCustom:
        # "coffee" doesn't exist in custom_providers
        client, model = resolve_provider_client("coffee", "test")
        assert client is None
+
+
+class TestResolveProviderClientModelNormalization:
+    """Direct-provider auxiliary routing should normalize models like main runtime."""
+
+    def test_matching_native_prefix_is_stripped_for_main_provider(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "zai/glm-5.1", "provider": "zai"},
+        })
+        with (
+            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
+                "api_key": "glm-key",
+                "base_url": "https://api.z.ai/api/paas/v4",
+            }),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            mock_openai.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_provider_client
+
+            client, model = resolve_provider_client("main", "zai/glm-5.1")
+
+        assert client is not None
+        assert model == "glm-5.1"
+
+    def test_non_matching_prefix_is_preserved_for_direct_provider(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "zai/glm-5.1", "provider": "zai"},
+        })
+        with (
+            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
+                "api_key": "glm-key",
+                "base_url": "https://api.z.ai/api/paas/v4",
+            }),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            mock_openai.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_provider_client
+
+            client, model = resolve_provider_client("zai", "google/gemini-2.5-pro")
+
+        assert client is not None
+        assert model == "google/gemini-2.5-pro"
+
+    def test_aggregator_vendor_slug_is_preserved(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_openai.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_provider_client
+
+            client, model = resolve_provider_client(
+                "openrouter", "anthropic/claude-sonnet-4.6"
+            )
+
+        assert client is not None
+        assert model == "anthropic/claude-sonnet-4.6"
+
+
+class TestResolveVisionProviderClientModelNormalization:
+    """Vision auto-routing should reuse the same provider-specific normalization."""
+
+    def test_vision_auto_strips_matching_main_provider_prefix(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "zai/glm-5.1", "provider": "zai"},
+        })
+        with (
+            patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
+                "api_key": "glm-key",
+                "base_url": "https://api.z.ai/api/paas/v4",
+            }),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            mock_openai.return_value = MagicMock()
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client()
+
+        assert provider == "zai"
+        assert client is not None
+        assert model == "glm-5.1"
@@ -0,0 +1,250 @@
+"""Tests for the ContextEngine ABC and plugin slot."""
+
+import json
+import pytest
+from typing import Any, Dict, List
+
+from agent.context_engine import ContextEngine
+from agent.context_compressor import ContextCompressor
+
+
+# ---------------------------------------------------------------------------
+# A minimal concrete engine for testing the ABC
+# ---------------------------------------------------------------------------
+
+class StubEngine(ContextEngine):
+    """Minimal engine that satisfies the ABC without doing real work."""
+
+    def __init__(self, context_length=200000, threshold_pct=0.50):
+        self.context_length = context_length
+        self.threshold_tokens = int(context_length * threshold_pct)
+        self._compress_called = False
+        self._tools_called = []
+
+    @property
+    def name(self) -> str:
+        return "stub"
+
+    def update_from_response(self, usage: Dict[str, Any]) -> None:
+        self.last_prompt_tokens = usage.get("prompt_tokens", 0)
+        self.last_completion_tokens = usage.get("completion_tokens", 0)
+        self.last_total_tokens = usage.get("total_tokens", 0)
+
+    def should_compress(self, prompt_tokens: int = None) -> bool:
+        tokens = prompt_tokens if prompt_tokens is not None else self.last_prompt_tokens
+        return tokens >= self.threshold_tokens
+
+    def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None) -> List[Dict[str, Any]]:
+        self._compress_called = True
+        self.compression_count += 1
+        # Trivial: just return as-is
+        return messages
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "name": "stub_search",
+                "description": "Search the stub engine",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        ]
+
+    def handle_tool_call(self, name: str, args: Dict[str, Any]) -> str:
+        self._tools_called.append(name)
+        return json.dumps({"ok": True, "tool": name})
+
+
+# ---------------------------------------------------------------------------
+# ABC contract tests
+# ---------------------------------------------------------------------------
+
+class TestContextEngineABC:
+    """Verify the ABC enforces the required interface."""
+
+    def test_cannot_instantiate_abc_directly(self):
+        with pytest.raises(TypeError):
+            ContextEngine()
+
+    def test_missing_methods_raises(self):
+        """A subclass missing required methods cannot be instantiated."""
+        class Incomplete(ContextEngine):
+            @property
+            def name(self):
+                return "incomplete"
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_stub_engine_satisfies_abc(self):
+        engine = StubEngine()
+        assert isinstance(engine, ContextEngine)
+        assert engine.name == "stub"
+
+    def test_compressor_is_context_engine(self):
+        c = ContextCompressor(model="test", quiet_mode=True, config_context_length=200000)
+        assert isinstance(c, ContextEngine)
+        assert c.name == "compressor"
+
+
+# ---------------------------------------------------------------------------
+# Default method behavior
+# ---------------------------------------------------------------------------
+
+class TestDefaults:
+    """Verify ABC default implementations work correctly."""
+
+    def test_default_tool_schemas_empty(self):
+        engine = StubEngine()
+        # StubEngine overrides this, so test the base via super
+        assert ContextEngine.get_tool_schemas(engine) == []
+
+    def test_default_handle_tool_call_returns_error(self):
+        engine = StubEngine()
+        result = ContextEngine.handle_tool_call(engine, "unknown", {})
+        data = json.loads(result)
+        assert "error" in data
+
+    def test_default_get_status(self):
+        engine = StubEngine()
+        engine.last_prompt_tokens = 50000
+        status = engine.get_status()
+        assert status["last_prompt_tokens"] == 50000
+        assert status["context_length"] == 200000
+        assert status["threshold_tokens"] == 100000
+        assert 0 < status["usage_percent"] <= 100
+
+    def test_on_session_reset(self):
+        engine = StubEngine()
+        engine.last_prompt_tokens = 999
+        engine.compression_count = 3
+        engine.on_session_reset()
+        assert engine.last_prompt_tokens == 0
+        assert engine.compression_count == 0
+
+    def test_should_compress_preflight_default_false(self):
+        engine = StubEngine()
+        assert engine.should_compress_preflight([]) is False
+
+
+# ---------------------------------------------------------------------------
+# StubEngine behavior
+# ---------------------------------------------------------------------------
+
+class TestStubEngine:
+
+    def test_should_compress(self):
+        engine = StubEngine(context_length=100000, threshold_pct=0.50)
+        assert not engine.should_compress(40000)
+        assert engine.should_compress(50000)
+        assert engine.should_compress(60000)
+
+    def test_compress_tracks_count(self):
+        engine = StubEngine()
+        msgs = [{"role": "user", "content": "hello"}]
+        result = engine.compress(msgs)
+        assert result == msgs
+        assert engine._compress_called
+        assert engine.compression_count == 1
+
+    def test_tool_schemas(self):
+        engine = StubEngine()
+        schemas = engine.get_tool_schemas()
+        assert len(schemas) == 1
+        assert schemas[0]["name"] == "stub_search"
+
+    def test_handle_tool_call(self):
+        engine = StubEngine()
+        result = engine.handle_tool_call("stub_search", {})
+        assert json.loads(result)["ok"] is True
+        assert "stub_search" in engine._tools_called
+
+    def test_update_from_response(self):
+        engine = StubEngine()
+        engine.update_from_response({"prompt_tokens": 1000, "completion_tokens": 200, "total_tokens": 1200})
+        assert engine.last_prompt_tokens == 1000
+        assert engine.last_completion_tokens == 200
+
+
+# ---------------------------------------------------------------------------
+# ContextCompressor session reset via ABC
+# ---------------------------------------------------------------------------
+
+class TestCompressorSessionReset:
+    """Verify ContextCompressor.on_session_reset() clears all state."""
+
+    def test_reset_clears_state(self):
+        c = ContextCompressor(model="test", quiet_mode=True, config_context_length=200000)
+        c.last_prompt_tokens = 50000
+        c.compression_count = 3
+        c._previous_summary = "some old summary"
+        c._context_probed = True
+        c._context_probe_persistable = True
+
+        c.on_session_reset()
+
+        assert c.last_prompt_tokens == 0
+        assert c.last_completion_tokens == 0
+        assert c.last_total_tokens == 0
+        assert c.compression_count == 0
+        assert c._context_probed is False
+        assert c._context_probe_persistable is False
+        assert c._previous_summary is None
+
+
+# ---------------------------------------------------------------------------
+# Plugin slot (PluginManager integration)
+# ---------------------------------------------------------------------------
+
+class TestPluginContextEngineSlot:
+    """Test register_context_engine on PluginContext."""
+
+    def test_register_engine(self):
+        from hermes_cli.plugins import PluginManager, PluginContext, PluginManifest
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-lcm")
+        ctx = PluginContext(manifest, mgr)
+
+        engine = StubEngine()
+        ctx.register_context_engine(engine)
+
+        assert mgr._context_engine is engine
+        assert mgr._context_engine.name == "stub"
+
+    def test_reject_second_engine(self):
+        from hermes_cli.plugins import PluginManager, PluginContext, PluginManifest
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-lcm")
+        ctx = PluginContext(manifest, mgr)
+
+        engine1 = StubEngine()
+        engine2 = StubEngine()
+        ctx.register_context_engine(engine1)
+        ctx.register_context_engine(engine2)  # should be rejected
+
+        assert mgr._context_engine is engine1
+
+    def test_reject_non_engine(self):
+        from hermes_cli.plugins import PluginManager, PluginContext, PluginManifest
+        mgr = PluginManager()
+        manifest = PluginManifest(name="test-bad")
+        ctx = PluginContext(manifest, mgr)
+
+        ctx.register_context_engine("not an engine")
+        assert mgr._context_engine is None
+
+    def test_get_plugin_context_engine(self):
+        from hermes_cli.plugins import PluginManager, PluginContext, PluginManifest, get_plugin_context_engine, _plugin_manager
+        import hermes_cli.plugins as plugins_mod
+
+        # Inject a test manager
+        old_mgr = plugins_mod._plugin_manager
+        try:
+            mgr = PluginManager()
+            plugins_mod._plugin_manager = mgr
+
+            assert get_plugin_context_engine() is None
+
+            engine = StubEngine()
+            mgr._context_engine = engine
+            assert get_plugin_context_engine() is engine
+        finally:
+            plugins_mod._plugin_manager = old_mgr
@@ -83,6 +83,24 @@ def test_parse_references_strips_trailing_punctuation():
    assert refs[1].target == "https://example.com/docs"


+def test_parse_quoted_references_with_spaces_and_preserve_unquoted_ranges():
+    from agent.context_references import parse_context_references
+
+    refs = parse_context_references(
+        'review @file:"C:\\Users\\Simba\\My Project\\main.py":7-9 '
+        'and @folder:"docs and specs" plus @file:src/main.py:1-2'
+    )
+
+    assert [ref.kind for ref in refs] == ["file", "folder", "file"]
+    assert refs[0].target == r"C:\Users\Simba\My Project\main.py"
+    assert refs[0].line_start == 7
+    assert refs[0].line_end == 9
+    assert refs[1].target == "docs and specs"
+    assert refs[2].target == "src/main.py"
+    assert refs[2].line_start == 1
+    assert refs[2].line_end == 2
+
+
 def test_expand_file_range_and_folder_listing(sample_repo: Path):
    from agent.context_references import preprocess_context_references

@@ -106,6 +124,30 @@ def test_expand_file_range_and_folder_listing(sample_repo: Path):
    assert not result.warnings


+def test_expand_quoted_file_reference_with_spaces(tmp_path: Path):
+    from agent.context_references import preprocess_context_references
+
+    workspace = tmp_path / "repo"
+    folder = workspace / "docs and specs"
+    folder.mkdir(parents=True)
+    file_path = folder / "release notes.txt"
+    file_path.write_text("line 1\nline 2\nline 3\n", encoding="utf-8")
+
+    result = preprocess_context_references(
+        'Review @file:"docs and specs/release notes.txt":2-3',
+        cwd=workspace,
+        context_length=100_000,
+    )
+
+    assert result.expanded
+    assert result.message.startswith("Review")
+    assert "line 1" not in result.message
+    assert "line 2" in result.message
+    assert "line 3" in result.message
+    assert "release notes.txt" in result.message
+    assert not result.warnings
+
+
 def test_expand_git_diff_staged_and_log(sample_repo: Path):
    from agent.context_references import preprocess_context_references

@@ -567,6 +567,7 @@ def test_singleton_seed_does_not_clobber_manual_oauth_entry(tmp_path, monkeypatc
    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
    monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    monkeypatch.setattr("hermes_cli.auth.is_provider_explicitly_configured", lambda pid: True)
    _write_auth_store(
        tmp_path,
        {
@@ -1043,3 +1044,30 @@ def test_release_lease_decrements_counter(tmp_path, monkeypatch):

    pool.release_lease("cred-1")
    assert pool._active_leases.get("cred-1", 0) == 0
+
+
+def test_load_pool_does_not_seed_claude_code_when_anthropic_not_configured(tmp_path, monkeypatch):
+    """Claude Code credentials must not be auto-seeded when the user never selected anthropic."""
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(tmp_path, {"version": 1, "credential_pool": {}})
+
+    # Claude Code credentials exist on disk
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_claude_code_credentials",
+        lambda: {"accessToken": "sk-ant...oken", "refreshToken": "rt", "expiresAt": 9999999999999},
+    )
+    monkeypatch.setattr(
+        "agent.anthropic_adapter.read_hermes_oauth_credentials",
+        lambda: None,
+    )
+    # User configured kimi-coding, NOT anthropic
+    monkeypatch.setattr(
+        "hermes_cli.auth.is_provider_explicitly_configured",
+        lambda pid: pid == "kimi-coding",
+    )
+
+    from agent.credential_pool import load_pool
+    pool = load_pool("anthropic")
+
+    # Should NOT have seeded the claude_code entry
+    assert pool.entries() == []
@@ -249,6 +249,22 @@ class TestClassifyApiError:
        assert result.reason == FailoverReason.rate_limit
        assert result.should_fallback is True

+    def test_alibaba_rate_increased_too_quickly(self):
+        """Alibaba/DashScope returns a unique throttling message.
+
+        Port from anomalyco/opencode#21355.
+        """
+        msg = (
+            "Upstream error from Alibaba: Request rate increased too quickly. "
+            "To ensure system stability, please adjust your client logic to "
+            "scale requests more smoothly over time."
+        )
+        e = MockAPIError(msg, status_code=400)
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.rate_limit
+        assert result.retryable is True
+        assert result.should_rotate_credential is True
+
    # ── Server errors ──

    def test_500_server_error(self):
@@ -1,37 +1,37 @@
-"""Tests for MiniMax provider hardening — context lengths, thinking guard, catalog, beta headers."""
+"""Tests for MiniMax provider hardening — context lengths, thinking, catalog, beta headers, transport."""

 from unittest.mock import patch


 class TestMinimaxContextLengths:
-    """Verify per-model context length entries for MiniMax models."""
+    """Verify context length entries match official docs (204,800 for all models).

-    def test_m1_variants_have_1m_context(self):
+    Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
+    """
+
+    def test_minimax_prefix_has_correct_context(self):
        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
-        # Keys are lowercase because the lookup lowercases model names
-        for model in ("minimax-m1", "minimax-m1-40k", "minimax-m1-80k",
-                       "minimax-m1-128k", "minimax-m1-256k"):
-            assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
-            assert DEFAULT_CONTEXT_LENGTHS[model] == 1_000_000, f"{model} expected 1M"
+        assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 204_800

-    def test_m2_variants_have_1m_context(self):
-        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
-        # Keys are lowercase because the lookup lowercases model names
-        for model in ("minimax-m2.5", "minimax-m2.7"):
-            assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
-            assert DEFAULT_CONTEXT_LENGTHS[model] == 1_048_576, f"{model} expected 1048576"
-
-    def test_minimax_prefix_fallback(self):
-        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
-        # The generic "minimax" prefix entry should be 1M for unknown models
-        assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 1_048_576
+    def test_minimax_models_resolve_via_prefix(self):
+        from agent.model_metadata import get_model_context_length
+        # All MiniMax models should resolve to 204,800 via the "minimax" prefix
+        for model in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"):
+            ctx = get_model_context_length(model, "")
+            assert ctx == 204_800, f"{model} expected 204800, got {ctx}"



-class TestMinimaxThinkingGuard:
-    """Verify that build_anthropic_kwargs does NOT add thinking params for MiniMax models."""
+class TestMinimaxThinkingSupport:
+    """Verify that MiniMax gets manual thinking (not adaptive).

-    def test_no_thinking_for_minimax_m27(self):
+    MiniMax's Anthropic-compat endpoint officially supports the thinking
+    parameter (https://platform.minimax.io/docs/api-reference/text-anthropic-api).
+    It should get manual thinking (type=enabled + budget_tokens), NOT adaptive
+    thinking (which is Claude 4.6-only).
+    """
+
+    def test_minimax_m27_gets_manual_thinking(self):
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
            model="MiniMax-M2.7",
@@ -40,19 +40,23 @@ class TestMinimaxThinkingGuard:
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "medium"},
        )
-        assert "thinking" not in kwargs
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"
+        assert "budget_tokens" in kwargs["thinking"]
+        # MiniMax should NOT get adaptive thinking or output_config
        assert "output_config" not in kwargs

-    def test_no_thinking_for_minimax_m1(self):
+    def test_minimax_m25_gets_manual_thinking(self):
        from agent.anthropic_adapter import build_anthropic_kwargs
        kwargs = build_anthropic_kwargs(
-            model="MiniMax-M1-128k",
+            model="MiniMax-M2.5",
            messages=[{"role": "user", "content": "hello"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "high"},
        )
-        assert "thinking" not in kwargs
+        assert "thinking" in kwargs
+        assert kwargs["thinking"]["type"] == "enabled"

    def test_thinking_still_works_for_claude(self):
        from agent.anthropic_adapter import build_anthropic_kwargs
@@ -81,25 +85,30 @@ class TestMinimaxAuxModel:


 class TestMinimaxModelCatalog:
-    """Verify the model catalog includes M1 family and excludes deprecated models."""
+    """Verify the model catalog matches official Anthropic-compat endpoint models.

-    def test_catalog_includes_m1_family(self):
+    Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
+    """
+
+    def test_catalog_includes_current_models(self):
        from hermes_cli.models import _PROVIDER_MODELS
        for provider in ("minimax", "minimax-cn"):
            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M1" in models
-            assert "MiniMax-M1-40k" in models
-            assert "MiniMax-M1-80k" in models
-            assert "MiniMax-M1-128k" in models
-            assert "MiniMax-M1-256k" in models
+            assert "MiniMax-M2.7" in models
+            assert "MiniMax-M2.5" in models
+            assert "MiniMax-M2.1" in models
+            assert "MiniMax-M2" in models

-    def test_catalog_excludes_deprecated(self):
+    def test_catalog_excludes_m1_family(self):
+        """M1 models are not available on the /anthropic endpoint."""
        from hermes_cli.models import _PROVIDER_MODELS
        for provider in ("minimax", "minimax-cn"):
            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M2.1" not in models
+            assert "MiniMax-M1" not in models

    def test_catalog_excludes_highspeed(self):
+        """Highspeed variants are available but not shown in default catalog
+        (users can still specify them manually)."""
        from hermes_cli.models import _PROVIDER_MODELS
        for provider in ("minimax", "minimax-cn"):
            models = _PROVIDER_MODELS[provider]
@@ -202,3 +211,154 @@ class TestMinimaxBetaHeaders:
    def test_common_betas_regular_url(self):
        from agent.anthropic_adapter import _common_betas_for_base_url, _COMMON_BETAS
        assert _common_betas_for_base_url("https://api.anthropic.com") == _COMMON_BETAS
+
+
+class TestMinimaxApiMode:
+    """Verify determine_api_mode returns anthropic_messages for MiniMax providers.
+
+    The MiniMax /anthropic endpoint speaks Anthropic Messages wire format,
+    not OpenAI chat completions.  The overlay transport must reflect this
+    so that code paths calling determine_api_mode() without a base_url
+    (e.g. /model switch) get the correct api_mode.
+    """
+
+    def test_minimax_returns_anthropic_messages(self):
+        from hermes_cli.providers import determine_api_mode
+        assert determine_api_mode("minimax") == "anthropic_messages"
+
+    def test_minimax_cn_returns_anthropic_messages(self):
+        from hermes_cli.providers import determine_api_mode
+        assert determine_api_mode("minimax-cn") == "anthropic_messages"
+
+    def test_minimax_with_url_also_works(self):
+        from hermes_cli.providers import determine_api_mode
+        # Even with explicit base_url, provider lookup takes priority
+        assert determine_api_mode("minimax", "https://api.minimax.io/anthropic") == "anthropic_messages"
+
+    def test_anthropic_still_returns_anthropic_messages(self):
+        from hermes_cli.providers import determine_api_mode
+        assert determine_api_mode("anthropic") == "anthropic_messages"
+
+    def test_openai_returns_chat_completions(self):
+        from hermes_cli.providers import determine_api_mode
+        # Sanity check: standard providers are unaffected
+        result = determine_api_mode("deepseek")
+        assert result == "chat_completions"
+
+
+class TestMinimaxMaxOutput:
+    """Verify _get_anthropic_max_output returns correct limits for MiniMax models.
+
+    MiniMax max output is 131,072 tokens (source: OpenClaw model definitions,
+    cross-referenced with MiniMax API behavior).
+    """
+
+    def test_minimax_m27_output_limit(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("MiniMax-M2.7") == 131_072
+
+    def test_minimax_m25_output_limit(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("MiniMax-M2.5") == 131_072
+
+    def test_minimax_m2_output_limit(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        assert _get_anthropic_max_output("MiniMax-M2") == 131_072
+
+    def test_claude_output_unaffected(self):
+        from agent.anthropic_adapter import _get_anthropic_max_output
+        # Sanity: Claude limits are not broken by the MiniMax entry
+        assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000
+
+
+class TestMinimaxPreserveDots:
+    """Verify that MiniMax model names preserve dots through the Anthropic adapter.
+
+    MiniMax model IDs like 'MiniMax-M2.7' must NOT have dots converted to
+    hyphens — the endpoint expects the exact name with dots.
+    """
+
+    def test_minimax_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="minimax", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_minimax_cn_provider_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="minimax-cn", base_url="")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_minimax_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://api.minimax.io/anthropic")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_minimax_cn_url_preserves_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="custom", base_url="https://api.minimaxi.com/anthropic")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is True
+
+    def test_anthropic_does_not_preserve_dots(self):
+        from types import SimpleNamespace
+        agent = SimpleNamespace(provider="anthropic", base_url="https://api.anthropic.com")
+        from run_agent import AIAgent
+        assert AIAgent._anthropic_preserve_dots(agent) is False
+
+    def test_normalize_preserves_m27_dot(self):
+        from agent.anthropic_adapter import normalize_model_name
+        assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7"
+
+    def test_normalize_converts_without_preserve(self):
+        from agent.anthropic_adapter import normalize_model_name
+        # Without preserve_dots, dots become hyphens (broken for MiniMax)
+        assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2-7"
+
+
+class TestMinimaxSwitchModelCredentialGuard:
+    """Verify switch_model() does not leak Anthropic credentials to MiniMax.
+
+    The __init__ path correctly guards against this (line 761), but switch_model()
+    must mirror that guard. Without it, /model switch to minimax with no explicit
+    api_key would fall back to resolve_anthropic_token() and send Anthropic creds
+    to the MiniMax endpoint.
+    """
+
+    def test_switch_to_minimax_does_not_resolve_anthropic_token(self):
+        """switch_model() should NOT call resolve_anthropic_token() for MiniMax."""
+        from unittest.mock import patch, MagicMock
+
+        with patch("run_agent.AIAgent.__init__", return_value=None):
+            from run_agent import AIAgent
+            agent = AIAgent.__new__(AIAgent)
+            agent.provider = "anthropic"
+            agent.model = "claude-sonnet-4"
+            agent.api_key = "sk-ant-fake"
+            agent.base_url = "https://api.anthropic.com"
+            agent.api_mode = "anthropic_messages"
+            agent._anthropic_base_url = "https://api.anthropic.com"
+            agent._anthropic_api_key = "sk-ant-fake"
+            agent._is_anthropic_oauth = False
+            agent._client_kwargs = {}
+            agent.client = None
+            agent._anthropic_client = MagicMock()
+
+        with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
+             patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \
+             patch("agent.anthropic_adapter._is_oauth_token", return_value=False):
+
+            agent.switch_model(
+                new_model="MiniMax-M2.7",
+                new_provider="minimax",
+                api_mode="anthropic_messages",
+                api_key="mm-key-123",
+                base_url="https://api.minimax.io/anthropic",
+            )
+            # resolve_anthropic_token should NOT be called for non-Anthropic providers
+            mock_resolve.assert_not_called()
+            # The key passed to build_anthropic_client should be the MiniMax key
+            build_args = mock_build.call_args
+            assert build_args[0][0] == "mm-key-123"
@@ -222,6 +222,24 @@ class TestGetModelContextLength:
        mock_fetch.return_value = {}
        assert get_model_context_length("openai/gpt-4o") == 128000

+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen3_coder_plus_context_length(self, mock_fetch):
+        """qwen3-coder-plus has a 1M context window, not the generic 128K Qwen default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-coder-plus") == 1000000
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen3_coder_context_length(self, mock_fetch):
+        """qwen3-coder has a 256K context window, not the generic 128K Qwen default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-coder") == 262144
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_qwen_generic_context_length(self, mock_fetch):
+        """Generic qwen models still get the 128K default."""
+        mock_fetch.return_value = {}
+        assert get_model_context_length("qwen3-plus") == 131072
+
    @patch("agent.model_metadata.fetch_model_metadata")
    def test_api_missing_context_length_key(self, mock_fetch):
        """Model in API but without context_length → defaults to 128000."""
@@ -0,0 +1,85 @@
+"""Tests for CLI /status command behavior."""
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+from cli import HermesCLI
+from hermes_cli.commands import resolve_command
+
+
+def _make_cli():
+    cli_obj = HermesCLI.__new__(HermesCLI)
+    cli_obj.config = {}
+    cli_obj.console = MagicMock()
+    cli_obj.agent = None
+    cli_obj.conversation_history = []
+    cli_obj.session_id = "session-123"
+    cli_obj._pending_input = MagicMock()
+    cli_obj._status_bar_visible = True
+    cli_obj.model = "openai/gpt-5.4"
+    cli_obj.provider = "openai"
+    cli_obj.session_start = datetime(2026, 4, 9, 19, 24)
+    cli_obj._agent_running = False
+    cli_obj._session_db = MagicMock()
+    cli_obj._session_db.get_session.return_value = None
+    return cli_obj
+
+
+def test_status_command_is_available_in_cli_registry():
+    cmd = resolve_command("status")
+    assert cmd is not None
+    assert cmd.gateway_only is False
+
+
+def test_process_command_status_dispatches_without_toggling_status_bar():
+    cli_obj = _make_cli()
+
+    with patch.object(cli_obj, "_show_session_status", create=True) as mock_status:
+        assert cli_obj.process_command("/status") is True
+
+    mock_status.assert_called_once_with()
+    assert cli_obj._status_bar_visible is True
+
+
+def test_statusbar_still_toggles_visibility():
+    cli_obj = _make_cli()
+
+    assert cli_obj.process_command("/statusbar") is True
+    assert cli_obj._status_bar_visible is False
+
+
+def test_status_prefix_prefers_status_command_over_statusbar_toggle():
+    cli_obj = _make_cli()
+
+    with patch.object(cli_obj, "_show_session_status") as mock_status:
+        assert cli_obj.process_command("/sta") is True
+
+    mock_status.assert_called_once_with()
+    assert cli_obj._status_bar_visible is True
+
+
+def test_show_session_status_prints_gateway_style_summary():
+    cli_obj = _make_cli()
+    cli_obj.agent = SimpleNamespace(
+        session_total_tokens=321,
+        session_api_calls=4,
+    )
+    cli_obj._session_db.get_session.return_value = {
+        "title": "My titled session",
+        "started_at": 1775791440,
+    }
+
+    with patch("cli.display_hermes_home", return_value="~/.hermes"):
+        cli_obj._show_session_status()
+
+    printed = "\n".join(str(call.args[0]) for call in cli_obj.console.print.call_args_list)
+    assert "Hermes CLI Status" in printed
+    assert "Session ID: session-123" in printed
+    assert "Path: ~/.hermes" in printed
+    assert "Title: My titled session" in printed
+    assert "Model: openai/gpt-5.4 (openai)" in printed
+    assert "Tokens: 321" in printed
+    assert "Agent Running: No" in printed
+    _, kwargs = cli_obj.console.print.call_args
+    assert kwargs.get("highlight") is False
+    assert kwargs.get("markup") is False
@@ -0,0 +1,66 @@
+"""Tests for CLI manual compression messaging."""
+
+from unittest.mock import MagicMock, patch
+
+from tests.cli.test_cli_init import _make_cli
+
+
+def _make_history() -> list[dict[str, str]]:
+    return [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "two"},
+        {"role": "user", "content": "three"},
+        {"role": "assistant", "content": "four"},
+    ]
+
+
+def test_manual_compress_reports_noop_without_success_banner(capsys):
+    shell = _make_cli()
+    history = _make_history()
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (list(history), "")
+
+    def _estimate(messages):
+        assert messages == history
+        return 100
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
+        shell._manual_compress()
+
+    output = capsys.readouterr().out
+    assert "No changes from compression" in output
+    assert "✅ Compressed" not in output
+    assert "Rough transcript estimate: ~100 tokens (unchanged)" in output
+
+
+def test_manual_compress_explains_when_token_estimate_rises(capsys):
+    shell = _make_cli()
+    history = _make_history()
+    compressed = [
+        history[0],
+        {"role": "assistant", "content": "Dense summary that still counts as more tokens."},
+        history[-1],
+    ]
+    shell.conversation_history = history
+    shell.agent = MagicMock()
+    shell.agent.compression_enabled = True
+    shell.agent._cached_system_prompt = ""
+    shell.agent._compress_context.return_value = (compressed, "")
+
+    def _estimate(messages):
+        if messages == history:
+            return 100
+        if messages == compressed:
+            return 120
+        raise AssertionError(f"unexpected transcript: {messages!r}")
+
+    with patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate):
+        shell._manual_compress()
+
+    output = capsys.readouterr().out
+    assert "✅ Compressed: 4 → 3 messages" in output
+    assert "Rough transcript estimate: ~100 → ~120 tokens" in output
+    assert "denser summaries" in output
@@ -1,4 +1,4 @@
-"""Shared fixtures for Telegram gateway e2e tests.
+"""Shared fixtures for gateway e2e tests (Telegram, Discord).

 These tests exercise the full async message flow:
    adapter.handle_message(event)
@@ -14,19 +14,22 @@ import sys
 import uuid
 from datetime import datetime
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest

 from gateway.config import GatewayConfig, Platform, PlatformConfig
 from gateway.platforms.base import MessageEvent, SendResult
 from gateway.session import SessionEntry, SessionSource, build_session_key


-#Ensure telegram module is available (mock it if not installed)
+# Platform library mocks

+# Ensure telegram module is available (mock it if not installed)
 def _ensure_telegram_mock():
    """Install mock telegram modules so TelegramAdapter can be imported."""
    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
-        return  # Real library installed
+        return # Real library installed

    telegram_mod = MagicMock()
    telegram_mod.Update = MagicMock()
@@ -51,24 +54,118 @@ def _ensure_telegram_mock():
        sys.modules.setdefault(name, telegram_mod)


-_ensure_telegram_mock()
+# Ensure discord module is available (mock it if not installed)
+def _ensure_discord_mock():
+    """Install mock discord modules so DiscordAdapter can be imported."""
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return # Real library installed

+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.DMChannel = type("DMChannel", (), {})
+    discord_mod.Thread = type("Thread", (), {})
+    discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.Interaction = object
+    discord_mod.app_commands = SimpleNamespace(
+        describe=lambda **kwargs: (lambda fn: fn),
+        choices=lambda **kwargs: (lambda fn: fn),
+        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+    )
+    discord_mod.opus.is_loaded.return_value = True
+
+    ext_mod = MagicMock()
+    commands_mod = MagicMock()
+    commands_mod.Bot = MagicMock
+    ext_mod.commands = commands_mod
+
+    sys.modules.setdefault("discord", discord_mod)
+    sys.modules.setdefault("discord.ext", ext_mod)
+    sys.modules.setdefault("discord.ext.commands", commands_mod)
+    sys.modules.setdefault("discord.opus", discord_mod.opus)
+
+
+def _ensure_slack_mock():
+    """Install mock slack modules so SlackAdapter can be imported."""
+    if "slack_bolt" in sys.modules and hasattr(sys.modules["slack_bolt"], "__file__"):
+        return  # Real library installed
+
+    slack_bolt = MagicMock()
+    slack_bolt.async_app.AsyncApp = MagicMock
+    slack_bolt.adapter.socket_mode.async_handler.AsyncSocketModeHandler = MagicMock
+
+    slack_sdk = MagicMock()
+    slack_sdk.web.async_client.AsyncWebClient = MagicMock
+
+    for name, mod in [
+        ("slack_bolt", slack_bolt),
+        ("slack_bolt.async_app", slack_bolt.async_app),
+        ("slack_bolt.adapter", slack_bolt.adapter),
+        ("slack_bolt.adapter.socket_mode", slack_bolt.adapter.socket_mode),
+        ("slack_bolt.adapter.socket_mode.async_handler", slack_bolt.adapter.socket_mode.async_handler),
+        ("slack_sdk", slack_sdk),
+        ("slack_sdk.web", slack_sdk.web),
+        ("slack_sdk.web.async_client", slack_sdk.web.async_client),
+    ]:
+        sys.modules.setdefault(name, mod)
+
+
+_ensure_telegram_mock()
+_ensure_discord_mock()
+_ensure_slack_mock()
+
+from gateway.platforms.discord import DiscordAdapter   # noqa: E402
 from gateway.platforms.telegram import TelegramAdapter  # noqa: E402

+import gateway.platforms.slack as _slack_mod  # noqa: E402
+_slack_mod.SLACK_AVAILABLE = True
+from gateway.platforms.slack import SlackAdapter  # noqa: E402

-#GatewayRunner factory (based on tests/gateway/test_status_command.py)

-def make_runner(session_entry: SessionEntry) -> "GatewayRunner":
+# Platform-generic factories
+
+def make_source(platform: Platform, chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> SessionSource:
+    return SessionSource(
+        platform=platform,
+        chat_id=chat_id,
+        user_id=user_id,
+        user_name="e2e_tester",
+        chat_type="dm",
+    )
+
+
+def make_session_entry(platform: Platform, source: SessionSource = None) -> SessionEntry:
+    source = source or make_source(platform)
+    return SessionEntry(
+        session_key=build_session_key(source),
+        session_id=f"sess-{uuid.uuid4().hex[:8]}",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=platform,
+        chat_type="dm",
+    )
+
+
+def make_event(platform: Platform, text: str = "/help", chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        source=make_source(platform, chat_id, user_id),
+        message_id=f"msg-{uuid.uuid4().hex[:8]}",
+    )
+
+
+def make_runner(platform: Platform, session_entry: SessionEntry = None) -> "GatewayRunner":
    """Create a GatewayRunner with mocked internals for e2e testing.

    Skips __init__ to avoid filesystem/network side effects.
-    All command-dispatch dependencies are wired manually.
    """
    from gateway.run import GatewayRunner

+    if session_entry is None:
+        session_entry = make_session_entry(platform)
+
    runner = object.__new__(GatewayRunner)
    runner.config = GatewayConfig(
-        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="e2e-test-token")}
+        platforms={platform: PlatformConfig(enabled=True, token="e2e-test-token")}
    )
    runner.adapters = {}
    runner._voice_mode = {}
@@ -99,7 +196,6 @@ def make_runner(session_entry: SessionEntry) -> "GatewayRunner":
    runner._capture_gateway_honcho_if_configured = lambda *a, **kw: None
    runner._emit_gateway_run_progress = AsyncMock()

-    # Pairing store (used by authorization rejection path)
    runner.pairing_store = MagicMock()
    runner.pairing_store._is_rate_limited = MagicMock(return_value=False)
    runner.pairing_store.generate_code = MagicMock(return_value="ABC123")
@@ -107,67 +203,63 @@ def make_runner(session_entry: SessionEntry) -> "GatewayRunner":
    return runner


-#TelegramAdapter factory
+def make_adapter(platform: Platform, runner=None):
+    """Create a platform adapter wired to *runner*, with send methods mocked."""
+    if runner is None:
+        runner = make_runner(platform)

-def make_adapter(runner) -> TelegramAdapter:
-    """Create a TelegramAdapter wired to *runner*, with send methods mocked.
-
-    connect() is NOT called — no polling, no token lock, no real HTTP.
-    """
    config = PlatformConfig(enabled=True, token="e2e-test-token")
-    adapter = TelegramAdapter(config)

-    # Mock outbound methods so tests can capture what was sent
+    if platform == Platform.DISCORD:
+        with patch.object(DiscordAdapter, "_load_participated_threads", return_value=set()):
+            adapter = DiscordAdapter(config)
+        platform_key = Platform.DISCORD
+    elif platform == Platform.SLACK:
+        adapter = SlackAdapter(config)
+        platform_key = Platform.SLACK
+    else:
+        adapter = TelegramAdapter(config)
+        platform_key = Platform.TELEGRAM
+
    adapter.send = AsyncMock(return_value=SendResult(success=True, message_id="e2e-resp-1"))
    adapter.send_typing = AsyncMock()

-    # Wire adapter ↔ runner
    adapter.set_message_handler(runner._handle_message)
-    runner.adapters[Platform.TELEGRAM] = adapter
+    runner.adapters[platform_key] = adapter

    return adapter


-#Helpers
-
-def make_source(chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> SessionSource:
-    return SessionSource(
-        platform=Platform.TELEGRAM,
-        chat_id=chat_id,
-        user_id=user_id,
-        user_name="e2e_tester",
-        chat_type="dm",
-    )
-
-
-def make_event(text: str, chat_id: str = "e2e-chat-1", user_id: str = "e2e-user-1") -> MessageEvent:
-    return MessageEvent(
-        text=text,
-        source=make_source(chat_id, user_id),
-        message_id=f"msg-{uuid.uuid4().hex[:8]}",
-    )
-
-
-def make_session_entry(source: SessionSource = None) -> SessionEntry:
-    source = source or make_source()
-    return SessionEntry(
-        session_key=build_session_key(source),
-        session_id=f"sess-{uuid.uuid4().hex[:8]}",
-        created_at=datetime.now(),
-        updated_at=datetime.now(),
-        platform=Platform.TELEGRAM,
-        chat_type="dm",
-    )
-
-
-async def send_and_capture(adapter: TelegramAdapter, text: str, **event_kwargs) -> AsyncMock:
-    """Send a message through the full e2e flow and return the send mock.
-
-    Drives: adapter.handle_message → background task → runner dispatch → adapter.send.
-    """
-    event = make_event(text, **event_kwargs)
+async def send_and_capture(adapter, text: str, platform: Platform, **event_kwargs) -> AsyncMock:
+    """Send a message through the full e2e flow and return the send mock."""
+    event = make_event(platform, text, **event_kwargs)
    adapter.send.reset_mock()
    await adapter.handle_message(event)
-    # Let the background task complete
    await asyncio.sleep(0.3)
    return adapter.send
+
+
+# Parametrized fixtures for platform-generic tests
+@pytest.fixture(params=[Platform.TELEGRAM, Platform.DISCORD, Platform.SLACK], ids=["telegram", "discord", "slack"])
+def platform(request):
+    return request.param
+
+
+@pytest.fixture()
+def source(platform):
+    return make_source(platform)
+
+
+@pytest.fixture()
+def session_entry(platform, source):
+    return make_session_entry(platform, source)
+
+
+@pytest.fixture()
+def runner(platform, session_entry):
+    return make_runner(platform, session_entry)
+
+
+@pytest.fixture()
+def adapter(platform, runner):
+    return make_adapter(platform, runner)
@@ -1,4 +1,4 @@
-"""E2E tests for Telegram gateway slash commands.
+"""E2E tests for gateway slash commands (Telegram, Discord).

 Each test drives a message through the full async pipeline:
    adapter.handle_message(event)
@@ -7,6 +7,7 @@ Each test drives a message through the full async pipeline:
        → adapter.send() (captured for assertions)

 No LLM involved — only gateway-level commands are tested.
+Tests are parametrized over platforms via the ``platform`` fixture in conftest.
 """

 import asyncio
@@ -15,46 +16,15 @@ from unittest.mock import AsyncMock
 import pytest

 from gateway.platforms.base import SendResult
-from tests.e2e.conftest import (
-    make_adapter,
-    make_event,
-    make_runner,
-    make_session_entry,
-    make_source,
-    send_and_capture,
-)
+from tests.e2e.conftest import make_event, send_and_capture


-#Fixtures
-
-@pytest.fixture()
-def source():
-    return make_source()
-
-
-@pytest.fixture()
-def session_entry(source):
-    return make_session_entry(source)
-
-
-@pytest.fixture()
-def runner(session_entry):
-    return make_runner(session_entry)
-
-
-@pytest.fixture()
-def adapter(runner):
-    return make_adapter(runner)
-
-
-#Tests
-
-class TestTelegramSlashCommands:
+class TestSlashCommands:
    """Gateway slash commands dispatched through the full adapter pipeline."""

    @pytest.mark.asyncio
-    async def test_help_returns_command_list(self, adapter):
-        send = await send_and_capture(adapter, "/help")
+    async def test_help_returns_command_list(self, adapter, platform):
+        send = await send_and_capture(adapter, "/help", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
@@ -62,24 +32,23 @@ class TestTelegramSlashCommands:
        assert "/status" in response_text

    @pytest.mark.asyncio
-    async def test_status_shows_session_info(self, adapter):
-        send = await send_and_capture(adapter, "/status")
+    async def test_status_shows_session_info(self, adapter, platform):
+        send = await send_and_capture(adapter, "/status", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
-        # Status output includes session metadata
        assert "session" in response_text.lower() or "Session" in response_text

    @pytest.mark.asyncio
-    async def test_new_resets_session(self, adapter, runner):
-        send = await send_and_capture(adapter, "/new")
+    async def test_new_resets_session(self, adapter, runner, platform):
+        send = await send_and_capture(adapter, "/new", platform)

        send.assert_called_once()
        runner.session_store.reset_session.assert_called_once()

    @pytest.mark.asyncio
-    async def test_stop_when_no_agent_running(self, adapter):
-        send = await send_and_capture(adapter, "/stop")
+    async def test_stop_when_no_agent_running(self, adapter, platform):
+        send = await send_and_capture(adapter, "/stop", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
@@ -87,8 +56,8 @@ class TestTelegramSlashCommands:
        assert "no" in response_lower or "stop" in response_lower or "not running" in response_lower

    @pytest.mark.asyncio
-    async def test_commands_shows_listing(self, adapter):
-        send = await send_and_capture(adapter, "/commands")
+    async def test_commands_shows_listing(self, adapter, platform):
+        send = await send_and_capture(adapter, "/commands", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
@@ -96,29 +65,25 @@ class TestTelegramSlashCommands:
        assert "/" in response_text

    @pytest.mark.asyncio
-    async def test_sequential_commands_share_session(self, adapter):
+    async def test_sequential_commands_share_session(self, adapter, platform):
        """Two commands from the same chat_id should both succeed."""
-        send_help = await send_and_capture(adapter, "/help")
+        send_help = await send_and_capture(adapter, "/help", platform)
        send_help.assert_called_once()

-        send_status = await send_and_capture(adapter, "/status")
+        send_status = await send_and_capture(adapter, "/status", platform)
        send_status.assert_called_once()

    @pytest.mark.asyncio
-    @pytest.mark.xfail(
-        reason="Bug: _handle_provider_command references unbound model_cfg when config.yaml is absent",
-        strict=False,
-    )
-    async def test_provider_shows_current_provider(self, adapter):
-        send = await send_and_capture(adapter, "/provider")
+    async def test_provider_shows_current_provider(self, adapter, platform):
+        send = await send_and_capture(adapter, "/provider", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
        assert "provider" in response_text.lower()

    @pytest.mark.asyncio
-    async def test_verbose_responds(self, adapter):
-        send = await send_and_capture(adapter, "/verbose")
+    async def test_verbose_responds(self, adapter, platform):
+        send = await send_and_capture(adapter, "/verbose", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
@@ -126,42 +91,50 @@ class TestTelegramSlashCommands:
        assert "verbose" in response_text.lower() or "tool_progress" in response_text

    @pytest.mark.asyncio
-    async def test_personality_lists_options(self, adapter):
-        send = await send_and_capture(adapter, "/personality")
+    async def test_personality_lists_options(self, adapter, platform):
+        send = await send_and_capture(adapter, "/personality", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
        assert "personalit" in response_text.lower()  # matches "personality" or "personalities"

    @pytest.mark.asyncio
-    async def test_yolo_toggles_mode(self, adapter):
-        send = await send_and_capture(adapter, "/yolo")
+    async def test_yolo_toggles_mode(self, adapter, platform):
+        send = await send_and_capture(adapter, "/yolo", platform)

        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
        assert "yolo" in response_text.lower()

+    @pytest.mark.asyncio
+    async def test_compress_command(self, adapter, platform):
+        send = await send_and_capture(adapter, "/compress", platform)
+
+        send.assert_called_once()
+        response_text = send.call_args[1].get("content") or send.call_args[0][1]
+        assert "compress" in response_text.lower() or "context" in response_text.lower()
+

 class TestSessionLifecycle:
    """Verify session state changes across command sequences."""

    @pytest.mark.asyncio
-    async def test_new_then_status_reflects_reset(self, adapter, runner, session_entry):
+    async def test_new_then_status_reflects_reset(self, adapter, runner, session_entry, platform):
        """After /new, /status should report the fresh session."""
-        await send_and_capture(adapter, "/new")
+        await send_and_capture(adapter, "/new", platform)
        runner.session_store.reset_session.assert_called_once()

-        send = await send_and_capture(adapter, "/status")
+        send = await send_and_capture(adapter, "/status", platform)
        send.assert_called_once()
        response_text = send.call_args[1].get("content") or send.call_args[0][1]
        # Session ID from the entry should appear in the status output
        assert session_entry.session_id[:8] in response_text

    @pytest.mark.asyncio
-    async def test_new_is_idempotent(self, adapter, runner):
+    async def test_new_is_idempotent(self, adapter, runner, platform):
        """/new called twice should not crash."""
-        await send_and_capture(adapter, "/new")
-        await send_and_capture(adapter, "/new")
+        await send_and_capture(adapter, "/new", platform)
+        await send_and_capture(adapter, "/new", platform)
        assert runner.session_store.reset_session.call_count == 2


@@ -169,11 +142,11 @@ class TestAuthorization:
    """Verify the pipeline handles unauthorized users."""

    @pytest.mark.asyncio
-    async def test_unauthorized_user_gets_pairing_response(self, adapter, runner):
+    async def test_unauthorized_user_gets_pairing_response(self, adapter, runner, platform):
        """Unauthorized DM should trigger pairing code, not a command response."""
        runner._is_user_authorized = lambda _source: False

-        event = make_event("/help")
+        event = make_event(platform, "/help")
        adapter.send.reset_mock()
        await adapter.handle_message(event)
        await asyncio.sleep(0.3)
@@ -185,11 +158,11 @@ class TestAuthorization:
        assert "recognize" in response_text.lower() or "pair" in response_text.lower() or "ABC123" in response_text

    @pytest.mark.asyncio
-    async def test_unauthorized_user_does_not_get_help(self, adapter, runner):
+    async def test_unauthorized_user_does_not_get_help(self, adapter, runner, platform):
        """Unauthorized user should NOT see the help command output."""
        runner._is_user_authorized = lambda _source: False

-        event = make_event("/help")
+        event = make_event(platform, "/help")
        adapter.send.reset_mock()
        await adapter.handle_message(event)
        await asyncio.sleep(0.3)
@@ -204,12 +177,12 @@ class TestSendFailureResilience:
    """Verify the pipeline handles send failures gracefully."""

    @pytest.mark.asyncio
-    async def test_send_failure_does_not_crash_pipeline(self, adapter):
+    async def test_send_failure_does_not_crash_pipeline(self, adapter, platform):
        """If send() returns failure, the pipeline should not raise."""
        adapter.send = AsyncMock(return_value=SendResult(success=False, error="network timeout"))
-        adapter.set_message_handler(adapter._message_handler)  # re-wire with same handler
+        adapter.set_message_handler(adapter._message_handler) # re-wire with same handler

-        event = make_event("/help")
+        event = make_event(platform, "/help")
        # Should not raise — pipeline handles send failures internally
        await adapter.handle_message(event)
        await asyncio.sleep(0.3)
@@ -0,0 +1,110 @@
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
+from gateway.restart import DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+class RestartTestAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
+        self.sent: list[str] = []
+
+    async def connect(self):
+        return True
+
+    async def disconnect(self):
+        return None
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        self.sent.append(content)
+        return SendResult(success=True, message_id="1")
+
+    async def send_typing(self, chat_id, metadata=None):
+        return None
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id=chat_id,
+        chat_type=chat_type,
+    )
+
+
+def make_restart_runner(
+    adapter: BasePlatformAdapter | None = None,
+) -> tuple[GatewayRunner, BasePlatformAdapter]:
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    runner._running = True
+    runner._shutdown_event = asyncio.Event()
+    runner._exit_reason = None
+    runner._exit_code = None
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._pending_model_notes = {}
+    runner._background_tasks = set()
+    runner._draining = False
+    runner._restart_requested = False
+    runner._restart_task_started = False
+    runner._restart_detached = False
+    runner._restart_via_service = False
+    runner._restart_drain_timeout = DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT
+    runner._stop_task = None
+    runner._busy_input_mode = "interrupt"
+    runner._update_prompt_pending = {}
+    runner._voice_mode = {}
+    runner._session_model_overrides = {}
+    runner._shutdown_all_gateway_honcho = lambda: None
+    runner._update_runtime_status = MagicMock()
+    runner._queue_or_replace_pending_event = GatewayRunner._queue_or_replace_pending_event.__get__(
+        runner, GatewayRunner
+    )
+    runner._session_key_for_source = GatewayRunner._session_key_for_source.__get__(
+        runner, GatewayRunner
+    )
+    runner._handle_active_session_busy_message = (
+        GatewayRunner._handle_active_session_busy_message.__get__(runner, GatewayRunner)
+    )
+    runner._handle_restart_command = GatewayRunner._handle_restart_command.__get__(
+        runner, GatewayRunner
+    )
+    runner._status_action_label = GatewayRunner._status_action_label.__get__(
+        runner, GatewayRunner
+    )
+    runner._status_action_gerund = GatewayRunner._status_action_gerund.__get__(
+        runner, GatewayRunner
+    )
+    runner._queue_during_drain_enabled = GatewayRunner._queue_during_drain_enabled.__get__(
+        runner, GatewayRunner
+    )
+    runner._running_agent_count = GatewayRunner._running_agent_count.__get__(
+        runner, GatewayRunner
+    )
+    runner._launch_detached_restart_command = GatewayRunner._launch_detached_restart_command.__get__(
+        runner, GatewayRunner
+    )
+    runner.request_restart = GatewayRunner.request_restart.__get__(runner, GatewayRunner)
+    runner._is_user_authorized = lambda _source: True
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.pairing_store = MagicMock()
+    runner.session_store = MagicMock()
+    runner.delivery_router = MagicMock()
+
+    platform_adapter = adapter or RestartTestAdapter()
+    platform_adapter.set_message_handler(AsyncMock(return_value=None))
+    platform_adapter.set_busy_session_handler(runner._handle_active_session_busy_message)
+    runner.adapters = {Platform.TELEGRAM: platform_adapter}
+    return runner, platform_adapter
@@ -464,7 +464,7 @@ class TestChatCompletionsEndpoint:

    @pytest.mark.asyncio
    async def test_stream_includes_tool_progress(self, adapter):
-        """tool_progress_callback fires → progress appears in the SSE stream."""
+        """tool_progress_callback fires → progress appears as custom SSE event, not in delta.content."""
        import asyncio

        app = _create_app(adapter)
@@ -495,8 +495,26 @@ class TestChatCompletionsEndpoint:
                assert resp.status == 200
                body = await resp.text()
                assert "[DONE]" in body
-                # Tool progress message must appear in the stream
-                assert "ls -la" in body
+                # Tool progress must appear as a custom SSE event, not in
+                # delta.content — prevents model from learning to imitate
+                # markers instead of calling tools (#6972).
+                assert "event: hermes.tool.progress" in body
+                assert '"tool": "terminal"' in body
+                assert '"label": "ls -la"' in body
+                # The progress marker must NOT appear inside any
+                # chat.completion.chunk delta.content field.
+                import json as _json
+                for line in body.splitlines():
+                    if line.startswith("data: ") and line.strip() != "data: [DONE]":
+                        try:
+                            chunk = _json.loads(line[len("data: "):])
+                        except _json.JSONDecodeError:
+                            continue
+                        if chunk.get("object") == "chat.completion.chunk":
+                            for choice in chunk.get("choices", []):
+                                content = choice.get("delta", {}).get("content", "")
+                                # Tool emoji markers must never leak into content
+                                assert "ls -la" not in content or content == "Here are the files."
                # Final content must also be present
                assert "Here are the files." in body

@@ -532,10 +550,12 @@ class TestChatCompletionsEndpoint:
                )
                assert resp.status == 200
                body = await resp.text()
-                # Internal _thinking event should NOT appear
+                # Internal _thinking event should NOT appear anywhere
                assert "some internal state" not in body
-                # Real tool progress should appear
-                assert "Python docs" in body
+                # Real tool progress should appear as custom SSE event
+                assert "event: hermes.tool.progress" in body
+                assert '"tool": "web_search"' in body
+                assert '"label": "Python docs"' in body

    @pytest.mark.asyncio
    async def test_no_user_message_returns_400(self, adapter):
@@ -0,0 +1,132 @@
+"""Tests for the API server bind-address startup guard.
+
+Validates that is_network_accessible() correctly classifies addresses and
+that connect() refuses to start on non-loopback without API_SERVER_KEY.
+"""
+
+import socket
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from gateway.config import PlatformConfig
+from gateway.platforms.api_server import APIServerAdapter
+from gateway.platforms.base import is_network_accessible
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: is_network_accessible()
+# ---------------------------------------------------------------------------
+
+
+class TestIsNetworkAccessible:
+    """Direct tests for the address classification helper."""
+
+    # -- Loopback (safe, should return False) --
+
+    def test_ipv4_loopback(self):
+        assert is_network_accessible("127.0.0.1") is False
+
+    def test_ipv6_loopback(self):
+        assert is_network_accessible("::1") is False
+
+    def test_ipv4_mapped_loopback(self):
+        # ::ffff:127.0.0.1 — Python's is_loopback returns False for mapped
+        # addresses; the helper must unwrap and check ipv4_mapped.
+        assert is_network_accessible("::ffff:127.0.0.1") is False
+
+    # -- Network-accessible (should return True) --
+
+    def test_ipv4_wildcard(self):
+        assert is_network_accessible("0.0.0.0") is True
+
+    def test_ipv6_wildcard(self):
+        # This is the bypass vector that the string-based check missed.
+        assert is_network_accessible("::") is True
+
+    def test_ipv4_mapped_unspecified(self):
+        assert is_network_accessible("::ffff:0.0.0.0") is True
+
+    def test_private_ipv4(self):
+        assert is_network_accessible("10.0.0.1") is True
+
+    def test_private_ipv4_class_c(self):
+        assert is_network_accessible("192.168.1.1") is True
+
+    def test_public_ipv4(self):
+        assert is_network_accessible("8.8.8.8") is True
+
+    # -- Hostname resolution --
+
+    def test_localhost_resolves_to_loopback(self):
+        loopback_result = [
+            (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("127.0.0.1", 0)),
+        ]
+        with patch("gateway.platforms.base._socket.getaddrinfo", return_value=loopback_result):
+            assert is_network_accessible("localhost") is False
+
+    def test_hostname_resolving_to_non_loopback(self):
+        non_loopback_result = [
+            (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("10.0.0.1", 0)),
+        ]
+        with patch("gateway.platforms.base._socket.getaddrinfo", return_value=non_loopback_result):
+            assert is_network_accessible("my-server.local") is True
+
+    def test_hostname_mixed_resolution(self):
+        """If a hostname resolves to both loopback and non-loopback, it's
+        network-accessible (any non-loopback address is enough)."""
+        mixed_result = [
+            (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("127.0.0.1", 0)),
+            (socket.AF_INET, socket.SOCK_STREAM, 0, "", ("10.0.0.1", 0)),
+        ]
+        with patch("gateway.platforms.base._socket.getaddrinfo", return_value=mixed_result):
+            assert is_network_accessible("dual-host.local") is True
+
+    def test_dns_failure_fails_closed(self):
+        """Unresolvable hostnames should require an API key (fail closed)."""
+        with patch(
+            "gateway.platforms.base._socket.getaddrinfo",
+            side_effect=socket.gaierror("Name resolution failed"),
+        ):
+            assert is_network_accessible("nonexistent.invalid") is True
+
+
+# ---------------------------------------------------------------------------
+# Integration tests: connect() startup guard
+# ---------------------------------------------------------------------------
+
+
+class TestConnectBindGuard:
+    """Verify that connect() refuses dangerous configurations."""
+
+    @pytest.mark.asyncio
+    async def test_refuses_ipv4_wildcard_without_key(self):
+        adapter = APIServerAdapter(PlatformConfig(enabled=True, extra={"host": "0.0.0.0"}))
+        result = await adapter.connect()
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_refuses_ipv6_wildcard_without_key(self):
+        adapter = APIServerAdapter(PlatformConfig(enabled=True, extra={"host": "::"}))
+        result = await adapter.connect()
+        assert result is False
+
+    def test_allows_loopback_without_key(self):
+        """Loopback with no key should pass the guard."""
+        adapter = APIServerAdapter(PlatformConfig(enabled=True, extra={"host": "127.0.0.1"}))
+        assert adapter._api_key == ""
+        # The guard condition: is_network_accessible(host) AND NOT api_key
+        # For loopback, is_network_accessible is False so the guard does not block.
+        assert is_network_accessible(adapter._host) is False
+
+    @pytest.mark.asyncio
+    async def test_allows_wildcard_with_key(self):
+        """Non-loopback with a key should pass the guard."""
+        adapter = APIServerAdapter(
+            PlatformConfig(enabled=True, extra={"host": "0.0.0.0", "key": "sk-test"})
+        )
+        # The guard checks: is_network_accessible(host) AND NOT api_key
+        # With a key set, the guard should not block.
+        assert adapter._api_key == "sk-test"
+        assert is_network_accessible("0.0.0.0") is True
+        # Combined: the guard condition is False (key is set), so it passes
@@ -345,6 +345,11 @@ class TestBlockingApprovalE2E:

    def setup_method(self):
        _clear_approval_state()
+        os.environ.pop("HERMES_YOLO_MODE", None)
+        os.environ.pop("HERMES_INTERACTIVE", None)
+        os.environ.pop("HERMES_GATEWAY_SESSION", None)
+        os.environ.pop("HERMES_EXEC_ASK", None)
+        os.environ.pop("HERMES_SESSION_KEY", None)

    def test_blocking_approval_approve_once(self):
        """check_all_command_guards blocks until resolve_gateway_approval is called."""
@@ -364,6 +369,7 @@ class TestBlockingApprovalE2E:
            from tools.approval import reset_current_session_key, set_current_session_key

            token = set_current_session_key(session_key)
+            os.environ["HERMES_GATEWAY_SESSION"] = "1"
            os.environ["HERMES_EXEC_ASK"] = "1"
            os.environ["HERMES_SESSION_KEY"] = session_key
            try:
@@ -371,6 +377,7 @@ class TestBlockingApprovalE2E:
                    "rm -rf /important", "local"
                )
            finally:
+                os.environ.pop("HERMES_GATEWAY_SESSION", None)
                os.environ.pop("HERMES_EXEC_ASK", None)
                os.environ.pop("HERMES_SESSION_KEY", None)
                reset_current_session_key(token)
@@ -410,6 +417,7 @@ class TestBlockingApprovalE2E:
            from tools.approval import reset_current_session_key, set_current_session_key

            token = set_current_session_key(session_key)
+            os.environ["HERMES_GATEWAY_SESSION"] = "1"
            os.environ["HERMES_EXEC_ASK"] = "1"
            os.environ["HERMES_SESSION_KEY"] = session_key
            try:
@@ -417,6 +425,7 @@ class TestBlockingApprovalE2E:
                    "rm -rf /important", "local"
                )
            finally:
+                os.environ.pop("HERMES_GATEWAY_SESSION", None)
                os.environ.pop("HERMES_EXEC_ASK", None)
                os.environ.pop("HERMES_SESSION_KEY", None)
                reset_current_session_key(token)
@@ -451,6 +460,7 @@ class TestBlockingApprovalE2E:
            from tools.approval import reset_current_session_key, set_current_session_key

            token = set_current_session_key(session_key)
+            os.environ["HERMES_GATEWAY_SESSION"] = "1"
            os.environ["HERMES_EXEC_ASK"] = "1"
            os.environ["HERMES_SESSION_KEY"] = session_key
            try:
@@ -460,6 +470,7 @@ class TestBlockingApprovalE2E:
                        "rm -rf /important", "local"
                    )
            finally:
+                os.environ.pop("HERMES_GATEWAY_SESSION", None)
                os.environ.pop("HERMES_EXEC_ASK", None)
                os.environ.pop("HERMES_SESSION_KEY", None)
                reset_current_session_key(token)
@@ -491,11 +502,13 @@ class TestBlockingApprovalE2E:
                from tools.approval import reset_current_session_key, set_current_session_key

                token = set_current_session_key(session_key)
+                os.environ["HERMES_GATEWAY_SESSION"] = "1"
                os.environ["HERMES_EXEC_ASK"] = "1"
                os.environ["HERMES_SESSION_KEY"] = session_key
                try:
                    results[idx] = check_all_command_guards(cmd, "local")
                finally:
+                    os.environ.pop("HERMES_GATEWAY_SESSION", None)
                    os.environ.pop("HERMES_EXEC_ASK", None)
                    os.environ.pop("HERMES_SESSION_KEY", None)
                    reset_current_session_key(token)
@@ -546,11 +559,13 @@ class TestBlockingApprovalE2E:
                from tools.approval import reset_current_session_key, set_current_session_key

                token = set_current_session_key(session_key)
+                os.environ["HERMES_GATEWAY_SESSION"] = "1"
                os.environ["HERMES_EXEC_ASK"] = "1"
                os.environ["HERMES_SESSION_KEY"] = session_key
                try:
                    results[idx] = check_all_command_guards(cmd, "local")
                finally:
+                    os.environ.pop("HERMES_GATEWAY_SESSION", None)
                    os.environ.pop("HERMES_EXEC_ASK", None)
                    os.environ.pop("HERMES_SESSION_KEY", None)
                    reset_current_session_key(token)
@@ -0,0 +1,121 @@
+"""Tests for gateway /compress user-facing messaging."""
+
+from datetime import datetime
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str = "/compress") -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_history() -> list[dict[str, str]]:
+    return [
+        {"role": "user", "content": "one"},
+        {"role": "assistant", "content": "two"},
+        {"role": "user", "content": "three"},
+        {"role": "assistant", "content": "four"},
+    ]
+
+
+def _make_runner(history: list[dict[str, str]]):
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = session_entry
+    runner.session_store.load_transcript.return_value = history
+    runner.session_store.rewrite_transcript = MagicMock()
+    runner.session_store.update_session = MagicMock()
+    runner.session_store._save = MagicMock()
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_compress_command_reports_noop_without_success_banner():
+    history = _make_history()
+    runner = _make_runner(history)
+    agent_instance = MagicMock()
+    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor._align_boundary_forward.return_value = 0
+    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
+    agent_instance.session_id = "sess-1"
+    agent_instance._compress_context.return_value = (list(history), "")
+
+    def _estimate(messages):
+        assert messages == history
+        return 100
+
+    with (
+        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
+        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+        patch("run_agent.AIAgent", return_value=agent_instance),
+        patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
+    ):
+        result = await runner._handle_compress_command(_make_event())
+
+    assert "No changes from compression" in result
+    assert "Compressed:" not in result
+    assert "Rough transcript estimate: ~100 tokens (unchanged)" in result
+
+
+@pytest.mark.asyncio
+async def test_compress_command_explains_when_token_estimate_rises():
+    history = _make_history()
+    compressed = [
+        history[0],
+        {"role": "assistant", "content": "Dense summary that still counts as more tokens."},
+        history[-1],
+    ]
+    runner = _make_runner(history)
+    agent_instance = MagicMock()
+    agent_instance.context_compressor.protect_first_n = 0
+    agent_instance.context_compressor._align_boundary_forward.return_value = 0
+    agent_instance.context_compressor._find_tail_cut_by_tokens.return_value = 2
+    agent_instance.session_id = "sess-1"
+    agent_instance._compress_context.return_value = (compressed, "")
+
+    def _estimate(messages):
+        if messages == history:
+            return 100
+        if messages == compressed:
+            return 120
+        raise AssertionError(f"unexpected transcript: {messages!r}")
+
+    with (
+        patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "test-key"}),
+        patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+        patch("run_agent.AIAgent", return_value=agent_instance),
+        patch("agent.model_metadata.estimate_messages_tokens_rough", side_effect=_estimate),
+    ):
+        result = await runner._handle_compress_command(_make_event())
+
+    assert "Compressed: 4 → 3 messages" in result
+    assert "Rough transcript estimate: ~100 → ~120 tokens" in result
+    assert "denser summaries" in result
@@ -0,0 +1,64 @@
+"""Tests for Discord channel_skill_bindings auto-skill resolution."""
+from unittest.mock import MagicMock
+import pytest
+
+
+def _make_adapter():
+    """Create a minimal DiscordAdapter with mocked config."""
+    from gateway.platforms.discord import DiscordAdapter
+    adapter = object.__new__(DiscordAdapter)
+    adapter.config = MagicMock()
+    adapter.config.extra = {}
+    return adapter
+
+
+class TestResolveChannelSkills:
+    def test_no_bindings_returns_none(self):
+        adapter = _make_adapter()
+        assert adapter._resolve_channel_skills("123") is None
+
+    def test_match_by_channel_id(self):
+        adapter = _make_adapter()
+        adapter.config.extra = {
+            "channel_skill_bindings": [
+                {"id": "100", "skills": ["skill-a", "skill-b"]},
+            ]
+        }
+        assert adapter._resolve_channel_skills("100") == ["skill-a", "skill-b"]
+
+    def test_match_by_parent_id(self):
+        adapter = _make_adapter()
+        adapter.config.extra = {
+            "channel_skill_bindings": [
+                {"id": "200", "skills": ["forum-skill"]},
+            ]
+        }
+        # channel_id doesn't match, but parent_id does (forum thread)
+        assert adapter._resolve_channel_skills("999", parent_id="200") == ["forum-skill"]
+
+    def test_no_match_returns_none(self):
+        adapter = _make_adapter()
+        adapter.config.extra = {
+            "channel_skill_bindings": [
+                {"id": "100", "skills": ["skill-a"]},
+            ]
+        }
+        assert adapter._resolve_channel_skills("999") is None
+
+    def test_single_skill_string(self):
+        adapter = _make_adapter()
+        adapter.config.extra = {
+            "channel_skill_bindings": [
+                {"id": "100", "skill": "solo-skill"},
+            ]
+        }
+        assert adapter._resolve_channel_skills("100") == ["solo-skill"]
+
+    def test_dedup_preserves_order(self):
+        adapter = _make_adapter()
+        adapter.config.extra = {
+            "channel_skill_bindings": [
+                {"id": "100", "skills": ["a", "b", "a", "c", "b"]},
+            ]
+        }
+        assert adapter._resolve_channel_skills("100") == ["a", "b", "c"]
@@ -0,0 +1,44 @@
+"""Tests for fallback-eviction gating on failed runs (#7130).
+
+When a run fails, the gateway must NOT evict the cached agent — doing so
+forces MCP reinit on the next message, creating a CPU-burning restart loop.
+Eviction should only happen on successful runs where fallback activated.
+"""
+
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
+
+
+class TestFallbackEvictionGating:
+    """The fallback-eviction code path should skip eviction on failed runs."""
+
+    def test_failed_run_does_not_evict_cached_agent(self):
+        """When result has failed=True, the cached agent should NOT be evicted."""
+        # The fix: `and not _run_failed` guard on the eviction check.
+        # Simulate the variables that the eviction block uses.
+        result = {"failed": True, "final_response": None, "error": "400 invalid model"}
+        _run_failed = result.get("failed") if result else False
+        assert _run_failed is True, "Failed run should be detected"
+
+    def test_successful_run_allows_eviction(self):
+        """When result is successful, fallback eviction should proceed."""
+        result = {"completed": True, "final_response": "Hello!", "failed": False}
+        _run_failed = result.get("failed") if result else False
+        assert _run_failed is False, "Successful run should not be flagged"
+
+    def test_none_result_treated_as_not_failed(self):
+        """When result is None (edge case), treat as not-failed."""
+        result = None
+        _run_failed = result.get("failed") if result else False
+        assert _run_failed is False
+
+    def test_missing_failed_key_treated_as_not_failed(self):
+        """When result dict doesn't have 'failed' key, treat as not-failed."""
+        result = {"completed": True, "final_response": "Hello!"}
+        _run_failed = result.get("failed") if result else False
+        assert not _run_failed, "Missing 'failed' key should be falsy"
@@ -0,0 +1,191 @@
+"""Tests for gateway /fast support and Priority Processing routing."""
+
+import sys
+import threading
+import types
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
+import pytest
+import yaml
+
+import gateway.run as gateway_run
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+class _CapturingAgent:
+    last_init = None
+    last_run = None
+
+    def __init__(self, *args, **kwargs):
+        type(self).last_init = dict(kwargs)
+        self.tools = []
+
+    def run_conversation(self, user_message, conversation_history=None, task_id=None, persist_user_message=None):
+        type(self).last_run = {
+            "user_message": user_message,
+            "conversation_history": conversation_history,
+            "task_id": task_id,
+            "persist_user_message": persist_user_message,
+        }
+        return {
+            "final_response": "ok",
+            "messages": [],
+            "api_calls": 1,
+            "completed": True,
+        }
+
+
+def _install_fake_agent(monkeypatch):
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = _CapturingAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+
+def _make_runner():
+    runner = object.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._service_tier = None
+    runner._provider_routing = {}
+    runner._fallback_model = None
+    runner._smart_model_routing = {}
+    runner._running_agents = {}
+    runner._pending_model_notes = {}
+    runner._session_db = None
+    runner._agent_cache = {}
+    runner._agent_cache_lock = threading.Lock()
+    runner._session_model_overrides = {}
+    runner.hooks = SimpleNamespace(loaded_hooks=False)
+    runner.config = SimpleNamespace(streaming=None)
+    runner.session_store = SimpleNamespace(
+        get_or_create_session=lambda source: SimpleNamespace(session_id="session-1"),
+        load_transcript=lambda session_id: [],
+    )
+    runner._get_or_create_gateway_honcho = lambda session_key: (None, None)
+    runner._enrich_message_with_vision = AsyncMock(return_value="ENRICHED")
+    return runner
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="12345",
+        chat_type="dm",
+        user_id="user-1",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def test_turn_route_injects_priority_processing_without_changing_runtime():
+    runner = _make_runner()
+    runner._service_tier = "priority"
+    runtime_kwargs = {
+        "api_key": "***",
+        "base_url": "https://openrouter.ai/api/v1",
+        "provider": "openrouter",
+        "api_mode": "chat_completions",
+        "command": None,
+        "args": [],
+        "credential_pool": None,
+    }
+
+    with patch("agent.smart_model_routing.resolve_turn_route", return_value={
+        "model": "gpt-5.4",
+        "runtime": dict(runtime_kwargs),
+        "label": None,
+        "signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
+    }):
+        route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.4", runtime_kwargs)
+
+    assert route["runtime"]["provider"] == "openrouter"
+    assert route["runtime"]["api_mode"] == "chat_completions"
+    assert route["request_overrides"] == {"service_tier": "priority"}
+
+
+def test_turn_route_skips_priority_processing_for_unsupported_models():
+    runner = _make_runner()
+    runner._service_tier = "priority"
+    runtime_kwargs = {
+        "api_key": "***",
+        "base_url": "https://openrouter.ai/api/v1",
+        "provider": "openrouter",
+        "api_mode": "chat_completions",
+        "command": None,
+        "args": [],
+        "credential_pool": None,
+    }
+
+    with patch("agent.smart_model_routing.resolve_turn_route", return_value={
+        "model": "gpt-5.3-codex",
+        "runtime": dict(runtime_kwargs),
+        "label": None,
+        "signature": ("gpt-5.3-codex", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
+    }):
+        route = gateway_run.GatewayRunner._resolve_turn_agent_config(runner, "hi", "gpt-5.3-codex", runtime_kwargs)
+
+    assert route["request_overrides"] is None
+
+
+@pytest.mark.asyncio
+async def test_handle_fast_command_persists_config(monkeypatch, tmp_path):
+    runner = _make_runner()
+
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
+    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
+
+    response = await runner._handle_fast_command(_make_event("/fast fast"))
+
+    assert "FAST" in response
+    assert runner._service_tier == "priority"
+
+    saved = yaml.safe_load((tmp_path / "config.yaml").read_text(encoding="utf-8"))
+    assert saved["agent"]["service_tier"] == "fast"
+
+
+@pytest.mark.asyncio
+async def test_run_agent_passes_priority_processing_to_gateway_agent(monkeypatch, tmp_path):
+    _install_fake_agent(monkeypatch)
+    runner = _make_runner()
+
+    (tmp_path / "config.yaml").write_text("agent:\n  service_tier: fast\n", encoding="utf-8")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_env_path", tmp_path / ".env")
+    monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
+    monkeypatch.setattr(gateway_run, "_load_gateway_config", lambda: {})
+    monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda config=None: "gpt-5.4")
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "openrouter",
+            "api_mode": "chat_completions",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "***",
+        },
+    )
+
+    import hermes_cli.tools_config as tools_config
+    monkeypatch.setattr(tools_config, "_get_platform_tools", lambda user_config, platform_key: {"core"})
+
+    _CapturingAgent.last_init = None
+    result = await runner._run_agent(
+        message="hi",
+        context_prompt="",
+        history=[],
+        source=_make_source(),
+        session_id="session-1",
+        session_key="agent:main:telegram:dm:12345",
+    )
+
+    assert result["final_response"] == "ok"
+    assert _CapturingAgent.last_init["service_tier"] == "priority"
+    assert _CapturingAgent.last_init["request_overrides"] == {"service_tier": "priority"}
@@ -3,43 +3,15 @@ from unittest.mock import AsyncMock, MagicMock, patch

 import pytest

-from gateway.config import GatewayConfig, Platform, PlatformConfig
-from gateway.platforms.base import BasePlatformAdapter, MessageEvent, SendResult
-from gateway.run import GatewayRunner
-from gateway.session import SessionSource, build_session_key
-
-
-class StubAdapter(BasePlatformAdapter):
-    def __init__(self):
-        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
-
-    async def connect(self):
-        return True
-
-    async def disconnect(self):
-        return None
-
-    async def send(self, chat_id, content, reply_to=None, metadata=None):
-        return SendResult(success=True, message_id="1")
-
-    async def send_typing(self, chat_id, metadata=None):
-        return None
-
-    async def get_chat_info(self, chat_id):
-        return {"id": chat_id}
-
-
-def _source(chat_id="123456", chat_type="dm"):
-    return SessionSource(
-        platform=Platform.TELEGRAM,
-        chat_id=chat_id,
-        chat_type=chat_type,
-    )
+from gateway.platforms.base import MessageEvent
+from gateway.restart import GATEWAY_SERVICE_RESTART_EXIT_CODE
+from gateway.session import build_session_key
+from tests.gateway.restart_test_helpers import make_restart_runner, make_restart_source


@pytest.mark.asyncio
 async def test_cancel_background_tasks_cancels_inflight_message_processing():
-    adapter = StubAdapter()
+    _runner, adapter = make_restart_runner()
    release = asyncio.Event()

    async def block_forever(_event):
@@ -47,7 +19,7 @@ async def test_cancel_background_tasks_cancels_inflight_message_processing():
        return None

    adapter.set_message_handler(block_forever)
-    event = MessageEvent(text="work", source=_source(), message_id="1")
+    event = MessageEvent(text="work", source=make_restart_source(), message_id="1")

    await adapter.handle_message(event)
    await asyncio.sleep(0)
@@ -65,17 +37,11 @@ async def test_cancel_background_tasks_cancels_inflight_message_processing():

@pytest.mark.asyncio
 async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks():
-    runner = object.__new__(GatewayRunner)
-    runner.config = GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")})
-    runner._running = True
-    runner._shutdown_event = asyncio.Event()
-    runner._exit_reason = None
+    runner, adapter = make_restart_runner()
    runner._pending_messages = {"session": "pending text"}
    runner._pending_approvals = {"session": {"command": "rm -rf /tmp/x"}}
-    runner._background_tasks = set()
-    runner._shutdown_all_gateway_honcho = lambda: None
+    runner._restart_drain_timeout = 0.0

-    adapter = StubAdapter()
    release = asyncio.Event()

    async def block_forever(_event):
@@ -83,7 +49,7 @@ async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks(
        return None

    adapter.set_message_handler(block_forever)
-    event = MessageEvent(text="work", source=_source(), message_id="1")
+    event = MessageEvent(text="work", source=make_restart_source(), message_id="1")
    await adapter.handle_message(event)
    await asyncio.sleep(0)

@@ -93,7 +59,6 @@ async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks(
    session_key = build_session_key(event.source)
    running_agent = MagicMock()
    runner._running_agents = {session_key: running_agent}
-    runner.adapters = {Platform.TELEGRAM: adapter}

    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
        await runner.stop()
@@ -105,3 +70,78 @@ async def test_gateway_stop_interrupts_running_agents_and_cancels_adapter_tasks(
    assert runner._pending_messages == {}
    assert runner._pending_approvals == {}
    assert runner._shutdown_event.is_set() is True
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_drains_running_agents_before_disconnect():
+    runner, adapter = make_restart_runner()
+    disconnect_mock = AsyncMock()
+    adapter.disconnect = disconnect_mock
+
+    running_agent = MagicMock()
+    runner._running_agents = {"session": running_agent}
+
+    async def finish_agent():
+        await asyncio.sleep(0.05)
+        runner._running_agents.clear()
+
+    asyncio.create_task(finish_agent())
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    running_agent.interrupt.assert_not_called()
+    disconnect_mock.assert_awaited_once()
+    assert runner._shutdown_event.is_set() is True
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_interrupts_after_drain_timeout():
+    runner, adapter = make_restart_runner()
+    runner._restart_drain_timeout = 0.05
+
+    disconnect_mock = AsyncMock()
+    adapter.disconnect = disconnect_mock
+
+    running_agent = MagicMock()
+    runner._running_agents = {"session": running_agent}
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    running_agent.interrupt.assert_called_once_with("Gateway shutting down")
+    disconnect_mock.assert_awaited_once()
+    assert runner._shutdown_event.is_set() is True
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_service_restart_sets_named_exit_code():
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop(restart=True, service_restart=True)
+
+    assert runner._exit_code == GATEWAY_SERVICE_RESTART_EXIT_CODE
+
+
+@pytest.mark.asyncio
+async def test_drain_active_agents_throttles_status_updates():
+    runner, _adapter = make_restart_runner()
+    runner._update_runtime_status = MagicMock()
+
+    runner._running_agents = {"a": MagicMock(), "b": MagicMock()}
+
+    async def finish_agents():
+        await asyncio.sleep(0.12)
+        runner._running_agents.pop("a")
+        await asyncio.sleep(0.12)
+        runner._running_agents.clear()
+
+    task = asyncio.create_task(finish_agents())
+    await runner._drain_active_agents(1.0)
+    await task
+
+    # Start, one count-change update, and final update. Allow one extra update
+    # if the loop observes the zero-agent state before exiting.
+    assert 3 <= runner._update_runtime_status.call_count <= 4
@@ -11,24 +11,10 @@ import pytest
 from gateway.config import PlatformConfig


-def _ensure_nio_mock():
-    """Install a mock nio module when matrix-nio isn't available."""
-    if "nio" in sys.modules and hasattr(sys.modules["nio"], "__file__"):
-        return
-    nio_mod = MagicMock()
-    nio_mod.MegolmEvent = type("MegolmEvent", (), {})
-    nio_mod.RoomMessageText = type("RoomMessageText", (), {})
-    nio_mod.RoomMessageImage = type("RoomMessageImage", (), {})
-    nio_mod.RoomMessageAudio = type("RoomMessageAudio", (), {})
-    nio_mod.RoomMessageVideo = type("RoomMessageVideo", (), {})
-    nio_mod.RoomMessageFile = type("RoomMessageFile", (), {})
-    nio_mod.DownloadResponse = type("DownloadResponse", (), {})
-    nio_mod.MemoryDownloadResponse = type("MemoryDownloadResponse", (), {})
-    nio_mod.InviteMemberEvent = type("InviteMemberEvent", (), {})
-    sys.modules.setdefault("nio", nio_mod)
-
-
-_ensure_nio_mock()
+# The matrix adapter module is importable without mautrix installed
+# (module-level imports use try/except with stubs).  No need for
+# module-level mock installation — tests that call adapter methods
+# needing real mautrix APIs mock them individually.


 def _make_adapter(tmp_path=None):
@@ -50,24 +36,25 @@ def _make_adapter(tmp_path=None):
    return adapter


-def _make_room(room_id="!room1:example.org", member_count=5, is_dm=False):
-    """Create a fake Matrix room."""
-    room = SimpleNamespace(
-        room_id=room_id,
-        member_count=member_count,
-        users={},
-    )
-    return room
+def _set_dm(adapter, room_id="!room1:example.org", is_dm=True):
+    """Mark a room as DM (or not) in the adapter's cache."""
+    adapter._dm_rooms[room_id] = is_dm


 def _make_event(
    body,
    sender="@alice:example.org",
    event_id="$evt1",
+    room_id="!room1:example.org",
    formatted_body=None,
    thread_id=None,
 ):
-    """Create a fake RoomMessageText event."""
+    """Create a fake room message event.
+
+    The mautrix adapter reads ``event.room_id``, ``event.sender``,
+    ``event.event_id``, ``event.timestamp``, and ``event.content``
+    (a dict with ``msgtype``, ``body``, etc.).
+    """
    content = {"body": body, "msgtype": "m.text"}
    if formatted_body:
        content["formatted_body"] = formatted_body
@@ -83,9 +70,9 @@ def _make_event(
    return SimpleNamespace(
        sender=sender,
        event_id=event_id,
-        server_timestamp=int(time.time() * 1000),
-        body=body,
-        source={"content": content},
+        room_id=room_id,
+        timestamp=int(time.time() * 1000),
+        content=content,
    )


@@ -152,10 +139,9 @@ async def test_require_mention_default_ignores_unmentioned(monkeypatch):
    monkeypatch.delenv("MATRIX_AUTO_THREAD", raising=False)

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("hello everyone")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_not_awaited()


@@ -167,10 +153,9 @@ async def test_require_mention_default_processes_mentioned(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("@hermes:example.org help me")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.text == "help me"
@@ -184,11 +169,10 @@ async def test_require_mention_html_pill(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room()
    formatted = '<a href="https://matrix.to/#/@hermes:example.org">Hermes</a> help'
    event = _make_event("Hermes help", formatted_body=formatted)

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()


@@ -200,11 +184,11 @@ async def test_require_mention_dm_always_responds(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    # member_count=2 triggers DM detection
-    room = _make_room(member_count=2)
+    # Mark the room as a DM via the adapter's cache.
+    _set_dm(adapter)
    event = _make_event("hello without mention")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()


@@ -216,10 +200,10 @@ async def test_dm_strips_mention(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room(member_count=2)
+    _set_dm(adapter)
    event = _make_event("@hermes:example.org help me")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.text == "help me"
@@ -233,10 +217,9 @@ async def test_bare_mention_passes_empty_string(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("@hermes:example.org")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.text == ""
@@ -250,10 +233,9 @@ async def test_require_mention_free_response_room(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room(room_id="!room1:example.org")
-    event = _make_event("hello without mention")
+    event = _make_event("hello without mention", room_id="!room1:example.org")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()


@@ -267,10 +249,9 @@ async def test_require_mention_bot_participated_thread(monkeypatch):
    adapter = _make_adapter()
    adapter._bot_participated_threads.add("$thread1")

-    room = _make_room()
    event = _make_event("hello without mention", thread_id="$thread1")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()


@@ -282,10 +263,9 @@ async def test_require_mention_disabled(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("hello without mention")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.text == "hello without mention"
@@ -303,10 +283,9 @@ async def test_auto_thread_default_creates_thread(monkeypatch):
    monkeypatch.delenv("MATRIX_AUTO_THREAD", raising=False)

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("hello", event_id="$msg1")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.source.thread_id == "$msg1"
@@ -320,10 +299,9 @@ async def test_auto_thread_preserves_existing_thread(monkeypatch):

    adapter = _make_adapter()
    adapter._bot_participated_threads.add("$thread_root")
-    room = _make_room()
    event = _make_event("reply in thread", thread_id="$thread_root")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.source.thread_id == "$thread_root"
@@ -336,10 +314,10 @@ async def test_auto_thread_skips_dm(monkeypatch):
    monkeypatch.delenv("MATRIX_AUTO_THREAD", raising=False)

    adapter = _make_adapter()
-    room = _make_room(member_count=2)
+    _set_dm(adapter)
    event = _make_event("hello dm", event_id="$dm1")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.source.thread_id is None
@@ -352,10 +330,9 @@ async def test_auto_thread_disabled(monkeypatch):
    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("hello", event_id="$msg1")

-    await adapter._on_room_message(room, event)
+    await adapter._on_room_message(event)
    adapter.handle_message.assert_awaited_once()
    msg = adapter.handle_message.await_args.args[0]
    assert msg.source.thread_id is None
@@ -368,11 +345,10 @@ async def test_auto_thread_tracks_participation(monkeypatch):
    monkeypatch.delenv("MATRIX_AUTO_THREAD", raising=False)

    adapter = _make_adapter()
-    room = _make_room()
    event = _make_event("hello", event_id="$msg1")

    with patch.object(adapter, "_save_participated_threads"):
-        await adapter._on_room_message(room, event)
+        await adapter._on_room_message(event)

    assert "$msg1" in adapter._bot_participated_threads

@@ -385,8 +361,9 @@ async def test_auto_thread_tracks_participation(monkeypatch):
 class TestThreadPersistence:
    def test_empty_state_file(self, tmp_path, monkeypatch):
        """No state file → empty set."""
+        from gateway.platforms.matrix import MatrixAdapter
        monkeypatch.setattr(
-            "gateway.platforms.matrix.MatrixAdapter._thread_state_path",
+            MatrixAdapter, "_thread_state_path",
            staticmethod(lambda: tmp_path / "matrix_threads.json"),
        )
        adapter = _make_adapter()
@@ -395,9 +372,10 @@ class TestThreadPersistence:

    def test_track_thread_persists(self, tmp_path, monkeypatch):
        """_track_thread writes to disk."""
+        from gateway.platforms.matrix import MatrixAdapter
        state_path = tmp_path / "matrix_threads.json"
        monkeypatch.setattr(
-            "gateway.platforms.matrix.MatrixAdapter._thread_state_path",
+            MatrixAdapter, "_thread_state_path",
            staticmethod(lambda: state_path),
        )
        adapter = _make_adapter()
@@ -408,10 +386,11 @@ class TestThreadPersistence:

    def test_threads_survive_reload(self, tmp_path, monkeypatch):
        """Persisted threads are loaded by a new adapter instance."""
+        from gateway.platforms.matrix import MatrixAdapter
        state_path = tmp_path / "matrix_threads.json"
        state_path.write_text(json.dumps(["$t1", "$t2"]))
        monkeypatch.setattr(
-            "gateway.platforms.matrix.MatrixAdapter._thread_state_path",
+            MatrixAdapter, "_thread_state_path",
            staticmethod(lambda: state_path),
        )
        adapter = _make_adapter()
@@ -420,9 +399,10 @@ class TestThreadPersistence:

    def test_cap_max_tracked_threads(self, tmp_path, monkeypatch):
        """Thread set is trimmed to _MAX_TRACKED_THREADS."""
+        from gateway.platforms.matrix import MatrixAdapter
        state_path = tmp_path / "matrix_threads.json"
        monkeypatch.setattr(
-            "gateway.platforms.matrix.MatrixAdapter._thread_state_path",
+            MatrixAdapter, "_thread_state_path",
            staticmethod(lambda: state_path),
        )
        adapter = _make_adapter()
@@ -436,6 +416,95 @@ class TestThreadPersistence:
        assert len(data) == 5


+# ---------------------------------------------------------------------------
+# DM mention-thread feature
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_dm_mention_thread_disabled_by_default(monkeypatch):
+    """Default (dm_mention_threads=false): DM with mention should NOT create a thread."""
+    monkeypatch.delenv("MATRIX_DM_MENTION_THREADS", raising=False)
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    _set_dm(adapter)
+    event = _make_event("@hermes:example.org help me", event_id="$dm1")
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_awaited_once()
+    msg = adapter.handle_message.await_args.args[0]
+    assert msg.source.thread_id is None
+
+
+@pytest.mark.asyncio
+async def test_dm_mention_thread_creates_thread(monkeypatch):
+    """MATRIX_DM_MENTION_THREADS=true: DM with @mention creates a thread."""
+    monkeypatch.setenv("MATRIX_DM_MENTION_THREADS", "true")
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    _set_dm(adapter)
+    event = _make_event("@hermes:example.org help me", event_id="$dm1")
+
+    with patch.object(adapter, "_save_participated_threads"):
+        await adapter._on_room_message(event)
+
+    adapter.handle_message.assert_awaited_once()
+    msg = adapter.handle_message.await_args.args[0]
+    assert msg.source.thread_id == "$dm1"
+    assert msg.text == "help me"
+
+
+@pytest.mark.asyncio
+async def test_dm_mention_thread_no_mention_no_thread(monkeypatch):
+    """MATRIX_DM_MENTION_THREADS=true: DM without mention does NOT create a thread."""
+    monkeypatch.setenv("MATRIX_DM_MENTION_THREADS", "true")
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    _set_dm(adapter)
+    event = _make_event("hello without mention", event_id="$dm1")
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_awaited_once()
+    msg = adapter.handle_message.await_args.args[0]
+    assert msg.source.thread_id is None
+
+
+@pytest.mark.asyncio
+async def test_dm_mention_thread_preserves_existing_thread(monkeypatch):
+    """MATRIX_DM_MENTION_THREADS=true: DM already in a thread keeps that thread_id."""
+    monkeypatch.setenv("MATRIX_DM_MENTION_THREADS", "true")
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    _set_dm(adapter)
+    adapter._bot_participated_threads.add("$existing_thread")
+    event = _make_event("@hermes:example.org help me", thread_id="$existing_thread")
+
+    await adapter._on_room_message(event)
+    adapter.handle_message.assert_awaited_once()
+    msg = adapter.handle_message.await_args.args[0]
+    assert msg.source.thread_id == "$existing_thread"
+
+
+@pytest.mark.asyncio
+async def test_dm_mention_thread_tracks_participation(monkeypatch):
+    """DM mention-thread tracks the thread in _bot_participated_threads."""
+    monkeypatch.setenv("MATRIX_DM_MENTION_THREADS", "true")
+    monkeypatch.setenv("MATRIX_AUTO_THREAD", "false")
+
+    adapter = _make_adapter()
+    _set_dm(adapter)
+    event = _make_event("@hermes:example.org help", event_id="$dm1")
+
+    with patch.object(adapter, "_save_participated_threads"):
+        await adapter._on_room_message(event)
+
+    assert "$dm1" in adapter._bot_participated_threads
+
+
 # ---------------------------------------------------------------------------
 # YAML config bridge
 # ---------------------------------------------------------------------------
@@ -480,6 +549,25 @@ class TestMatrixConfigBridge:
        assert os.getenv("MATRIX_FREE_RESPONSE_ROOMS") == "!room1:example.org,!room2:example.org"
        assert os.getenv("MATRIX_AUTO_THREAD") == "false"

+    def test_yaml_bridge_sets_dm_mention_threads(self, monkeypatch, tmp_path):
+        """Matrix YAML dm_mention_threads should bridge to env var."""
+        monkeypatch.delenv("MATRIX_DM_MENTION_THREADS", raising=False)
+
+        import os
+        import yaml
+
+        yaml_content = {"matrix": {"dm_mention_threads": True}}
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(yaml.dump(yaml_content))
+
+        yaml_cfg = yaml.safe_load(config_file.read_text())
+        matrix_cfg = yaml_cfg.get("matrix", {})
+        if isinstance(matrix_cfg, dict):
+            if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
+                monkeypatch.setenv("MATRIX_DM_MENTION_THREADS", str(matrix_cfg["dm_mention_threads"]).lower())
+
+        assert os.getenv("MATRIX_DM_MENTION_THREADS") == "true"
+
    def test_env_vars_take_precedence_over_yaml(self, monkeypatch):
        """Env vars should not be overwritten by YAML values."""
        monkeypatch.setenv("MATRIX_REQUIRE_MENTION", "true")
@@ -1,18 +1,23 @@
-"""Tests for Matrix voice message support (MSC3245)."""
+"""Tests for Matrix voice message support (MSC3245).
+
+Updated for the mautrix-python SDK (no more matrix-nio / nio imports).
+"""
 import io
+import os
+import tempfile
 import types
+from types import SimpleNamespace

 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch

-# Try importing real nio; skip entire file if not available.
-# A MagicMock in sys.modules (from another test) is not the real package.
+# Try importing mautrix; skip entire file if not available.
 try:
-    import nio as _nio_probe
-    if not isinstance(_nio_probe, types.ModuleType) or not hasattr(_nio_probe, "__file__"):
-        pytest.skip("nio in sys.modules is a mock, not the real package", allow_module_level=True)
+    import mautrix as _mautrix_probe
+    if not isinstance(_mautrix_probe, types.ModuleType) or not hasattr(_mautrix_probe, "__file__"):
+        pytest.skip("mautrix in sys.modules is a mock, not the real package", allow_module_level=True)
 except ImportError:
-    pytest.skip("matrix-nio not installed", allow_module_level=True)
+    pytest.skip("mautrix not installed", allow_module_level=True)

 from gateway.platforms.base import MessageType

@@ -25,7 +30,7 @@ def _make_adapter():
    """Create a MatrixAdapter with mocked config."""
    from gateway.platforms.matrix import MatrixAdapter
    from gateway.config import PlatformConfig
-    
+
    config = PlatformConfig(
        enabled=True,
        token="***",
@@ -38,32 +43,26 @@ def _make_adapter():
    return adapter


-def _make_room(room_id: str = "!test:example.org", member_count: int = 2):
-    """Create a mock Matrix room."""
-    room = MagicMock()
-    room.room_id = room_id
-    room.member_count = member_count
-    return room
-
-
 def _make_audio_event(
    event_id: str = "$audio_event",
    sender: str = "@alice:example.org",
+    room_id: str = "!test:example.org",
    body: str = "Voice message",
    url: str = "mxc://example.org/abc123",
    is_voice: bool = False,
    mimetype: str = "audio/ogg",
-    timestamp: float = 9999999999000,  # ms
+    timestamp: int = 9999999999000,  # ms
 ):
    """
-    Create a mock RoomMessageAudio event that passes isinstance checks.
-    
+    Create a mock mautrix room message event.
+
+    In mautrix, the handler receives a single event object with attributes
+    ``room_id``, ``sender``, ``event_id``, ``timestamp``, and ``content``
+    (a dict-like or serializable object).
+
    Args:
-        is_voice: If True, adds org.matrix.msc3245.voice field to content
+        is_voice: If True, adds org.matrix.msc3245.voice field to content.
    """
-    import nio
-    
-    # Build the source dict that nio events expose via .source
    content = {
        "msgtype": "m.audio",
        "body": body,
@@ -72,39 +71,35 @@ def _make_audio_event(
            "mimetype": mimetype,
        },
    }
-    
+
    if is_voice:
        content["org.matrix.msc3245.voice"] = {}
-    
-    # Create a real nio RoomMessageAudio-like object
-    # We use MagicMock but configure __class__ to pass isinstance check
-    event = MagicMock(spec=nio.RoomMessageAudio)
-    event.event_id = event_id
-    event.sender = sender
-    event.body = body
-    event.url = url
-    event.server_timestamp = timestamp
-    event.source = {
-        "type": "m.room.message",
-        "content": content,
-    }
-    # For MIME type extraction - needs to be a dict
-    event.content = content
-    
+
+    event = SimpleNamespace(
+        event_id=event_id,
+        sender=sender,
+        room_id=room_id,
+        timestamp=timestamp,
+        content=content,
+    )
    return event


-def _make_download_response(body: bytes = b"fake audio data"):
-    """Create a mock nio.MemoryDownloadResponse."""
-    import nio
-    resp = MagicMock()
-    resp.body = body
-    resp.__class__ = nio.MemoryDownloadResponse
-    return resp
+def _make_state_store(member_count: int = 2):
+    """Create a mock state store with get_members/get_member support."""
+    store = MagicMock()
+    # get_members returns a list of member user IDs
+    members = [MagicMock() for _ in range(member_count)]
+    store.get_members = AsyncMock(return_value=members)
+    # get_member returns a single member info object
+    member = MagicMock()
+    member.displayname = "Alice"
+    store.get_member = AsyncMock(return_value=member)
+    return store


 # ---------------------------------------------------------------------------
-# Tests: MSC3245 Voice Detection (RED -> GREEN)
+# Tests: MSC3245 Voice Detection
 # ---------------------------------------------------------------------------

 class TestMatrixVoiceMessageDetection:
@@ -118,27 +113,28 @@ class TestMatrixVoiceMessageDetection:
        self.adapter._message_handler = AsyncMock()
        # Mock _mxc_to_http to return a fake HTTP URL
        self.adapter._mxc_to_http = lambda url: f"https://matrix.example.org/_matrix/media/v3/download/{url[6:]}"
-        # Mock client for authenticated download
+        # Mock client for authenticated download — download_media returns bytes directly
        self.adapter._client = MagicMock()
-        self.adapter._client.download = AsyncMock(return_value=_make_download_response())
+        self.adapter._client.download_media = AsyncMock(return_value=b"fake audio data")
+        # State store for DM detection
+        self.adapter._client.state_store = _make_state_store()

    @pytest.mark.asyncio
    async def test_voice_message_has_type_voice(self):
        """Voice messages (with MSC3245 field) should be MessageType.VOICE."""
-        room = _make_room()
        event = _make_audio_event(is_voice=True)
-        
+
        # Capture the MessageEvent passed to handle_message
        captured_event = None
-        
+
        async def capture(msg_event):
            nonlocal captured_event
            captured_event = msg_event
-        
+
        self.adapter.handle_message = capture
-        
-        await self.adapter._on_room_message_media(room, event)
-        
+
+        await self.adapter._on_room_message(event)
+
        assert captured_event is not None, "No event was captured"
        assert captured_event.message_type == MessageType.VOICE, \
            f"Expected MessageType.VOICE, got {captured_event.message_type}"
@@ -146,44 +142,43 @@ class TestMatrixVoiceMessageDetection:
    @pytest.mark.asyncio
    async def test_voice_message_has_local_path(self):
        """Voice messages should have a local cached path in media_urls."""
-        room = _make_room()
        event = _make_audio_event(is_voice=True)
-        
+
        captured_event = None
-        
+
        async def capture(msg_event):
            nonlocal captured_event
            captured_event = msg_event
-        
+
        self.adapter.handle_message = capture
-        
-        await self.adapter._on_room_message_media(room, event)
-        
+
+        await self.adapter._on_room_message(event)
+
        assert captured_event is not None
        assert captured_event.media_urls is not None
        assert len(captured_event.media_urls) > 0
        # Should be a local path, not an HTTP URL
        assert not captured_event.media_urls[0].startswith("http"), \
            f"media_urls should contain local path, got {captured_event.media_urls[0]}"
-        self.adapter._client.download.assert_awaited_once_with(mxc=event.url)
+        # download_media is called with a ContentURI wrapping the mxc URL
+        self.adapter._client.download_media.assert_awaited_once()
        assert captured_event.media_types == ["audio/ogg"]

    @pytest.mark.asyncio
    async def test_audio_without_msc3245_stays_audio_type(self):
        """Regular audio uploads (no MSC3245 field) should remain MessageType.AUDIO."""
-        room = _make_room()
        event = _make_audio_event(is_voice=False)  # NOT a voice message
-        
+
        captured_event = None
-        
+
        async def capture(msg_event):
            nonlocal captured_event
            captured_event = msg_event
-        
+
        self.adapter.handle_message = capture
-        
-        await self.adapter._on_room_message_media(room, event)
-        
+
+        await self.adapter._on_room_message(event)
+
        assert captured_event is not None
        assert captured_event.message_type == MessageType.AUDIO, \
            f"Expected MessageType.AUDIO for non-voice, got {captured_event.message_type}"
@@ -191,25 +186,24 @@ class TestMatrixVoiceMessageDetection:
    @pytest.mark.asyncio
    async def test_regular_audio_has_http_url(self):
        """Regular audio uploads should keep HTTP URL (not cached locally)."""
-        room = _make_room()
        event = _make_audio_event(is_voice=False)
-        
+
        captured_event = None
-        
+
        async def capture(msg_event):
            nonlocal captured_event
            captured_event = msg_event
-        
+
        self.adapter.handle_message = capture
-        
-        await self.adapter._on_room_message_media(room, event)
-        
+
+        await self.adapter._on_room_message(event)
+
        assert captured_event is not None
        assert captured_event.media_urls is not None
        # Should be HTTP URL, not local path
        assert captured_event.media_urls[0].startswith("http"), \
            f"Non-voice audio should have HTTP URL, got {captured_event.media_urls[0]}"
-        self.adapter._client.download.assert_not_awaited()
+        self.adapter._client.download_media.assert_not_awaited()
        assert captured_event.media_types == ["audio/ogg"]


@@ -224,29 +218,26 @@ class TestMatrixVoiceCacheFallback:
        self.adapter._message_handler = AsyncMock()
        self.adapter._mxc_to_http = lambda url: f"https://matrix.example.org/_matrix/media/v3/download/{url[6:]}"
        self.adapter._client = MagicMock()
+        self.adapter._client.state_store = _make_state_store()

    @pytest.mark.asyncio
    async def test_voice_cache_failure_falls_back_to_http_url(self):
-        """If caching fails, voice message should still be delivered with HTTP URL."""
-        room = _make_room()
+        """If caching fails (download returns None), voice message should still be delivered with HTTP URL."""
        event = _make_audio_event(is_voice=True)
-        
-        # Make download fail
-        import nio
-        error_resp = MagicMock()
-        error_resp.__class__ = nio.DownloadError
-        self.adapter._client.download = AsyncMock(return_value=error_resp)
-        
+
+        # download_media returns None on failure
+        self.adapter._client.download_media = AsyncMock(return_value=None)
+
        captured_event = None
-        
+
        async def capture(msg_event):
            nonlocal captured_event
            captured_event = msg_event
-        
+
        self.adapter.handle_message = capture
-        
-        await self.adapter._on_room_message_media(room, event)
-        
+
+        await self.adapter._on_room_message(event)
+
        assert captured_event is not None
        assert captured_event.media_urls is not None
        # Should fall back to HTTP URL
@@ -256,10 +247,9 @@ class TestMatrixVoiceCacheFallback:
    @pytest.mark.asyncio
    async def test_voice_cache_exception_falls_back_to_http_url(self):
        """Unexpected download exceptions should also fall back to HTTP URL."""
-        room = _make_room()
        event = _make_audio_event(is_voice=True)

-        self.adapter._client.download = AsyncMock(side_effect=RuntimeError("boom"))
+        self.adapter._client.download_media = AsyncMock(side_effect=RuntimeError("boom"))

        captured_event = None

@@ -269,7 +259,7 @@ class TestMatrixVoiceCacheFallback:

        self.adapter.handle_message = capture

-        await self.adapter._on_room_message_media(room, event)
+        await self.adapter._on_room_message(event)

        assert captured_event is not None
        assert captured_event.media_urls is not None
@@ -278,7 +268,7 @@ class TestMatrixVoiceCacheFallback:


 # ---------------------------------------------------------------------------
-# Tests: send_voice includes MSC3245 field (RED -> GREEN)
+# Tests: send_voice includes MSC3245 field
 # ---------------------------------------------------------------------------

 class TestMatrixSendVoiceMSC3245:
@@ -287,62 +277,52 @@ class TestMatrixSendVoiceMSC3245:
    def setup_method(self):
        self.adapter = _make_adapter()
        self.adapter._user_id = "@bot:example.org"
-        # Mock client with successful upload
+        # Mock client — upload_media returns a ContentURI string
        self.adapter._client = MagicMock()
        self.upload_call = None

-        async def mock_upload(*args, **kwargs):
-            self.upload_call = (args, kwargs)
-            import nio
-            resp = MagicMock()
-            resp.content_uri = "mxc://example.org/uploaded"
-            resp.__class__ = nio.UploadResponse
-            return resp, None
+        async def mock_upload_media(data, mime_type=None, filename=None, **kwargs):
+            self.upload_call = {"data": data, "mime_type": mime_type, "filename": filename}
+            return "mxc://example.org/uploaded"

-        self.adapter._client.upload = mock_upload
+        self.adapter._client.upload_media = mock_upload_media

    @pytest.mark.asyncio
-    async def test_send_voice_includes_msc3245_field(self):
+    @patch("mimetypes.guess_type", return_value=("audio/ogg", None))
+    async def test_send_voice_includes_msc3245_field(self, _mock_guess):
        """send_voice should include org.matrix.msc3245.voice in message content."""
-        import tempfile
-        import os
-        
        # Create a temp audio file
        with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f:
            f.write(b"fake audio data")
            temp_path = f.name
-        
+
        try:
-            # Capture the message content sent to room_send
+            # Capture the message content sent via send_message_event
            sent_content = None
-            
-            async def mock_room_send(room_id, event_type, content):
+
+            async def mock_send_message_event(room_id, event_type, content):
                nonlocal sent_content
                sent_content = content
-                resp = MagicMock()
-                resp.event_id = "$sent_event"
-                import nio
-                resp.__class__ = nio.RoomSendResponse
-                return resp
-            
-            self.adapter._client.room_send = mock_room_send
-            
+                # send_message_event returns an EventID string
+                return "$sent_event"
+
+            self.adapter._client.send_message_event = mock_send_message_event
+
            await self.adapter.send_voice(
                chat_id="!room:example.org",
                audio_path=temp_path,
                caption="Test voice",
            )
-            
+
            assert sent_content is not None, "No message was sent"
            assert "org.matrix.msc3245.voice" in sent_content, \
                f"MSC3245 voice field missing from content: {sent_content.keys()}"
            assert sent_content["msgtype"] == "m.audio"
            assert sent_content["info"]["mimetype"] == "audio/ogg"
-            assert self.upload_call is not None, "Expected upload() to be called"
-            args, kwargs = self.upload_call
-            assert isinstance(args[0], io.BytesIO)
-            assert kwargs["content_type"] == "audio/ogg"
-            assert kwargs["filename"].endswith(".ogg")
+            assert self.upload_call is not None, "Expected upload_media() to be called"
+            assert isinstance(self.upload_call["data"], bytes)
+            assert self.upload_call["mime_type"] == "audio/ogg"
+            assert self.upload_call["filename"].endswith(".ogg")

        finally:
            os.unlink(temp_path)
@@ -376,6 +376,134 @@ class TestCacheAudioFromUrl:
        mock_sleep.assert_not_called()


+# ---------------------------------------------------------------------------
+# SSRF redirect guard tests (base.py)
+# ---------------------------------------------------------------------------
+
+
+class TestSSRFRedirectGuard:
+    """cache_image_from_url / cache_audio_from_url must reject redirects
+    that land on private/internal hosts (e.g. cloud metadata endpoint)."""
+
+    def _make_redirect_response(self, target_url: str):
+        """Build a mock httpx response that looks like a redirect."""
+        resp = MagicMock()
+        resp.is_redirect = True
+        resp.next_request = MagicMock(url=target_url)
+        return resp
+
+    def _make_client_capturing_hooks(self):
+        """Return (mock_client, captured_kwargs dict) where captured_kwargs
+        will contain the kwargs passed to httpx.AsyncClient()."""
+        captured = {}
+        mock_client = AsyncMock()
+        mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_client.__aexit__ = AsyncMock(return_value=False)
+
+        def factory(*args, **kwargs):
+            captured.update(kwargs)
+            return mock_client
+
+        return mock_client, captured, factory
+
+    def test_image_blocks_private_redirect(self, tmp_path, monkeypatch):
+        """cache_image_from_url rejects a redirect to a private IP."""
+        monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
+
+        redirect_resp = self._make_redirect_response(
+            "http://169.254.169.254/latest/meta-data"
+        )
+        mock_client, captured, factory = self._make_client_capturing_hooks()
+
+        async def fake_get(_url, **kwargs):
+            # Simulate httpx calling the response event hooks
+            for hook in captured["event_hooks"]["response"]:
+                await hook(redirect_resp)
+
+        mock_client.get = AsyncMock(side_effect=fake_get)
+
+        def fake_safe(url):
+            return url == "https://public.example.com/image.png"
+
+        async def run():
+            with patch("tools.url_safety.is_safe_url", side_effect=fake_safe), \
+                 patch("httpx.AsyncClient", side_effect=factory):
+                from gateway.platforms.base import cache_image_from_url
+                await cache_image_from_url(
+                    "https://public.example.com/image.png", ext=".png"
+                )
+
+        with pytest.raises(ValueError, match="Blocked redirect"):
+            asyncio.run(run())
+
+    def test_audio_blocks_private_redirect(self, tmp_path, monkeypatch):
+        """cache_audio_from_url rejects a redirect to a private IP."""
+        monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
+
+        redirect_resp = self._make_redirect_response(
+            "http://10.0.0.1/internal/secrets"
+        )
+        mock_client, captured, factory = self._make_client_capturing_hooks()
+
+        async def fake_get(_url, **kwargs):
+            for hook in captured["event_hooks"]["response"]:
+                await hook(redirect_resp)
+
+        mock_client.get = AsyncMock(side_effect=fake_get)
+
+        def fake_safe(url):
+            return url == "https://public.example.com/voice.ogg"
+
+        async def run():
+            with patch("tools.url_safety.is_safe_url", side_effect=fake_safe), \
+                 patch("httpx.AsyncClient", side_effect=factory):
+                from gateway.platforms.base import cache_audio_from_url
+                await cache_audio_from_url(
+                    "https://public.example.com/voice.ogg", ext=".ogg"
+                )
+
+        with pytest.raises(ValueError, match="Blocked redirect"):
+            asyncio.run(run())
+
+    def test_safe_redirect_allowed(self, tmp_path, monkeypatch):
+        """A redirect to a public IP is allowed through."""
+        monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
+
+        redirect_resp = self._make_redirect_response(
+            "https://cdn.example.com/real-image.png"
+        )
+
+        ok_response = MagicMock()
+        ok_response.content = b"\xff\xd8\xff fake jpeg"
+        ok_response.raise_for_status = MagicMock()
+        ok_response.is_redirect = False
+
+        mock_client, captured, factory = self._make_client_capturing_hooks()
+
+        call_count = 0
+
+        async def fake_get(_url, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            # First call triggers redirect hook, second returns data
+            for hook in captured["event_hooks"]["response"]:
+                await hook(redirect_resp if call_count == 1 else ok_response)
+            return ok_response
+
+        mock_client.get = AsyncMock(side_effect=fake_get)
+
+        async def run():
+            with patch("tools.url_safety.is_safe_url", return_value=True), \
+                 patch("httpx.AsyncClient", side_effect=factory):
+                from gateway.platforms.base import cache_image_from_url
+                return await cache_image_from_url(
+                    "https://public.example.com/image.png", ext=".jpg"
+                )
+
+        path = asyncio.run(run())
+        assert path.endswith(".jpg")
+
+
 # ---------------------------------------------------------------------------
 # Slack mock setup (mirrors existing test_slack.py approach)
 # ---------------------------------------------------------------------------
--- a/Show More
+++ b/Show More