fix(model): preserve custom endpoint credentials and accept cloud models not in /v1/models

When switching models on a custom endpoint (ollama-launch): - Same-provider switches no longer re-resolve credentials (fixes base_url being lost for 'custom' provider on subsequent switches) - Named providers (ollama-launch) are resolved via user_providers so switch_model can find their base_url from config - Models not in the /v1/models probe but present in the user's saved provider config are accepted with a warning instead of rejected - CLI /model and TUI /model both pass user_providers/custom_providers to switch_model so the config model list is available for validation Closes #15088
2026-04-25 14:10:42 +05:30
565 changed files with 4802 additions and 59412 deletions
@@ -69,4 +69,3 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
-models-dev-upstream/
@@ -30,22 +30,18 @@ WORKDIR /opt/hermes
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
-COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
-COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    (cd web && npm install --prefer-offline --no-audit) && \
-    (cd ui-tui && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build browser dashboard and terminal UI assets.
-RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build
+# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
+RUN cd web && npm run build

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -390,16 +390,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
    }
    if normalized_base_url:
-        # Azure Anthropic endpoints require an ``api-version`` query parameter.
-        # Pass it via default_query so the SDK appends it to every request URL
-        # without corrupting the base_url (appending it directly produces
-        # malformed paths like /anthropic?api-version=.../v1/messages).
-        _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
-        if _is_azure_endpoint and "api-version" not in normalized_base_url:
-            kwargs["base_url"] = normalized_base_url.rstrip("/")
-            kwargs["default_query"] = {"api-version": "2025-04-15"}
-        else:
-            kwargs["base_url"] = normalized_base_url
+        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

    if _is_kimi_coding_endpoint(base_url):
@@ -1689,9 +1680,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, etc.) may set these for older models;
-    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
-    # don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, flush_memories, etc.) may set these for
+    # older models; drop them here as a safety net so upstream 4.6 → 4.7
+    # migrations don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -42,7 +42,6 @@ import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import urlparse, parse_qs, urlunparse

 from openai import OpenAI

@@ -53,17 +52,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_

 logger = logging.getLogger(__name__)

-
-def _extract_url_query_params(url: str):
-    """Extract query params from URL, return (clean_url, default_query dict or None)."""
-    parsed = urlparse(url)
-    if parsed.query:
-        clean = urlunparse(parsed._replace(query=""))
-        params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
-        return clean, params
-    return url, None
-
-
 # Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
 _stale_base_url_warned = False

@@ -82,8 +70,6 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -157,7 +143,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding": "kimi-k2-turbo-preview",
    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
-    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
@@ -405,7 +390,7 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
+        # Tools support for flush_memories and similar callers
        tools = kwargs.get("tools")
        if tools:
            converted = []
@@ -1172,10 +1157,8 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
        return None, None
    model = _read_main_model() or "gpt-4o-mini"
    logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
-    _clean_base, _dq = _extract_url_query_params(custom_base)
-    _extra = {"default_query": _dq} if _dq else {}
    if custom_mode == "codex_responses":
-        real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+        real_client = OpenAI(api_key=custom_key, base_url=custom_base)
        return CodexAuxiliaryClient(real_client, model), model
    if custom_mode == "anthropic_messages":
        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -1189,12 +1172,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
                "Custom endpoint declares api_mode=anthropic_messages but the "
                "anthropic SDK is not installed — falling back to OpenAI-wire."
            )
-            return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
+            return OpenAI(api_key=custom_key, base_url=custom_base), model
        return (
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
+    return OpenAI(api_key=custom_key, base_url=custom_base), model


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1366,49 +1349,6 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


-def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
-    """Detect provider 400s for an unsupported request parameter.
-
-    Different OpenAI-compatible endpoints phrase the same class of error a few
-    ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
-    ``param`` field, ``X is not supported``, ``unknown parameter: X``,
-    ``unrecognized request argument: X``.  We match on both the parameter
-    name and a generic "unsupported/unknown/unrecognized parameter" marker so
-    call sites can reactively retry without the offending key instead of
-    surfacing a noisy auxiliary failure.
-
-    Generalizes the temperature-specific detector that originally shipped
-    with PR #15621 so the same retry strategy can cover ``max_tokens``,
-    ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
-    for the generalization pattern.
-    """
-    param_lower = (param or "").lower()
-    if not param_lower:
-        return False
-    err_lower = str(exc).lower()
-    if param_lower not in err_lower:
-        return False
-    return any(marker in err_lower for marker in (
-        "unsupported parameter",
-        "unsupported_parameter",
-        "not supported",
-        "does not support",
-        "unknown parameter",
-        "unrecognized request argument",
-        "unrecognized parameter",
-        "invalid parameter",
-    ))
-
-
-def _is_unsupported_temperature_error(exc: Exception) -> bool:
-    """Back-compat wrapper: detect API errors where the model rejects ``temperature``.
-
-    Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
-    public symbol because existing tests and call sites import it by name.
-    """
-    return _is_unsupported_parameter_error(exc, "temperature")
-
-
 def _evict_cached_clients(provider: str) -> None:
    """Drop cached auxiliary clients for a provider so fresh creds are used."""
    normalized = _normalize_aux_provider(provider)
@@ -1620,14 +1560,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
 # below — never look up auth env vars ad-hoc.


-def _to_async_client(sync_client, model: str, is_vision: bool = False):
-    """Convert a sync client to its async counterpart, preserving Codex routing.
-
-    When ``is_vision=True`` and the underlying base URL is Copilot, the
-    resulting async client carries the ``Copilot-Vision-Request: true``
-    header so the request is routed to Copilot's vision-capable
-    infrastructure (otherwise vision payloads silently time out).
-    """
+def _to_async_client(sync_client, model: str):
+    """Convert a sync client to its async counterpart, preserving Codex routing."""
    from openai import AsyncOpenAI

    if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1656,11 +1590,9 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
-        from hermes_cli.copilot_auth import copilot_request_headers
+        from hermes_cli.models import copilot_default_headers

-        async_kwargs["default_headers"] = copilot_request_headers(
-            is_agent_turn=True, is_vision=is_vision
-        )
+        async_kwargs["default_headers"] = copilot_default_headers()
    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model
@@ -1687,7 +1619,6 @@ def resolve_provider_client(
    explicit_api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1771,7 +1702,7 @@ def resolve_provider_client(
                "auxiliary provider (using %r instead)", model, resolved)
            model = None
        final_model = model or resolved
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── OpenRouter ───────────────────────────────────────────────────
@@ -1784,7 +1715,7 @@ def resolve_provider_client(
            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1801,7 +1732,7 @@ def resolve_provider_client(
                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1828,7 +1759,7 @@ def resolve_provider_client(
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
@@ -1851,19 +1782,14 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            _clean_base, _dq = _extract_url_query_params(custom_base)
-            if _dq:
-                extra["default_query"] = _dq
            if base_url_host_matches(custom_base, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
-                from hermes_cli.copilot_auth import copilot_request_headers
-                extra["default_headers"] = copilot_request_headers(
-                    is_agent_turn=True, is_vision=is_vision
-                )
-            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
+                from hermes_cli.models import copilot_default_headers
+                extra["default_headers"] = copilot_default_headers()
+            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
            client = _wrap_if_needed(client, final_model, custom_base)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1873,7 +1799,7 @@ def resolve_provider_client(
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
                client = _wrap_if_needed(client, final_model, _cbase)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
                       "but no endpoint credentials found")
@@ -1898,8 +1824,6 @@ def resolve_provider_client(
                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
-                _clean_base2, _dq2 = _extract_url_query_params(custom_base)
-                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
                    provider, final_model, entry_api_mode or "chat_completions")
@@ -1917,8 +1841,8 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
-                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                        client = OpenAI(api_key=custom_key, base_url=custom_base)
+                        return (_to_async_client(client, final_model) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
                        real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1926,7 +1850,7 @@ def resolve_provider_client(
                    if async_mode:
                        return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
                    return sync_anthropic, final_model
-                client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
+                client = OpenAI(api_key=custom_key, base_url=custom_base)
                # codex_responses or inherited auto-detect (via _wrap_if_needed).
                # _wrap_if_needed reads the closed-over `api_mode` (the task-level
                # override). Named-provider entry api_mode=codex_responses also
@@ -1937,7 +1861,7 @@ def resolve_provider_client(
                    client = CodexAuxiliaryClient(client, final_model)
                else:
                    client = _wrap_if_needed(client, final_model, custom_base)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
            logger.warning(
                "resolve_provider_client: named custom provider %r has no base_url",
@@ -1969,7 +1893,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
            final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
+            return (_to_async_client(client, final_model) if async_mode else (client, final_model))

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
@@ -1995,7 +1919,7 @@ def resolve_provider_client(
            if is_native_gemini_base_url(base_url):
                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))

        # Provider-specific headers
@@ -2003,11 +1927,9 @@ def resolve_provider_client(
        if base_url_host_matches(base_url, "api.kimi.com"):
            headers["User-Agent"] = "claude-code/0.1.0"
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.copilot_auth import copilot_request_headers
+            from hermes_cli.models import copilot_default_headers

-            headers.update(copilot_request_headers(
-                is_agent_turn=True, is_vision=is_vision
-            ))
+            headers.update(copilot_default_headers())
        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

@@ -2033,7 +1955,7 @@ def resolve_provider_client(
        client = _wrap_if_needed(client, final_model, base_url)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    if pconfig.auth_type == "external_process":
@@ -2065,7 +1987,7 @@ def resolve_provider_client(
                args=args,
            )
            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+            return (_to_async_client(client, final_model) if async_mode
                    else (client, final_model))
        logger.warning("resolve_provider_client: external-process provider %s not "
                       "directly supported", provider)
@@ -2101,7 +2023,7 @@ def resolve_provider_client(
            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
        )
        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2176,13 +2098,8 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
    return _normalize_aux_provider(provider)


-def _resolve_strict_vision_backend(
-    provider: str,
-    model: Optional[str] = None,
-) -> Tuple[Optional[Any], Optional[str]]:
+def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
    provider = _normalize_vision_provider(provider)
-    if provider == "copilot":
-        return resolve_provider_client("copilot", model, is_vision=True)
    if provider == "openrouter":
        return _try_openrouter()
    if provider == "nous":
@@ -2250,7 +2167,7 @@ def resolve_vision_provider_client(
            return resolved_provider, None, None
        final_model = resolved_model or default_model
        if async_mode:
-            async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
+            async_client, async_model = _to_async_client(sync_client, final_model)
            return resolved_provider, async_client, async_model
        return resolved_provider, sync_client, final_model

@@ -2282,11 +2199,8 @@ def resolve_vision_provider_client(
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(
-                    main_provider, vision_model
-                )
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
                if sync_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2294,10 +2208,10 @@ def resolve_vision_provider_client(
                    )
                    return _finalize(main_provider, sync_client, default_model)
            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
-                    api_mode=resolved_api_mode,
-                    is_vision=True)
+                    api_mode=resolved_api_mode)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2319,14 +2233,11 @@ def resolve_vision_provider_client(
        return None, None, None

    if requested in _VISION_AUTO_PROVIDER_ORDER:
-        sync_client, default_model = _resolve_strict_vision_backend(
-            requested, resolved_model
-        )
+        sync_client, default_model = _resolve_strict_vision_backend(requested)
        return _finalize(requested, sync_client, default_model)

    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
-                                             api_mode=resolved_api_mode,
-                                             is_vision=True)
+                                             api_mode=resolved_api_mode)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -2390,11 +2301,10 @@ def _client_cache_key(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2420,7 +2330,6 @@ def _refresh_nous_auxiliary_client(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
    runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2438,7 +2347,7 @@ def _refresh_nous_auxiliary_client(
            current_loop = _aio.get_event_loop()
        except RuntimeError:
            pass
-        client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
+        client, final_model = _to_async_client(sync_client, final_model or "")
    else:
        client = sync_client

@@ -2449,7 +2358,6 @@ def _refresh_nous_auxiliary_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
-        is_vision=is_vision,
    )
    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
    return client, final_model
@@ -2561,19 +2469,12 @@ def _is_openrouter_client(client: Any) -> bool:
    return False


-def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
-    """Best-effort check for cached clients that accept ``vendor/model`` IDs."""
-    if _is_openrouter_client(client):
-        return True
-    return bool(cached_default and "/" in cached_default)
-
-
 def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
-    """Keep slash-bearing model IDs only for cached clients that support them.
+    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.

    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
    """
-    if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
+    if model and "/" in model and not _is_openrouter_client(client):
        return cached_default
    return model or cached_default

@@ -2586,7 +2487,6 @@ def _get_cached_client(
    api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
-    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -2623,7 +2523,6 @@ def _get_cached_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
-        is_vision=is_vision,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -2655,7 +2554,6 @@ def _get_cached_client(
        explicit_api_key=api_key,
        api_mode=api_mode,
        main_runtime=runtime,
-        is_vision=is_vision,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -2862,8 +2760,8 @@ def _build_call_kwargs(
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
-    # drop here so auxiliary callers that hardcode temperature (e.g. 0 on
-    # structured-JSON extraction) don't 400 the moment
+    # drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
+    # flush_memories, 0 on structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
        from agent.anthropic_adapter import _forbids_sampling_params
@@ -2951,7 +2849,7 @@ def call_llm(

    Args:
        task: Auxiliary task name ("compression", "vision", "web_extract",
-              "session_search", "skills_hub", "mcp", "title_generation").
+              "session_search", "skills_hub", "mcp", "flush_memories").
              Reads provider:model from config/env. Ignored if provider is set.
        provider: Explicit provider override.
        model: Explicit model override.
@@ -3054,45 +2952,13 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
-    # then payment fallback.
+    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
-        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
-            retry_kwargs = dict(kwargs)
-            retry_kwargs.pop("temperature", None)
-            logger.info(
-                "Auxiliary %s: provider rejected temperature; retrying once without it",
-                task or "call",
-            )
-            try:
-                return _validate_llm_response(
-                    client.chat.completions.create(**retry_kwargs), task)
-            except Exception as retry_err:
-                retry_err_str = str(retry_err)
-                # If retry still fails, fall through to the max_tokens /
-                # payment / auth chains below using the temperature-stripped
-                # kwargs.  Re-raise only if the retry hit something those
-                # chains won't handle.
-                if not (
-                    _is_payment_error(retry_err)
-                    or _is_connection_error(retry_err)
-                    or _is_auth_error(retry_err)
-                    or "max_tokens" in retry_err_str
-                    or "unsupported_parameter" in retry_err_str
-                ):
-                    raise
-                first_err = retry_err
-                kwargs = retry_kwargs
-
        err_str = str(first_err)
-        if max_tokens is not None and (
-            "max_tokens" in err_str
-            or "unsupported_parameter" in err_str
-            or _is_unsupported_parameter_error(first_err, "max_tokens")
-        ):
+        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3119,7 +2985,6 @@ def call_llm(
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
                main_runtime=main_runtime,
-                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3356,35 +3221,8 @@ async def async_call_llm(
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
-        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
-            retry_kwargs = dict(kwargs)
-            retry_kwargs.pop("temperature", None)
-            logger.info(
-                "Auxiliary %s (async): provider rejected temperature; retrying once without it",
-                task or "call",
-            )
-            try:
-                return _validate_llm_response(
-                    await client.chat.completions.create(**retry_kwargs), task)
-            except Exception as retry_err:
-                retry_err_str = str(retry_err)
-                if not (
-                    _is_payment_error(retry_err)
-                    or _is_connection_error(retry_err)
-                    or _is_auth_error(retry_err)
-                    or "max_tokens" in retry_err_str
-                    or "unsupported_parameter" in retry_err_str
-                ):
-                    raise
-                first_err = retry_err
-                kwargs = retry_kwargs
-
        err_str = str(first_err)
-        if max_tokens is not None and (
-            "max_tokens" in err_str
-            or "unsupported_parameter" in err_str
-            or _is_unsupported_parameter_error(first_err, "max_tokens")
-        ):
+        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3410,7 +3248,6 @@ async def async_call_llm(
                base_url=resolved_base_url,
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
-                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3479,9 +3316,7 @@ async def async_call_llm(
                    extra_body=effective_extra_body,
                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                # Convert sync fallback client to async
-                async_fb, async_fb_model = _to_async_client(
-                    fb_client, fb_model or "", is_vision=(task == "vision")
-                )
+                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
                    fb_kwargs["model"] = async_fb_model
                return _validate_llm_response(
@@ -44,31 +44,22 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
 # Multimodal content helpers
 # ---------------------------------------------------------------------------

-def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
+def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
    """Convert chat-style multimodal content to Responses API input parts.

    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
-
-    The ``role`` parameter controls the text content type:
-    - ``"user"`` (default) → ``"input_text"``
-    - ``"assistant"`` → ``"output_text"``
-
-    The Responses API rejects ``input_text`` inside assistant messages and
-    ``output_text`` inside user messages, so callers MUST pass the correct
-    role for the message being converted.
+    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)

    Returns an empty list when ``content`` is not a list or contains no
    recognized parts — callers fall back to the string path.
    """
-    text_type = "output_text" if role == "assistant" else "input_text"
    if not isinstance(content, list):
        return []
    converted: List[Dict[str, Any]] = []
    for part in content:
        if isinstance(part, str):
            if part:
-                converted.append({"type": text_type, "text": part})
+                converted.append({"type": "input_text", "text": part})
            continue
        if not isinstance(part, dict):
            continue
@@ -76,7 +67,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
        if ptype in {"text", "input_text", "output_text"}:
            text = part.get("text")
            if isinstance(text, str) and text:
-                converted.append({"type": text_type, "text": text})
+                converted.append({"type": "input_text", "text": text})
            continue
        if ptype in {"image_url", "input_image"}:
            image_ref = part.get("image_url")
@@ -227,23 +218,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
 # Message format conversion
 # ---------------------------------------------------------------------------

-_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
-
-
-def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
-    """Normalize a Responses assistant message status for replay.
-
-    The API accepts completed/incomplete/in_progress on replayed assistant
-    output messages.  Preserve those exactly (modulo case/hyphen spelling) so
-    incomplete Codex continuation turns don't get falsely marked completed.
-    """
-    if isinstance(value, str):
-        status = value.strip().lower().replace("-", "_").replace(" ", "_")
-        if status in _RESPONSE_MESSAGE_STATUSES:
-            return status
-    return default
-
-
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -259,10 +233,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
        if role in {"user", "assistant"}:
            content = msg.get("content", "")
            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content, role=role)
-                text_type = "output_text" if role == "assistant" else "input_text"
+                content_parts = _chat_content_to_responses_parts(content)
                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == text_type
+                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
                )
            else:
                content_parts = []
@@ -289,57 +262,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                # Replay exact assistant message items (with id/phase) from
-                # previous turns so the API can maintain prefix-cache hits.
-                # OpenAI docs: "preserve and resend phase on all assistant
-                # messages — dropping it can degrade performance."
-                codex_message_items = msg.get("codex_message_items")
-                replayed_message_items = 0
-                if isinstance(codex_message_items, list):
-                    for raw_item in codex_message_items:
-                        if not isinstance(raw_item, dict):
-                            continue
-                        if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
-                            continue
-                        raw_content_parts = raw_item.get("content")
-                        if not isinstance(raw_content_parts, list):
-                            continue
-
-                        normalized_content_parts = []
-                        for part in raw_content_parts:
-                            if not isinstance(part, dict):
-                                continue
-                            part_type = str(part.get("type") or "").strip()
-                            if part_type not in {"output_text", "text"}:
-                                continue
-                            text = part.get("text", "")
-                            if text is None:
-                                text = ""
-                            if not isinstance(text, str):
-                                text = str(text)
-                            normalized_content_parts.append({"type": "output_text", "text": text})
-
-                        if not normalized_content_parts:
-                            continue
-
-                        replay_item = {
-                            "type": "message",
-                            "role": "assistant",
-                            "status": _normalize_responses_message_status(raw_item.get("status")),
-                            "content": normalized_content_parts,
-                        }
-                        item_id = raw_item.get("id")
-                        if isinstance(item_id, str) and item_id.strip():
-                            replay_item["id"] = item_id.strip()
-                        phase = raw_item.get("phase")
-                        if isinstance(phase, str) and phase.strip():
-                            replay_item["phase"] = phase.strip()
-                        items.append(replay_item)
-                        replayed_message_items += 1
-
-                if replayed_message_items > 0:
-                    pass
-                elif content_parts:
+                if content_parts:
                    items.append({"role": "assistant", "content": content_parts})
                elif content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
@@ -499,47 +422,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                normalized.append(reasoning_item)
            continue

-        if item_type == "message":
-            role = item.get("role")
-            if role != "assistant":
-                raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
-            content = item.get("content")
-            if not isinstance(content, list):
-                raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
-            normalized_content = []
-            for part_idx, part in enumerate(content):
-                if not isinstance(part, dict):
-                    raise ValueError(
-                        f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
-                    )
-                part_type = part.get("type")
-                if part_type not in {"output_text", "text"}:
-                    raise ValueError(
-                        f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
-                    )
-                text = part.get("text", "")
-                if text is None:
-                    text = ""
-                if not isinstance(text, str):
-                    text = str(text)
-                normalized_content.append({"type": "output_text", "text": text})
-            if not normalized_content:
-                raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
-            normalized_item: Dict[str, Any] = {
-                "type": "message",
-                "role": "assistant",
-                "status": _normalize_responses_message_status(item.get("status")),
-                "content": normalized_content,
-            }
-            item_id = item.get("id")
-            if isinstance(item_id, str) and item_id.strip():
-                normalized_item["id"] = item_id.strip()
-            phase = item.get("phase")
-            if isinstance(phase, str) and phase.strip():
-                normalized_item["phase"] = phase.strip()
-            normalized.append(normalized_item)
-            continue
-
        role = item.get("role")
        if role in {"user", "assistant"}:
            content = item.get("content", "")
@@ -547,16 +429,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                content = ""
            if isinstance(content, list):
                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``output_text``
-                # / ``input_image``).  Validate each part and pass through.
-                # Use the correct text type for the role — ``output_text`` for
-                # assistant messages, ``input_text`` for user messages.
-                text_type = "output_text" if role == "assistant" else "input_text"
+                # is already in Responses format (``input_text`` / ``input_image``).
+                # Validate each part and pass through.
                validated: List[Dict[str, Any]] = []
                for part_idx, part in enumerate(content):
                    if isinstance(part, str):
                        if part:
-                            validated.append({"type": text_type, "text": part})
+                            validated.append({"type": "input_text", "text": part})
                        continue
                    if not isinstance(part, dict):
                        raise ValueError(
@@ -567,7 +446,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                        text = part.get("text", "")
                        if not isinstance(text, str):
                            text = str(text or "")
-                        validated.append({"type": text_type, "text": text})
+                        validated.append({"type": "input_text", "text": text})
                    elif ptype in {"input_image", "image_url"}:
                        image_ref = part.get("image_url", "")
                        detail = part.get("detail")
@@ -824,7 +703,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    content_parts: List[str] = []
    reasoning_parts: List[str] = []
    reasoning_items_raw: List[Dict[str, Any]] = []
-    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
    saw_commentary_phase = False
@@ -843,7 +721,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
-            normalized_phase = None
            if isinstance(item_phase, str):
                normalized_phase = item_phase.strip().lower()
                if normalized_phase in {"commentary", "analysis"}:
@@ -853,18 +730,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
            message_text = _extract_responses_message_text(item)
            if message_text:
                content_parts.append(message_text)
-                raw_message_item: Dict[str, Any] = {
-                    "type": "message",
-                    "role": "assistant",
-                    "status": _normalize_responses_message_status(item_status),
-                    "content": [{"type": "output_text", "text": message_text}],
-                }
-                item_id = getattr(item, "id", None)
-                if isinstance(item_id, str) and item_id:
-                    raw_message_item["id"] = item_id
-                if normalized_phase:
-                    raw_message_item["phase"] = normalized_phase
-                message_items_raw.append(raw_message_item)
        elif item_type == "reasoning":
            reasoning_text = _extract_responses_reasoning_text(item)
            if reasoning_text:
@@ -977,7 +842,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        reasoning_content=None,
        reasoning_details=None,
        codex_reasoning_items=reasoning_items_raw or None,
-        codex_message_items=message_items_raw or None,
    )

    if tool_calls:
@@ -61,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
-# Flat token cost per attached image part.  Real cost varies by provider and
-# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
-# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
-# that keeps compression budgeting honest for multi-image conversations.
-# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
-_IMAGE_TOKEN_ESTIMATE = 1600
-# Same figure expressed in the char-budget currency the rest of the
-# compressor speaks in.  Used when accumulating message "content length"
-# for tail-cut decisions.
-_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-def _content_length_for_budget(raw_content: Any) -> int:
-    """Return the effective char-length of a message's content for token budgeting.
-
-    Plain strings: ``len(content)``. Multimodal lists: sum of text-part
-    ``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
-    (``image_url`` / ``input_image`` / Anthropic-style ``image``). This
-    keeps the compressor from treating a turn with 5 attached images as
-    near-zero tokens just because the text part is empty.
-    """
-    if isinstance(raw_content, str):
-        return len(raw_content)
-    if not isinstance(raw_content, list):
-        return len(str(raw_content or ""))
-
-    total = 0
-    for p in raw_content:
-        if isinstance(p, str):
-            total += len(p)
-            continue
-        if not isinstance(p, dict):
-            total += len(str(p))
-            continue
-        ptype = p.get("type")
-        if ptype in {"image_url", "input_image", "image"}:
-            total += _IMAGE_CHAR_EQUIVALENT
-        else:
-            # text / input_text / tool_result-with-text / anything else with
-            # a text field.  Ignore the raw base64 payload inside image_url
-            # dicts — dimensions don't matter, only whether it's an image.
-            total += len(p.get("text", "") or "")
-    return total
-
-
 def _content_text_for_contains(content: Any) -> str:
    """Return a best-effort text view of message content.

@@ -338,8 +295,6 @@ class ContextCompressor(ContextEngine):
        self._context_probe_persistable = False
        self._previous_summary = None
        self._last_summary_error = None
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -363,13 +318,6 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
-        # Recalculate token budgets for the new context length so the
-        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
-        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
-        self.tail_token_budget = target_tokens
-        self.max_summary_tokens = min(
-            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
-        )

    def __init__(
        self,
@@ -443,11 +391,6 @@ class ContextCompressor(ContextEngine):
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None
-        # When summary generation fails and a static fallback is inserted,
-        # record how many turns were unrecoverably dropped so callers
-        # (gateway hygiene, /compress) can surface a visible warning.
-        self._last_summary_dropped_count: int = 0
-        self._last_summary_fallback_used: bool = False

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -534,7 +477,7 @@ class ContextCompressor(ContextEngine):
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
-                content_len = _content_length_for_budget(raw_content)
+                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -1132,9 +1075,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
-            raw_content = msg.get("content") or ""
-            content_len = _content_length_for_budget(raw_content)
-            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            content = msg.get("content") or ""
+            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
                if isinstance(tc, dict):
@@ -1203,11 +1145,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
-        # Reset per-call summary failure state — callers inspect these fields
-        # after compress() returns to decide whether to surface a warning.
-        self._last_summary_dropped_count = 0
-        self._last_summary_fallback_used = False
-        self._last_summary_error = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1286,13 +1223,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
-            self._last_summary_dropped_count = n_dropped
-            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} message(s) were "
+                f"Summary generation was unavailable. {n_dropped} conversation turns were "
                f"removed to free context space but could not be summarized. The removed "
-                f"messages contained earlier work in this session. Continue based on the "
+                f"turns contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -14,7 +14,6 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
-from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -1274,8 +1273,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
+        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@@ -1301,7 +1299,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
+        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1312,8 +1310,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value(env_var) or "").strip()
+        token = os.getenv(env_var, "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
@@ -42,7 +42,6 @@ class FailoverReason(enum.Enum):
    # Context / payload
    context_overflow = "context_overflow"  # Context too large — compress, not failover
    payload_too_large = "payload_too_large"  # 413 — compress payload
-    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
@@ -148,20 +147,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
    "error code: 413",
 ]

-# Image-size patterns.  Matched against 400 bodies (not 413) because most
-# providers return a 400 with a specific image-too-big message before the
-# whole request hits the 413 size limit.  Anthropic's wording is the most
-# important here (hard 5 MB per image, returned as
-# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
-_IMAGE_TOO_LARGE_PATTERNS = [
-    "image exceeds",        # Anthropic: "image exceeds 5 MB maximum"
-    "image too large",      # generic
-    "image_too_large",      # error_code variant
-    "image size exceeds",   # variant
-    # "request_too_large" on a request known to contain an image → image is
-    # the likely culprit; we still try the shrink path before giving up.
-]
-
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -686,15 +671,6 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

-    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
-    # Must be checked BEFORE context_overflow because messages can trip both
-    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
-    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
-        return result_fn(
-            FailoverReason.image_too_large,
-            retryable=True,
-        )
-
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
@@ -822,13 +798,6 @@ def _classify_by_message(
            should_compress=True,
        )

-    # Image-too-large patterns (from message text when no status_code)
-    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
-        return result_fn(
-            FailoverReason.image_too_large,
-            retryable=True,
-        )
-
    # Usage-limit patterns need the same disambiguation as 402: some providers
    # surface "usage limit" errors without an HTTP status code.  A transient
    # signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -1,236 +0,0 @@
-"""Routing helpers for inbound user-attached images.
-
-Two modes:
-
-  native  — attach images as OpenAI-style ``image_url`` content parts on the
-            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
-            OpenAI chat.completions) already translate these into their
-            vendor-specific multimodal formats.
-
-  text    — run ``vision_analyze`` on each image up-front and prepend the
-            description to the user's text. The model never sees the pixels;
-            it only sees a lossy text summary. This is the pre-existing
-            behaviour and still the right choice for non-vision models.
-
-The decision is made once per message turn by :func:`decide_image_input_mode`.
-It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
-| ``text``, default ``auto``) and the active model's capability metadata.
-
-In ``auto`` mode:
-  - If the user has explicitly configured ``auxiliary.vision.provider``
-    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
-    regardless of the main model — they've opted in to a specific vision
-    backend for a reason (cost, quality, local-only, etc.).
-  - Otherwise, if the active model reports ``supports_vision=True`` in its
-    models.dev metadata, we attach natively.
-  - Otherwise (non-vision model, no explicit override), we fall back to text.
-
-This keeps ``vision_analyze`` surfaced as a tool in every session — skills
-and agent flows that chain it (browser screenshots, deeper inspection of
-URL-referenced images, style-gating loops) keep working. The routing only
-affects *how user-attached images on the current turn* are presented to the
-main model.
-"""
-
-from __future__ import annotations
-
-import base64
-import logging
-import mimetypes
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-_VALID_MODES = frozenset({"auto", "native", "text"})
-
-
-def _coerce_mode(raw: Any) -> str:
-    """Normalize a config value into one of the valid modes."""
-    if not isinstance(raw, str):
-        return "auto"
-    val = raw.strip().lower()
-    if val in _VALID_MODES:
-        return val
-    return "auto"
-
-
-def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
-    """True when the user configured a specific auxiliary vision backend.
-
-    An explicit override means the user *wants* the text pipeline (they're
-    paying for a dedicated vision model), so we don't silently bypass it.
-    """
-    if not isinstance(cfg, dict):
-        return False
-    aux = cfg.get("auxiliary") or {}
-    if not isinstance(aux, dict):
-        return False
-    vision = aux.get("vision") or {}
-    if not isinstance(vision, dict):
-        return False
-
-    provider = str(vision.get("provider") or "").strip().lower()
-    model = str(vision.get("model") or "").strip()
-    base_url = str(vision.get("base_url") or "").strip()
-
-    # "auto" / "" / blank = not explicit
-    if provider in ("", "auto") and not model and not base_url:
-        return False
-    return True
-
-
-def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
-    """Return True/False if we can resolve caps, None if unknown."""
-    if not provider or not model:
-        return None
-    try:
-        from agent.models_dev import get_model_capabilities
-        caps = get_model_capabilities(provider, model)
-    except Exception as exc:  # pragma: no cover - defensive
-        logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
-        return None
-    if caps is None:
-        return None
-    return bool(caps.supports_vision)
-
-
-def decide_image_input_mode(
-    provider: str,
-    model: str,
-    cfg: Optional[Dict[str, Any]],
-) -> str:
-    """Return ``"native"`` or ``"text"`` for the given turn.
-
-    Args:
-      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
-      model:    active model slug as it would be sent to the provider.
-      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
-    """
-    mode_cfg = "auto"
-    if isinstance(cfg, dict):
-        agent_cfg = cfg.get("agent") or {}
-        if isinstance(agent_cfg, dict):
-            mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
-
-    if mode_cfg == "native":
-        return "native"
-    if mode_cfg == "text":
-        return "text"
-
-    # auto
-    if _explicit_aux_vision_override(cfg):
-        return "text"
-
-    supports = _lookup_supports_vision(provider, model)
-    if supports is True:
-        return "native"
-    return "text"
-
-
-# Image size handling is REACTIVE rather than proactive: we attempt native
-# attachment at full size regardless of provider, and rely on
-# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
-# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
-# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
-#
-# Why reactive: our knowledge of provider ceilings is partial and evolving
-# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
-# A proactive per-provider table would be stale the moment a provider raises
-# or lowers its limit, and silently degrading quality for users on providers
-# that would have accepted the full image is the worse failure mode.
-# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
-# it fires, which is cheaper than permanent quality loss.
-
-
-def _guess_mime(path: Path) -> str:
-    mime, _ = mimetypes.guess_type(str(path))
-    if mime and mime.startswith("image/"):
-        return mime
-    # mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
-    # the suffix looks imagey.
-    suffix = path.suffix.lower()
-    return {
-        ".jpg": "image/jpeg",
-        ".jpeg": "image/jpeg",
-        ".png": "image/png",
-        ".gif": "image/gif",
-        ".webp": "image/webp",
-        ".bmp": "image/bmp",
-    }.get(suffix, "image/jpeg")
-
-
-def _file_to_data_url(path: Path) -> Optional[str]:
-    """Encode a local image as a base64 data URL at its native size.
-
-    Size limits are NOT enforced here — the agent retry loop
-    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
-    provider's first rejection. Keeping this simple means providers that
-    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
-    quality tax just because one other provider is stricter.
-
-    Returns None only if the file can't be read (missing, permission
-    denied, etc.); the caller reports those paths in ``skipped``.
-    """
-    try:
-        raw = path.read_bytes()
-    except Exception as exc:
-        logger.warning("image_routing: failed to read %s — %s", path, exc)
-        return None
-    mime = _guess_mime(path)
-    b64 = base64.b64encode(raw).decode("ascii")
-    return f"data:{mime};base64,{b64}"
-
-
-def build_native_content_parts(
-    user_text: str,
-    image_paths: List[str],
-) -> Tuple[List[Dict[str, Any]], List[str]]:
-    """Build an OpenAI-style ``content`` list for a user turn.
-
-    Shape:
-      [{"type": "text", "text": "..."},
-       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
-       ...]
-
-    Images are attached at their native size. If a provider rejects the
-    request because an image is too large (e.g. Anthropic's 5 MB per-image
-    ceiling), the agent's retry loop transparently shrinks and retries
-    once — see ``run_agent._try_shrink_image_parts_in_messages``.
-
-    Returns (content_parts, skipped_paths). Skipped paths are files that
-    couldn't be read from disk.
-    """
-    parts: List[Dict[str, Any]] = []
-    skipped: List[str] = []
-
-    text = (user_text or "").strip()
-    if text:
-        parts.append({"type": "text", "text": text})
-
-    for raw_path in image_paths:
-        p = Path(raw_path)
-        if not p.exists() or not p.is_file():
-            skipped.append(str(raw_path))
-            continue
-        data_url = _file_to_data_url(p)
-        if not data_url:
-            skipped.append(str(raw_path))
-            continue
-        parts.append({
-            "type": "image_url",
-            "image_url": {"url": data_url},
-        })
-
-    # If the text was empty, add a neutral prompt so the turn isn't just images.
-    if not text and any(p.get("type") == "image_url" for p in parts):
-        parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
-
-    return parts, skipped
-
-
-__all__ = [
-    "decide_image_input_mode",
-    "build_native_content_parts",
-]
@@ -63,124 +63,15 @@ def sanitize_context(text: str) -> str:
    return text


-class StreamingContextScrubber:
-    """Stateful scrubber for streaming text that may contain split memory-context spans.
-
-    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
-    a ``<memory-context>`` opened in one delta and closed in a later delta
-    leaks its payload to the UI because the non-greedy block regex needs
-    both tags in one string.  This scrubber runs a small state machine
-    across deltas, holding back partial-tag tails and discarding
-    everything inside a span (including the system-note line).
-
-    Usage::
-
-        scrubber = StreamingContextScrubber()
-        for delta in stream:
-            visible = scrubber.feed(delta)
-            if visible:
-                emit(visible)
-        trailing = scrubber.flush()  # at end of stream
-        if trailing:
-            emit(trailing)
-
-    The scrubber is re-entrant per agent instance.  Callers building new
-    top-level responses (new turn) should create a fresh scrubber or call
-    ``reset()``.
-    """
-
-    _OPEN_TAG = "<memory-context>"
-    _CLOSE_TAG = "</memory-context>"
-
-    def __init__(self) -> None:
-        self._in_span: bool = False
-        self._buf: str = ""
-
-    def reset(self) -> None:
-        self._in_span = False
-        self._buf = ""
-
-    def feed(self, text: str) -> str:
-        """Return the visible portion of ``text`` after scrubbing.
-
-        Any trailing fragment that could be the start of an open/close tag
-        is held back in the internal buffer and surfaced on the next
-        ``feed()`` call or discarded/emitted by ``flush()``.
-        """
-        if not text:
-            return ""
-        buf = self._buf + text
-        self._buf = ""
-        out: list[str] = []
-
-        while buf:
-            if self._in_span:
-                idx = buf.lower().find(self._CLOSE_TAG)
-                if idx == -1:
-                    # Hold back a potential partial close tag; drop the rest
-                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
-                    self._buf = buf[-held:] if held else ""
-                    return "".join(out)
-                # Found close — skip span content + tag, continue
-                buf = buf[idx + len(self._CLOSE_TAG):]
-                self._in_span = False
-            else:
-                idx = buf.lower().find(self._OPEN_TAG)
-                if idx == -1:
-                    # No open tag — hold back a potential partial open tag
-                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
-                    if held:
-                        out.append(buf[:-held])
-                        self._buf = buf[-held:]
-                    else:
-                        out.append(buf)
-                    return "".join(out)
-                # Emit text before the tag, enter span
-                if idx > 0:
-                    out.append(buf[:idx])
-                buf = buf[idx + len(self._OPEN_TAG):]
-                self._in_span = True
-
-        return "".join(out)
-
-    def flush(self) -> str:
-        """Emit any held-back buffer at end-of-stream.
-
-        If we're still inside an unterminated span the remaining content is
-        discarded (safer: leaking partial memory context is worse than a
-        truncated answer).  Otherwise the held-back partial-tag tail is
-        emitted verbatim (it turned out not to be a real tag).
-        """
-        if self._in_span:
-            self._buf = ""
-            self._in_span = False
-            return ""
-        tail = self._buf
-        self._buf = ""
-        return tail
-
-    @staticmethod
-    def _max_partial_suffix(buf: str, tag: str) -> int:
-        """Return the length of the longest buf-suffix that is a tag-prefix.
-
-        Case-insensitive.  Returns 0 if no suffix could start the tag.
-        """
-        tag_lower = tag.lower()
-        buf_lower = buf.lower()
-        max_check = min(len(buf_lower), len(tag_lower) - 1)
-        for i in range(max_check, 0, -1):
-            if tag_lower.startswith(buf_lower[-i:]):
-                return i
-        return 0
-
-
 def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note."""
+    """Wrap prefetched memory in a fenced block with system note.
+
+    The fence prevents the model from treating recalled context as user
+    discourse.  Injected at API-call time only — never persisted.
+    """
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
-    if clean != raw_context:
-        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -51,7 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
-    "gmi",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
-    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -108,11 +106,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

 # Descending tiers for context length probing when the model is unknown.
-# We start at 256K (covers GPT-5.x, many current large-context models) and
-# step down on context-length errors until one works.  Tier[0] is also the
-# default fallback when no detection method succeeds.
+# We start at 128K (a safe default for most modern models) and step down
+# on context-length errors until one works.
 CONTEXT_PROBE_TIERS = [
-    256_000,
    128_000,
    64_000,
    32_000,
@@ -147,11 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
-    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
-    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
-    # This hardcoded value is only reached when every probe misses.
-    "gpt-5.5": 1050000,
+    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
+    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
+    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
+    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -167,17 +162,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek — V4 family ships with a 1M context window. The legacy
-    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
-    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
-    # and inherit the same 1M window. The ``deepseek`` substring entry
-    # below remains as a 128K fallback for older / unknown DeepSeek model
-    # ids (e.g. via custom endpoints).
-    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
-    "deepseek-v4-pro": 1_000_000,
-    "deepseek-v4-flash": 1_000_000,
-    "deepseek-chat": 1_000_000,
-    "deepseek-reasoner": 1_000_000,
+    # DeepSeek
    "deepseek": 128000,
    # Meta
    "llama": 131072,
@@ -309,7 +294,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
-    "api.gmi-serving.com": "gmi",
    "ollama.com": "ollama-cloud",
 }

@@ -705,29 +689,6 @@ def fetch_endpoint_model_metadata(
    return {}


-def _resolve_endpoint_context_length(
-    model: str,
-    base_url: str,
-    api_key: str = "",
-) -> Optional[int]:
-    """Resolve context length from an endpoint's live ``/models`` metadata."""
-    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-    matched = endpoint_metadata.get(model)
-    if not matched:
-        if len(endpoint_metadata) == 1:
-            matched = next(iter(endpoint_metadata.values()))
-        else:
-            for key, entry in endpoint_metadata.items():
-                if model in key or key in model:
-                    matched = entry
-                    break
-    if matched:
-        context_length = matched.get("context_length")
-        if isinstance(context_length, int):
-            return context_length
-    return None
-
-
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -1232,7 +1193,6 @@ def get_model_context_length(
    api_key: str = "",
    config_context_length: int | None = None,
    provider: str = "",
-    custom_providers: list | None = None,
 ) -> int:
    """Get the context length for a model.

@@ -1253,23 +1213,6 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

-    # 0b. custom_providers per-model override — check before any probe.
-    # This closes the gap where /model switch and display paths used to fall
-    # back to 128K despite the user having a per-model context_length set.
-    # See #15779.
-    if custom_providers and base_url and model:
-        try:
-            from hermes_cli.config import get_custom_provider_context_length
-            cp_ctx = get_custom_provider_context_length(
-                model=model,
-                base_url=base_url,
-                custom_providers=custom_providers,
-            )
-            if cp_ctx:
-                return cp_ctx
-        except Exception:
-            pass  # fall through to probing
-
    # Normalise provider-prefixed model names (e.g. "local:model-name" →
    # "model-name") so cache lookups and server queries use the bare ID that
    # local servers actually know about.  Ollama "model:tag" colons are preserved.
@@ -1321,9 +1264,22 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if context_length is not None:
-            return context_length
+        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+        matched = endpoint_metadata.get(model)
+        if not matched:
+            # Single-model servers: if only one model is loaded, use it
+            if len(endpoint_metadata) == 1:
+                matched = next(iter(endpoint_metadata.values()))
+            else:
+                # Fuzzy match: substring in either direction
+                for key, entry in endpoint_metadata.items():
+                    if model in key or key in model:
+                        matched = entry
+                        break
+        if matched:
+            context_length = matched.get("context_length")
+            if isinstance(context_length, int):
+                return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
@@ -1387,12 +1343,6 @@ def get_model_context_length(
            if base_url:
                save_context_length(model, base_url, codex_ctx)
            return codex_ctx
-    if effective_provider == "gmi" and base_url:
-        # GMI exposes authoritative context_length via /models, but it is not
-        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
-        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
-        if ctx is not None:
-            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -1402,7 +1352,7 @@ def get_model_context_length(
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
-        return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
+        return metadata[model].get("context_length", 128000)

    # 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
    # Only check `default_model in model` (is the key a substring of the input).
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
-
-
-# Buckets with reset windows shorter than this are treated as transient
-# (upstream jitter, secondary throttling) rather than a genuine quota
-# exhaustion worth a cross-session breaker trip.
-_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
-
-
-def is_genuine_nous_rate_limit(
-    *,
-    headers: Optional[Mapping[str, str]] = None,
-    last_known_state: Optional[Any] = None,
-) -> bool:
-    """Decide whether a 429 from Nous Portal is a real account rate limit.
-
-    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
-    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:
-
-      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
-          exhausted — a genuine rate limit that will last until the
-          bucket resets.
-      (b) The upstream provider is out of capacity for a specific model
-          — transient, clears in seconds, and has nothing to do with
-          the caller's quota on Nous.
-
-    Tripping the cross-session breaker on (b) blocks ALL Nous requests
-    (and all models, since Nous is one provider key) for minutes even
-    though the caller's account is healthy and a different model would
-    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
-    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
-
-    We tell the two apart by looking at:
-
-      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
-         the full suite on every response including 429s.  An exhausted
-         bucket (``remaining == 0`` with a reset window >= 60s) is
-         proof of (a).
-      2. The last-known-good rate-limit state captured by
-         ``_capture_rate_limits()`` on the previous successful
-         response.  If any bucket there was already near-exhausted with
-         a substantial reset window, the current 429 is almost
-         certainly (a) continuing from that condition.
-
-    If neither signal fires, we treat the 429 as (b): fail the single
-    request, let the retry loop or model-switch proceed, and do NOT
-    write the cross-session breaker file.
-
-    Returns True when the evidence points at (a).
-    """
-    # Signal 1: current 429 response headers.
-    state = _parse_buckets_from_headers(headers)
-    if _has_exhausted_bucket(state):
-        return True
-
-    # Signal 2: last-known-good state from a recent successful response.
-    # Accepts either a RateLimitState (dataclass from rate_limit_tracker)
-    # or a dict of bucket snapshots.
-    if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
-        return True
-
-    return False
-
-
-def _parse_buckets_from_headers(
-    headers: Optional[Mapping[str, str]],
-) -> dict[str, tuple[Optional[int], Optional[float]]]:
-    """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
-
-    Returns empty dict when no rate-limit headers are present.
-    """
-    if not headers:
-        return {}
-
-    lowered = {k.lower(): v for k, v in headers.items()}
-    if not any(k.startswith("x-ratelimit-") for k in lowered):
-        return {}
-
-    def _maybe_int(raw: Optional[str]) -> Optional[int]:
-        if raw is None:
-            return None
-        try:
-            return int(float(raw))
-        except (TypeError, ValueError):
-            return None
-
-    def _maybe_float(raw: Optional[str]) -> Optional[float]:
-        if raw is None:
-            return None
-        try:
-            return float(raw)
-        except (TypeError, ValueError):
-            return None
-
-    result: dict[str, tuple[Optional[int], Optional[float]]] = {}
-    for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
-        remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
-        reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
-        if remaining is not None or reset is not None:
-            result[tag] = (remaining, reset)
-    return result
-
-
-def _has_exhausted_bucket(
-    buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
-) -> bool:
-    """Return True when any bucket has remaining == 0 AND a meaningful reset window."""
-    for remaining, reset in buckets.values():
-        if remaining is None or remaining > 0:
-            continue
-        if reset is None:
-            continue
-        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
-            return True
-    return False
-
-
-def _has_exhausted_bucket_in_object(state: Any) -> bool:
-    """Check a RateLimitState-like object for an exhausted bucket.
-
-    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
-    exposed as attributes ``requests_min``, ``requests_hour``,
-    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
-    object missing those attributes.
-    """
-    for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
-        bucket = getattr(state, attr, None)
-        if bucket is None:
-            continue
-        limit = getattr(bucket, "limit", 0) or 0
-        remaining = getattr(bucket, "remaining", 0) or 0
-        # Prefer the adjusted "remaining_seconds_now" property when present;
-        # fall back to raw reset_seconds.
-        reset = getattr(bucket, "remaining_seconds_now", None)
-        if reset is None:
-            reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
-        if limit <= 0:
-            continue
-        if remaining > 0:
-            continue
-        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
-            return True
-    return False
@@ -1,191 +0,0 @@
-"""
-Contextual first-touch onboarding hints.
-
-Instead of blocking first-run questionnaires, show a one-time hint the *first*
-time a user hits a behavior fork — message-while-running, first long-running
-tool, etc.  Each hint is shown once per install (tracked in ``config.yaml`` under
-``onboarding.seen.<flag>``) and then never again.
-
-Keep this module tiny and dependency-free so both the CLI and gateway can import
-it without pulling in heavy modules.
-"""
-
-from __future__ import annotations
-
-import logging
-from pathlib import Path
-from typing import Any, Mapping, Optional
-
-logger = logging.getLogger(__name__)
-
-
-# -------------------------------------------------------------------------
-# Flag names (stable — used as config.yaml keys under onboarding.seen)
-# -------------------------------------------------------------------------
-
-BUSY_INPUT_FLAG = "busy_input_prompt"
-TOOL_PROGRESS_FLAG = "tool_progress_prompt"
-OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
-
-
-# -------------------------------------------------------------------------
-# Hint content
-# -------------------------------------------------------------------------
-
-def busy_input_hint_gateway(mode: str) -> str:
-    """Hint shown the first time a user messages while the agent is busy.
-
-    ``mode`` is the effective busy_input_mode that was just applied, so the
-    message matches reality ("I just interrupted…" vs "I just queued…").
-    """
-    if mode == "queue":
-        return (
-            "💡 First-time tip — I queued your message instead of interrupting. "
-            "Send `/busy interrupt` to make new messages stop the current task "
-            "immediately, or `/busy status` to check. This notice won't appear again."
-        )
-    if mode == "steer":
-        return (
-            "💡 First-time tip — I steered your message into the current run; "
-            "it will arrive after the next tool call instead of interrupting. "
-            "Send `/busy interrupt` or `/busy queue` to change this, or "
-            "`/busy status` to check. This notice won't appear again."
-        )
-    return (
-        "💡 First-time tip — I just interrupted my current task to answer you. "
-        "Send `/busy queue` to queue follow-ups for after the current task instead, "
-        "`/busy steer` to inject them mid-run without interrupting, or "
-        "`/busy status` to check. This notice won't appear again."
-    )
-
-
-def busy_input_hint_cli(mode: str) -> str:
-    """CLI version of the busy-input hint (plain text, no markdown)."""
-    if mode == "queue":
-        return (
-            "(tip) Your message was queued for the next turn. "
-            "Use /busy interrupt to make Enter stop the current run instead, "
-            "or /busy steer to inject mid-run. This tip only shows once."
-        )
-    if mode == "steer":
-        return (
-            "(tip) Your message was steered into the current run; it arrives "
-            "after the next tool call. Use /busy interrupt or /busy queue to "
-            "change this. This tip only shows once."
-        )
-    return (
-        "(tip) Your message interrupted the current run. "
-        "Use /busy queue to queue messages for the next turn instead, "
-        "or /busy steer to inject mid-run. This tip only shows once."
-    )
-
-
-def tool_progress_hint_gateway() -> str:
-    return (
-        "💡 First-time tip — that tool took a while and I'm streaming every step. "
-        "If the progress messages feel noisy, send `/verbose` to cycle modes "
-        "(all → new → off). This notice won't appear again."
-    )
-
-
-def tool_progress_hint_cli() -> str:
-    return (
-        "(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
-        "display modes (all -> new -> off -> verbose). This tip only shows once."
-    )
-
-
-def openclaw_residue_hint_cli() -> str:
-    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
-
-    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
-    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
-    get carried forward and the agent dutifully reads them). ``hermes claw
-    cleanup`` renames the directory so the agent stops finding it.
-    """
-    return (
-        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
-        "After migrating, the agent can still get confused and read that "
-        "directory's config/memory instead of Hermes's.\n"
-        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
-        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
-    )
-
-
-def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
-    """Return True if an OpenClaw workspace directory is present in ``$HOME``.
-
-    Pure filesystem check — no side effects. ``home`` override exists for tests.
-    """
-    base = home or Path.home()
-    try:
-        return (base / ".openclaw").is_dir()
-    except OSError:
-        return False
-
-
-# -------------------------------------------------------------------------
-# State read / write
-# -------------------------------------------------------------------------
-
-def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
-    onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
-    if not isinstance(onboarding, Mapping):
-        return {}
-    seen = onboarding.get("seen")
-    return seen if isinstance(seen, Mapping) else {}
-
-
-def is_seen(config: Mapping[str, Any], flag: str) -> bool:
-    """Return True if the user has already been shown this first-touch hint."""
-    return bool(_get_seen_dict(config).get(flag))
-
-
-def mark_seen(config_path: Path, flag: str) -> bool:
-    """Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
-
-    Uses the atomic YAML writer so a concurrent process can't observe a
-    partially-written file.  Returns True on success, False on any error
-    (including the config file being absent — onboarding is best-effort).
-    """
-    try:
-        import yaml
-        from utils import atomic_yaml_write
-    except Exception as e:  # pragma: no cover — dependency issue
-        logger.debug("onboarding: failed to import yaml/utils: %s", e)
-        return False
-
-    try:
-        cfg: dict = {}
-        if config_path.exists():
-            with open(config_path, encoding="utf-8") as f:
-                cfg = yaml.safe_load(f) or {}
-        if not isinstance(cfg.get("onboarding"), dict):
-            cfg["onboarding"] = {}
-        seen = cfg["onboarding"].get("seen")
-        if not isinstance(seen, dict):
-            seen = {}
-            cfg["onboarding"]["seen"] = seen
-        if seen.get(flag) is True:
-            return True  # already marked — nothing to do
-        seen[flag] = True
-        atomic_yaml_write(config_path, cfg)
-        return True
-    except Exception as e:
-        logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
-        return False
-
-
-__all__ = [
-    "BUSY_INPUT_FLAG",
-    "TOOL_PROGRESS_FLAG",
-    "OPENCLAW_RESIDUE_FLAG",
-    "busy_input_hint_gateway",
-    "busy_input_hint_cli",
-    "tool_progress_hint_gateway",
-    "tool_progress_hint_cli",
-    "openclaw_residue_hint_cli",
-    "detect_openclaw_residue",
-    "is_seen",
-    "mark_seen",
-]
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
    "Be targeted and efficient in your exploration and investigations."
 )

-HERMES_AGENT_HELP_GUIDANCE = (
-    "If the user asks about configuring, setting up, or using Hermes Agent "
-    "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
-    "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
-)
-
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -428,29 +422,6 @@ PLATFORM_HINTS = {
        "your response. Images are sent as native photos, and other files arrive as downloadable "
        "documents."
    ),
-    "yuanbao": (
-        "You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
-        "Markdown formatting is supported (code blocks, tables, bold/italic). "
-        "You CAN send media files natively — to deliver a file to the user, include "
-        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
-        "Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
-        "and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
-        "(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
-        "they will be downloaded and sent as native photos. "
-        "Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
-        "whenever a file delivery is appropriate.\n\n"
-        "Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
-        "When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
-        "you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
-        "  1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
-        "     '捂脸', '合十') to discover matching sticker_ids.\n"
-        "  2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
-        "     TIMFaceElem that renders as a native sticker in the chat.\n"
-        "DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
-        "them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
-        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
-        "— when a sticker is the right response, use yb_send_sticker."
-    ),
 }

 # ---------------------------------------------------------------------------
@@ -854,11 +825,6 @@ def build_skills_system_prompt(
            "Skills also encode the user's preferred approach, conventions, and quality standards "
            "for tasks like code review, planning, and testing — load them even for tasks you "
            "already know how to do, because the skill defines how it should be done here.\n"
-            "Whenever the user asks you to configure, set up, install, enable, disable, modify, "
-            "or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
-            "skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
-            "first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
-            "`hermes setup`) so you don't have to guess or invent workarounds.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -754,11 +754,7 @@ def _resolve_effective_accept(
    if env in ("1", "true", "yes", "on"):
        return True
    cfg_val = cfg.get("hooks_auto_accept", False)
-    if isinstance(cfg_val, bool):
-        return cfg_val
-    if isinstance(cfg_val, str):
-        return cfg_val.strip().lower() in ("1", "true", "yes", "on")
-    return False
+    return bool(cfg_val)


 # ---------------------------------------------------------------------------
@@ -329,7 +329,7 @@ def build_skill_invocation_message(

    loaded_skill, skill_dir, skill_name = loaded
    activation_note = (
-        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
+        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
    )
    return _build_skill_message(
@@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(

        loaded_skill, skill_dir, skill_name = loaded
        activation_note = (
-            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
+            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
            "session unless the user overrides them.]"
        )
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.

 import logging
 import threading
-from typing import Callable, Optional
+from typing import Optional

 from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

-# Callback signature: (task_name, exception) -> None. Used to surface
-# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
-# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
-# become visible instead of piling up as NULL session titles.
-FailureCallback = Callable[[str, BaseException], None]
-
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
    "following exchange. The title should capture the main topic or intent. "
@@ -25,21 +19,11 @@ _TITLE_PROMPT = (
 )


-def generate_title(
-    user_message: str,
-    assistant_response: str,
-    timeout: float = 30.0,
-    failure_callback: Optional[FailureCallback] = None,
-) -> Optional[str]:
+def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
    """Generate a session title from the first exchange.

    Uses the auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.
-
-    ``failure_callback`` is invoked with ``(task, exception)`` when the
-    auxiliary call raises — the caller typically wires this to
-    ``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
-    of silently accumulating untitled sessions.
    """
    # Truncate long messages to keep the request small
    user_snippet = user_message[:500] if user_message else ""
@@ -68,15 +52,7 @@ def generate_title(
            title = title[:77] + "..."
        return title if title else None
    except Exception as e:
-        # Log at WARNING so this shows up in agent.log without debug mode.
-        # Full detail at debug level for operators who need the stack.
-        logger.warning("Title generation failed: %s", e)
-        logger.debug("Title generation traceback", exc_info=True)
-        if failure_callback is not None:
-            try:
-                failure_callback("title generation", e)
-            except Exception:
-                logger.debug("Title generation failure_callback raised", exc_info=True)
+        logger.debug("Title generation failed: %s", e)
        return None


@@ -85,7 +61,6 @@ def auto_title_session(
    session_id: str,
    user_message: str,
    assistant_response: str,
-    failure_callback: Optional[FailureCallback] = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -106,9 +81,7 @@ def auto_title_session(
    except Exception:
        return

-    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback
-    )
+    title = generate_title(user_message, assistant_response)
    if not title:
        return

@@ -125,7 +98,6 @@ def maybe_auto_title(
    user_message: str,
    assistant_response: str,
    conversation_history: list,
-    failure_callback: Optional[FailureCallback] = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -147,7 +119,6 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback},
        daemon=True,
        name="auto-title",
    )
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    cls = _REGISTRY.get(api_mode)
-    if cls is None:
-        # The registry can be partially populated when a specific transport
-        # module was imported directly (for example chat_completions before
-        # codex).  Discover on misses, not only when the registry is empty, so
-        # test/order-dependent imports do not make valid api_modes unavailable.
+    if not _REGISTRY:
        _discover_transports()
-        cls = _REGISTRY.get(api_mode)
+    cls = _REGISTRY.get(api_mode)
    if cls is None:
        return None
    return cls()
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

-        Strips Codex Responses API fields (``codex_reasoning_items`` /
-        ``codex_message_items`` on the message, ``call_id``/``response_item_id``
-        on tool_calls) that strict chat-completions providers reject with 400/422.
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
        """
        needs_sanitize = False
        for msg in messages:
            if not isinstance(msg, dict):
                continue
-            if "codex_reasoning_items" in msg or "codex_message_items" in msg:
+            if "codex_reasoning_items" in msg:
                needs_sanitize = True
                break
            tool_calls = msg.get("tool_calls")
@@ -59,7 +59,6 @@ class ChatCompletionsTransport(ProviderTransport):
            if not isinstance(msg, dict):
                continue
            msg.pop("codex_reasoning_items", None)
-            msg.pop("codex_message_items", None)
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
@@ -120,24 +120,6 @@ class ResponsesApiTransport(ProviderTransport):
        if request_overrides:
            kwargs.update(request_overrides)

-        if is_codex_backend:
-            prompt_cache_key = kwargs.get("prompt_cache_key")
-            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
-            if cache_scope_id:
-                existing_extra_headers = kwargs.get("extra_headers")
-                merged_extra_headers: Dict[str, str] = {}
-                if isinstance(existing_extra_headers, dict):
-                    merged_extra_headers.update(
-                        {
-                            str(key): str(value)
-                            for key, value in existing_extra_headers.items()
-                            if key and value is not None
-                        }
-                    )
-                merged_extra_headers["session_id"] = cache_scope_id
-                merged_extra_headers["x-client-request-id"] = cache_scope_id
-                kwargs["extra_headers"] = merged_extra_headers
-
        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
            kwargs["max_output_tokens"] = max_tokens
@@ -178,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
        provider_data = {}
        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
-        if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
-            provider_data["codex_message_items"] = msg.codex_message_items
        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
            provider_data["reasoning_details"] = msg.reasoning_details

@@ -97,7 +97,7 @@ class NormalizedResponse:
    Response-level ``provider_data`` examples:

    * Anthropic: ``{"reasoning_details": [...]}``
-    * Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...]}``
    * Others: ``None``
    """

@@ -126,11 +126,6 @@ class NormalizedResponse:
        pd = self.provider_data or {}
        return pd.get("codex_reasoning_items")

-    @property
-    def codex_message_items(self):
-        pd = self.provider_data or {}
-        return pd.get("codex_message_items")
-

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -606,7 +606,6 @@ platform_toolsets:
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
-  yuanbao: [hermes-yuanbao]

 # =============================================================================
 # Gateway Platform Settings
@@ -825,9 +824,7 @@ delegation:
 # Display
 # =============================================================================
 display:
-  # Use compact banner mode (hides the ASCII-art banner, shows a single line).
-  #   true:  Compact single-line banner
-  #   false: Full ASCII banner with tool/skill summary (default)
+  # Use compact banner mode
  compact: false

  # Tool progress display level (CLI and gateway)
@@ -841,19 +838,12 @@ display:
  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
-  #   true:  Send mid-turn assistant updates as separate messages (default)
-  #   false: Only send the final response
  interim_assistant_messages: true

-  # What Enter does when Hermes is already busy (CLI and gateway platforms).
+  # What Enter does when Hermes is already busy in the CLI.
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
-  #   steer:     Inject your message mid-run via /steer, arriving at the agent
-  #              after the next tool call — no interrupt, no role violation.
-  #              Falls back to 'queue' if the agent isn't running yet or if
-  #              images are attached (steer only carries text).
-  # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
-  # Toggle at runtime with /busy <interrupt|queue|steer>.
+  # Ctrl+C always interrupts regardless of this setting.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
@@ -869,22 +859,17 @@ display:
  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
-  #   true:  Ring the terminal bell on each response
-  #   false: Silent (default)
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
-  #   true:  Show the reasoning box
-  #   false: Hide reasoning (default)
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
-  #   true:  Stream tokens as they arrive (default)
-  #   false: Wait for the full response before rendering
+  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
@@ -894,15 +879,10 @@ display:
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
-  #   default        — Classic Hermes gold/kawaii
-  #   ares           — Crimson/bronze war-god theme with spinner wings
-  #   mono           — Clean grayscale monochrome
-  #   slate          — Cool blue developer-focused
-  #   daylight       — Bright light-mode theme
-  #   warm-lightmode — Warm paper-tone light-mode theme
-  #   poseidon       — Sea-green/teal Olympian theme
-  #   sisyphus       — Earthy stone-and-moss theme
-  #   charizard      — Fiery orange dragon theme
+  #   default  — Classic Hermes gold/kawaii
+  #   ares     — Crimson/bronze war-god theme with spinner wings
+  #   mono     — Clean grayscale monochrome
+  #   slate    — Cool blue developer-focused
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
@@ -16,7 +16,7 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any, Union
+from typing import Optional, Dict, List, Any

 logger = logging.getLogger(__name__)

@@ -311,12 +311,6 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None

    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
-            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' "
-                "is not installed. Install the 'cron' extra (pip install "
-                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
-                schedule.get("expr"),
-            )
            return None
        cron = croniter(schedule["expr"], now)
        next_run = cron.get_next(datetime)
@@ -423,7 +417,6 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
-    context_from: Optional[Union[str, List[str]]] = None,
    enabled_toolsets: Optional[List[str]] = None,
    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
@@ -445,9 +438,6 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
-        context_from: Optional job ID (or list of job IDs) whose most recent output
-                      is injected into the prompt as context before each run.
-                      Useful for chaining cron jobs: job A finds data, job B processes it.
        enabled_toolsets: Optional list of toolset names to restrict the agent to.
                          When set, only tools from these toolsets are loaded, reducing
                          token overhead. When omitted, all default tools are loaded.
@@ -491,14 +481,6 @@ def create_job(
    normalized_toolsets = normalized_toolsets or None
    normalized_workdir = _normalize_workdir(workdir)

-    # Normalize context_from: accept str or list of str, store as list or None
-    if isinstance(context_from, str):
-        context_from = [context_from.strip()] if context_from.strip() else None
-    elif isinstance(context_from, list):
-        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
-    else:
-        context_from = None
-
    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
        "id": job_id,
@@ -510,7 +492,6 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
-        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -704,32 +685,10 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
                # Compute next run
                job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run, decide whether this is terminal completion
-                # (one-shot) or a transient failure (recurring schedule couldn't
-                # compute — e.g. 'croniter' missing from the runtime env).
-                # Recurring jobs must NEVER be silently disabled: that turns a
-                # missing runtime dep into "job completed" and the user's
-                # schedule quietly goes off. See issue #16265.
+                # If no next run (one-shot completed), disable
                if job["next_run_at"] is None:
-                    kind = job.get("schedule", {}).get("kind")
-                    if kind in ("cron", "interval"):
-                        job["state"] = "error"
-                        if not job.get("last_error"):
-                            job["last_error"] = (
-                                "Failed to compute next run for recurring "
-                                "schedule (is the 'croniter' package "
-                                "installed in the gateway's Python env?)"
-                            )
-                        logger.error(
-                            "Job '%s' (%s) could not compute next_run_at; "
-                            "leaving enabled and marking state=error so the "
-                            "job is not silently disabled.",
-                            job.get("name", job["id"]),
-                            kind,
-                        )
-                    else:
-                        job["enabled"] = False
-                        job["state"] = "completed"
+                    job["enabled"] = False
+                    job["state"] = "completed"
                elif job.get("state") != "paused":
                    job["state"] = "scheduled"

@@ -77,7 +77,7 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
    "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
-    "qqbot", "yuanbao",
+    "qqbot",
 })

 # Platforms that support a configured cron/notification home target, mapped to
@@ -337,7 +337,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
        "qqbot": Platform.QQBOT,
-        "yuanbao": Platform.YUANBAO,
    }

    # Optionally wrap the content with a header/footer so the user knows this
@@ -672,51 +671,10 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

-    # Inject output from referenced cron jobs as context.
-    context_from = job.get("context_from")
-    if context_from:
-        from cron.jobs import OUTPUT_DIR
-        if isinstance(context_from, str):
-            context_from = [context_from]
-        for source_job_id in context_from:
-            # Guard against path traversal — valid job IDs are 12-char hex strings
-            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
-                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
-                continue
-            try:
-                job_output_dir = OUTPUT_DIR / source_job_id
-                if not job_output_dir.exists():
-                    continue  # silent skip — no output yet
-                output_files = sorted(
-                    job_output_dir.glob("*.md"),
-                    key=lambda f: f.stat().st_mtime,
-                    reverse=True,
-                )
-                if not output_files:
-                    continue  # silent skip — no output yet
-                latest_output = output_files[0].read_text(encoding="utf-8").strip()
-                # Truncate to 8K characters to avoid prompt bloat
-                _MAX_CONTEXT_CHARS = 8000
-                if len(latest_output) > _MAX_CONTEXT_CHARS:
-                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
-                if latest_output:
-                    prompt = (
-                        f"## Output from job '{source_job_id}'\n"
-                        "The following is the most recent output from a preceding "
-                        "cron job. Use it as context for your analysis.\n\n"
-                        f"```\n{latest_output}\n```\n\n"
-                        f"{prompt}"
-                    )
-                else:
-                    continue  # silent skip — empty output
-            except (OSError, PermissionError) as e:
-                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
-                # silent skip — do not pollute the prompt with error messages
-
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
-        "[IMPORTANT: You are running as a scheduled cron job. "
+        "[SYSTEM: You are running as a scheduled cron job. "
        "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
@@ -752,7 +710,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
            parts.append("")
        parts.extend(
            [
-                f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
                "",
                content,
            ]
@@ -760,7 +718,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:

    if skipped:
        notice = (
-            f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
+            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
            f"and were skipped: {', '.join(skipped)}. "
            f"Start your response with a brief notice so the user is aware, e.g.: "
            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -822,8 +780,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

-    agent = None
-
    # Mark this as a cron session so the approval system can apply cron_mode.
    # This env var is process-wide and persists for the lifetime of the
    # scheduler process — every job this process runs is a cron job.
@@ -1172,24 +1128,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                _session_db.close()
            except (Exception, KeyboardInterrupt) as e:
                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
-        # Release subprocesses, terminal sandboxes, browser daemons, and the
-        # main OpenAI/httpx client held by this ephemeral cron agent. Without
-        # this, a gateway that ticks cron every N minutes leaks fds per job
-        # until it hits EMFILE (#10200 / "too many open files").
-        try:
-            if agent is not None:
-                agent.close()
-        except (Exception, KeyboardInterrupt) as e:
-            logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
-        # Each cron run spins up a short-lived worker thread whose event loop
-        # dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
-        # httpx clients cached under that loop are now unusable — reap them
-        # so their transports don't accumulate in the process-global cache.
-        try:
-            from agent.auxiliary_client import cleanup_stale_async_clients
-            cleanup_stale_async_clients()
-        except Exception as e:
-            logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)


 def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -1329,17 +1267,6 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
                _results.extend(f.result() for f in _futures)

-        # Best-effort sweep of MCP stdio subprocesses that survived their
-        # session teardown during this tick.  Runs AFTER every job has
-        # finished so active sessions (including live user chats) are
-        # never touched — only PIDs explicitly detected as orphans in
-        # tools.mcp_tool._run_stdio's finally block are reaped.
-        try:
-            from tools.mcp_tool import _kill_orphaned_mcp_children
-            _kill_orphaned_mcp_children()
-        except Exception as _e:
-            logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
-
        return sum(_results)
    finally:
        if fcntl:
@@ -41,15 +41,6 @@ if [ "$(id -u)" = "0" ]; then
            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

-    # Ensure config.yaml is readable by the hermes runtime user even if it was
-    # edited on the host after initial ownership setup. Must run here (as root)
-    # rather than after the gosu drop, otherwise a non-root caller like
-    # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
-    if [ -f "$HERMES_HOME/config.yaml" ]; then
-        chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
-        chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
-    fi
-
    echo "Dropping root privileges"
    exec gosu hermes "$0" "$@"
 fi
@@ -76,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

+# Ensure the main config file remains accessible to the hermes runtime user
+# even if it was edited on the host after initial ownership setup.
+if [ -f "$HERMES_HOME/config.yaml" ]; then
+    chown hermes:hermes "$HERMES_HOME/config.yaml"
+    chmod 640 "$HERMES_HOME/config.yaml"
+fi
+
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -36,7 +36,6 @@

      imports = [
        ./nix/packages.nix
-        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
 # Build / refresh
 # ---------------------------------------------------------------------------

-async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
+def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
    """
    Build a channel directory from connected platform adapters and session data.

@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            if platform == Platform.DISCORD:
                platforms["discord"] = _build_discord(adapter)
            elif platform == Platform.SLACK:
-                platforms["slack"] = await _build_slack(adapter)
+                platforms["slack"] = _build_slack(adapter)
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

@@ -136,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
    return channels


-async def _build_slack(adapter) -> List[Dict[str, Any]]:
-    """List Slack channels the bot has joined across all workspaces.
-
-    Uses ``users.conversations`` against each workspace's web client. Pulls
-    public + private channels the bot is a member of, then merges in DMs
-    discovered from session history (IMs aren't useful to enumerate
-    proactively).
-    """
-    team_clients = getattr(adapter, "_team_clients", None) or {}
-    if not team_clients:
+def _build_slack(adapter) -> List[Dict[str, str]]:
+    """List Slack channels the bot has joined."""
+    # Slack adapter may expose a web client
+    client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
+    if not client:
        return _build_from_sessions("slack")

-    channels: List[Dict[str, Any]] = []
-    seen_ids: set = set()
+    try:
+        from tools.send_message_tool import _send_slack  # noqa: F401
+        # Use the Slack Web API directly if available
+    except Exception:
+        pass

-    for team_id, client in team_clients.items():
-        try:
-            cursor: Optional[str] = None
-            for _page in range(20):  # safety cap on pagination
-                response = await client.users_conversations(
-                    types="public_channel,private_channel",
-                    exclude_archived=True,
-                    limit=200,
-                    cursor=cursor,
-                )
-                if not response.get("ok"):
-                    logger.warning(
-                        "Channel directory: users.conversations not ok for team %s: %s",
-                        team_id,
-                        response.get("error", "unknown"),
-                    )
-                    break
-                for ch in response.get("channels", []):
-                    cid = ch.get("id")
-                    name = ch.get("name")
-                    if not cid or not name or cid in seen_ids:
-                        continue
-                    seen_ids.add(cid)
-                    channels.append({
-                        "id": cid,
-                        "name": name,
-                        "type": "private" if ch.get("is_private") else "channel",
-                    })
-                cursor = (response.get("response_metadata") or {}).get("next_cursor")
-                if not cursor:
-                    break
-        except Exception as e:
-            logger.warning(
-                "Channel directory: failed to list Slack channels for team %s: %s",
-                team_id, e,
-            )
-            continue
-
-    # Merge in DM/group entries discovered from session history.
-    for entry in _build_from_sessions("slack"):
-        if entry.get("id") not in seen_ids:
-            channels.append(entry)
-            seen_ids.add(entry.get("id"))
-
-    return channels
+    # Fallback to session data
+    return _build_from_sessions("slack")


 def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -268,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
    if not channels:
        return None

-    # 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
-    # raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
-    # in _parse_target_ref hasn't recognized them as explicit.
-    raw = name.strip()
-    for ch in channels:
-        if ch.get("id") == raw:
-            return ch["id"]
-
    query = _normalize_channel_query(name)

    # 1. Exact name match, including the display labels shown by send_message(action="list")
@@ -67,7 +67,6 @@ class Platform(Enum):
    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
-    YUANBAO = "yuanbao"


@dataclass
@@ -196,14 +195,6 @@ class StreamingConfig:
    edit_interval: float = 1.0    # Seconds between message edits (Telegram rate-limits at ~1/s)
    buffer_threshold: int = 40    # Chars before forcing an edit
    cursor: str = " ▉"           # Cursor shown during streaming
-    # Ported from openclaw/openclaw#72038.  When >0, the final edit for
-    # a long-running streamed response is delivered as a fresh message
-    # if the original preview has been visible for at least this many
-    # seconds, so the platform's visible timestamp reflects completion
-    # time instead of the preview creation time.  Currently applied to
-    # Telegram only (other platforms ignore the setting).  Default 60s
-    # matches the OpenClaw rollout.  Set to 0 to disable.
-    fresh_final_after_seconds: float = 60.0

    def to_dict(self) -> Dict[str, Any]:
        return {
@@ -212,7 +203,6 @@ class StreamingConfig:
            "edit_interval": self.edit_interval,
            "buffer_threshold": self.buffer_threshold,
            "cursor": self.cursor,
-            "fresh_final_after_seconds": self.fresh_final_after_seconds,
        }

    @classmethod
@@ -225,9 +215,6 @@ class StreamingConfig:
            edit_interval=float(data.get("edit_interval", 1.0)),
            buffer_threshold=int(data.get("buffer_threshold", 40)),
            cursor=data.get("cursor", " ▉"),
-            fresh_final_after_seconds=float(
-                data.get("fresh_final_after_seconds", 60.0)
-            ),
        )


@@ -327,9 +314,6 @@ class GatewayConfig:
            # QQBot uses extra dict for app credentials
            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
                connected.append(platform)
-            # Yuanbao uses extra dict for app credentials
-            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
-                connected.append(platform)
            # DingTalk uses client_id/client_secret from config.extra or env vars
            elif platform == Platform.DINGTALK and (
                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -566,8 +550,6 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
-                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
-                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -588,8 +570,6 @@ def load_gateway_config() -> GatewayConfig:
                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
-                if "reply_in_thread" in platform_cfg:
-                    bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
                if "require_mention" in platform_cfg:
                    bridged["require_mention"] = platform_cfg["require_mention"]
                if "free_response_channels" in platform_cfg:
@@ -604,7 +584,7 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["group_policy"] = platform_cfg["group_policy"]
                if "group_allow_from" in platform_cfg:
                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
-                if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
+                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
                    channel_prompts = platform_cfg["channel_prompts"]
@@ -612,21 +592,16 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                enabled_was_explicit = "enabled" in platform_cfg
-                if not bridged and not enabled_was_explicit:
+                if not bridged:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
-                if enabled_was_explicit:
-                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
-                if plat == Platform.SLACK and enabled_was_explicit:
-                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -634,8 +609,6 @@ def load_gateway_config() -> GatewayConfig:
            if isinstance(slack_cfg, dict):
                if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
                    os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
-                if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
-                    os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
                if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
                    os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
                frc = slack_cfg.get("free_response_channels")
@@ -945,20 +918,8 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
        if Platform.SLACK not in config.platforms:
-            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
-            config.platforms[Platform.SLACK].enabled = True
-        else:
-            slack_config = config.platforms[Platform.SLACK]
-            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
-            if not slack_config.enabled and not enabled_was_explicit:
-                # Top-level Slack settings such as channel prompts should not
-                # turn an env-token setup into a disabled platform. Only an
-                # explicit slack.enabled/platforms.slack.enabled false should.
-                slack_config.enabled = True
-        # If yaml config exists, respect its enabled flag (don't override
-        # explicit enabled: false). Token is still stored so skills that
-        # send Slack messages can use it without activating the gateway adapter.
+        config.platforms[Platform.SLACK].enabled = True
        config.platforms[Platform.SLACK].token = slack_token
    slack_home = os.getenv("SLACK_HOME_CHANNEL")
    if slack_home and Platform.SLACK in config.platforms:
@@ -1315,48 +1276,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
            )

-    # Yuanbao — YUANBAO_APP_ID preferred
-    yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
-    yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
-    if yuanbao_app_id and yuanbao_app_secret:
-        if Platform.YUANBAO not in config.platforms:
-            config.platforms[Platform.YUANBAO] = PlatformConfig()
-        config.platforms[Platform.YUANBAO].enabled = True
-        extra = config.platforms[Platform.YUANBAO].extra
-        extra["app_id"] = yuanbao_app_id
-        extra["app_secret"] = yuanbao_app_secret
-        yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
-        if yuanbao_bot_id:
-            extra["bot_id"] = yuanbao_bot_id
-        yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
-        if yuanbao_ws_url:
-            extra["ws_url"] = yuanbao_ws_url
-        yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
-        if yuanbao_api_domain:
-            extra["api_domain"] = yuanbao_api_domain
-        yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
-        if yuanbao_route_env:
-            extra["route_env"] = yuanbao_route_env
-        yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
-        if yuanbao_home:
-            config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
-                platform=Platform.YUANBAO,
-                chat_id=yuanbao_home,
-                name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
-            )
-        yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
-        if yuanbao_dm_policy:
-            extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
-        yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
-        if yuanbao_dm_allow_from:
-            extra["dm_allow_from"] = yuanbao_dm_allow_from
-        yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
-        if yuanbao_group_policy:
-            extra["group_policy"] = yuanbao_group_policy.strip().lower()
-        yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
-        if yuanbao_group_allow_from:
-            extra["group_allow_from"] = yuanbao_group_allow_from
-
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -79,9 +79,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
    "discord":     _TIER_HIGH,

    # Tier 2 — edit support, often customer/workspace channels
-    # Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
-    # "new"/"all" spam permanent lines in channels (hermes-agent#14663).
-    "slack":           {**_TIER_MEDIUM, "tool_progress": "off"},
+    "slack":           _TIER_MEDIUM,
    "mattermost":      _TIER_MEDIUM,
    "matrix":          _TIER_MEDIUM,
    "feishu":          _TIER_MEDIUM,
@@ -28,7 +28,6 @@ def mirror_to_session(
    message_text: str,
    source_label: str = "cli",
    thread_id: Optional[str] = None,
-    user_id: Optional[str] = None,
 ) -> bool:
    """
    Append a delivery-mirror message to the target session's transcript.
@@ -40,20 +39,9 @@ def mirror_to_session(
    All errors are caught -- this is never fatal.
    """
    try:
-        session_id = _find_session_id(
-            platform,
-            str(chat_id),
-            thread_id=thread_id,
-            user_id=user_id,
-        )
+        session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
        if not session_id:
-            logger.debug(
-                "Mirror: no session found for %s:%s:%s:%s",
-                platform,
-                chat_id,
-                thread_id,
-                user_id,
-            )
+            logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
            return False

        mirror_msg = {
@@ -71,33 +59,17 @@ def mirror_to_session(
        return True

    except Exception as e:
-        logger.debug(
-            "Mirror failed for %s:%s:%s:%s: %s",
-            platform,
-            chat_id,
-            thread_id,
-            user_id,
-            e,
-        )
+        logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
        return False


-def _find_session_id(
-    platform: str,
-    chat_id: str,
-    thread_id: Optional[str] = None,
-    user_id: Optional[str] = None,
-) -> Optional[str]:
+def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
    """
    Find the active session_id for a platform + chat_id pair.

    Scans sessions.json entries and matches where origin.chat_id == chat_id
    on the right platform.  DM session keys don't embed the chat_id
    (e.g. "agent:main:telegram:dm"), so we check the origin dict.
-
-    When *user_id* is provided, prefer exact sender matches. If multiple
-    same-chat candidates exist and none matches the user, return None instead
-    of guessing and contaminating another participant's session.
    """
    if not _SESSIONS_INDEX.exists():
        return None
@@ -109,7 +81,8 @@ def _find_session_id(
        return None

    platform_lower = platform.lower()
-    candidates = []
+    best_match = None
+    best_updated = ""

    for _key, entry in data.items():
        origin = entry.get("origin") or {}
@@ -123,31 +96,12 @@ def _find_session_id(
            origin_thread_id = origin.get("thread_id")
            if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
                continue
-            candidates.append(entry)
+            updated = entry.get("updated_at", "")
+            if updated > best_updated:
+                best_updated = updated
+                best_match = entry.get("session_id")

-    if not candidates:
-        return None
-
-    if user_id:
-        exact_user_matches = [
-            entry for entry in candidates
-            if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
-        ]
-        if exact_user_matches:
-            candidates = exact_user_matches
-        elif len(candidates) > 1:
-            return None
-    elif len(candidates) > 1:
-        distinct_user_ids = {
-            str((entry.get("origin") or {}).get("user_id") or "").strip()
-            for entry in candidates
-            if str((entry.get("origin") or {}).get("user_id") or "").strip()
-        }
-        if len(distinct_user_ids) > 1:
-            return None
-
-    best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
-    return best_entry.get("session_id")
+    return best_match


 def _append_to_jsonl(session_id: str, message: dict) -> None:
@@ -10,12 +10,10 @@ Each adapter handles:

 from .base import BasePlatformAdapter, MessageEvent, SendResult
 from .qqbot import QQAdapter
-from .yuanbao import YuanbaoAdapter

 __all__ = [
    "BasePlatformAdapter",
    "MessageEvent",
    "SendResult",
    "QQAdapter",
-    "YuanbaoAdapter",
 ]
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/models                  — lists hermes-agent as an available model
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
- POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
 - GET  /health/detailed            — rich status for cross-container dashboard probing

@@ -587,9 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
        self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
        # Creation timestamps for orphaned-run TTL sweep
        self._run_streams_created: Dict[str, float] = {}
-        # Active run agent/task references for stop support
-        self._active_run_agents: Dict[str, Any] = {}
-        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -2445,7 +2441,6 @@ class APIServerAdapter(BasePlatformAdapter):
                    stream_delta_callback=_text_cb,
                    tool_progress_callback=event_cb,
                )
-                self._active_run_agents[run_id] = agent
                def _run_sync():
                    r = agent.run_conversation(
                        user_message=user_message,
@@ -2485,11 +2480,8 @@ class APIServerAdapter(BasePlatformAdapter):
                    q.put_nowait(None)
                except Exception:
                    pass
-                self._active_run_agents.pop(run_id, None)
-                self._active_run_tasks.pop(run_id, None)

        task = asyncio.create_task(_run_and_close())
-        self._active_run_tasks[run_id] = task
        try:
            self._background_tasks.add(task)
        except TypeError:
@@ -2548,44 +2540,6 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
-        """POST /v1/runs/{run_id}/stop — interrupt a running agent."""
-        auth_err = self._check_auth(request)
-        if auth_err:
-            return auth_err
-
-        run_id = request.match_info["run_id"]
-        agent = self._active_run_agents.get(run_id)
-        task = self._active_run_tasks.get(run_id)
-
-        if agent is None and task is None:
-            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
-
-        if agent is not None:
-            try:
-                agent.interrupt("Stop requested via API")
-            except Exception:
-                pass
-
-        if task is not None and not task.done():
-            task.cancel()
-            # Bounded wait: run_conversation() executes in the default
-            # executor thread which task.cancel() cannot preempt — we rely on
-            # agent.interrupt() above to break the loop. Cap the wait so a
-            # slow/unresponsive interrupt can't hang this handler.
-            try:
-                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
-            except asyncio.TimeoutError:
-                logger.warning(
-                    "[api_server] stop for run %s timed out after 5s; "
-                    "agent may still be finishing the current step",
-                    run_id,
-                )
-            except (asyncio.CancelledError, Exception):
-                pass
-
-        return web.json_response({"run_id": run_id, "status": "stopping"})
-
    async def _sweep_orphaned_runs(self) -> None:
        """Periodically clean up run streams that were never consumed."""
        while True:
@@ -2600,8 +2554,6 @@ class APIServerAdapter(BasePlatformAdapter):
                logger.debug("[api_server] sweeping orphaned run %s", run_id)
                self._run_streams.pop(run_id, None)
                self._run_streams_created.pop(run_id, None)
-                self._active_run_agents.pop(run_id, None)
-                self._active_run_tasks.pop(run_id, None)

    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
@@ -2637,7 +2589,6 @@ class APIServerAdapter(BasePlatformAdapter):
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
-            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
            sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
            try:
@@ -336,39 +336,6 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    return {}, {"proxy": proxy_url}


-def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
-    """Return True when ``hostname`` matches a ``NO_PROXY`` entry.
-
-    Supports comma- or whitespace-separated entries with optional leading dots
-    and ``*.`` wildcards, which match both the apex domain and subdomains.
-    """
-    raw = no_proxy_value
-    if raw is None:
-        raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
-
-    raw = raw.strip()
-    if not raw:
-        return False
-
-    lower_hostname = hostname.lower()
-    for entry in re.split(r"[\s,]+", raw):
-        normalized = entry.strip().lower()
-        if not normalized:
-            continue
-        if normalized == "*":
-            return True
-
-        if normalized.startswith("*."):
-            normalized = normalized[2:]
-        elif normalized.startswith("."):
-            normalized = normalized[1:]
-
-        if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
-            return True
-
-    return False
-
-
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -726,15 +693,7 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
-    ".csv": "text/csv",
    ".log": "text/plain",
-    ".json": "application/json",
-    ".xml": "application/xml",
-    ".yaml": "application/yaml",
-    ".yml": "application/yaml",
-    ".toml": "application/toml",
-    ".ini": "text/plain",
-    ".cfg": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -1023,61 +982,6 @@ def resolve_channel_prompt(
    return None


-def resolve_channel_skills(
-    config_extra: dict,
-    channel_id: str,
-    parent_id: str | None = None,
-) -> list[str] | None:
-    """Resolve auto-loaded skill(s) for a channel/thread from platform config.
-
-    Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
-
-    Config format::
-
-        channel_skill_bindings:
-          - id: "C0123"          # Slack channel ID or Discord channel/forum ID
-            skills: ["skill-a", "skill-b"]
-          - id: "D0ABCDE"
-            skill: "solo-skill"  # single string also accepted
-
-    Prefers an exact match on *channel_id*; falls back to *parent_id*
-    (useful for forum threads / Slack threads inheriting the parent channel's
-    binding).
-
-    Returns a deduplicated list of skill names (order preserved), or None if
-    no match is found.
-    """
-    bindings = config_extra.get("channel_skill_bindings") or []
-    if not isinstance(bindings, list) or not bindings:
-        return None
-    ids_to_check: set[str] = set()
-    if channel_id:
-        ids_to_check.add(str(channel_id))
-    if parent_id:
-        ids_to_check.add(str(parent_id))
-    if not ids_to_check:
-        return None
-    for entry in bindings:
-        if not isinstance(entry, dict):
-            continue
-        entry_id = str(entry.get("id", ""))
-        if entry_id in ids_to_check:
-            skills = entry.get("skills") or entry.get("skill")
-            if isinstance(skills, str):
-                s = skills.strip()
-                return [s] if s else None
-            if isinstance(skills, list) and skills:
-                seen: list[str] = []
-                for name in skills:
-                    if not isinstance(name, str):
-                        continue
-                    nm = name.strip()
-                    if nm and nm not in seen:
-                        seen.append(nm)
-                return seen or None
-    return None
-
-
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
@@ -1121,20 +1025,7 @@ class BasePlatformAdapter(ABC):
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-        # Auto-TTS on voice input: ``_auto_tts_default`` is the global default
-        # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
-        # Per-chat overrides live in two sets populated from ``_voice_mode``:
-        #   - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
-        #     or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
-        #     the global default is False.
-        #   - ``_auto_tts_disabled_chats``: chat explicitly opted out via
-        #     ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
-        #     global default is True.
-        # The gate in _process_message() is:
-        #   fire if chat in _auto_tts_enabled_chats
-        #     OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
-        self._auto_tts_default: bool = False
-        self._auto_tts_enabled_chats: set = set()
+        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
@@ -1156,21 +1047,6 @@ class BasePlatformAdapter(ABC):
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

-    def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
-        """Whether auto-TTS on voice input should fire for ``chat_id``.
-
-        Decision layers (Issue #16007):
-          1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
-             ``voice.auto_tts`` is False).
-          2. Explicit ``/voice off`` → never fire.
-          3. Fall back to the global ``voice.auto_tts`` config default.
-        """
-        if chat_id in self._auto_tts_enabled_chats:
-            return True
-        if chat_id in self._auto_tts_disabled_chats:
-            return False
-        return bool(self._auto_tts_default)
-
    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

@@ -1354,27 +1230,6 @@ class BasePlatformAdapter(ABC):
        """
        return SendResult(success=False, error="Not supported")

-    async def delete_message(
-        self,
-        chat_id: str,
-        message_id: str,
-    ) -> bool:
-        """
-        Delete a previously sent message.  Optional — platforms that don't
-        support deletion return ``False`` and callers fall back to leaving
-        the message in place.
-
-        Used by the stream consumer's fresh-final cleanup path (see
-        openclaw/openclaw#72038) to remove long-lived preview messages
-        after sending the completed reply as a fresh message so the
-        platform's visible timestamp reflects completion time.
-
-        Returns ``True`` on successful deletion, ``False`` otherwise.
-        Subclasses should override for platforms with a deletion API
-        (e.g. Telegram ``deleteMessage``).
-        """
-        return False
-
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1702,41 +1557,13 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
-
-        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
-        network round-trip can't stall the refresh cadence.  Telegram- and
-        Discord-side typing expire after ~5s; if any individual send_typing
-        takes longer than the refresh interval, the bubble would die and
-        stay dead until that call returns.  Abandoning the slow call lets
-        the next tick fire a fresh send_typing on schedule — as long as
-        one of them succeeds within the 5s platform-side window, the bubble
-        stays visible across provider stalls / upstream API timeouts.
        """
-        # Bound each send_typing round-trip so the refresh cadence isn't
-        # gated on network health.  Must stay below ``interval`` so a slow
-        # call gets abandoned before the next scheduled tick.
-        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    try:
-                        await asyncio.wait_for(
-                            self.send_typing(chat_id, metadata=metadata),
-                            timeout=_send_typing_timeout,
-                        )
-                    except asyncio.TimeoutError:
-                        # Slow network — abandon this tick, keep the loop
-                        # on schedule so the next send_typing fires fresh.
-                        pass
-                    except asyncio.CancelledError:
-                        raise
-                    except Exception as typing_err:
-                        logger.debug(
-                            "[%s] send_typing error (non-fatal): %s",
-                            self.name, typing_err,
-                        )
+                    await self.send_typing(chat_id, metadata=metadata)
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -2387,14 +2214,12 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-                # Gated via ``_should_auto_tts_for_chat``: fires when the chat has
-                # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
-                # True globally and no ``/voice off`` has been issued.
+                # Skipped when the chat has voice mode disabled (/voice off)
                _tts_path = None
-                if (self._should_auto_tts_for_chat(event.source.chat_id)
-                        and event.message_type == MessageType.VOICE
+                if (event.message_type == MessageType.VOICE
                        and text_content
-                        and not media_files):
+                        and not media_files
+                        and event.source.chat_id not in self._auto_tts_disabled_chats):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
@@ -2718,9 +2543,6 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
-        guild_id: Optional[str] = None,
-        parent_chat_id: Optional[str] = None,
-        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2738,9 +2560,6 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
-            guild_id=str(guild_id) if guild_id else None,
-            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
-            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -2315,6 +2315,11 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_background(interaction: discord.Interaction, prompt: str):
            await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")

+        @tree.command(name="btw", description="Ephemeral side question using session context")
+        @discord.app_commands.describe(question="Your side question (no tools, not persisted)")
+        async def slash_btw(interaction: discord.Interaction, question: str):
+            await self._run_simple_slash(interaction, f"/btw {question}")
+
        # ── Auto-register any gateway-available commands not yet on the tree ──
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
@@ -2679,8 +2684,21 @@ class DiscordAdapter(BasePlatformAdapter):
                skills: ["skill-a", "skill-b"]
        Also checks parent_id so forum threads inherit the forum's bindings.
        """
-        from gateway.platforms.base import resolve_channel_skills
-        return resolve_channel_skills(self.config.extra, channel_id, parent_id)
+        bindings = self.config.extra.get("channel_skill_bindings", [])
+        if not bindings:
+            return None
+        ids_to_check = {channel_id}
+        if parent_id:
+            ids_to_check.add(parent_id)
+        for entry in bindings:
+            entry_id = str(entry.get("id", ""))
+            if entry_id in ids_to_check:
+                skills = entry.get("skills") or entry.get("skill")
+                if isinstance(skills, str):
+                    return [skills]
+                if isinstance(skills, list) and skills:
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
+        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -3243,7 +3261,6 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
-                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3294,7 +3311,6 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)

        # Build source
-        guild = getattr(message, "guild", None)
        source = self.build_source(
            chat_id=str(effective_channel.id),
            chat_name=chat_name,
@@ -3304,9 +3320,6 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
-            guild_id=str(guild.id) if guild else None,
-            parent_chat_id=parent_channel_id,
-            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -28,7 +28,6 @@ from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
-from email.utils import formatdate
 from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

-        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -588,7 +586,6 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

-        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -57,15 +57,6 @@ class MessageDeduplicator:
        if len(self._seen) > self._max_size:
            cutoff = now - self._ttl
            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
-            if len(self._seen) > self._max_size:
-                # TTL pruning alone does not cap the cache when every entry is
-                # still fresh. Keep the newest entries so the helper's
-                # max_size bound is enforced under sustained traffic.
-                newest = sorted(
-                    self._seen.items(),
-                    key=lambda item: item[1],
-                )[-self._max_size:]
-                self._seen = dict(newest)
        return False

    def clear(self):
@@ -1178,83 +1178,13 @@ class MatrixAdapter(BasePlatformAdapter):
    # Event callbacks
    # ------------------------------------------------------------------

-    def _is_self_sender(self, sender: str) -> bool:
-        """Return True if the sender refers to the bot's own account.
-
-        Matrix user IDs are byte-compared after trimming whitespace and
-        lowercasing — some homeservers normalize the localpart case
-        differently at different API surfaces, and the reply-loop tail
-        of the "hall of mirrors" bug (#15763) has been observed with the
-        bot's own account bypassing a case-sensitive equality check.
-
-        When ``self._user_id`` is empty (whoami hasn't resolved yet, or
-        login failed), we cannot prove a sender is NOT us, so we return
-        True defensively — an unidentified bot dropping its own events
-        is always preferable to falling into an echo loop.
-        """
-        own = (self._user_id or "").strip().lower()
-        if not own:
-            return True
-        return sender.strip().lower() == own
-
-    @staticmethod
-    def _is_system_or_bridge_sender(sender: str) -> bool:
-        """Return True if the sender looks like a system / bridge / appservice
-        identity rather than a real user.
-
-        Appservice namespaces on Matrix conventionally prefix bot / puppet
-        user IDs with an underscore (e.g. ``@_telegram_12345:server``,
-        ``@_discord_999:server``, ``@_slack_...:server``).  Server-notices
-        bots and bridge-controller bots on many homeservers use the same
-        pattern.
-
-        We treat these as system identities for pairing purposes: they
-        should never be offered a pairing code, because an operator
-        approving the code would hand the bridge itself permanent
-        authorization — and every outbound message relayed by the bridge
-        would then loop back into the agent as an "authorized user
-        message", which is the root of issue #15763.
-
-        Matches:
-            ``@_something:server``   — appservice namespace convention
-            ``@:server``             — malformed / empty localpart
-            ``:server``              — malformed, no leading ``@``
-        """
-        s = (sender or "").strip()
-        if not s:
-            return True
-        # Localpart is everything between leading '@' and ':'
-        if s.startswith("@"):
-            s = s[1:]
-        if ":" in s:
-            localpart, _, _ = s.partition(":")
-        else:
-            localpart = s
-        if not localpart:
-            return True
-        return localpart.startswith("_")
-
    async def _on_room_message(self, event: Any) -> None:
        """Handle incoming room message events (text, media)."""
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

-        # Ignore own messages (case-insensitive; also drops when our own
-        # user_id hasn't been resolved yet — see _is_self_sender docstring
-        # and issue #15763).
-        if self._is_self_sender(sender):
-            return
-
-        # Ignore appservice / bridge / system identities so they never
-        # trigger the pairing flow.  Once a bridge user is paired, every
-        # outbound message it relays would loop back as an authorized
-        # user message (the "hall of mirrors" in #15763).
-        if self._is_system_or_bridge_sender(sender):
-            logger.debug(
-                "Matrix: ignoring system/bridge sender %s in %s",
-                sender,
-                room_id,
-            )
+        # Ignore own messages.
+        if sender == self._user_id:
            return

        # Deduplicate by event ID.
@@ -1724,7 +1654,7 @@ class MatrixAdapter(BasePlatformAdapter):
    async def _on_reaction(self, event: Any) -> None:
        """Handle incoming reaction events."""
        sender = str(getattr(event, "sender", ""))
-        if self._is_self_sender(sender):
+        if sender == self._user_id:
            return
        event_id = str(getattr(event, "event_id", ""))
        if self._is_duplicate_event(event_id):
@@ -1209,31 +1209,6 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

-    async def delete_message(self, chat_id: str, message_id: str) -> bool:
-        """Delete a previously sent Telegram message.
-
-        Used by the stream consumer's fresh-final cleanup path (ported
-        from openclaw/openclaw#72038) to remove long-lived preview
-        messages after sending the completed reply as a fresh message.
-        Telegram's Bot API ``deleteMessage`` works for bot-posted
-        messages in the last 48 hours.  Failures are non-fatal — the
-        caller leaves the preview in place and logs at debug level.
-        """
-        if not self._bot:
-            return False
-        try:
-            await self._bot.delete_message(
-                chat_id=int(chat_id),
-                message_id=int(message_id),
-            )
-            return True
-        except Exception as e:
-            logger.debug(
-                "[%s] Failed to delete Telegram message %s: %s",
-                self.name, message_id, e,
-            )
-            return False
-
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@@ -2353,26 +2328,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    user = getattr(entity, "user", None)
                    if user and getattr(user, "id", None) == bot_id:
                        return True
-                elif entity_type == "bot_command" and expected:
-                    # Telegram's official group-disambiguation form for slash
-                    # commands (``/cmd@botname``) is emitted as a single
-                    # ``bot_command`` entity covering the whole span — there
-                    # is no accompanying ``mention`` entity. Treat it as a
-                    # direct address to this bot when the ``@botname`` suffix
-                    # matches. This is the form Telegram's own command menu
-                    # autocomplete produces in groups, so dropping it at the
-                    # mention gate would break /new, /reset, /help, ... for
-                    # every group that has ``require_mention`` enabled (#15415).
-                    offset = int(getattr(entity, "offset", -1))
-                    length = int(getattr(entity, "length", 0))
-                    if offset < 0 or length <= 0:
-                        continue
-                    command_text = source_text[offset:offset + length]
-                    at_index = command_text.find("@")
-                    if at_index < 0:
-                        continue
-                    if command_text[at_index:].strip().lower() == expected:
-                        return True
        return False

    def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -1,647 +0,0 @@
-"""
-yuanbao_media.py — 元宝平台媒体处理模块
-
-提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
-移植自 TypeScript 版 media.ts（yuanbao-openclaw-plugin），
-使用 httpx 替代 cos-nodejs-sdk-v5，避免引入额外 SDK 依赖。
-
-COS 上传流程：
-  1. 调用 genUploadInfo 获取临时凭证（tmpSecretId/tmpSecretKey/sessionToken）
-  2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
-  3. HTTP PUT 上传到 COS
-
-TIM 消息体构建：
-  - buildImageMsgBody() → TIMImageElem
-  - buildFileMsgBody()  → TIMFileElem
-"""
-
-from __future__ import annotations
-
-import hashlib
-import hmac
-import logging
-import os
-import re
-import secrets
-import struct
-import time
-import urllib.parse
-from datetime import datetime, timezone, timedelta
-from typing import Optional, Any
-
-import httpx
-
-logger = logging.getLogger(__name__)
-
-# ============ 常量 ============
-
-UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
-DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
-DEFAULT_MAX_SIZE_MB = 50
-
-# COS 加速域名后缀（优先使用全球加速）
-COS_USE_ACCELERATE = True
-
-# ============ 类型映射 ============
-
-# MIME → image_format 数字（TIM 协议字段）
-_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
-    "image/jpeg": 1,
-    "image/jpg": 1,
-    "image/gif": 2,
-    "image/png": 3,
-    "image/bmp": 4,
-    "image/webp": 255,
-    "image/heic": 255,
-    "image/tiff": 255,
-}
-
-# 文件扩展名 → MIME
-_EXT_TO_MIME: dict[str, str] = {
-    ".jpg": "image/jpeg",
-    ".jpeg": "image/jpeg",
-    ".png": "image/png",
-    ".gif": "image/gif",
-    ".webp": "image/webp",
-    ".bmp": "image/bmp",
-    ".heic": "image/heic",
-    ".tiff": "image/tiff",
-    ".ico": "image/x-icon",
-    ".pdf": "application/pdf",
-    ".doc": "application/msword",
-    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-    ".xls": "application/vnd.ms-excel",
-    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    ".ppt": "application/vnd.ms-powerpoint",
-    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-    ".txt": "text/plain",
-    ".zip": "application/zip",
-    ".tar": "application/x-tar",
-    ".gz": "application/gzip",
-    ".mp3": "audio/mpeg",
-    ".mp4": "video/mp4",
-    ".wav": "audio/wav",
-    ".ogg": "audio/ogg",
-    ".webm": "video/webm",
-}
-
-
-# ============ 工具函数 ============
-
-def guess_mime_type(filename: str) -> str:
-    """根据文件扩展名猜测 MIME 类型。"""
-    ext = os.path.splitext(filename)[-1].lower()
-    return _EXT_TO_MIME.get(ext, "application/octet-stream")
-
-
-def is_image(filename: str, mime_type: str = "") -> bool:
-    """判断是否为图片类型。"""
-    if mime_type.startswith("image/"):
-        return True
-    ext = os.path.splitext(filename)[-1].lower()
-    return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
-
-
-def get_image_format(mime_type: str) -> int:
-    """获取 TIM 图片格式编号。"""
-    return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
-
-
-def md5_hex(data: bytes) -> str:
-    """计算 MD5 十六进制摘要。"""
-    return hashlib.md5(data).hexdigest()
-
-
-def generate_file_id() -> str:
-    """生成随机文件 ID（32 位 hex）。"""
-    return secrets.token_hex(16)
-
-
-
-# ============ 图片尺寸解析（纯 Python，无需 Pillow） ============
-
-def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
-    """
-    解析图片宽高（支持 JPEG/PNG/GIF/WebP），无需第三方依赖。
-    返回 {"width": w, "height": h} 或 None（无法识别）。
-    """
-    return (
-        _parse_png_size(data)
-        or _parse_jpeg_size(data)
-        or _parse_gif_size(data)
-        or _parse_webp_size(data)
-    )
-
-
-def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 24:
-        return None
-    if buf[:4] != b"\x89PNG":
-        return None
-    w = struct.unpack(">I", buf[16:20])[0]
-    h = struct.unpack(">I", buf[20:24])[0]
-    return {"width": w, "height": h}
-
-
-def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
-        return None
-    i = 2
-    while i < len(buf) - 9:
-        if buf[i] != 0xFF:
-            i += 1
-            continue
-        marker = buf[i + 1]
-        if marker in (0xC0, 0xC2):
-            h = struct.unpack(">H", buf[i + 5: i + 7])[0]
-            w = struct.unpack(">H", buf[i + 7: i + 9])[0]
-            return {"width": w, "height": h}
-        if i + 3 < len(buf):
-            i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
-        else:
-            break
-    return None
-
-
-def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 10:
-        return None
-    sig = buf[:6].decode("ascii", errors="replace")
-    if sig not in ("GIF87a", "GIF89a"):
-        return None
-    w = struct.unpack("<H", buf[6:8])[0]
-    h = struct.unpack("<H", buf[8:10])[0]
-    return {"width": w, "height": h}
-
-
-def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 16:
-        return None
-    if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
-        return None
-    chunk = buf[12:16].decode("ascii", errors="replace")
-    if chunk == "VP8 ":
-        if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
-            w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
-            h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
-            return {"width": w, "height": h}
-    elif chunk == "VP8L":
-        if len(buf) >= 25 and buf[20] == 0x2F:
-            bits = struct.unpack("<I", buf[21:25])[0]
-            w = (bits & 0x3FFF) + 1
-            h = ((bits >> 14) & 0x3FFF) + 1
-            return {"width": w, "height": h}
-    elif chunk == "VP8X":
-        if len(buf) >= 30:
-            w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
-            h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
-            return {"width": w, "height": h}
-    return None
-
-
-# ============ URL 下载 ============
-
-async def download_url(
-    url: str,
-    max_size_mb: int = DEFAULT_MAX_SIZE_MB,
-) -> tuple[bytes, str]:
-    """
-    下载 URL 内容，返回 (bytes, content_type)。
-
-    Args:
-        url:          HTTP(S) URL
-        max_size_mb:  最大允许大小（MB），超过则抛出异常
-
-    Returns:
-        (data_bytes, content_type_string)
-
-    Raises:
-        ValueError:  内容超过大小限制
-        httpx.HTTPError: 网络/HTTP 错误
-    """
-    max_bytes = max_size_mb * 1024 * 1024
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-        # 先 HEAD 检查大小
-        try:
-            head = await client.head(url)
-            content_length = int(head.headers.get("content-length", 0) or 0)
-            if content_length > 0 and content_length > max_bytes:
-                raise ValueError(
-                    f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
-                )
-        except httpx.HTTPStatusError:
-            pass  # 部分服务器不支持 HEAD，忽略
-
-        # GET 下载（流式读取，防止超限）
-        async with client.stream("GET", url) as resp:
-            resp.raise_for_status()
-
-            content_type = resp.headers.get("content-type", "").split(";")[0].strip()
-
-            chunks: list[bytes] = []
-            downloaded = 0
-            async for chunk in resp.aiter_bytes(65536):
-                downloaded += len(chunk)
-                if downloaded > max_bytes:
-                    raise ValueError(
-                        f"文件过大: 已超过 {max_size_mb} MB 限制"
-                    )
-                chunks.append(chunk)
-
-        data = b"".join(chunks)
-        return data, content_type
-
-
-# ============ COS 鉴权（HMAC-SHA1） ============
-
-def _cos_sign(
-    method: str,
-    path: str,
-    params: dict[str, str],
-    headers: dict[str, str],
-    secret_id: str,
-    secret_key: str,
-    start_time: Optional[int] = None,
-    expire_seconds: int = 3600,
-) -> str:
-    """
-    构建 COS 请求签名（q-sign-algorithm=sha1 方案）。
-    参考：https://cloud.tencent.com/document/product/436/7778
-
-    Args:
-        method:         HTTP 方法（小写，如 "put"）
-        path:           URL 路径（URL encode 后的小写）
-        params:         URL 查询参数 dict（用于签名）
-        headers:        参与签名的请求头 dict（key 需小写）
-        secret_id:      临时 SecretId（tmpSecretId）
-        secret_key:     临时 SecretKey（tmpSecretKey）
-        start_time:     签名起始 Unix 时间戳（默认 now）
-        expire_seconds: 签名有效期（秒，默认 3600）
-
-    Returns:
-        Authorization header 值（完整字符串）
-    """
-    now = int(time.time())
-    q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
-
-    # Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
-    sign_key = hmac.new(
-        secret_key.encode("utf-8"),
-        q_sign_time.encode("utf-8"),
-        hashlib.sha1,
-    ).hexdigest()
-
-    # Step 2: HttpString
-    # 参数和头部需按字典序排列，key 小写
-    sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
-    sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
-
-    url_param_list = ";".join(k for k, _ in sorted_params)
-    url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
-    header_list = ";".join(k for k, _ in sorted_headers)
-    header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
-
-    http_string = "\n".join([
-        method.lower(),
-        path,
-        url_params,
-        header_str,
-        "",
-    ])
-
-    # Step 3: StringToSign = sha1 hash of HttpString
-    sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
-    string_to_sign = "\n".join([
-        "sha1",
-        q_sign_time,
-        sha1_of_http,
-        "",
-    ])
-
-    # Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
-    signature = hmac.new(
-        sign_key.encode("utf-8"),
-        string_to_sign.encode("utf-8"),
-        hashlib.sha1,
-    ).hexdigest()
-
-    return (
-        f"q-sign-algorithm=sha1"
-        f"&q-ak={secret_id}"
-        f"&q-sign-time={q_sign_time}"
-        f"&q-key-time={q_sign_time}"
-        f"&q-header-list={header_list}"
-        f"&q-url-param-list={url_param_list}"
-        f"&q-signature={signature}"
-    )
-
-
-# ============ 主要公开 API ============
-
-async def get_cos_credentials(
-    app_key: str,
-    api_domain: str,
-    token: str,
-    filename: str = "file",
-    file_id: Optional[str] = None,
-    bot_id: str = "",
-    route_env: str = "",
-) -> dict:
-    """
-    调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
-
-    Args:
-        app_key:        应用 Key（用于 X-ID 头）
-        api_domain:     API 域名（如 https://bot.yuanbao.tencent.com）
-        token:          当前有效的签票 token（X-Token 头）
-        filename:       待上传的文件名（含扩展名）
-        file_id:        客户端生成的唯一文件 ID（不传则自动生成）
-        bot_id:         Bot 账号 ID（用于 X-ID 头）
-
-    Returns:
-        COS 上传配置 dict，包含以下字段：
-            bucketName         (str)  — COS Bucket 名称
-            region             (str)  — COS 地域
-            location           (str)  — 上传 Key（对象路径）
-            encryptTmpSecretId (str)  — 临时 SecretId
-            encryptTmpSecretKey(str)  — 临时 SecretKey
-            encryptToken       (str)  — SessionToken
-            startTime          (int)  — 凭证起始时间戳（Unix）
-            expiredTime        (int)  — 凭证过期时间戳（Unix）
-            resourceUrl        (str)  — 上传后的公网访问 URL
-            resourceID         (str)  — 资源 ID（可选）
-
-    Raises:
-        RuntimeError: 接口返回非 0 code 或字段缺失
-    """
-    if file_id is None:
-        file_id = generate_file_id()
-
-    upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
-
-    headers = {
-        "Content-Type": "application/json",
-        "X-Token": token,
-        "X-ID": bot_id or app_key,
-        "X-Source": "web",
-    }
-    if route_env:
-        headers["X-Route-Env"] = route_env
-    body = {
-        "fileName": filename,
-        "fileId": file_id,
-        "docFrom": "localDoc",
-        "docOpenId": "",
-    }
-
-    async with httpx.AsyncClient(timeout=15.0) as client:
-        resp = await client.post(upload_url, json=body, headers=headers)
-        resp.raise_for_status()
-        result: dict[str, Any] = resp.json()
-
-    code = result.get("code")
-    if code != 0 and code is not None:
-        raise RuntimeError(
-            f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
-        )
-
-    data = result.get("data") or result
-    required_fields = ["bucketName", "location"]
-    missing = [f for f in required_fields if not data.get(f)]
-    if missing:
-        raise RuntimeError(
-            f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
-        )
-
-    return data
-
-
-async def upload_to_cos(
-    file_bytes: bytes,
-    filename: str,
-    content_type: str,
-    credentials: dict,
-    bucket: str,
-    region: str,
-) -> dict:
-    """
-    通过 httpx PUT 请求将文件上传到 COS。
-    使用临时凭证（tmpSecretId/tmpSecretKey/sessionToken）构建 HMAC-SHA1 签名。
-
-    Args:
-        file_bytes:   文件二进制内容
-        filename:     文件名（用于辅助计算 MIME、UUID）
-        content_type: MIME 类型（如 "image/jpeg"）
-        credentials:  get_cos_credentials() 返回的 dict，包含：
-                        encryptTmpSecretId  → tmpSecretId
-                        encryptTmpSecretKey → tmpSecretKey
-                        encryptToken        → sessionToken
-                        location            → COS key（对象路径）
-                        resourceUrl         → 上传后公网 URL
-                        startTime           → 凭证起始时间（Unix）
-                        expiredTime         → 凭证过期时间（Unix）
-        bucket:       COS Bucket 名称（如 chatbot-1234567890）
-        region:       COS 地域（如 ap-guangzhou）
-
-    Returns:
-        上传结果 dict，包含：
-            url       (str)           — COS 公网访问 URL
-            uuid      (str)           — 文件内容 MD5
-            size      (int)           — 文件大小（字节）
-            width     (int, optional) — 图片宽度（仅图片）
-            height    (int, optional) — 图片高度（仅图片）
-
-    Raises:
-        httpx.HTTPStatusError: COS 返回非 2xx 状态
-        RuntimeError:          credentials 字段缺失
-    """
-    secret_id: str = credentials.get("encryptTmpSecretId", "")
-    secret_key: str = credentials.get("encryptTmpSecretKey", "")
-    session_token: str = credentials.get("encryptToken", "")
-    cos_key: str = credentials.get("location", "")
-    resource_url: str = credentials.get("resourceUrl", "")
-    start_time: Optional[int] = credentials.get("startTime")
-    expired_time: Optional[int] = credentials.get("expiredTime")
-
-    if not secret_id or not secret_key or not cos_key:
-        raise RuntimeError(
-            f"COS credentials 不完整: secretId={bool(secret_id)}, "
-            f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
-        )
-
-    # 构建 COS 上传 URL（优先使用全球加速域名）
-    if COS_USE_ACCELERATE:
-        cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
-    else:
-        cos_host = f"{bucket}.cos.{region}.myqcloud.com"
-
-    # URL encode cos_key（保留 /）
-    encoded_key = urllib.parse.quote(cos_key, safe="/")
-    cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
-
-    # 确定 Content-Type
-    if not content_type or content_type == "application/octet-stream":
-        if is_image(filename):
-            content_type = guess_mime_type(filename)
-        else:
-            content_type = "application/octet-stream"
-
-    # 计算文件 MD5 + size
-    file_uuid = md5_hex(file_bytes)
-    file_size = len(file_bytes)
-
-    # 参与签名的请求头
-    sign_headers = {
-        "host": cos_host,
-        "content-type": content_type,
-        "x-cos-security-token": session_token,
-    }
-
-    # 计算签名有效期
-    now = int(time.time())
-    sign_start = start_time if start_time else now
-    sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
-
-    authorization = _cos_sign(
-        method="put",
-        path=f"/{encoded_key.lstrip('/')}",
-        params={},
-        headers=sign_headers,
-        secret_id=secret_id,
-        secret_key=secret_key,
-        start_time=sign_start,
-        expire_seconds=sign_expire,
-    )
-
-    put_headers = {
-        "Authorization": authorization,
-        "Content-Type": content_type,
-        "x-cos-security-token": session_token,
-    }
-
-    logger.info(
-        "COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
-        bucket, region, cos_key, file_size, content_type,
-    )
-
-    async with httpx.AsyncClient(timeout=120.0) as client:
-        resp = await client.put(
-            cos_url,
-            content=file_bytes,
-            headers=put_headers,
-        )
-        resp.raise_for_status()
-
-    # 解析图片尺寸（仅图片类型）
-    result: dict[str, Any] = {
-        "url": resource_url or cos_url,
-        "uuid": file_uuid,
-        "size": file_size,
-    }
-
-    if content_type.startswith("image/"):
-        size_info = parse_image_size(file_bytes)
-        if size_info:
-            result["width"] = size_info["width"]
-            result["height"] = size_info["height"]
-
-    logger.info(
-        "COS 上传成功: url=%s size=%d",
-        result["url"], file_size,
-    )
-    return result
-
-
-# ============ TIM 媒体消息构建 ============
-
-def build_image_msg_body(
-    url: str,
-    uuid: Optional[str] = None,
-    filename: Optional[str] = None,
-    size: int = 0,
-    width: int = 0,
-    height: int = 0,
-    mime_type: str = "",
-) -> list[dict]:
-    """
-    构建腾讯 IM TIMImageElem 消息体。
-    参考：https://cloud.tencent.com/document/product/269/2720
-
-    Args:
-        url:       图片公网访问 URL（COS resourceUrl）
-        uuid:      文件 UUID（MD5 或其他唯一标识）
-        filename:  文件名（uuid 为空时作为备用）
-        size:      文件大小（字节）
-        width:     图片宽度（像素）
-        height:    图片高度（像素）
-        mime_type: MIME 类型（用于确定 image_format）
-
-    Returns:
-        TIMImageElem 消息体列表（适合直接放入 msg_body）
-    """
-    _uuid = uuid or filename or _basename_from_url(url) or "image"
-    image_format = get_image_format(mime_type) if mime_type else 255
-
-    return [
-        {
-            "msg_type": "TIMImageElem",
-            "msg_content": {
-                "uuid": _uuid,
-                "image_format": image_format,
-                "image_info_array": [
-                    {
-                        "type": 1,       # 1 = 原图
-                        "size": size,
-                        "width": width,
-                        "height": height,
-                        "url": url,
-                    }
-                ],
-            },
-        }
-    ]
-
-
-def build_file_msg_body(
-    url: str,
-    filename: str,
-    uuid: Optional[str] = None,
-    size: int = 0,
-) -> list[dict]:
-    """
-    构建腾讯 IM TIMFileElem 消息体。
-    参考：https://cloud.tencent.com/document/product/269/2720
-
-    Args:
-        url:      文件公网访问 URL（COS resourceUrl）
-        filename: 文件名（含扩展名）
-        uuid:     文件 UUID（MD5 或其他唯一标识，不传则使用 filename）
-        size:     文件大小（字节）
-
-    Returns:
-        TIMFileElem 消息体列表（适合直接放入 msg_body）
-    """
-    _uuid = uuid or filename
-
-    return [
-        {
-            "msg_type": "TIMFileElem",
-            "msg_content": {
-                "uuid": _uuid,
-                "file_name": filename,
-                "file_size": size,
-                "url": url,
-            },
-        }
-    ]
-
-
-# ============ 内部工具 ============
-
-def _basename_from_url(url: str) -> str:
-    """从 URL 提取文件名。"""
-    try:
-        parsed = urllib.parse.urlparse(url)
-        return os.path.basename(parsed.path)
-    except Exception:
-        return ""
@@ -1,558 +0,0 @@
-"""
-Yuanbao sticker (TIMFaceElem) support.
-
-Ported from yuanbao-openclaw-plugin/src/sticker/.
-
-TIMFaceElem wire format:
-    {
-        "msg_type": "TIMFaceElem",
-        "msg_content": {
-            "index": 0,          # always 0 per Yuanbao convention
-            "data": "<json>",    # serialised sticker metadata
-        }
-    }
-
-The `data` field carries a JSON string with the sticker's metadata so the
-receiver can look up the correct asset in the emoji pack.
-"""
-
-from __future__ import annotations
-
-import json
-import random
-import re
-import unicodedata
-from typing import Optional
-
-# ---------------------------------------------------------------------------
-# Sticker catalogue – ported from builtin-stickers.json
-# Key   : canonical name (Chinese)
-# Value : {sticker_id, package_id, name, description, width, height, formats}
-# ---------------------------------------------------------------------------
-STICKER_MAP: dict[str, dict] = {
-    "六六六": {
-        "sticker_id": "278", "package_id": "1003", "name": "六六六",
-        "description": "666 厉害 牛 棒 绝了 好强 awesome",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "我想开了": {
-        "sticker_id": "262", "package_id": "1003", "name": "我想开了",
-        "description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "害羞": {
-        "sticker_id": "130", "package_id": "1003", "name": "害羞",
-        "description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "比心": {
-        "sticker_id": "252", "package_id": "1003", "name": "比心",
-        "description": "笔芯 爱你 爱心手势 love heart 喜欢你",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "委屈": {
-        "sticker_id": "125", "package_id": "1003", "name": "委屈",
-        "description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "亲亲": {
-        "sticker_id": "146", "package_id": "1003", "name": "亲亲",
-        "description": "么么 mua 亲一下 kiss 飞吻 啵",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "酷": {
-        "sticker_id": "131", "package_id": "1003", "name": "酷",
-        "description": "帅 墨镜 cool 高冷 有型 swagger",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "睡": {
-        "sticker_id": "145", "package_id": "1003", "name": "睡",
-        "description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "发呆": {
-        "sticker_id": "152", "package_id": "1003", "name": "发呆",
-        "description": "懵 愣住 放空 呆滞 出神 脑子空白",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "可怜": {
-        "sticker_id": "157", "package_id": "1003", "name": "可怜",
-        "description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "摊手": {
-        "sticker_id": "200", "package_id": "1003", "name": "摊手",
-        "description": "无奈 没办法 耸肩 随便 那咋整 whatever",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "头大": {
-        "sticker_id": "213", "package_id": "1003", "name": "头大",
-        "description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吓": {
-        "sticker_id": "256", "package_id": "1003", "name": "吓",
-        "description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吐血": {
-        "sticker_id": "203", "package_id": "1003", "name": "吐血",
-        "description": "无语 崩溃 被雷 内伤 一口老血 屮",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哼": {
-        "sticker_id": "185", "package_id": "1003", "name": "哼",
-        "description": "傲娇 生气 不满 撇嘴 不理 赌气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "嘿嘿": {
-        "sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
-        "description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "头秃": {
-        "sticker_id": "218", "package_id": "1003", "name": "头秃",
-        "description": "程序员 加班 焦虑 没头发 秃了 肝爆",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "暗中观察": {
-        "sticker_id": "221", "package_id": "1003", "name": "暗中观察",
-        "description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "我酸了": {
-        "sticker_id": "224", "package_id": "1003", "name": "我酸了",
-        "description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "打call": {
-        "sticker_id": "246", "package_id": "1003", "name": "打call",
-        "description": "应援 加油 支持 喝彩 助威 call",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "庆祝": {
-        "sticker_id": "251", "package_id": "1003", "name": "庆祝",
-        "description": "祝贺 开心 耶 party 胜利 干杯",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "奋斗": {
-        "sticker_id": "151", "package_id": "1003", "name": "奋斗",
-        "description": "努力 加油 拼搏 冲 干劲 卷起来",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "惊讶": {
-        "sticker_id": "143", "package_id": "1003", "name": "惊讶",
-        "description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "疑问": {
-        "sticker_id": "144", "package_id": "1003", "name": "疑问",
-        "description": "问号 不懂 啥 为什么 啥情况 懵逼问",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "仔细分析": {
-        "sticker_id": "248", "package_id": "1003", "name": "仔细分析",
-        "description": "思考 推敲 认真 研究 琢磨 让我想想",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "撅嘴": {
-        "sticker_id": "184", "package_id": "1003", "name": "撅嘴",
-        "description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "泪奔": {
-        "sticker_id": "199", "package_id": "1003", "name": "泪奔",
-        "description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "尊嘟假嘟": {
-        "sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
-        "description": "真的假的 真假 可爱问 你骗我 是不是",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "略略略": {
-        "sticker_id": "113", "package_id": "1003", "name": "略略略",
-        "description": "调皮 吐舌 不服 略 气死你 鬼脸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "困": {
-        "sticker_id": "180", "package_id": "1003", "name": "困",
-        "description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "折磨": {
-        "sticker_id": "181", "package_id": "1003", "name": "折磨",
-        "description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "抠鼻": {
-        "sticker_id": "182", "package_id": "1003", "name": "抠鼻",
-        "description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "鼓掌": {
-        "sticker_id": "183", "package_id": "1003", "name": "鼓掌",
-        "description": "拍手 叫好 赞同 666 喝彩 掌声",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "斜眼笑": {
-        "sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
-        "description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "辣眼睛": {
-        "sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
-        "description": "看不下去 cringe 毁三观 太丑了 瞎了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哦哟": {
-        "sticker_id": "217", "package_id": "1003", "name": "哦哟",
-        "description": "惊讶 起哄 哇哦 有戏 不简单 哟",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吃瓜": {
-        "sticker_id": "222", "package_id": "1003", "name": "吃瓜",
-        "description": "围观 看戏 八卦 路人 看热闹 板凳",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "狗头": {
-        "sticker_id": "225", "package_id": "1003", "name": "狗头",
-        "description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "敬礼": {
-        "sticker_id": "227", "package_id": "1003", "name": "敬礼",
-        "description": "salute 尊重 收到 遵命 致敬 报告",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哦": {
-        "sticker_id": "231", "package_id": "1003", "name": "哦",
-        "description": "知道了 明白 敷衍 嗯 这样啊 收到",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "拿到红包": {
-        "sticker_id": "236", "package_id": "1003", "name": "拿到红包",
-        "description": "红包 谢谢老板 发财 开心 抢到了 欧气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "牛吖": {
-        "sticker_id": "239", "package_id": "1003", "name": "牛吖",
-        "description": "牛 厉害 强 666 佩服 大佬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "贴贴": {
-        "sticker_id": "272", "package_id": "1003", "name": "贴贴",
-        "description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "爱心": {
-        "sticker_id": "138", "package_id": "1003", "name": "爱心",
-        "description": "心 love 喜欢你 红心 示爱 么么哒",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "晚安": {
-        "sticker_id": "170", "package_id": "1003", "name": "晚安",
-        "description": "好梦 睡了 night 早点休息 安啦 moon",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "太阳": {
-        "sticker_id": "176", "package_id": "1003", "name": "太阳",
-        "description": "晴天 早上好 阳光 morning 好天气 日",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "柠檬": {
-        "sticker_id": "266", "package_id": "1003", "name": "柠檬",
-        "description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "大冤种": {
-        "sticker_id": "267", "package_id": "1003", "name": "大冤种",
-        "description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吐了": {
-        "sticker_id": "132", "package_id": "1003", "name": "吐了",
-        "description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "怒": {
-        "sticker_id": "134", "package_id": "1003", "name": "怒",
-        "description": "生气 愤怒 火大 暴躁 气炸 怼",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "玫瑰": {
-        "sticker_id": "165", "package_id": "1003", "name": "玫瑰",
-        "description": "花 示爱 表白 浪漫 送你花 情人节",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "凋谢": {
-        "sticker_id": "119", "package_id": "1003", "name": "凋谢",
-        "description": "花谢 失恋 难过 枯萎 心碎 凉了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "点赞": {
-        "sticker_id": "159", "package_id": "1003", "name": "点赞",
-        "description": "赞 认同 好棒 good like 大拇指 顶",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "握手": {
-        "sticker_id": "164", "package_id": "1003", "name": "握手",
-        "description": "合作 你好 商务 hello deal 成交 友好",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "抱拳": {
-        "sticker_id": "163", "package_id": "1003", "name": "抱拳",
-        "description": "谢谢 失敬 江湖 承让 拜托 有礼",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "ok": {
-        "sticker_id": "169", "package_id": "1003", "name": "ok",
-        "description": "好的 收到 没问题 okay 行 可以 懂了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "拳头": {
-        "sticker_id": "174", "package_id": "1003", "name": "拳头",
-        "description": "加油 干 冲 fight 力量 击拳 硬气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "鞭炮": {
-        "sticker_id": "191", "package_id": "1003", "name": "鞭炮",
-        "description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "烟花": {
-        "sticker_id": "258", "package_id": "1003", "name": "烟花",
-        "description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
-        "width": 128, "height": 128, "formats": "png",
-    },
-}
-
-
-def get_sticker_by_name(name: str) -> Optional[dict]:
-    """
-    按名称查找贴纸，支持模糊匹配。
-
-    匹配优先级：
-      1. 完全相等（name）
-      2. name 包含查询词（前缀/子串）
-      3. description 包含查询词（同义词搜索）
-      4. 通用模糊评分（与 sticker-search 同算法），命中即返回得分最高的一条
-
-    返回 sticker dict，找不到返回 None。
-    """
-    if not name:
-        return None
-
-    query = name.strip()
-
-    if query in STICKER_MAP:
-        return STICKER_MAP[query]
-
-    for key, sticker in STICKER_MAP.items():
-        if query in key or key in query:
-            return sticker
-
-    for sticker in STICKER_MAP.values():
-        desc = sticker.get("description", "")
-        if query in desc:
-            return sticker
-
-    matches = search_stickers(query, limit=1)
-    return matches[0] if matches else None
-
-
-def get_random_sticker(category: str = None) -> dict:
-    """
-    随机返回一个贴纸。
-
-    若指定 category，则在 description 中含有该关键词的贴纸里随机选取；
-    category 为 None 时从全表随机。
-    """
-    if category:
-        candidates = [
-            s for s in STICKER_MAP.values()
-            if category in s.get("description", "") or category in s.get("name", "")
-        ]
-        if candidates:
-            return random.choice(candidates)
-    return random.choice(list(STICKER_MAP.values()))
-
-
-def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
-    """按 sticker_id 精确查找贴纸。"""
-    if not sticker_id:
-        return None
-    sid = str(sticker_id).strip()
-    for sticker in STICKER_MAP.values():
-        if sticker.get("sticker_id") == sid:
-            return sticker
-    return None
-
-
-# ---------------------------------------------------------------------------
-# 模糊搜索（对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers）
-# ---------------------------------------------------------------------------
-
-_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,，。!！?？\"“”'‘’、/\\]+")
-
-
-def _normalize_text(raw: str) -> str:
-    return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
-
-
-def _compact_text(raw: str) -> str:
-    return _PUNCT_RE.sub("", _normalize_text(raw))
-
-
-def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
-    if not needle:
-        return 0.0
-    bag: dict[str, int] = {}
-    for ch in haystack:
-        bag[ch] = bag.get(ch, 0) + 1
-    hits = 0
-    for ch in needle:
-        n = bag.get(ch, 0)
-        if n > 0:
-            hits += 1
-            bag[ch] = n - 1
-    return hits / len(needle)
-
-
-def _bigram_jaccard(a: str, b: str) -> float:
-    if len(a) < 2 or len(b) < 2:
-        return 0.0
-    A = {a[i:i + 2] for i in range(len(a) - 1)}
-    B = {b[i:i + 2] for i in range(len(b) - 1)}
-    inter = len(A & B)
-    union = len(A) + len(B) - inter
-    return inter / union if union else 0.0
-
-
-def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
-    if not needle:
-        return 0.0
-    j = 0
-    for ch in haystack:
-        if j >= len(needle):
-            break
-        if ch == needle[j]:
-            j += 1
-    return j / len(needle)
-
-
-def _score_field(haystack: str, query: str) -> float:
-    hay = _normalize_text(haystack)
-    q = _normalize_text(query)
-    if not hay or not q:
-        return 0.0
-    hay_c = _compact_text(haystack)
-    q_c = _compact_text(query)
-    best = 0.0
-    if hay == q:
-        best = max(best, 100.0)
-    if q in hay:
-        best = max(best, 92 + min(6, len(q)))
-    if len(q) >= 2 and hay.startswith(q):
-        best = max(best, 88.0)
-    if q_c and q_c in hay_c:
-        best = max(best, 86.0)
-    best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
-    best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
-    best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
-    if len(q) == 1 and q in hay:
-        best = max(best, 68.0)
-    return best
-
-
-def search_stickers(query: str, limit: int = 10) -> list[dict]:
-    """
-    在内置贴纸表中按模糊匹配排序返回前 N 条结果。
-
-    评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
-    name 权重略高于 description（×0.88）。空 query 时按字典顺序返回前 N 条。
-    """
-    safe_limit = max(1, min(500, int(limit) if limit else 10))
-    if not query or not _normalize_text(query):
-        return list(STICKER_MAP.values())[:safe_limit]
-
-    scored: list[tuple[float, dict]] = []
-    for sticker in STICKER_MAP.values():
-        name_s = _score_field(sticker.get("name", ""), query)
-        desc_s = _score_field(sticker.get("description", ""), query) * 0.88
-        sid = str(sticker.get("sticker_id", "")).strip()
-        q_norm = _normalize_text(query)
-        id_s = 0.0
-        if sid and q_norm:
-            sid_norm = _normalize_text(sid)
-            if sid_norm == q_norm:
-                id_s = 100.0
-            elif q_norm in sid_norm:
-                id_s = 84.0
-        scored.append((max(name_s, desc_s, id_s), sticker))
-
-    scored.sort(key=lambda x: x[0], reverse=True)
-    top = scored[0][0] if scored else 0
-    if top <= 0:
-        return [s for _, s in scored[:safe_limit]]
-
-    if top >= 22:
-        floor = 18.0
-    elif top >= 12:
-        floor = max(10.0, top * 0.5)
-    else:
-        floor = max(6.0, top * 0.35)
-
-    filtered = [pair for pair in scored if pair[0] >= floor]
-    out = filtered if filtered else scored
-    return [s for _, s in out[:safe_limit]]
-
-
-def build_face_msg_body(
-    face_index: int,
-    face_type: int = 1,
-    data: Optional[str] = None,
-) -> list:
-    """
-    构造 TIMFaceElem 消息体。
-
-    Yuanbao 约定：
-      - index 固定传 0（服务端通过 data 字段识别具体表情）
-      - data 为 JSON 字符串，包含 sticker_id / package_id 等字段
-
-    Args:
-        face_index: 保留字段，暂时不影响 wire format（Yuanbao 固定 index=0）。
-                    当 face_index > 0 时视为旧版 QQ 表情 ID，直接放入 index。
-        face_type:  保留字段（兼容旧接口，当前未使用）。
-        data:       已序列化的 JSON 字符串；为 None 时仅传 index。
-
-    Returns:
-        符合 Yuanbao TIM 协议的 msg_body list，如::
-
-            [{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
-    """
-    msg_content: dict = {"index": face_index}
-    if data is not None:
-        msg_content["data"] = data
-    return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
-
-
-def build_sticker_msg_body(sticker: dict) -> list:
-    """
-    从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
-
-    这是 send_sticker() 的内部辅助，确保 data 字段与原始 JS 插件一致。
-    """
-    data_payload = json.dumps(
-        {
-            "sticker_id": sticker["sticker_id"],
-            "package_id": sticker["package_id"],
-            "width": sticker.get("width", 128),
-            "height": sticker.get("height", 128),
-            "formats": sticker.get("formats", "png"),
-            "name": sticker["name"],
-        },
-        ensure_ascii=False,
-        separators=(",", ":"),
-    )
-    return build_face_msg_body(face_index=0, data=data_payload)
@@ -87,9 +87,6 @@ class SessionSource:
    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
-    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
-    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
-    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -127,14 +124,8 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
-        if self.guild_id:
-            d["guild_id"] = self.guild_id
-        if self.parent_chat_id:
-            d["parent_chat_id"] = self.parent_chat_id
-        if self.message_id:
-            d["message_id"] = self.message_id
        return d
-
+    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -148,9 +139,6 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
-            guild_id=data.get("guild_id"),
-            parent_chat_id=data.get("parent_chat_id"),
-            message_id=data.get("message_id"),
        )
    

@@ -202,31 +190,6 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


-def _discord_tools_loaded() -> bool:
-    """True iff the agent will actually have Discord tools this session.
-
-    Two conditions must hold:
-      1. The `discord` or `discord_admin` toolset is enabled for the
-         Discord platform via `hermes tools` (opt-in, default OFF).
-      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
-         at registry time, so the toolset being enabled in config is not
-         enough if the token isn't configured.
-
-    Returns False (safe default — keeps the stale-API disclaimer) on any
-    error so a bad config can't silently promise tools the agent lacks.
-    """
-    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
-        return False
-    try:
-        from hermes_cli.config import load_config
-        from hermes_cli.tools_config import _get_platform_tools
-        cfg = load_config()
-        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
-        return "discord" in enabled or "discord_admin" in enabled
-    except Exception:
-        return False
-
-
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -310,38 +273,18 @@ def build_session_context_prompt(
            "**Platform notes:** You are running inside Slack. "
            "You do NOT have access to Slack-specific APIs — you cannot search "
            "channel history, pin/unpin messages, manage channels, or list users. "
-            "Do not promise to perform these actions. The gateway may inline the "
-            "current message's Slack block/attachment payload when available, but "
-            "you still cannot call Slack APIs yourself."
+            "Do not promise to perform these actions. If the user asks, explain "
+            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
-        # Inject the Discord IDs block only when the agent actually has
-        # Discord tools loaded this session — i.e. the user opted into
-        # `discord` / `discord_admin` via `hermes tools` AND the bot
-        # token is configured.  Otherwise keep the stale-API disclaimer
-        # honest so we never promise tools the agent lacks.
-        if _discord_tools_loaded():
-            src = context.source
-            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
-            if src.guild_id:
-                id_lines.append(f"  - Guild: `{src.guild_id}`")
-            if src.thread_id and src.parent_chat_id:
-                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
-                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
-            else:
-                id_lines.append(f"  - Channel: `{src.chat_id}`")
-            if src.message_id:
-                id_lines.append(f"  - Triggering message: `{src.message_id}`")
-            lines.extend(id_lines)
-        else:
-            lines.append("")
-            lines.append(
-                "**Platform notes:** You are running inside Discord. "
-                "You do NOT have access to Discord-specific APIs — you cannot search "
-                "channel history, pin messages, manage roles, or list server members. "
-                "Do not promise to perform these actions. If the user asks, explain "
-                "that you can only read messages sent directly to you and respond."
-            )
+        lines.append("")
+        lines.append(
+            "**Platform notes:** You are running inside Discord. "
+            "You do NOT have access to Discord-specific APIs — you cannot search "
+            "channel history, pin messages, manage roles, or list server members. "
+            "Do not promise to perform these actions. If the user asks, explain "
+            "that you can only read messages sent directly to you and respond."
+        )
    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
@@ -354,14 +297,6 @@ def build_session_context_prompt(
            "If the user needs a detailed answer, give the short version first "
            "and offer to elaborate."
        )
-    elif context.source.platform == Platform.YUANBAO:
-        lines.append("")
-        lines.append(
-            "**Platform notes:** You are running inside Yuanbao. "
-            "You CAN send private (DM) messages via the send_message tool. "
-            "Use target='yuanbao:direct:<account_id>' for DM "
-            "and target='yuanbao:group:<group_code>' for group chat."
-        )

    # Connected platforms
    platforms_list = ["local (files on this machine)"]
@@ -448,11 +383,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it finalizes an expired
-    # session (invoking on_session_finalize hooks and evicting the cached
-    # agent).  Persisted to sessions.json so the flag survives gateway
-    # restarts — prevents redundant finalization runs.
-    expiry_finalized: bool = False
+    # Set by the background expiry watcher after it successfully flushes
+    # memories for this session.  Persisted to sessions.json so the flag
+    # survives gateway restarts (the old in-memory _pre_flushed_sessions
+    # set was lost on restart, causing redundant re-flushes).
+    memory_flushed: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -488,7 +423,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "expiry_finalized": self.expiry_finalized,
+            "memory_flushed": self.memory_flushed,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -540,7 +475,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
+            memory_flushed=data.get("memory_flushed", False),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -1241,7 +1176,6 @@ class SessionStore:
                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
-                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1274,7 +1208,6 @@ class SessionStore:
                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
                    )
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
@@ -44,14 +44,6 @@ class StreamConsumerConfig:
    buffer_threshold: int = 40
    cursor: str = " ▉"
    buffer_only: bool = False
-    # When >0, the final edit for a streamed response is delivered as a
-    # fresh message if the original preview has been visible for at least
-    # this many seconds.  This makes the platform's visible timestamp
-    # reflect completion time instead of first-token time for long-running
-    # responses (e.g. reasoning models that stream slowly).  Ported from
-    # openclaw/openclaw#72038.  Default 0 = always edit in place (legacy
-    # behavior).  The gateway enables this selectively per-platform.
-    fresh_final_after_seconds: float = 0.0


 class GatewayStreamConsumer:
@@ -99,12 +91,6 @@ class GatewayStreamConsumer:
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
-        # Wall-clock timestamp (time.monotonic) when ``_message_id`` was
-        # first assigned from a successful first-send.  Used by the
-        # fresh-final logic to detect long-lived previews whose edit
-        # timestamps would be stale by completion time.  Ported from
-        # openclaw/openclaw#72038.
-        self._message_created_ts: Optional[float] = None
        self._already_sent = False
        self._edit_supported = True  # Disabled when progressive edits are no longer usable
        self._last_edit_time = 0.0
@@ -150,7 +136,6 @@ class GatewayStreamConsumer:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
        self._message_id = None
-        self._message_created_ts = None
        self._accumulated = ""
        self._last_sent_text = ""
        self._fallback_final_send = False
@@ -749,81 +734,6 @@ class GatewayStreamConsumer:
            logger.error("Commentary send error: %s", e)
            return False

-    def _should_send_fresh_final(self) -> bool:
-        """Return True when a long-lived preview should be replaced with a
-        fresh final message instead of an edit.
-
-        Conditions:
-        - Fresh-final is enabled (``fresh_final_after_seconds > 0``).
-        - We have a real preview message id (not the ``__no_edit__`` sentinel
-          and not ``None``).
-        - The preview has been visible for at least the configured threshold.
-
-        Ported from openclaw/openclaw#72038.
-        """
-        threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
-        if threshold <= 0:
-            return False
-        if not self._message_id or self._message_id == "__no_edit__":
-            return False
-        if self._message_created_ts is None:
-            return False
-        age = time.monotonic() - self._message_created_ts
-        return age >= threshold
-
-    async def _try_fresh_final(self, text: str) -> bool:
-        """Send ``text`` as a brand-new message (best-effort delete the old
-        preview) so the platform's visible timestamp reflects completion
-        time.  Returns True on successful delivery, False on any failure so
-        the caller falls back to the normal edit path.
-
-        Ported from openclaw/openclaw#72038.
-        """
-        old_message_id = self._message_id
-        try:
-            result = await self.adapter.send(
-                chat_id=self.chat_id,
-                content=text,
-                metadata=self.metadata,
-            )
-        except Exception as e:
-            logger.debug("Fresh-final send failed, falling back to edit: %s", e)
-            return False
-        if not getattr(result, "success", False):
-            return False
-        # Successful fresh send — try to delete the stale preview so the
-        # user doesn't see the old edit-stuck message underneath.  Cleanup
-        # is best-effort; platforms that don't implement ``delete_message``
-        # just leave the preview behind (still an acceptable outcome —
-        # the visible final timestamp is the important part).
-        if old_message_id and old_message_id != "__no_edit__":
-            delete_fn = getattr(self.adapter, "delete_message", None)
-            if delete_fn is not None:
-                try:
-                    await delete_fn(self.chat_id, old_message_id)
-                except Exception as e:
-                    logger.debug(
-                        "Fresh-final preview cleanup failed (%s): %s",
-                        old_message_id, e,
-                    )
-        # Adopt the new message id as the current message so subsequent
-        # callers (e.g. overflow split loops, finalize retries) see a
-        # consistent state.
-        new_message_id = getattr(result, "message_id", None)
-        if new_message_id:
-            self._message_id = new_message_id
-            self._message_created_ts = time.monotonic()
-        else:
-            # Send succeeded but platform didn't return an id — treat the
-            # delivery as final-only and fall back to "__no_edit__" so we
-            # don't try to edit something we can't address.
-            self._message_id = "__no_edit__"
-            self._message_created_ts = None
-        self._already_sent = True
-        self._last_sent_text = text
-        self._final_response_sent = True
-        return True
-
    async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
        """Send or edit the streaming message.

@@ -876,22 +786,6 @@ class GatewayStreamConsumer:
                        finalize and self._adapter_requires_finalize
                    ):
                        return True
-                    # Fresh-final for long-lived previews: when finalizing
-                    # the last edit in a streaming sequence, if the
-                    # original preview has been visible for at least
-                    # ``fresh_final_after_seconds``, send the completed
-                    # reply as a fresh message so the platform's visible
-                    # timestamp reflects completion time instead of the
-                    # preview creation time.  Best-effort cleanup of the
-                    # old preview follows.  Ported from
-                    # openclaw/openclaw#72038.  Gated by config so the
-                    # legacy edit-in-place path stays the default.
-                    if (
-                        finalize
-                        and self._should_send_fresh_final()
-                        and await self._try_fresh_final(text)
-                    ):
-                        return True
                    # Edit existing message
                    result = await self.adapter.edit_message(
                        chat_id=self.chat_id,
@@ -958,10 +852,6 @@ class GatewayStreamConsumer:
                if result.success:
                    if result.message_id:
                        self._message_id = result.message_id
-                        # Track when the preview first became visible to
-                        # the user so fresh-final logic can detect stale
-                        # preview timestamps on long-running responses.
-                        self._message_created_ts = time.monotonic()
                    else:
                        self._edit_supported = False
                    self._already_sent = True
@@ -31,17 +31,8 @@ Hermes' own session keys.
 from __future__ import annotations

 import json
-import logging
-import re
 from typing import Set

-logger = logging.getLogger(__name__)
-
-# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
-# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
-# full-width digits / Unicode word chars can't sneak through.
-_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
-
 from hermes_constants import get_hermes_home


@@ -90,16 +81,6 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
        current = queue.pop(0)
        if not current or current in resolved:
            continue
-        # Defense-in-depth: reject identifiers that could sneak path
-        # separators / traversal segments into the ``lid-mapping-{current}``
-        # filename below. The hardcoded ``lid-mapping-`` prefix already
-        # prevents escape via pathlib's component split (an attacker can't
-        # create ``lid-mapping-..`` as a real directory in session_dir), but
-        # this keeps the identifier space to the characters WhatsApp JIDs
-        # actually use and avoids depending on that filesystem-layout
-        # invariant.
-        if not _SAFE_IDENTIFIER_RE.match(current):
-            continue

        resolved.add(current)
        for suffix in ("", "_reverse"):
@@ -110,8 +91,7 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
                mapped = normalize_whatsapp_identifier(
                    json.loads(mapping_path.read_text(encoding="utf-8"))
                )
-            except (OSError, json.JSONDecodeError) as exc:
-                logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
+            except Exception:
                continue
            if mapped and mapped not in resolved:
                queue.append(mapped)
@@ -224,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("ARCEEAI_API_KEY",),
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": ProviderConfig(
-        id="gmi",
-        name="GMI Cloud",
-        auth_type="api_key",
-        inference_base_url="https://api.gmi-serving.com/v1",
-        api_key_env_vars=("GMI_API_KEY",),
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -364,14 +356,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=(),
        base_url_env_var="BEDROCK_BASE_URL",
    ),
-    "azure-foundry": ProviderConfig(
-        id="azure-foundry",
-        name="Azure Foundry",
-        auth_type="api_key",
-        inference_base_url="",  # User-provided endpoint
-        api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
-        base_url_env_var="AZURE_FOUNDRY_BASE_URL",
-    ),
 }


@@ -475,27 +459,11 @@ def _resolve_api_key_provider_secret(
            pass
        return "", ""

-    from hermes_cli.config import get_env_value
    for env_var in pconfig.api_key_env_vars:
-        # Check both os.environ and ~/.hermes/.env file
-        val = (get_env_value(env_var) or "").strip()
+        val = os.getenv(env_var, "").strip()
        if has_usable_secret(val):
            return val, env_var

-    # Fallback: try credential pool (e.g. zai key stored via auth.json)
-    try:
-        from agent.credential_pool import load_pool
-        pool = load_pool(provider_id)
-        if pool and pool.has_credentials():
-            entry = pool.peek()
-            if entry:
-                key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
-                key = str(key).strip()
-                if has_usable_secret(key):
-                    return key, f"credential_pool:{provider_id}"
-    except Exception:
-        pass
-
    return "", ""


@@ -1128,7 +1096,6 @@ def resolve_provider(
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
-        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
@@ -4269,10 +4236,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                get_curated_nous_model_ids, get_pricing_for_provider,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
-            model_ids = get_curated_nous_model_ids()
+            model_ids = _PROVIDER_MODELS.get("nous", [])

            print()
            unavailable_models: list = []
@@ -1,300 +0,0 @@
-"""Azure Foundry endpoint auto-detection.
-
-Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
-  - API transport (OpenAI-style ``chat_completions`` vs
-    Anthropic-style ``anthropic_messages``)
-  - Available models (best effort — Azure does not expose a deployment
-    listing via the inference API key, but Azure OpenAI v1 endpoints
-    return the resource's model catalog via ``GET /models``)
-  - Context length for each discovered/entered model, via the existing
-    :func:`agent.model_metadata.get_model_context_length` resolver.
-
-Rationale:
-
-Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
-deployment enumeration requires ARM management-plane auth.  Azure
-OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
-a ``/models`` list, but it reflects the resource's *available* models
-rather than the user's *deployed* deployment names.  In practice it is
-still a useful hint — the user picks a familiar model name and we look
-up its context length from the catalog.
-
-The detector never crashes on errors (every HTTP call is wrapped in a
-broad try/except).  Callers get a :class:`DetectionResult` with whatever
-information could be gathered, and fall back to manual entry for the
-rest.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import re
-from dataclasses import dataclass, field
-from typing import Optional
-from urllib import request as urllib_request
-from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse, urlunparse
-
-logger = logging.getLogger(__name__)
-
-
-# Default Azure OpenAI ``api-version`` to probe with.  The v1 GA endpoint
-# accepts requests without ``api-version`` entirely, so this is only used
-# as a fallback for pre-v1 resources that still require it.
-_AZURE_OPENAI_PROBE_API_VERSIONS = (
-    "2025-04-01-preview",
-    "2024-10-21",  # oldest GA that supports /models
-)
-
-# Default Azure Anthropic ``api-version``.  Matches the value used by
-# ``agent/anthropic_adapter.py`` when building the Anthropic client.
-_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
-
-
-@dataclass
-class DetectionResult:
-    """Everything auto-detection could gather from a base URL + API key."""
-
-    #: Detected API transport: ``"chat_completions"``,
-    #: ``"anthropic_messages"``, or ``None`` when detection failed.
-    api_mode: Optional[str] = None
-
-    #: Deployment / model IDs returned by ``/models`` (best effort).
-    #: Empty when the endpoint doesn't expose the list with an API key.
-    models: list[str] = field(default_factory=list)
-
-    #: Lowercased host from the base URL (used for display messages).
-    hostname: str = ""
-
-    #: Human-readable reason the detector chose ``api_mode``.  Useful
-    #: for explaining auto-detection to the user in the wizard.
-    reason: str = ""
-
-    #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
-    models_probe_ok: bool = False
-
-    #: ``True`` when the URL was determined to be an Anthropic-style
-    #: endpoint (from path suffix or live probe).
-    is_anthropic: bool = False
-
-
-def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
-    """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
-    ``(status_code, parsed_json_or_None)``.  Never raises."""
-    req = urllib_request.Request(url, method="GET")
-    # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
-    # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
-    # so we probe once per URL rather than twice.
-    req.add_header("api-key", api_key)
-    req.add_header("Authorization", f"Bearer {api_key}")
-    req.add_header("User-Agent", "hermes-agent/azure-detect")
-    try:
-        with urllib_request.urlopen(req, timeout=timeout) as resp:
-            body = resp.read()
-            try:
-                return resp.status, json.loads(body.decode("utf-8", errors="replace"))
-            except Exception:
-                return resp.status, None
-    except HTTPError as exc:
-        return exc.code, None
-    except (URLError, TimeoutError, OSError) as exc:
-        logger.debug("azure_detect: GET %s failed: %s", url, exc)
-        return 0, None
-    except Exception as exc:  # pragma: no cover — defensive
-        logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
-        return 0, None
-
-
-def _strip_trailing_v1(url: str) -> str:
-    """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
-    return re.sub(r"/v1/?$", "", url.rstrip("/"))
-
-
-def _looks_like_anthropic_path(url: str) -> bool:
-    """Return True when the URL's path ends in ``/anthropic`` or
-    contains a ``/anthropic/`` segment.  Used by Azure Foundry
-    resources that route Claude traffic through a dedicated path."""
-    try:
-        parsed = urlparse(url)
-        path = (parsed.path or "").lower().rstrip("/")
-        return path.endswith("/anthropic") or "/anthropic/" in path + "/"
-    except Exception:
-        return False
-
-
-def _extract_model_ids(payload: dict) -> list[str]:
-    """Extract a list of model IDs from an OpenAI-shaped ``/models``
-    response.  Returns ``[]`` on any shape mismatch."""
-    data = payload.get("data") if isinstance(payload, dict) else None
-    if not isinstance(data, list):
-        return []
-    ids: list[str] = []
-    for item in data:
-        if not isinstance(item, dict):
-            continue
-        # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
-        mid = item.get("id") or item.get("model") or item.get("name")
-        if isinstance(mid, str) and mid:
-            ids.append(mid)
-    return ids
-
-
-def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
-    """Probe ``<base>/models`` for an OpenAI-shaped response.
-
-    Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
-    us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
-    """
-    base_url = base_url.rstrip("/")
-
-    # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
-    # api-version required for GA paths, so probe without first.
-    candidates = [f"{base_url}/models"]
-    # Fallback: explicit api-version for pre-v1 resources
-    for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
-        candidates.append(f"{base_url}/models?api-version={v}")
-
-    for url in candidates:
-        status, body = _http_get_json(url, api_key)
-        if status == 200 and body is not None:
-            ids = _extract_model_ids(body)
-            if ids:
-                logger.info(
-                    "azure_detect: /models probe OK at %s (%d models)",
-                    url, len(ids),
-                )
-                return True, ids
-            # 200 + empty list still counts as "OpenAI shape, no models
-            # listed" — let the user proceed with manual entry.
-            if isinstance(body, dict) and "data" in body:
-                return True, []
-    return False, []
-
-
-def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
-    """Send a zero-token request to ``<base>/v1/messages`` and check
-    whether the endpoint at least *recognises* the Anthropic Messages
-    shape (any 4xx that mentions ``messages`` or ``model``, or a 400
-    ``invalid_request`` with an Anthropic error shape).  Never completes
-    a real chat.
-    """
-    base = _strip_trailing_v1(base_url)
-    url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
-    payload = json.dumps({
-        "model": "probe",
-        "max_tokens": 1,
-        "messages": [{"role": "user", "content": "ping"}],
-    }).encode("utf-8")
-    req = urllib_request.Request(url, method="POST", data=payload)
-    req.add_header("api-key", api_key)
-    req.add_header("Authorization", f"Bearer {api_key}")
-    req.add_header("anthropic-version", "2023-06-01")
-    req.add_header("content-type", "application/json")
-    req.add_header("User-Agent", "hermes-agent/azure-detect")
-    try:
-        with urllib_request.urlopen(req, timeout=6.0) as resp:
-            # Should never 200 — "probe" isn't a real deployment.  But
-            # if it does, the endpoint definitely speaks Anthropic.
-            return resp.status < 500
-    except HTTPError as exc:
-        # 4xx with an Anthropic-shaped error body = Anthropic endpoint.
-        try:
-            body = exc.read().decode("utf-8", errors="replace")
-            lowered = body.lower()
-            if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
-                return True
-            # Pre-Azure-v1 Azure Foundry returns a plain 404 for
-            # Anthropic-style calls on non-Anthropic deployments.  A
-            # 400 "model not found" IS Anthropic though.
-            if exc.code == 400 and ("messages" in lowered or "model" in lowered):
-                return True
-            return False
-        except Exception:
-            return False
-    except (URLError, TimeoutError, OSError):
-        return False
-    except Exception:  # pragma: no cover
-        return False
-
-
-def detect(base_url: str, api_key: str) -> DetectionResult:
-    """Inspect an Azure endpoint and describe its transport + models.
-
-    Call this from the wizard before asking the user to pick an API
-    mode manually.  The caller should treat the returned
-    :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
-    fall back to asking the user.
-    """
-    result = DetectionResult()
-
-    try:
-        parsed = urlparse(base_url)
-        result.hostname = (parsed.hostname or "").lower()
-    except Exception:
-        result.hostname = ""
-
-    # 1. Path sniff.  Azure Foundry exposes Anthropic-style deployments
-    #    under a dedicated ``/anthropic`` path.
-    if _looks_like_anthropic_path(base_url):
-        result.is_anthropic = True
-        result.api_mode = "anthropic_messages"
-        result.reason = "URL path ends in /anthropic → Anthropic Messages API"
-        return result
-
-    # 2. Try the OpenAI-style /models probe.  If this works, the
-    #    endpoint definitely speaks OpenAI wire.
-    ok, models = _probe_openai_models(base_url, api_key)
-    if ok:
-        result.models_probe_ok = True
-        result.models = models
-        result.api_mode = "chat_completions"
-        result.reason = (
-            f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
-            if models
-            else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
-        )
-        return result
-
-    # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
-    #    intrusive than /models, so only run it when the OpenAI probe
-    #    failed.
-    if _probe_anthropic_messages(base_url, api_key):
-        result.is_anthropic = True
-        result.api_mode = "anthropic_messages"
-        result.reason = "Endpoint accepts Anthropic Messages shape"
-        return result
-
-    # Nothing matched.  Caller falls back to manual selection.
-    result.reason = (
-        "Could not probe endpoint (private network, missing model list, or "
-        "non-standard path) — falling back to manual API-mode selection"
-    )
-    return result
-
-
-def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
-    """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
-    that returns ``None`` when only the fallback default (128k) would
-    fire, so the wizard can distinguish "we actually know this" from
-    "we guessed."""
-    try:
-        from agent.model_metadata import (
-            DEFAULT_FALLBACK_CONTEXT,
-            get_model_context_length,
-        )
-    except Exception:
-        return None
-
-    try:
-        n = get_model_context_length(model, base_url=base_url, api_key=api_key)
-    except Exception as exc:
-        logger.debug("azure_detect: context length lookup failed: %s", exc)
-        return None
-
-    if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
-        return n
-    return None
-
-
-__all__ = ["DetectionResult", "detect", "lookup_context_length"]
@@ -36,23 +36,12 @@ _EXCLUDED_DIRS = {
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
    "node_modules",     # js deps if website/ somehow leaks in
-    "backups",          # prior auto-backups — don't nest backups exponentially
-    "checkpoints",      # session-local trajectory caches — regenerated per-session,
-                        # session-hash-keyed so they don't port to another machine anyway
 }

 # File-name suffixes to skip
 _EXCLUDED_SUFFIXES = (
    ".pyc",
    ".pyo",
-    # SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
-    # via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
-    # rollback-journal alongside would pair a fresh snapshot with stale sidecar
-    # state and produce a torn restore on the next open. They're transient and
-    # regenerated on first connection anyway.
-    ".db-wal",
-    ".db-shm",
-    ".db-journal",
 )

 # File names to skip (runtime state that's meaningless on another machine)
@@ -465,12 +454,6 @@ def run_import(args) -> None:
 # Critical state files to include in quick snapshots (relative to HERMES_HOME).
 # Everything else is either regeneratable (logs, cache) or managed separately
 # (skills, repo, sessions/).
-#
-# Entries may be individual files OR directories.  Directories are captured
-# recursively; missing entries are silently skipped.  Pairing data lives in
-# platform-specific JSON blobs outside state.db, so it's listed here explicitly
-# — `hermes update` snapshots this set before pulling so approved-user lists
-# are recoverable if anything goes wrong (issue #15733).
 _QUICK_STATE_FILES = (
    "state.db",
    "config.yaml",
@@ -480,10 +463,6 @@ _QUICK_STATE_FILES = (
    "gateway_state.json",
    "channel_directory.json",
    "processes.json",
-    # Pairing stores (generic + per-platform JSONs outside state.db)
-    "pairing",                          # legacy location (gateway/pairing.py)
-    "platforms/pairing",                # new location (gateway/pairing.py)
-    "feishu_comment_pairing.json",      # Feishu comment subscription pairings
 )

 _QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -519,27 +498,7 @@ def create_quick_snapshot(

    for rel in _QUICK_STATE_FILES:
        src = home / rel
-        if not src.exists():
-            continue
-
-        if src.is_dir():
-            # Walk the directory and record each file individually in the
-            # manifest so restore can treat them uniformly.  Empty dirs are
-            # skipped (nothing to snapshot).
-            for sub in src.rglob("*"):
-                if not sub.is_file():
-                    continue
-                sub_rel = sub.relative_to(home).as_posix()
-                dst = snap_dir / sub_rel
-                dst.parent.mkdir(parents=True, exist_ok=True)
-                try:
-                    shutil.copy2(sub, dst)
-                    manifest[sub_rel] = dst.stat().st_size
-                except (OSError, PermissionError) as exc:
-                    logger.warning("Could not snapshot %s: %s", sub_rel, exc)
-            continue
-
-        if not src.is_file():
+        if not src.exists() or not src.is_file():
            continue

        dst = snap_dir / rel
@@ -694,138 +653,3 @@ def run_quick_backup(args) -> None:
        print(f"  Restore with: /snapshot restore {snap_id}")
    else:
        print("No state files found to snapshot.")
-
-
-# ---------------------------------------------------------------------------
-# Pre-update auto-backup
-# ---------------------------------------------------------------------------
-
-_PRE_UPDATE_BACKUPS_DIR = "backups"
-_PRE_UPDATE_PREFIX = "pre-update-"
-_PRE_UPDATE_DEFAULT_KEEP = 5
-
-
-def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
-    home = hermes_home or get_hermes_home()
-    return home / _PRE_UPDATE_BACKUPS_DIR
-
-
-def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
-    """Remove oldest pre-update backups beyond the keep limit.
-
-    Returns the number of files deleted.  Only touches files matching
-    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
-    are never touched.
-    """
-    if keep < 0:
-        keep = 0
-    if not backup_dir.exists():
-        return 0
-
-    backups = sorted(
-        (p for p in backup_dir.iterdir()
-         if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
-        key=lambda p: p.name,
-        reverse=True,
-    )
-
-    deleted = 0
-    for p in backups[keep:]:
-        try:
-            p.unlink()
-            deleted += 1
-        except OSError as exc:
-            logger.warning("Failed to prune backup %s: %s", p.name, exc)
-
-    return deleted
-
-
-def create_pre_update_backup(
-    hermes_home: Optional[Path] = None,
-    keep: int = _PRE_UPDATE_DEFAULT_KEEP,
-) -> Optional[Path]:
-    """Create a full zip backup of HERMES_HOME under ``backups/``.
-
-    Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
-    but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
-    auto-prunes old pre-update backups.
-
-    Returns the path to the created zip, or ``None`` if no files were
-    found or the backup could not be created.  Never raises — the caller
-    (``hermes update``) should continue even if the backup fails.
-    """
-    hermes_root = hermes_home or get_default_hermes_root()
-    if not hermes_root.is_dir():
-        return None
-
-    backup_dir = _pre_update_backup_dir(hermes_root)
-    try:
-        backup_dir.mkdir(parents=True, exist_ok=True)
-    except OSError as exc:
-        logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
-        return None
-
-    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
-    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
-
-    # Collect files (same logic as run_backup, minus the chatty progress prints)
-    files_to_add: list[tuple[Path, Path]] = []
-    try:
-        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
-            dp = Path(dirpath)
-            # Prune excluded directories in-place so os.walk doesn't descend
-            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
-
-            for fname in filenames:
-                fpath = dp / fname
-                try:
-                    rel = fpath.relative_to(hermes_root)
-                except ValueError:
-                    continue
-
-                if _should_exclude(rel):
-                    continue
-
-                # Skip the output zip itself if it already exists
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
-                files_to_add.append((fpath, rel))
-    except OSError as exc:
-        logger.warning("Pre-update backup: walk failed: %s", exc)
-        return None
-
-    if not files_to_add:
-        return None
-
-    try:
-        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
-            for abs_path, rel_path in files_to_add:
-                try:
-                    if abs_path.suffix == ".db":
-                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-                            tmp_db = Path(tmp.name)
-                        try:
-                            if _safe_copy_db(abs_path, tmp_db):
-                                zf.write(tmp_db, arcname=str(rel_path))
-                        finally:
-                            tmp_db.unlink(missing_ok=True)
-                    else:
-                        zf.write(abs_path, arcname=str(rel_path))
-                except (PermissionError, OSError, ValueError) as exc:
-                    logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
-                    continue
-    except OSError as exc:
-        logger.warning("Pre-update backup: zip write failed: %s", exc)
-        # Best-effort cleanup of partial file
-        try:
-            out_path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return None
-
-    _prune_pre_update_backups(backup_dir, keep=keep)
-    return out_path
@@ -62,8 +62,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("reset",)),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
-    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
-               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
    CommandDef("save", "Save the current conversation", "Session",
@@ -86,7 +84,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("deny", "Deny a pending dangerous command", "Session",
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
-               aliases=("bg", "btw"), args_hint="<prompt>"),
+               aliases=("bg",), args_hint="<prompt>"),
+    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
+               args_hint="<question>"),
    CommandDef("agents", "Show active agents and running tasks", "Session",
               aliases=("tasks",)),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@@ -103,8 +103,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration",
-               aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
+    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
               cli_only=True),

@@ -128,8 +127,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
-               cli_only=True, args_hint="[queue|steer|interrupt|status]",
-               subcommands=("queue", "steer", "interrupt", "status")),
+               cli_only=True, args_hint="[queue|interrupt|status]",
+               subcommands=("queue", "interrupt", "status")),

    # Tools & Skills
    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -808,114 +807,6 @@ def discord_skill_commands_by_category(
    return trimmed_categories, uncategorized, hidden


-# ---------------------------------------------------------------------------
-# Slack native slash commands
-# ---------------------------------------------------------------------------
-
-# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
-# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
-# commands per app.
-_SLACK_MAX_SLASH_COMMANDS = 50
-_SLACK_NAME_LIMIT = 32
-_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
-
-
-def _sanitize_slack_name(raw: str) -> str:
-    """Convert a command name to a valid Slack slash command name.
-
-    Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
-    chars. Uppercase is lowercased; invalid chars are stripped.
-    """
-    name = raw.lower()
-    name = _SLACK_INVALID_CHARS.sub("", name)
-    name = name.strip("-_")
-    return name[:_SLACK_NAME_LIMIT]
-
-
-def slack_native_slashes() -> list[tuple[str, str, str]]:
-    """Return (slash_name, description, usage_hint) triples for Slack.
-
-    Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
-    a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
-    matching Discord's and Telegram's model where every command is a
-    first-class slash and not a ``/hermes <verb>`` subcommand.
-
-    Both canonical names and aliases are included so users can type any
-    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
-    Plugin-registered slash commands are included too.
-
-    Results are clamped to Slack's 50-command limit with duplicate-name
-    avoidance. ``/hermes`` is always reserved as the first entry so the
-    legacy ``/hermes <subcommand>`` form keeps working for anything that
-    gets dropped by the clamp or for free-form questions.
-    """
-    overrides = _resolve_config_gates()
-    entries: list[tuple[str, str, str]] = []
-    seen: set[str] = set()
-
-    # Reserve /hermes as the catch-all top-level command.
-    entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
-    seen.add("hermes")
-
-    def _add(name: str, desc: str, hint: str) -> None:
-        slack_name = _sanitize_slack_name(name)
-        if not slack_name or slack_name in seen:
-            return
-        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
-            return
-        # Slack description cap is 2000 chars; keep it short.
-        entries.append((slack_name, desc[:140], hint[:100]))
-        seen.add(slack_name)
-
-    # First pass: canonical names (so they win slots if we hit the cap).
-    for cmd in COMMAND_REGISTRY:
-        if not _is_gateway_available(cmd, overrides):
-            continue
-        _add(cmd.name, cmd.description, cmd.args_hint or "")
-
-    # Second pass: aliases.
-    for cmd in COMMAND_REGISTRY:
-        if not _is_gateway_available(cmd, overrides):
-            continue
-        for alias in cmd.aliases:
-            # Skip aliases that only differ from canonical by case/punctuation
-            # normalization (already covered by _add dedup).
-            _add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "")
-
-    # Third pass: plugin commands.
-    for name, description, args_hint in _iter_plugin_command_entries():
-        _add(name, description, args_hint or "")
-
-    return entries
-
-
-def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
-    """Generate a Slack app manifest with all gateway commands as slashes.
-
-    ``request_url`` is required by Slack's manifest schema for every slash
-    command, but in Socket Mode (which we use) Slack ignores it and routes
-    the command event through the WebSocket. A placeholder URL is fine.
-
-    The returned dict is the ``features.slash_commands`` portion only —
-    callers compose it into a full manifest (or merge into an existing
-    one). Keeping it narrow avoids coupling us to the rest of the manifest
-    schema (display_information, oauth_config, settings, etc.) which users
-    set up once in the Slack UI and rarely change.
-    """
-    slashes = []
-    for name, desc, usage in slack_native_slashes():
-        entry = {
-            "command": f"/{name}",
-            "description": desc or f"Run /{name}",
-            "should_escape": False,
-            "url": request_url,
-        }
-        if usage:
-            entry["usage_hint"] = usage
-        slashes.append(entry)
-    return {"features": {"slash_commands": slashes}}
-
-
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -389,20 +389,6 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
-        # How user-attached images are presented to the main model on each turn.
-        #   "auto"   — attach natively when the active model reports
-        #              supports_vision=True AND the user hasn't explicitly
-        #              configured auxiliary.vision.provider.  Otherwise fall
-        #              back to text (vision_analyze pre-analysis).
-        #   "native" — always attach natively; non-vision models will either
-        #              error at the provider or get a last-chance text fallback
-        #              (see run_agent._prepare_messages_for_api).
-        #   "text"   — always pre-analyze with vision_analyze and prepend the
-        #              description as text; the main model never sees pixels.
-        # Affects gateway platforms, the TUI, and CLI /attach.  vision_analyze
-        # remains available as a tool regardless of this setting — the routing
-        # only controls how inbound user images are presented.
-        "image_input_mode": "auto",
    },
    
    "terminal": {
@@ -479,7 +465,6 @@ DEFAULT_CONFIG = {
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
-        "auto_local_for_private_urls": True,  # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
        "cdp_url": "",  # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
        # CDP supervisor — dialog + frame detection via a persistent WebSocket.
        # Active only when a CDP-capable backend is attached (Browserbase or
@@ -501,19 +486,6 @@ DEFAULT_CONFIG = {
    "checkpoints": {
        "enabled": True,
        "max_snapshots": 50,  # Max checkpoints to keep per directory
-        # Auto-maintenance: shadow repos accumulate forever under
-        # ~/.hermes/checkpoints/ (one per cd'd working directory). Field
-        # reports put the typical offender at 1000+ repos / ~12 GB. When
-        # auto_prune is on, hermes sweeps at startup (at most once per
-        # min_interval_hours) and deletes:
-        #   * orphan repos: HERMES_WORKDIR no longer exists on disk
-        #   * stale repos:  newest mtime older than retention_days
-        # Opt-in so users who rely on /rollback against long-ago sessions
-        # never lose data silently.
-        "auto_prune": False,
-        "retention_days": 7,
-        "delete_orphans": True,
-        "min_interval_hours": 24,
    },

    # Maximum characters returned by a single read_file call.  Reads that
@@ -640,6 +612,14 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
+        "flush_memories": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 30,
+            "extra_body": {},
+        },
        "title_generation": {
            "provider": "auto",
            "model": "",
@@ -654,7 +634,7 @@ DEFAULT_CONFIG = {
        "compact": False,
        "personality": "kawaii",
        "resume_display": "full",
-        "busy_input_mode": "interrupt",  # interrupt | queue | steer
+        "busy_input_mode": "interrupt",
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@@ -868,7 +848,7 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
-        # discord / discord_admin tools: restrict which actions the agent may call.
+        # discord_server tool: restrict which actions the agent may call.
        # Default (empty) = all actions allowed (subject to bot privileged intents).
        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
        # or YAML list. Unknown names are dropped with a warning at load time.
@@ -987,27 +967,6 @@ DEFAULT_CONFIG = {
        "backup_count": 3,     # Number of rotated backup files to keep
    },

-    # Remotely-hosted model catalog manifest.  When enabled, the CLI fetches
-    # curated model lists for OpenRouter and Nous Portal from this URL,
-    # falling back to the in-repo snapshot on network failure.  Lets us
-    # update model picker lists without shipping a hermes-agent release.
-    # The default URL is served by the docs site GitHub Pages deploy.
-    "model_catalog": {
-        "enabled": True,
-        "url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
-        # Disk cache TTL in hours.  Beyond this, the CLI refetches on the
-        # next /model or `hermes model` invocation; network failures
-        # silently fall back to the stale cache.
-        "ttl_hours": 24,
-        # Optional per-provider override URLs for third parties that want
-        # to self-host their own curation list using the same schema.
-        # Example:
-        #   providers:
-        #     openrouter:
-        #       url: https://example.com/my-curation.json
-        "providers": {},
-    },
-
    # Network settings — workarounds for connectivity issues.
    "network": {
        # Force IPv4 connections.  On servers with broken or unreachable IPv6,
@@ -1044,27 +1003,6 @@ DEFAULT_CONFIG = {
        "min_interval_hours": 24,
    },

-    # Contextual first-touch onboarding hints (see agent/onboarding.py).
-    # Each hint is shown once per install and then latched here so it
-    # never fires again.  Users can wipe the section to re-see all hints.
-    "onboarding": {
-        "seen": {},
-    },
-
-    # ``hermes update`` behaviour.
-    "updates": {
-        # Run a full ``hermes backup``-style zip of HERMES_HOME before every
-        # ``hermes update``.  Backups land in ``<HERMES_HOME>/backups/`` and
-        # can be restored with ``hermes import <path>``.  Off by default —
-        # on large HERMES_HOME directories the zip can add minutes to every
-        # update.  Set to true to re-enable, or pass ``--backup`` to opt in
-        # for a single update run.
-        "pre_update_backup": False,
-        # How many pre-update backup zips to retain.  Older ones are pruned
-        # automatically after each successful backup.
-        "backup_keep": 5,
-    },
-
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1254,22 +1192,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "GMI_API_KEY": {
-        "description": "GMI Cloud API key",
-        "prompt": "GMI Cloud API key",
-        "url": "https://www.gmicloud.ai/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "GMI_BASE_URL": {
-        "description": "GMI Cloud base URL override",
-        "prompt": "GMI Cloud base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -1457,21 +1379,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "AZURE_FOUNDRY_API_KEY": {
-        "description": "Azure Foundry API key for custom Azure endpoints",
-        "prompt": "Azure Foundry API Key",
-        "url": "https://ai.azure.com/",
-        "password": True,
-        "category": "provider",
-    },
-    "AZURE_FOUNDRY_BASE_URL": {
-        "description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
-        "prompt": "Azure Foundry base URL",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },

    # ── Tool API keys ──
    "EXA_API_KEY": {
@@ -1639,44 +1546,6 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
    },

-    # ── Bundled skills (opt-in: only needed if the user uses that skill) ──
-    # These use category="skill" (distinct from "tool") so the sandbox
-    # env blocklist in tools/environments/local.py does NOT rewrite them —
-    # skills legitimately need these passed through to curl via
-    # tools/env_passthrough.py when the user's skill calls out.
-    "NOTION_API_KEY": {
-        "description": "Notion integration token (used by the `notion` skill)",
-        "prompt": "Notion API key",
-        "url": "https://www.notion.so/my-integrations",
-        "password": True,
-        "category": "skill",
-        "advanced": True,
-    },
-    "LINEAR_API_KEY": {
-        "description": "Linear personal API key (used by the `linear` skill)",
-        "prompt": "Linear API key",
-        "url": "https://linear.app/settings/api",
-        "password": True,
-        "category": "skill",
-        "advanced": True,
-    },
-    "AIRTABLE_API_KEY": {
-        "description": "Airtable personal access token (used by the `airtable` skill)",
-        "prompt": "Airtable API key",
-        "url": "https://airtable.com/create/tokens",
-        "password": True,
-        "category": "skill",
-        "advanced": True,
-    },
-    "TENOR_API_KEY": {
-        "description": "Tenor API key for GIF search (used by the `gif-search` skill)",
-        "prompt": "Tenor API key",
-        "url": "https://developers.google.com/tenor/guides/quickstart",
-        "password": True,
-        "category": "skill",
-        "advanced": True,
-    },
-
    # ── Honcho ──
    "HONCHO_API_KEY": {
        "description": "Honcho API key for AI-native persistent memory",
@@ -2345,71 +2214,6 @@ def get_compatible_custom_providers(
    return compatible


-def get_custom_provider_context_length(
-    model: str,
-    base_url: str,
-    custom_providers: Optional[List[Dict[str, Any]]] = None,
-    config: Optional[Dict[str, Any]] = None,
-) -> Optional[int]:
-    """Look up a per-model ``context_length`` override from ``custom_providers``.
-
-    Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
-    insensitive) and returns ``custom_providers[i].models.<model>.context_length``
-    if present and valid.  Returns ``None`` when no override applies.
-
-    This is the single source of truth for custom-provider context overrides,
-    used by:
-      * ``AIAgent.__init__`` (startup resolution)
-      * ``AIAgent.switch_model`` (mid-session ``/model`` switch)
-      * ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
-      * ``gateway.run._format_session_info`` (``/info`` display)
-      * ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
-
-    Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
-    startup path only; every other path (notably ``/model`` switch) fell back
-    to the 128K default.  See #15779.
-    """
-    if not model or not base_url:
-        return None
-    if custom_providers is None:
-        try:
-            custom_providers = get_compatible_custom_providers(config)
-        except Exception:
-            if config is None:
-                return None
-            raw = config.get("custom_providers")
-            custom_providers = raw if isinstance(raw, list) else []
-    if not isinstance(custom_providers, list):
-        return None
-
-    target_url = (base_url or "").rstrip("/")
-    if not target_url:
-        return None
-
-    for entry in custom_providers:
-        if not isinstance(entry, dict):
-            continue
-        entry_url = (entry.get("base_url") or "").rstrip("/")
-        if not entry_url or entry_url != target_url:
-            continue
-        models = entry.get("models")
-        if not isinstance(models, dict):
-            continue
-        model_cfg = models.get(model)
-        if not isinstance(model_cfg, dict):
-            continue
-        raw_ctx = model_cfg.get("context_length")
-        if raw_ctx is None:
-            continue
-        try:
-            ctx = int(raw_ctx)
-        except (TypeError, ValueError):
-            continue
-        if ctx > 0:
-            return ctx
-    return None
-
-
 def check_config_version() -> Tuple[int, int]:
    """
    Check config version.
@@ -45,13 +45,8 @@ def _pending_file() -> Path:
    Each entry: ``{"url": "...", "expire_at": <unix_ts>}``.  Scheduled
    DELETEs used to be handled by spawning a detached Python process per
    paste that slept for 6 hours; those accumulated forever if the user
-    ran ``hermes debug share`` repeatedly.
-
-    Deletion is now driven by the gateway's cron ticker
-    (``gateway/run.py::_start_cron_ticker``) which calls
-    ``_sweep_expired_pastes`` once per hour.  ``hermes debug share`` also
-    runs an opportunistic sweep on entry as a fallback for CLI-only users
-    who never start the gateway.
+    ran ``hermes debug share`` repeatedly.  We now persist the schedule
+    to disk and sweep expired entries on the next debug invocation.
    """
    return get_hermes_home() / "pastes" / "pending.json"

@@ -228,10 +223,9 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
    interpreters that never exited until the sleep completed.

    The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
-    and the gateway's cron ticker sweeps expired entries once per hour.
-    ``hermes debug share`` also runs an opportunistic sweep as a fallback
-    for CLI-only users.  If neither runs again, paste.rs's own retention
-    policy handles cleanup.
+    and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
+    every ``hermes debug`` invocation.  If the user never runs ``hermes debug``
+    again, paste.rs's own retention policy handles cleanup.
    """
    _record_pending(urls, delay_seconds=delay_seconds)

@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
    "KIMI_CN_API_KEY",
-    "GMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -321,11 +320,7 @@ def run_doctor(args):
                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

            canonical_provider = provider
-            if (
-                provider
-                and _resolve_provider_full is not None
-                and provider not in ("auto", "custom")
-            ):
+            if provider and _resolve_provider_full is not None and provider != "auto":
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
                canonical_provider = provider_def.id if provider_def is not None else None

@@ -938,7 +933,6 @@ def run_doctor(args):
        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
-        ("GMI Cloud",        ("GMI_API_KEY",),                                "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
@@ -1,361 +0,0 @@
-"""
-hermes fallback — manage the fallback provider chain.
-
-Fallback providers are tried in order when the primary model fails with
-rate-limit, overload, or connection errors. See:
-https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
-
-Subcommands:
-  hermes fallback [list]   Show the current fallback chain (default when no subcommand)
-  hermes fallback add      Pick provider + model via the same picker as `hermes model`,
-                           then append the selection to the chain
-  hermes fallback remove   Pick an entry to delete from the chain
-  hermes fallback clear    Remove all fallback entries
-
-Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
-``{provider, model, base_url?, api_mode?}`` dicts).  The legacy single-dict
-``fallback_model`` format is migrated to the new list format on first add.
-"""
-from __future__ import annotations
-
-import copy
-from typing import Any, Dict, List, Optional
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
-    """Return the normalized fallback chain as a list of dicts.
-
-    Accepts both the new list format (``fallback_providers``) and the legacy
-    single-dict format (``fallback_model``).  The returned list is always a
-    fresh copy — callers can mutate without touching the config dict.
-    """
-    chain = config.get("fallback_providers") or []
-    if isinstance(chain, list):
-        result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
-        if result:
-            return result
-    legacy = config.get("fallback_model")
-    if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
-        return [dict(legacy)]
-    if isinstance(legacy, list):
-        return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
-    return []
-
-
-def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
-    """Persist the chain to ``fallback_providers`` and clear legacy key."""
-    config["fallback_providers"] = chain
-    # Drop the legacy single-dict key on write so there's only one source of truth.
-    if "fallback_model" in config:
-        config.pop("fallback_model", None)
-
-
-def _format_entry(entry: Dict[str, Any]) -> str:
-    """One-line human-readable rendering of a fallback entry."""
-    provider = entry.get("provider", "?")
-    model = entry.get("model", "?")
-    base = entry.get("base_url")
-    suffix = f"  [{base}]" if base else ""
-    return f"{model}  (via {provider}){suffix}"
-
-
-def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
-    """Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
-    if not isinstance(model_cfg, dict):
-        return None
-    provider = (model_cfg.get("provider") or "").strip()
-    # The picker writes the selected model to ``model.default``.
-    model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
-    if not provider or not model:
-        return None
-    entry: Dict[str, Any] = {"provider": provider, "model": model}
-    base_url = (model_cfg.get("base_url") or "").strip()
-    if base_url:
-        entry["base_url"] = base_url
-    api_mode = (model_cfg.get("api_mode") or "").strip()
-    if api_mode:
-        entry["api_mode"] = api_mode
-    return entry
-
-
-def _snapshot_auth_active_provider() -> Any:
-    """Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
-    try:
-        from hermes_cli.auth import _load_auth_store
-        store = _load_auth_store()
-        return store.get("active_provider")
-    except Exception:
-        return None
-
-
-def _restore_auth_active_provider(value: Any) -> None:
-    """Write back a previously snapshotted ``active_provider`` value."""
-    try:
-        from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
-        with _auth_store_lock():
-            store = _load_auth_store()
-            store["active_provider"] = value
-            _save_auth_store(store)
-    except Exception:
-        # Best-effort — if auth.json can't be restored, the user's primary
-        # provider may have been deactivated by the picker.  They can re-run
-        # `hermes model` to fix it.  Don't fail the fallback add.
-        pass
-
-
-# ---------------------------------------------------------------------------
-# Subcommand handlers
-# ---------------------------------------------------------------------------
-
-def cmd_fallback_list(args) -> None:  # noqa: ARG001
-    """Print the current fallback chain."""
-    from hermes_cli.config import load_config
-
-    config = load_config()
-    chain = _read_chain(config)
-
-    print()
-    if not chain:
-        print("  No fallback providers configured.")
-        print()
-        print("  Add one with:  hermes fallback add")
-        print()
-        return
-
-    primary = _describe_primary(config)
-    if primary:
-        print(f"  Primary:   {primary}")
-        print()
-    print(f"  Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
-    for i, entry in enumerate(chain, 1):
-        print(f"    {i}. {_format_entry(entry)}")
-    print()
-    print("  Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
-    print("  Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
-    print()
-
-
-def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
-    """One-line description of the primary model for display purposes."""
-    model_cfg = config.get("model")
-    if isinstance(model_cfg, dict):
-        provider = (model_cfg.get("provider") or "?").strip() or "?"
-        model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
-        return f"{model}  (via {provider})"
-    if isinstance(model_cfg, str) and model_cfg.strip():
-        return model_cfg.strip()
-    return None
-
-
-def cmd_fallback_add(args) -> None:
-    """Launch the same picker as `hermes model`, then append the selection to the chain."""
-    from hermes_cli.main import _require_tty, select_provider_and_model
-    from hermes_cli.config import load_config, save_config
-
-    _require_tty("fallback add")
-
-    # Snapshot BEFORE the picker runs so we can distinguish "user actually
-    # picked something" from "user cancelled" by comparing before/after.
-    before_cfg = load_config()
-    model_before = copy.deepcopy(before_cfg.get("model"))
-    active_provider_before = _snapshot_auth_active_provider()
-
-    print()
-    print("  Adding a fallback provider.  The picker below is the same one used by")
-    print("  `hermes model` — select the provider + model you want as a fallback.")
-    print()
-
-    try:
-        select_provider_and_model(args=args)
-    except SystemExit:
-        # Some provider flows exit on auth failure — restore state and re-raise.
-        _restore_model_cfg(model_before)
-        _restore_auth_active_provider(active_provider_before)
-        raise
-
-    # Read the post-picker state to see what the user selected.
-    after_cfg = load_config()
-    model_after = after_cfg.get("model")
-
-    new_entry = _extract_fallback_from_model_cfg(model_after)
-    if not new_entry:
-        # Picker didn't complete (user cancelled or flow bailed).  Nothing to do.
-        _restore_model_cfg(model_before)
-        _restore_auth_active_provider(active_provider_before)
-        print()
-        print("  No fallback added.")
-        return
-
-    # Picker picked the same thing that's already the primary → nothing changed,
-    # and there's nothing useful to add as a fallback to itself.
-    primary_entry = _extract_fallback_from_model_cfg(model_before)
-    if primary_entry and primary_entry["provider"] == new_entry["provider"] \
-            and primary_entry["model"] == new_entry["model"]:
-        _restore_model_cfg(model_before)
-        _restore_auth_active_provider(active_provider_before)
-        print()
-        print(f"  Selected model matches the current primary ({_format_entry(new_entry)}).")
-        print("  A provider cannot be a fallback for itself — no change.")
-        return
-
-    # Reload the config with the primary restored, then append the new entry
-    # to ``fallback_providers``.  We deliberately re-load (rather than mutating
-    # ``after_cfg``) because the picker may have touched other top-level keys
-    # (custom_providers, providers credentials) that we want to keep.
-    _restore_model_cfg(model_before)
-    _restore_auth_active_provider(active_provider_before)
-
-    final_cfg = load_config()
-    chain = _read_chain(final_cfg)
-
-    # Reject exact-duplicate fallback entries.
-    for existing in chain:
-        if existing.get("provider") == new_entry["provider"] \
-                and existing.get("model") == new_entry["model"]:
-            print()
-            print(f"  {_format_entry(new_entry)} is already in the fallback chain — skipped.")
-            return
-
-    chain.append(new_entry)
-    _write_chain(final_cfg, chain)
-    save_config(final_cfg)
-
-    print()
-    print(f"  Added fallback: {_format_entry(new_entry)}")
-    print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
-    print()
-    print("  Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
-
-
-def _restore_model_cfg(model_before: Any) -> None:
-    """Restore ``config["model"]`` to a previously-captured snapshot."""
-    from hermes_cli.config import load_config, save_config
-
-    cfg = load_config()
-    if model_before is None:
-        cfg.pop("model", None)
-    else:
-        cfg["model"] = copy.deepcopy(model_before)
-    save_config(cfg)
-
-
-def cmd_fallback_remove(args) -> None:  # noqa: ARG001
-    """Pick an entry from the chain and remove it."""
-    from hermes_cli.config import load_config, save_config
-
-    config = load_config()
-    chain = _read_chain(config)
-
-    if not chain:
-        print()
-        print("  No fallback providers configured — nothing to remove.")
-        print()
-        return
-
-    choices = [_format_entry(e) for e in chain]
-    choices.append("Cancel")
-
-    try:
-        from hermes_cli.setup import _curses_prompt_choice
-        idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
-    except Exception:
-        idx = _numbered_pick("Select a fallback to remove:", choices)
-
-    if idx is None or idx < 0 or idx >= len(chain):
-        print()
-        print("  Cancelled — no change.")
-        return
-
-    removed = chain.pop(idx)
-    _write_chain(config, chain)
-    save_config(config)
-
-    print()
-    print(f"  Removed fallback: {_format_entry(removed)}")
-    if chain:
-        print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
-    else:
-        print("  Fallback chain is now empty.")
-    print()
-
-
-def cmd_fallback_clear(args) -> None:  # noqa: ARG001
-    """Remove all fallback entries (with confirmation)."""
-    from hermes_cli.config import load_config, save_config
-
-    config = load_config()
-    chain = _read_chain(config)
-
-    if not chain:
-        print()
-        print("  No fallback providers configured — nothing to clear.")
-        print()
-        return
-
-    print()
-    print(f"  Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
-    for i, entry in enumerate(chain, 1):
-        print(f"    {i}. {_format_entry(entry)}")
-    print()
-    try:
-        resp = input("  Clear all entries? [y/N]: ").strip().lower()
-    except (KeyboardInterrupt, EOFError):
-        print()
-        print("  Cancelled.")
-        return
-    if resp not in ("y", "yes"):
-        print("  Cancelled — no change.")
-        return
-
-    _write_chain(config, [])
-    save_config(config)
-    print()
-    print("  Fallback chain cleared.")
-    print()
-
-
-def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
-    """Fallback numbered-list picker when curses is unavailable."""
-    print(question)
-    for i, c in enumerate(choices, 1):
-        print(f"  {i}. {c}")
-    print()
-    while True:
-        try:
-            val = input(f"Choice [1-{len(choices)}]: ").strip()
-            if not val:
-                return None
-            idx = int(val) - 1
-            if 0 <= idx < len(choices):
-                return idx
-            print(f"Please enter 1-{len(choices)}")
-        except ValueError:
-            print("Please enter a number")
-        except (KeyboardInterrupt, EOFError):
-            print()
-            return None
-
-
-# ---------------------------------------------------------------------------
-# Dispatch
-# ---------------------------------------------------------------------------
-
-def cmd_fallback(args) -> None:
-    """Top-level dispatcher for ``hermes fallback [subcommand]``."""
-    sub = getattr(args, "fallback_command", None)
-    if sub in (None, "", "list", "ls"):
-        cmd_fallback_list(args)
-    elif sub == "add":
-        cmd_fallback_add(args)
-    elif sub in ("remove", "rm"):
-        cmd_fallback_remove(args)
-    elif sub == "clear":
-        cmd_fallback_clear(args)
-    else:
-        print(f"Unknown fallback subcommand: {sub}")
-        print("Use one of: list, add, remove, clear")
-        raise SystemExit(2)
@@ -2724,24 +2724,6 @@ _PLATFORMS = [
             "help": "OpenID to deliver cron results and notifications to."},
        ],
    },
-    {
-        "key": "yuanbao",
-        "label": "Yuanbao",
-        "emoji": "💎",
-        "token_var": "YUANBAO_APP_ID",
-        "setup_instructions": [
-            "1. Download the Yuanbao app from https://yuanbao.tencent.com/",
-            "2. In the app, go to PAI → My Bot and create a new bot",
-            "3. After the bot is created, copy the App ID and App Secret",
-            "4. Enter them below and Hermes will connect automatically over WebSocket",
-        ],
-        "vars": [
-            {"name": "YUANBAO_APP_ID", "prompt": "App ID", "password": False,
-             "help": "The App ID from your Yuanbao IM Bot credentials."},
-            {"name": "YUANBAO_APP_SECRET", "prompt": "App Secret", "password": True,
-             "help": "The App Secret (used for HMAC signing) from your Yuanbao IM Bot."},
-        ],
-    },
 ]


@@ -3126,12 +3108,6 @@ def _setup_wecom():
    print_success("💬 WeCom configured!")


-def _setup_yuanbao():
-    """Configure Yuanbao via the standard platform setup."""
-    yuanbao_platform = next(p for p in _PLATFORMS if p["key"] == "yuanbao")
-    _setup_standard_platform(yuanbao_platform)
-
-
 def _is_service_installed() -> bool:
    """Check if the gateway is installed as a system service."""
    if supports_systemd_services():
@@ -125,7 +125,6 @@ _DEFAULT_PAYLOADS = {
        "task_id": "test-task",
        "tool_call_id": "test-call",
        "result": '{"output": "hello"}',
-        "duration_ms": 42,
    },
    "pre_llm_call": {
        "session_id": "test-session",
@@ -1,329 +0,0 @@
-"""Remote model catalog fetcher.
-
-The Hermes docs site hosts a JSON manifest of curated models for providers
-we want to update without shipping a release (currently OpenRouter and
-Nous Portal). This module fetches, validates, and caches that manifest,
-falling back to the in-repo hardcoded lists when the network is unavailable.
-
-Pipeline
--------
-1. ``get_catalog()`` — returns a parsed manifest dict.
-   - Checks in-process cache (invalidated by TTL).
-   - Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
-   - Fetches the master URL if disk cache is stale or missing.
-   - On any fetch failure, keeps using the stale cache (or empty dict).
-
-2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
-   thin accessors returning the shapes existing callers expect. Each
-   falls back to the in-repo hardcoded list on any lookup failure.
-
-Schema (version 1)
------------------
-::
-
-    {
-      "version": 1,
-      "updated_at": "2026-04-25T22:00:00Z",
-      "metadata": {...},                # free-form
-      "providers": {
-        "openrouter": {
-          "metadata": {...},            # free-form
-          "models": [
-            {"id": "vendor/model", "description": "recommended",
-             "metadata": {...}}          # free-form, model-level
-          ]
-        },
-        "nous": {...}
-      }
-    }
-
-Unknown fields are ignored — extra metadata can be added at either level
-without bumping ``version``. ``version`` bumps are reserved for
-breaking changes (renaming ``providers``, changing ``models`` shape).
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import time
-import urllib.error
-import urllib.request
-from pathlib import Path
-from typing import Any
-
-from hermes_cli import __version__ as _HERMES_VERSION
-
-logger = logging.getLogger(__name__)
-
-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-
-DEFAULT_CATALOG_URL = (
-    "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
-)
-DEFAULT_TTL_HOURS = 24
-DEFAULT_FETCH_TIMEOUT = 8.0
-SUPPORTED_SCHEMA_VERSION = 1
-
-_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
-
-# In-process cache to avoid repeated disk + parse work across multiple
-# calls within the same session. Invalidated by TTL against the disk file's
-# mtime, so calling code never has to think about this.
-_catalog_cache: dict[str, Any] | None = None
-_catalog_cache_source_mtime: float = 0.0
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-
-def _load_catalog_config() -> dict[str, Any]:
-    """Load the ``model_catalog`` config block with defaults filled in."""
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config() or {}
-    except Exception:
-        cfg = {}
-
-    raw = cfg.get("model_catalog")
-    if not isinstance(raw, dict):
-        raw = {}
-
-    return {
-        "enabled": bool(raw.get("enabled", True)),
-        "url": str(raw.get("url") or DEFAULT_CATALOG_URL),
-        "ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
-        "providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
-    }
-
-
-def _cache_path() -> Path:
-    """Return the disk cache path. Import lazily so tests can monkeypatch home."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "cache" / "model_catalog.json"
-
-
-# ---------------------------------------------------------------------------
-# Fetch + validate + cache
-# ---------------------------------------------------------------------------
-
-
-def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
-    """HTTP GET the manifest URL and return a parsed dict, or None on failure."""
-    try:
-        req = urllib.request.Request(
-            url,
-            headers={
-                "Accept": "application/json",
-                "User-Agent": _HERMES_USER_AGENT,
-            },
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
-        logger.info("model catalog fetch failed (%s): %s", url, exc)
-        return None
-    except Exception as exc:  # pragma: no cover — defensive
-        logger.info("model catalog fetch errored (%s): %s", url, exc)
-        return None
-
-    if not _validate_manifest(data):
-        logger.info("model catalog at %s failed schema validation", url)
-        return None
-
-    return data
-
-
-def _validate_manifest(data: Any) -> bool:
-    """Return True when ``data`` matches the minimum manifest shape."""
-    if not isinstance(data, dict):
-        return False
-    version = data.get("version")
-    if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
-        # Future schema version we don't understand — refuse rather than
-        # guess. Older schemas (version < 1) aren't supported either.
-        return False
-    providers = data.get("providers")
-    if not isinstance(providers, dict):
-        return False
-    for pname, pblock in providers.items():
-        if not isinstance(pname, str) or not isinstance(pblock, dict):
-            return False
-        models = pblock.get("models")
-        if not isinstance(models, list):
-            return False
-        for m in models:
-            if not isinstance(m, dict):
-                return False
-            if not isinstance(m.get("id"), str) or not m["id"].strip():
-                return False
-    return True
-
-
-def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
-    """Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
-    path = _cache_path()
-    try:
-        mtime = path.stat().st_mtime
-    except (OSError, FileNotFoundError):
-        return (None, 0.0)
-    try:
-        with open(path) as fh:
-            data = json.load(fh)
-    except (OSError, json.JSONDecodeError):
-        return (None, 0.0)
-    if not _validate_manifest(data):
-        return (None, 0.0)
-    return (data, mtime)
-
-
-def _write_disk_cache(data: dict[str, Any]) -> None:
-    path = _cache_path()
-    try:
-        path.parent.mkdir(parents=True, exist_ok=True)
-        tmp = path.with_suffix(path.suffix + ".tmp")
-        with open(tmp, "w") as fh:
-            json.dump(data, fh, indent=2)
-            fh.write("\n")
-        os.replace(tmp, path)
-    except OSError as exc:
-        logger.info("model catalog cache write failed: %s", exc)
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-
-def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
-    """Return the parsed model catalog manifest, or an empty dict on failure.
-
-    Callers should treat a missing provider/model as "use the in-repo fallback"
-    — never raise from this function so the CLI keeps working offline.
-    """
-    global _catalog_cache, _catalog_cache_source_mtime
-
-    cfg = _load_catalog_config()
-    if not cfg["enabled"]:
-        return {}
-
-    ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
-
-    disk_data, disk_mtime = _read_disk_cache()
-    now = time.time()
-    disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
-
-    # In-process cache hit: disk hasn't changed since we loaded it and still fresh.
-    if (
-        not force_refresh
-        and _catalog_cache is not None
-        and disk_data is not None
-        and disk_mtime == _catalog_cache_source_mtime
-        and disk_fresh
-    ):
-        return _catalog_cache
-
-    # Disk is fresh enough — use it without a network hit.
-    if not force_refresh and disk_fresh and disk_data is not None:
-        _catalog_cache = disk_data
-        _catalog_cache_source_mtime = disk_mtime
-        return disk_data
-
-    # Need to (re)fetch. If it fails, fall back to any stale disk copy.
-    fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
-    if fetched is not None:
-        _write_disk_cache(fetched)
-        new_disk_data, new_mtime = _read_disk_cache()
-        if new_disk_data is not None:
-            _catalog_cache = new_disk_data
-            _catalog_cache_source_mtime = new_mtime
-            return new_disk_data
-        _catalog_cache = fetched
-        _catalog_cache_source_mtime = now
-        return fetched
-
-    if disk_data is not None:
-        _catalog_cache = disk_data
-        _catalog_cache_source_mtime = disk_mtime
-        return disk_data
-
-    return {}
-
-
-def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
-    """If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
-    cfg = _load_catalog_config()
-    if not cfg["enabled"]:
-        return None
-    provider_cfg = cfg["providers"].get(provider)
-    if not isinstance(provider_cfg, dict):
-        return None
-    override_url = provider_cfg.get("url")
-    if not isinstance(override_url, str) or not override_url.strip():
-        return None
-    # Override fetches skip the disk cache because they're usually
-    # third-party self-hosted. Re-request on every call but with a short
-    # timeout so they don't block the picker.
-    return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
-
-
-def _get_provider_block(provider: str) -> dict[str, Any] | None:
-    """Return the provider's manifest block, respecting per-provider overrides."""
-    override = _fetch_provider_override(provider)
-    if override is not None:
-        block = override.get("providers", {}).get(provider)
-        if isinstance(block, dict):
-            return block
-
-    catalog = get_catalog()
-    if not catalog:
-        return None
-    block = catalog.get("providers", {}).get(provider)
-    return block if isinstance(block, dict) else None
-
-
-def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
-    """Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
-
-    Returns ``None`` when the manifest is unavailable, so callers can fall
-    back to their hardcoded list.
-    """
-    block = _get_provider_block("openrouter")
-    if not block:
-        return None
-    out: list[tuple[str, str]] = []
-    for m in block.get("models", []):
-        mid = str(m.get("id") or "").strip()
-        if not mid:
-            continue
-        desc = str(m.get("description") or "")
-        out.append((mid, desc))
-    return out or None
-
-
-def get_curated_nous_models() -> list[str] | None:
-    """Return Nous Portal's curated list of model ids from the manifest.
-
-    Returns ``None`` when the manifest is unavailable.
-    """
-    block = _get_provider_block("nous")
-    if not block:
-        return None
-    out: list[str] = []
-    for m in block.get("models", []):
-        mid = str(m.get("id") or "").strip()
-        if mid:
-            out.append(mid)
-    return out or None
-
-
-def reset_cache() -> None:
-    """Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
-    global _catalog_cache, _catalog_cache_source_mtime
-    _catalog_cache = None
-    _catalog_cache_source_mtime = 0.0
@@ -533,7 +533,6 @@ def resolve_display_context_length(
    base_url: str = "",
    api_key: str = "",
    model_info: Optional[ModelInfo] = None,
-    custom_providers: list | None = None,
 ) -> Optional[int]:
    """Resolve the context length to show in /model output.

@@ -544,11 +543,6 @@ def resolve_display_context_length(
    about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
    rest.

-    When ``custom_providers`` is provided, per-model ``context_length``
-    overrides from ``custom_providers[].models.<id>.context_length`` are
-    honored — this closes #15779 where ``/model`` switch ignored user-set
-    overrides.
-
    Prefer the provider-aware value; fall back to ``model_info.context_window``
    only if the resolver returns nothing.
    """
@@ -559,7 +553,6 @@ def resolve_display_context_length(
            base_url=base_url or "",
            api_key=api_key or "",
            provider=provider or None,
-            custom_providers=custom_providers,
        )
        if ctx:
            return int(ctx)
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("moonshotai/kimi-k2.6",            "recommended"),
+    ("deepseek/deepseek-v4-pro",        ""),
+    ("deepseek/deepseek-v4-flash",      ""),
    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
+        "deepseek/deepseek-v4-pro",
+        "deepseek/deepseek-v4-flash",
        "xiaomi/mimo-v2.5-pro",
        "xiaomi/mimo-v2.5",
        "anthropic/claude-opus-4.7",
@@ -278,14 +282,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "trinity-large-preview",
        "trinity-mini",
    ],
-    "gmi": [
-        "zai-org/GLM-5.1-FP8",
-        "deepseek-ai/DeepSeek-V3.2",
-        "moonshotai/Kimi-K2.5",
-        "google/gemini-3.1-flash-lite-preview",
-        "anthropic/claude-sonnet-4.6",
-        "openai/gpt-5.4",
-    ],
    "opencode-zen": [
        "kimi-k2.5",
        "gpt-5.4-pro",
@@ -387,9 +383,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "us.meta.llama4-maverick-17b-instruct-v1:0",
        "us.meta.llama4-scout-17b-instruct-v1:0",
    ],
-    # Azure Foundry: user-provided endpoint and model.
-    # Empty list because models depend on the endpoint configuration.
-    "azure-foundry": [],
 }

 # Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -717,6 +710,7 @@ class ProviderEntry(NamedTuple):
    label: str
    tui_desc: str   # detailed description for `hermes model` TUI

+
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
@@ -742,12 +736,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
-    ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
-    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
 ]

 # Derived dicts — used throughout the codebase
@@ -777,8 +769,6 @@ _PROVIDER_ALIASES = {
    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -882,16 +872,7 @@ def fetch_openrouter_models(
    if _openrouter_catalog_cache is not None and not force_refresh:
        return list(_openrouter_catalog_cache)

-    # Prefer the remotely-hosted catalog manifest; fall back to the in-repo
-    # snapshot when the manifest is unreachable. Both are curated lists that
-    # drive the picker; the OpenRouter live /v1/models filter (tool support,
-    # free pricing) is applied on top either way.
-    try:
-        from hermes_cli.model_catalog import get_curated_openrouter_models
-        remote = get_curated_openrouter_models()
-    except Exception:
-        remote = None
-    fallback = list(remote) if remote else list(OPENROUTER_MODELS)
+    fallback = list(OPENROUTER_MODELS)
    preferred_ids = [mid for mid, _ in fallback]

    try:
@@ -944,24 +925,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


-def get_curated_nous_model_ids() -> list[str]:
-    """Return the curated Nous Portal model-id list.
-
-    Prefers the remotely-hosted catalog manifest (published under
-    ``website/static/api/model-catalog.json``); falls back to the in-repo
-    snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
-    unreachable. Always returns a list (never None).
-    """
-    try:
-        from hermes_cli.model_catalog import get_curated_nous_models
-        remote = get_curated_nous_models()
-    except Exception:
-        remote = None
-    if remote:
-        return list(remote)
-    return list(_PROVIDER_MODELS.get("nous", []))
-
-
 def _ai_gateway_model_is_free(pricing: Any) -> bool:
    """Return True if an AI Gateway model has $0 input AND output pricing."""
    if not isinstance(pricing, dict):
@@ -1416,124 +1379,6 @@ def curated_models_for_provider(
    return [(m, "") for m in models]


-def _provider_keys(provider: str) -> set[str]:
-    key = (provider or "").strip().lower()
-    normalized = normalize_provider(provider)
-    return {k for k in (key, normalized) if k}
-
-
-def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
-    return any(
-        name_lower == model.lower()
-        for provider in providers
-        for model in _PROVIDER_MODELS.get(provider, [])
-    )
-
-
-_AGGREGATOR_PROVIDERS = frozenset(
-    {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
-)
-
-
-def _resolve_static_model_alias(
-    name_lower: str,
-    current_keys: set[str],
-) -> Optional[tuple[str, str]]:
-    """Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
-    try:
-        from hermes_cli.model_switch import MODEL_ALIASES
-    except Exception:
-        return None
-
-    identity = MODEL_ALIASES.get(name_lower)
-    if identity is None:
-        return None
-
-    vendor = identity.vendor
-    family = identity.family
-
-    def _match(provider: str) -> Optional[str]:
-        models = _PROVIDER_MODELS.get(provider, [])
-        if not models:
-            return None
-        prefix = (
-            f"{vendor}/{family}"
-            if provider in _AGGREGATOR_PROVIDERS
-            else family
-        ).lower()
-        for model in models:
-            if model.lower().startswith(prefix):
-                return model
-        return None
-
-    for provider in current_keys:
-        if matched := _match(provider):
-            return provider, matched
-
-    for provider in _PROVIDER_MODELS:
-        if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
-            continue
-        if matched := _match(provider):
-            return provider, matched
-
-    for provider in _AGGREGATOR_PROVIDERS:
-        if provider in current_keys and (matched := _match(provider)):
-            return provider, matched
-
-    return None
-
-
-def detect_static_provider_for_model(
-    model_name: str,
-    current_provider: str,
-) -> Optional[tuple[str, str]]:
-    """Auto-detect a provider from static catalogs only.
-
-    Returns ``(provider_id, model_name)``. The model name may be remapped
-    when a static alias or bare provider name resolves to a catalog default.
-    Returns ``None`` when no confident match is found.
-    """
-    name = (model_name or "").strip()
-    if not name:
-        return None
-
-    name_lower = name.lower()
-    current_keys = _provider_keys(current_provider)
-
-    alias_match = _resolve_static_model_alias(name_lower, current_keys)
-    if alias_match:
-        return alias_match
-
-    # --- Step 0: bare provider name typed as model ---
-    # If someone types `/model nous` or `/model anthropic`, treat it as a
-    # provider switch and pick the first model from that provider's catalog.
-    # Skip "custom" and "openrouter" — custom has no model catalog, and
-    # openrouter requires an explicit model name to be useful.
-    resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
-    if resolved_provider not in {"custom", "openrouter"}:
-        default_models = _PROVIDER_MODELS.get(resolved_provider, [])
-        if (
-            resolved_provider in _PROVIDER_LABELS
-            and default_models
-            and resolved_provider not in current_keys
-        ):
-            return (resolved_provider, default_models[0])
-
-    # Aggregators list other providers' models — never auto-switch TO them
-    # If the model belongs to the current provider's catalog, don't suggest switching
-    if _model_in_provider_catalog(name_lower, current_keys):
-        return None
-
-    # --- Step 1: check static provider catalogs for a direct match ---
-    for pid, models in _PROVIDER_MODELS.items():
-        if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
-            continue
-        if any(name_lower == m.lower() for m in models):
-            return (pid, name)
-
-    return None
-
-
 def detect_provider_for_model(
    model_name: str,
    current_provider: str,
@@ -1546,19 +1391,86 @@ def detect_provider_for_model(

    Priority:
    0. Bare provider name → switch to that provider's default model
-    1. Direct provider static catalog match
-    2. OpenRouter catalog match
+    1. Direct provider with credentials (highest)
+    2. Direct provider without credentials → remap to OpenRouter slug
+    3. OpenRouter catalog match
    """
    name = (model_name or "").strip()
    if not name:
        return None

-    static_match = detect_static_provider_for_model(name, current_provider)
-    if static_match:
-        return static_match
-    if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
+    name_lower = name.lower()
+
+    # --- Step 0: bare provider name typed as model ---
+    # If someone types `/model nous` or `/model anthropic`, treat it as a
+    # provider switch and pick the first model from that provider's catalog.
+    # Skip "custom" and "openrouter" — custom has no model catalog, and
+    # openrouter requires an explicit model name to be useful.
+    resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
+    if resolved_provider not in {"custom", "openrouter"}:
+        default_models = _PROVIDER_MODELS.get(resolved_provider, [])
+        if (
+            resolved_provider in _PROVIDER_LABELS
+            and default_models
+            and resolved_provider != normalize_provider(current_provider)
+        ):
+            return (resolved_provider, default_models[0])
+
+    # Aggregators list other providers' models — never auto-switch TO them
+    _AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
+
+    # If the model belongs to the current provider's catalog, don't suggest switching
+    current_models = _PROVIDER_MODELS.get(current_provider, [])
+    if any(name_lower == m.lower() for m in current_models):
        return None

+    # --- Step 1: check static provider catalogs for a direct match ---
+    direct_match: Optional[str] = None
+    for pid, models in _PROVIDER_MODELS.items():
+        if pid == current_provider or pid in _AGGREGATORS:
+            continue
+        if any(name_lower == m.lower() for m in models):
+            direct_match = pid
+            break
+
+    if direct_match:
+        # Check if we have credentials for this provider — env vars,
+        # credential pool, or auth store entries.
+        has_creds = False
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY
+            pconfig = PROVIDER_REGISTRY.get(direct_match)
+            if pconfig:
+                for env_var in pconfig.api_key_env_vars:
+                    if os.getenv(env_var, "").strip():
+                        has_creds = True
+                        break
+        except Exception:
+            pass
+        # Also check credential pool and auth store — covers OAuth,
+        # Claude Code tokens, and other non-env-var credentials (#10300).
+        if not has_creds:
+            try:
+                from agent.credential_pool import load_pool
+                pool = load_pool(direct_match)
+                if pool.has_credentials():
+                    has_creds = True
+            except Exception:
+                pass
+        if not has_creds:
+            try:
+                from hermes_cli.auth import _load_auth_store
+                store = _load_auth_store()
+                if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
+                    has_creds = True
+            except Exception:
+                pass
+
+        # Always return the direct provider match.  If credentials are
+        # missing, the client init will give a clear error rather than
+        # silently routing through the wrong provider (#10300).
+        return (direct_match, name)
+
    # --- Step 2: check OpenRouter catalog ---
    # First try exact match (handles provider/model format)
    or_slug = _find_openrouter_slug(name)
@@ -1859,19 +1771,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
            except Exception:
                pass
-    if normalized == "gmi":
-        try:
-            from hermes_cli.auth import resolve_api_key_provider_credentials
-
-            creds = resolve_api_key_provider_credentials("gmi")
-            api_key = str(creds.get("api_key") or "").strip()
-            base_url = str(creds.get("base_url") or "").strip()
-            if api_key and base_url:
-                live = fetch_api_models(api_key, base_url)
-                if live:
-                    return live
-        except Exception:
-            pass
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -2249,52 +2148,6 @@ def copilot_model_api_mode(
    return "chat_completions"


-# Azure Foundry model families that require the Responses API.  Azure
-# rejects /chat/completions against these deployments with
-# ``400 "The requested operation is unsupported."`` — the same payload Bob
-# Dobolina hit in April 2026 on ``gpt-5.3-codex`` while ``gpt-4o-pure`` on
-# the same endpoint worked fine.  Keep the patterns broad enough to cover
-# vendor-renamed deployments (e.g. ``gpt-5.3-codex``, ``gpt-5-codex``,
-# ``gpt-5.4``, ``o1-preview``) but tight enough to leave GPT-4 / 3.5 / Llama /
-# Mistral / Grok deployments on chat completions.
-_AZURE_FOUNDRY_RESPONSES_PREFIXES = (
-    "codex",       # codex-*, codex-mini
-    "gpt-5",       # gpt-5, gpt-5.x, gpt-5-codex, gpt-5.x-codex
-    "o1",          # o1, o1-preview, o1-mini
-    "o3",          # o3, o3-mini
-    "o4",          # o4, o4-mini
-)
-
-
-def azure_foundry_model_api_mode(model_name: Optional[str]) -> Optional[str]:
-    """Infer Azure Foundry api_mode from a deployment/model name.
-
-    Returns ``"codex_responses"`` when the model name matches a family that
-    only accepts the Responses API on Azure Foundry (GPT-5.x, codex, o1/o3/o4
-    reasoning models).  Returns ``None`` otherwise — the caller should fall
-    back to the configured/default api_mode (typically ``chat_completions``)
-    so GPT-4o, GPT-4 Turbo, Llama, Mistral, etc. keep working.
-
-    Intentionally does NOT return ``anthropic_messages``; Anthropic-style
-    Azure endpoints are disambiguated by URL (``/anthropic`` suffix) in
-    ``runtime_provider._detect_api_mode_for_url`` and by the user setting
-    ``model.api_mode: anthropic_messages`` explicitly.
-    """
-    raw = str(model_name or "").strip().lower()
-    if not raw:
-        return None
-    # Strip any vendor/ prefix a user may have copied from OpenRouter / Copilot.
-    if "/" in raw:
-        raw = raw.rsplit("/", 1)[-1]
-    # gpt-5-mini speaks chat completions on Copilot but Azure Foundry deploys
-    # the full gpt-5 family uniformly on Responses API — don't carve an
-    # exception here.
-    for prefix in _AZURE_FOUNDRY_RESPONSES_PREFIXES:
-        if raw.startswith(prefix):
-            return "codex_responses"
-    return None
-
-
 def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
    """Normalize OpenCode config IDs to the bare model slug used in API requests."""
    provider = normalize_provider(provider_id)
@@ -9,7 +9,6 @@ from typing import Dict, Iterable, Optional, Set
 from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
-from utils import is_truthy_value
 from tools.tool_backend_helpers import (
    fal_key_is_configured,
    has_direct_modal_credentials,
@@ -26,13 +25,6 @@ _DEFAULT_PLATFORM_TOOLSETS = {
 }


-def _uses_gateway(section: object) -> bool:
-    """Return True when a config section explicitly opts into the gateway."""
-    if not isinstance(section, dict):
-        return False
-    return is_truthy_value(section.get("use_gateway"), default=False)
-
-
@dataclass(frozen=True)
 class NousFeatureState:
    key: str
@@ -270,11 +262,11 @@ def get_nous_subscription_features(
    # use_gateway flags — when True, the user explicitly opted into the
    # Tool Gateway via `hermes model`, so direct credentials should NOT
    # prevent gateway routing.
-    web_use_gateway = _uses_gateway(web_cfg)
-    tts_use_gateway = _uses_gateway(tts_cfg)
-    browser_use_gateway = _uses_gateway(browser_cfg)
+    web_use_gateway = bool(web_cfg.get("use_gateway"))
+    tts_use_gateway = bool(tts_cfg.get("use_gateway"))
+    browser_use_gateway = bool(browser_cfg.get("use_gateway"))
    image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
-    image_use_gateway = _uses_gateway(image_gen_cfg)
+    image_use_gateway = bool(image_gen_cfg.get("use_gateway"))

    direct_exa = bool(get_env_value("EXA_API_KEY"))
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
@@ -609,10 +601,10 @@ def get_gateway_eligible_tools(
    # no direct keys exist — we only skip the prompt for tools where
    # use_gateway was explicitly set.
    opted_in = {
-        "web": _uses_gateway(config.get("web")),
-        "image_gen": _uses_gateway(config.get("image_gen")),
-        "tts": _uses_gateway(config.get("tts")),
-        "browser": _uses_gateway(config.get("browser")),
+        "web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
+        "image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
+        "tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
+        "browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
    }

    unconfigured: list[str] = []
@@ -1,202 +0,0 @@
-"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
-
-Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
-no stderr chatter.  Just the agent's final text to stdout.
-
-Toolsets = whatever the user has configured for "cli" in `hermes tools`.
-Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
-Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
-Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
-
-Model / provider selection mirrors `hermes chat`:
-    - Both optional. If omitted, use the user's configured default.
-    - If both given, pair them exactly as given.
-    - If only --model given, auto-detect the provider that serves it.
-    - If only --provider given, error out (ambiguous — caller must pick a model).
-
-Env var fallbacks (used when the corresponding arg is not passed):
-    - HERMES_INFERENCE_MODEL
-    - HERMES_INFERENCE_PROVIDER  (already read by resolve_runtime_provider)
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import sys
-from contextlib import redirect_stderr, redirect_stdout
-from typing import Optional
-
-
-def run_oneshot(
-    prompt: str,
-    model: Optional[str] = None,
-    provider: Optional[str] = None,
-) -> int:
-    """Execute a single prompt and print only the final content block.
-
-    Args:
-        prompt: The user message to send.
-        model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
-            env var, then config.yaml's model.default / model.model.
-        provider: Optional provider override. Falls back to
-            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
-            then "auto".
-
-    Returns the exit code.  Caller should sys.exit() with the return.
-    """
-    # Silence every stdlib logger for the duration.  AIAgent, tools, and
-    # provider adapters all log to stderr through the root logger; file
-    # handlers added by setup_logging() keep working (they're attached to
-    # the root logger's handler list, not affected by level), but no
-    # bytes reach the terminal.
-    logging.disable(logging.CRITICAL)
-
-    # --provider without --model is ambiguous: carrying the user's configured
-    # model across to a different provider is usually wrong (that provider may
-    # not host it), and silently picking the provider's catalog default hides
-    # the mismatch.  Require the caller to be explicit.  Validate BEFORE the
-    # stderr redirect so the message actually reaches the terminal.
-    env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
-    if provider and not ((model or "").strip() or env_model_early):
-        sys.stderr.write(
-            "hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
-            "Pass both explicitly, or neither to use your configured defaults.\n"
-        )
-        return 2
-
-    # Auto-approve any shell / tool approvals.  Non-interactive by
-    # definition — a prompt would hang forever.
-    os.environ["HERMES_YOLO_MODE"] = "1"
-    os.environ["HERMES_ACCEPT_HOOKS"] = "1"
-
-    # Redirect stderr AND stdout to devnull for the entire call tree.
-    # We'll print the final response to the real stdout at the end.
-    real_stdout = sys.stdout
-    devnull = open(os.devnull, "w")
-
-    try:
-        with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(prompt, model=model, provider=provider)
-    finally:
-        try:
-            devnull.close()
-        except Exception:
-            pass
-
-    if response:
-        real_stdout.write(response)
-        if not response.endswith("\n"):
-            real_stdout.write("\n")
-        real_stdout.flush()
-    return 0
-
-
-def _run_agent(
-    prompt: str,
-    model: Optional[str] = None,
-    provider: Optional[str] = None,
-) -> str:
-    """Build an AIAgent exactly like a normal CLI chat turn would, then
-    run a single conversation.  Returns the final response string."""
-    # Imports are local so they don't run when hermes is invoked for
-    # other commands (keeps top-level CLI startup cheap).
-    from hermes_cli.config import load_config
-    from hermes_cli.models import detect_provider_for_model
-    from hermes_cli.runtime_provider import resolve_runtime_provider
-    from hermes_cli.tools_config import _get_platform_tools
-    from run_agent import AIAgent
-
-    cfg = load_config()
-
-    # Resolve effective model: explicit arg → env var → config.
-    model_cfg = cfg.get("model") or {}
-    if isinstance(model_cfg, str):
-        cfg_model = model_cfg
-    else:
-        cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
-
-    env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
-    effective_model = (model or "").strip() or env_model or cfg_model
-
-    # Resolve effective provider: explicit arg → (auto-detect from model if
-    # model was explicit) → env / config (handled inside resolve_runtime_provider).
-    #
-    # When --model is given without --provider, auto-detect the provider that
-    # serves that model — same semantic as `/model <name>` in an interactive
-    # session.  Without this, resolve_runtime_provider() would fall back to
-    # the user's configured default provider, which may not host the model
-    # the caller just asked for.
-    effective_provider = (provider or "").strip() or None
-    if effective_provider is None and (model or env_model):
-        # Only auto-detect when the model was explicitly requested via arg or
-        # env var (not when it came from config — that's the "use my defaults"
-        # path and the configured provider is already correct).
-        explicit_model = (model or "").strip() or env_model
-        if explicit_model:
-            cfg_provider = ""
-            if isinstance(model_cfg, dict):
-                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-            current_provider = (
-                cfg_provider
-                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
-                or "auto"
-            )
-            detected = detect_provider_for_model(explicit_model, current_provider)
-            if detected:
-                effective_provider, effective_model = detected
-
-    runtime = resolve_runtime_provider(
-        requested=effective_provider,
-        target_model=effective_model or None,
-    )
-
-    # Pull in whatever toolsets the user has enabled for "cli".
-    # sorted() gives stable ordering; set→list for AIAgent's signature.
-    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
-
-    agent = AIAgent(
-        api_key=runtime.get("api_key"),
-        base_url=runtime.get("base_url"),
-        provider=runtime.get("provider"),
-        api_mode=runtime.get("api_mode"),
-        model=effective_model,
-        enabled_toolsets=toolsets_list,
-        quiet_mode=True,
-        platform="cli",
-        credential_pool=runtime.get("credential_pool"),
-        # Interactive callbacks are intentionally NOT wired beyond this
-        # one.  In oneshot mode there's no user sitting at a terminal:
-        #   - clarify  → returns a synthetic "pick a default" instruction
-        #                so the agent continues instead of stalling on
-        #                the tool's built-in "not available" error
-        #   - sudo password prompt → terminal_tool gates on
-        #                HERMES_INTERACTIVE which we never set
-        #   - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
-        #                (set above); also falls back to deny on non-tty
-        #   - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
-        #   - skill secret capture → returns gracefully when no callback set
-        clarify_callback=_oneshot_clarify_callback,
-    )
-
-    # Belt-and-braces: make sure AIAgent doesn't invoke any streaming
-    # display callbacks that would bypass our stdout capture.
-    agent.suppress_status_output = True
-    agent.stream_delta_callback = None
-    agent.tool_gen_callback = None
-
-    return agent.chat(prompt) or ""
-
-
-def _oneshot_clarify_callback(question: str, choices=None) -> str:
-    """Clarify is disabled in oneshot mode — tell the agent to pick a
-    default and proceed instead of stalling or erroring."""
-    if choices:
-        return (
-            f"[oneshot mode: no user available. Pick the best option from "
-            f"{choices} using your own judgment and continue.]"
-        )
-    return (
-        "[oneshot mode: no user available. Make the most reasonable "
-        "assumption you can and continue.]"
-    )
@@ -36,7 +36,6 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("wecom_callback", PlatformInfo(label="💬 WeCom Callback",  default_toolset="hermes-wecom-callback")),
    ("weixin",         PlatformInfo(label="💬 Weixin",          default_toolset="hermes-weixin")),
    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
-    ("yuanbao",        PlatformInfo(label="🤖 Yuanbao",         default_toolset="hermes-yuanbao")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
    ("cron",           PlatformInfo(label="⏰ Cron",            default_toolset="hermes-cron")),
@@ -163,22 +163,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
-    "gmi": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("GMI_API_KEY",),
-        base_url_override="https://api.gmi-serving.com/v1",
-        base_url_env_var="GMI_BASE_URL",
-    ),
    "ollama-cloud": HermesOverlay(
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
    ),
-    # Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
-    # The transport is determined at runtime from config.yaml model.api_mode.
-    "azure-foundry": HermesOverlay(
-        transport="openai_chat",  # default; overridden by api_mode in config
-        base_url_env_var="AZURE_FOUNDRY_BASE_URL",
-    ),
 }


@@ -303,10 +291,6 @@ ALIASES: Dict[str, str] = {
    "arcee-ai": "arcee",
    "arceeai": "arcee",

-    # gmi
-    "gmi-cloud": "gmi",
-    "gmicloud": "gmi",
-
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -329,7 +313,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
-    "gmi": "GMI Cloud",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -221,32 +221,6 @@ def _resolve_runtime_from_pool_entry(
    elif provider == "copilot":
        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
        base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
-    elif provider == "azure-foundry":
-        # Azure Foundry: read api_mode and base_url from config
-        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-        if cfg_provider == "azure-foundry":
-            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
-            if cfg_base_url:
-                base_url = cfg_base_url
-            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode:
-                api_mode = configured_mode
-        # Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
-        # /chat/completions on these with 400 "operation unsupported" — see
-        # azure_foundry_model_api_mode() for rationale.  Skip when the user
-        # explicitly picked anthropic_messages (Anthropic-style endpoint).
-        if effective_model and api_mode != "anthropic_messages":
-            try:
-                from hermes_cli.models import azure_foundry_model_api_mode
-
-                inferred = azure_foundry_model_api_mode(effective_model)
-            except Exception:
-                inferred = None
-            if inferred:
-                api_mode = inferred
-        # For Anthropic-style endpoints, strip /v1 suffix
-        if api_mode == "anthropic_messages":
-            base_url = re.sub(r"/v1/?$", "", base_url)
    else:
        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
        # Honour model.base_url from config.yaml when the configured provider
@@ -615,88 +589,6 @@ def _resolve_openrouter_runtime(
    }


-def _resolve_azure_foundry_runtime(
-    *,
-    requested_provider: str,
-    model_cfg: Dict[str, Any],
-    explicit_api_key: Optional[str] = None,
-    explicit_base_url: Optional[str] = None,
-    target_model: Optional[str] = None,
-) -> Dict[str, Any]:
-    """Resolve an Azure Foundry runtime entry.
-
-    Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
-    explicit overrides), pulls the API key from ``.env`` / env var, and
-    strips a trailing ``/v1`` for Anthropic-style endpoints because the
-    Anthropic SDK appends ``/v1/messages`` internally.
-
-    Raises :class:`AuthError` when required values are missing.
-    """
-    explicit_api_key = str(explicit_api_key or "").strip()
-    explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
-
-    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-    cfg_base_url = ""
-    cfg_api_mode = "chat_completions"
-    if cfg_provider == "azure-foundry":
-        cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
-        cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
-
-    # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
-    # reasoning models as Responses-API-only.  Calling /chat/completions
-    # against them returns 400 "The requested operation is unsupported."
-    # Upgrade api_mode when the model name matches, unless the user has
-    # explicitly chosen anthropic_messages (Anthropic-style endpoint).
-    effective_model = str(target_model or model_cfg.get("default") or "").strip()
-    if effective_model and cfg_api_mode != "anthropic_messages":
-        try:
-            from hermes_cli.models import azure_foundry_model_api_mode
-
-            inferred = azure_foundry_model_api_mode(effective_model)
-        except Exception:
-            inferred = None
-        if inferred:
-            cfg_api_mode = inferred
-
-    env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
-    base_url = explicit_base_url_clean or cfg_base_url or env_base_url
-    if not base_url:
-        raise AuthError(
-            "Azure Foundry requires a base URL. Set it via 'hermes model' or "
-            "the AZURE_FOUNDRY_BASE_URL environment variable."
-        )
-
-    api_key = explicit_api_key
-    if not api_key:
-        try:
-            from hermes_cli.config import get_env_value
-            api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
-        except Exception:
-            api_key = ""
-    if not api_key:
-        api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
-    if not api_key:
-        raise AuthError(
-            "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
-            "~/.hermes/.env or run 'hermes model' to configure."
-        )
-
-    # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
-    # we inherited from the configured base_url to avoid double-/v1 paths.
-    if cfg_api_mode == "anthropic_messages":
-        base_url = re.sub(r"/v1/?$", "", base_url)
-
-    source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
-    return {
-        "provider": "azure-foundry",
-        "api_mode": cfg_api_mode,
-        "base_url": base_url,
-        "api_key": api_key,
-        "source": source,
-        "requested_provider": requested_provider,
-    }
-
-
 def _resolve_explicit_runtime(
    *,
    provider: str,
@@ -786,15 +678,6 @@ def _resolve_explicit_runtime(
            "requested_provider": requested_provider,
        }

-    # Azure Foundry: user-configured endpoint with selectable API mode
-    if provider == "azure-foundry":
-        return _resolve_azure_foundry_runtime(
-            requested_provider=requested_provider,
-            model_cfg=model_cfg,
-            explicit_api_key=explicit_api_key,
-            explicit_base_url=explicit_base_url,
-        )
-
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
        env_url = ""
@@ -863,41 +746,6 @@ def resolve_runtime_provider(
    """
    requested_provider = resolve_requested_provider(requested)

-    # Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
-    # with provider="anthropic", bypass _resolve_named_custom_runtime (which would
-    # return provider="custom" with chat_completions api_mode and no valid key).
-    # Instead, use the Azure key directly with anthropic_messages api_mode.
-    _eff_base = (explicit_base_url or "").strip()
-    if requested_provider == "anthropic" and "azure.com" in _eff_base:
-        _azure_key = (
-            (explicit_api_key or "").strip()
-            or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-            or os.getenv("ANTHROPIC_API_KEY", "").strip()
-        )
-        return {
-            "provider": "anthropic",
-            "api_mode": "anthropic_messages",
-            "base_url": _eff_base.rstrip("/"),
-            "api_key": _azure_key,
-            "source": "azure-explicit",
-            "requested_provider": requested_provider,
-        }
-
-    # Azure Foundry: user-configured endpoint with selectable API mode
-    # (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
-    # Resolve before the custom-runtime / pool / generic paths so Azure
-    # config is always picked up from model.base_url + model.api_mode,
-    # regardless of whether the caller passed explicit_* args.
-    if requested_provider == "azure-foundry":
-        azure_runtime = _resolve_azure_foundry_runtime(
-            requested_provider=requested_provider,
-            model_cfg=_get_model_config(),
-            explicit_api_key=explicit_api_key,
-            explicit_base_url=explicit_base_url,
-            target_model=target_model,
-        )
-        return azure_runtime
-
    custom_runtime = _resolve_named_custom_runtime(
        requested_provider=requested_provider,
        explicit_api_key=explicit_api_key,
@@ -1076,6 +924,13 @@ def resolve_runtime_provider(

    # Anthropic (native Messages API)
    if provider == "anthropic":
+        from agent.anthropic_adapter import resolve_anthropic_token
+        token = resolve_anthropic_token()
+        if not token:
+            raise AuthError(
+                "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
+                "run 'claude setup-token', or authenticate with 'claude /login'."
+            )
        # Allow base URL override from config.yaml model.base_url, but only
        # when the configured provider is anthropic — otherwise a non-Anthropic
        # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
@@ -1084,33 +939,6 @@ def resolve_runtime_provider(
        if cfg_provider == "anthropic":
            cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
        base_url = cfg_base_url or "https://api.anthropic.com"
-
-        # For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
-        # Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
-        # Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
-        # would find the Claude Code OAuth token first (priority 3) and return
-        # that instead, causing 401s. Detect Azure endpoints and use the env
-        # key directly to bypass the OAuth priority chain.
-        _is_azure_endpoint = "azure.com" in base_url.lower() or (
-            cfg_base_url and "azure.com" in cfg_base_url.lower()
-        )
-        if _is_azure_endpoint:
-            token = (
-                os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-                or os.getenv("ANTHROPIC_API_KEY", "").strip()
-            )
-            if not token:
-                raise AuthError(
-                    "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
-                )
-        else:
-            from agent.anthropic_adapter import resolve_anthropic_token
-            token = resolve_anthropic_token()
-            if not token:
-                raise AuthError(
-                    "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
-                    "run 'claude setup-token', or authenticate with 'claude /login'."
-                )
        return {
            "provider": "anthropic",
            "api_mode": "anthropic_messages",
@@ -1856,32 +1856,27 @@ def _setup_slack():
    if existing:
        print_info("Slack: already configured")
        if not prompt_yes_no("Reconfigure Slack?", False):
-            # Even without reconfiguring, offer to refresh the manifest so
-            # new commands (e.g. /btw, /stop, ...) get registered in Slack.
-            if prompt_yes_no(
-                "Regenerate the Slack app manifest with the latest command "
-                "list? (recommended after `hermes update`)",
-                True,
-            ):
-                _write_slack_manifest_and_instruct()
            return

    print_info("Steps to create a Slack app:")
-    print_info("   1. Go to https://api.slack.com/apps → Create New App")
-    print_info("      Pick 'From an app manifest' — we'll generate one for you below.")
+    print_info("   1. Go to https://api.slack.com/apps → Create New App (from scratch)")
    print_info("   2. Enable Socket Mode: Settings → Socket Mode → Enable")
    print_info("      • Create an App-Level Token with 'connections:write' scope")
-    print_info("   3. Install to Workspace: Settings → Install App")
-    print_info("   4. After installing, invite the bot to channels: /invite @YourBot")
+    print_info("   3. Add Bot Token Scopes: Features → OAuth & Permissions")
+    print_info("      Required scopes: chat:write, app_mentions:read,")
+    print_info("      channels:history, channels:read, im:history,")
+    print_info("      im:read, im:write, users:read, files:read, files:write")
+    print_info("      Optional for private channels: groups:history")
+    print_info("   4. Subscribe to Events: Features → Event Subscriptions → Enable")
+    print_info("      Required events: message.im, message.channels, app_mention")
+    print_info("      Optional for private channels: message.groups")
+    print_warning("   ⚠ Without message.channels the bot will ONLY work in DMs,")
+    print_warning("     not public channels.")
+    print_info("   5. Install to Workspace: Settings → Install App")
+    print_info("   6. Reinstall the app after any scope or event changes")
+    print_info("   7. After installing, invite the bot to channels: /invite @YourBot")
    print()
    print_info("   Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
-    print()
-
-    # Generate and write manifest up-front so the user can paste it into
-    # the "Create from manifest" flow instead of clicking through scopes /
-    # events / slash commands one at a time.
-    _write_slack_manifest_and_instruct()
-
    print()
    bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
    if not bot_token:
@@ -1907,49 +1902,6 @@ def _setup_slack():
        print_info("   Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")


-def _write_slack_manifest_and_instruct():
-    """Generate the Slack manifest, write it under HERMES_HOME, and print
-    paste-into-Slack instructions.
-
-    Exposed as its own helper so both the initial setup flow and the
-    "reconfigure? → no" branch can refresh the manifest without the user
-    re-entering tokens. Failures are non-fatal — if the manifest write
-    fails for any reason, we print a warning and skip rather than abort
-    the whole Slack setup.
-    """
-    try:
-        from hermes_cli.slack_cli import _build_full_manifest
-        from hermes_constants import get_hermes_home
-
-        manifest = _build_full_manifest(
-            bot_name="Hermes",
-            bot_description="Your Hermes agent on Slack",
-        )
-        target = Path(get_hermes_home()) / "slack-manifest.json"
-        target.parent.mkdir(parents=True, exist_ok=True)
-        import json as _json
-        target.write_text(
-            _json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
-            encoding="utf-8",
-        )
-        print_success(f"Slack app manifest written to: {target}")
-        print_info(
-            "   Paste it into https://api.slack.com/apps → your app → Features "
-            "→ App Manifest → Edit, then Save.  Slack will prompt to "
-            "reinstall if scopes or slash commands changed."
-        )
-        print_info(
-            "   Re-run `hermes slack manifest --write` anytime to refresh after "
-            "Hermes adds new commands."
-        )
-    except Exception as exc:  # pragma: no cover - best-effort UX helper
-        print_warning(f"Couldn't write Slack manifest: {exc}")
-        print_info(
-            "   You can generate it manually later with: "
-            "hermes slack manifest --write"
-        )
-
-
 def _setup_matrix():
    """Configure Matrix credentials."""
    print_header("Matrix")
@@ -2133,12 +2085,6 @@ def _setup_feishu():
    _gateway_setup_feishu()


-def _setup_yuanbao():
-    """Configure Yuanbao via gateway setup."""
-    from hermes_cli.gateway import _setup_yuanbao as _gateway_setup_yuanbao
-    _gateway_setup_yuanbao()
-
-
 def _setup_wecom():
    """Configure WeCom (Enterprise WeChat) via gateway setup."""
    from hermes_cli.gateway import _setup_wecom as _gateway_setup_wecom
@@ -2283,7 +2229,6 @@ _GATEWAY_PLATFORMS = [
    ("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
    ("DingTalk", "DINGTALK_CLIENT_ID", _setup_dingtalk),
    ("Feishu / Lark", "FEISHU_APP_ID", _setup_feishu),
-    ("Yuanbao", "YUANBAO_APP_ID", _setup_yuanbao),
    ("WeCom (Enterprise WeChat)", "WECOM_BOT_ID", _setup_wecom),
    ("WeCom Callback (Self-Built App)", "WECOM_CALLBACK_CORP_ID", _setup_wecom_callback),
    ("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
@@ -2918,6 +2863,17 @@ SETUP_SECTIONS = [
    ("agent", "Agent Settings", setup_agent_settings),
 ]

+# The returning-user menu intentionally omits standalone TTS because model setup
+# already includes TTS selection and tools setup covers the rest of the provider
+# configuration. Keep this list in the same order as the visible menu entries.
+RETURNING_USER_MENU_SECTION_KEYS = [
+    "model",
+    "terminal",
+    "gateway",
+    "tools",
+    "agent",
+]
+

 def run_setup_wizard(args):
    """Run the interactive setup wizard.
@@ -2942,9 +2898,6 @@ def run_setup_wizard(args):
        save_config(copy.deepcopy(DEFAULT_CONFIG))
        print_success("Configuration reset to defaults.")

-    reconfigure_requested = bool(getattr(args, "reconfigure", False))
-    quick_requested = bool(getattr(args, "quick", False))
-
    config = load_config()
    hermes_home = get_hermes_home()

@@ -3036,36 +2989,50 @@ def run_setup_wizard(args):
    migration_ran = False

    if is_existing:
-        # Existing install — default is the full-wizard reconfigure flow.
-        # Every prompt shows the current value as its default, so pressing
-        # Enter keeps it.  Opt into `--quick` for the narrow "just fill in
-        # missing items" flow (useful after a partial OpenClaw migration
-        # or when a required API key got cleared).
-        if quick_requested:
+        # ── Returning User Menu ──
+        print()
+        print_header("Welcome Back!")
+        print_success("You already have Hermes configured.")
+        print()
+
+        menu_choices = [
+            "Quick Setup - configure missing items only",
+            "Full Setup - reconfigure everything",
+            "Model & Provider",
+            "Terminal Backend",
+            "Messaging Platforms (Gateway)",
+            "Tools",
+            "Agent Settings",
+            "Exit",
+        ]
+        choice = prompt_choice("What would you like to do?", menu_choices, 0)
+
+        if choice == 0:
+            # Quick setup
            _run_quick_setup(config, hermes_home)
            return
-
-        print()
-        print_header("Reconfigure")
-        print_success("You already have Hermes configured.")
-        print_info("Running the full wizard — each prompt shows your current value.")
-        print_info("Press Enter to keep it, or type a new value to change it.")
-        print_info("")
-        print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
-        print_info("     gateway|tools|agent', or fill only missing items with --quick.")
-        # Fall through to the "Full Setup — run all sections" block below.
-        # --reconfigure is now the default on existing installs; the flag
-        # is preserved for backwards compatibility but is a no-op here.
+        elif choice == 1:
+            # Full setup — fall through to run all sections
+            pass
+        elif choice == 7:
+            print_info("Exiting. Run 'hermes setup' again when ready.")
+            return
+        elif 2 <= choice <= 6:
+            # Individual section — map by key, not by position.
+            # SETUP_SECTIONS includes TTS but the returning-user menu skips it,
+            # so positional indexing (choice - 2) would dispatch the wrong section.
+            section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
+            section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
+            if section:
+                _, label, func = section
+                func(config)
+                save_config(config)
+                _print_setup_summary(config, hermes_home)
+            return
    else:
        # ── First-Time Setup ──
        print()

-        # --reconfigure / --quick on a fresh install are meaningless — fall
-        # through to the normal first-time flow.
-        if reconfigure_requested or quick_requested:
-            print_info("No existing configuration found — running first-time setup.")
-            print()
-
        # Offer OpenClaw migration before configuration begins
        migration_ran = _offer_openclaw_migration(hermes_home)
        if migration_ran:
@@ -11,10 +11,9 @@ handler are thin wrappers that parse args and delegate.
 """

 import json
-import re
 import shutil
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Optional

 from rich.console import Console
 from rich.panel import Panel
@@ -142,103 +141,6 @@ def _derive_category_from_install_path(install_path: str) -> str:
    return "" if parent == "." else parent


-# ---------------------------------------------------------------------------
-# Interactive name/category resolution for URL-installed skills
-# ---------------------------------------------------------------------------
-
-_VALID_NAME_RE = re.compile(r"^[a-z][a-z0-9_-]*$")
-_VALID_CATEGORY_RE = re.compile(r"^[a-z][a-z0-9_/-]*$")
-
-
-def _is_valid_installed_skill_name(name: str) -> bool:
-    """Accept identifier-shaped names, reject empty / sentinel-y values."""
-    if not isinstance(name, str):
-        return False
-    candidate = name.strip().lower()
-    if not candidate or candidate in {"skill", "readme", "index", "unnamed-skill"}:
-        return False
-    return bool(_VALID_NAME_RE.match(candidate))
-
-
-def _existing_categories() -> List[str]:
-    """Return sorted subdirectory names under ``~/.hermes/skills/`` that look
-    like category buckets (contain at least one ``SKILL.md`` somewhere below).
-
-    Used to suggest reusable categories when interactively installing from a
-    URL. Hidden dirs (``.hub``, ``.trash``) are skipped.
-    """
-    from tools.skills_hub import SKILLS_DIR
-    out: List[str] = []
-    try:
-        for entry in SKILLS_DIR.iterdir():
-            if not entry.is_dir() or entry.name.startswith("."):
-                continue
-            # Only count as a category if it contains skills, not if it IS a skill.
-            # Heuristic: if ``<entry>/SKILL.md`` exists, it's a skill at the
-            # top level (no category); otherwise treat as a category bucket.
-            if (entry / "SKILL.md").exists():
-                continue
-            # Has at least one nested SKILL.md?
-            try:
-                if any(entry.rglob("SKILL.md")):
-                    out.append(entry.name)
-            except OSError:
-                continue
-    except (FileNotFoundError, OSError):
-        return []
-    return sorted(set(out))
-
-
-def _prompt_for_skill_name(c: Console, url: str, default: str = "") -> Optional[str]:
-    """Prompt interactively for a skill name. Returns None on cancel/EOF."""
-    c.print()
-    c.print(
-        f"[yellow]The SKILL.md at {url} doesn't declare a `name:` in its "
-        f"frontmatter,[/]\n[yellow]and the URL path doesn't produce a valid "
-        f"identifier either.[/]"
-    )
-    default_hint = f" [{default}]" if default else ""
-    c.print(
-        f"[bold]Enter a skill name{default_hint}:[/] "
-        f"[dim](lowercase letters, digits, hyphens, underscores; starts with a letter)[/]"
-    )
-    try:
-        answer = input("Name: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return None
-    if not answer and default:
-        answer = default
-    if not _is_valid_installed_skill_name(answer):
-        c.print(f"[bold red]Invalid name:[/] {answer!r}. Aborting install.\n")
-        return None
-    return answer
-
-
-def _prompt_for_category(c: Console, existing: List[str]) -> str:
-    """Prompt interactively for a category. Empty/None input means flat install."""
-    c.print()
-    if existing:
-        c.print(
-            "[bold]Pick a category[/] "
-            "[dim](reuse an existing bucket, type a new one, or press Enter to install flat)[/]"
-        )
-        c.print(f"[dim]Existing: {', '.join(existing)}[/]")
-    else:
-        c.print(
-            "[bold]Category[/] [dim](optional — press Enter to install flat at ~/.hermes/skills/<name>/)[/]"
-        )
-    try:
-        answer = input("Category: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return ""
-    if not answer:
-        return ""
-    if not _VALID_CATEGORY_RE.match(answer):
-        c.print(f"[dim]Invalid category {answer!r} — installing flat.[/]")
-        return ""
-    return answer
-
-
 def do_search(query: str, source: str = "all", limit: int = 10,
              console: Optional[Console] = None) -> None:
    """Search registries and display results as a Rich table."""
@@ -407,17 +309,8 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",

 def do_install(identifier: str, category: str = "", force: bool = False,
               console: Optional[Console] = None, skip_confirm: bool = False,
-               invalidate_cache: bool = True,
-               name_override: str = "") -> None:
-    """Fetch, quarantine, scan, confirm, and install a skill.
-
-    ``name_override`` lets non-interactive callers (slash commands, gateway,
-    scripts) supply a skill name when the upstream SKILL.md lacks a valid
-    ``name:`` frontmatter field. On interactive TTY surfaces, a missing name
-    triggers a prompt instead; ``skip_confirm=True`` means "non-interactive"
-    (so pair it with ``name_override`` when installing from a URL that has
-    no frontmatter).
-    """
+               invalidate_cache: bool = True) -> None:
+    """Fetch, quarantine, scan, confirm, and install a skill."""
    from tools.skills_hub import (
        GitHubAuth, create_source_router, ensure_hub_dirs,
        quarantine_bundle, install_from_quarantine, HubLockFile,
@@ -461,58 +354,6 @@ def do_install(identifier: str, category: str = "", force: bool = False,
            c.print()
        return

-    # URL-sourced skills may arrive with an empty name when SKILL.md has no
-    # ``name:`` in frontmatter AND the URL path doesn't yield a valid
-    # identifier. Resolve by (1) --name override, (2) interactive prompt on
-    # a TTY, (3) refuse with an actionable error on non-interactive surfaces.
-    bundle_meta = getattr(bundle, "metadata", {}) or {}
-    if bundle.source == "url" and (not bundle.name or bundle_meta.get("awaiting_name")):
-        if name_override and _is_valid_installed_skill_name(name_override):
-            bundle.name = name_override.strip()
-            bundle_meta["awaiting_name"] = False
-        elif name_override:
-            c.print(
-                f"[bold red]Invalid --name:[/] {name_override!r}. "
-                "Must be a lowercase identifier (letters, digits, hyphens, "
-                "underscores; starts with a letter).\n"
-            )
-            return
-        elif skip_confirm:
-            # Non-interactive surface (slash command / TUI / gateway). Can't
-            # prompt — emit an actionable error.
-            url = bundle_meta.get("url") or identifier
-            c.print(
-                f"[bold red]Cannot install from URL:[/] {url}\n"
-                "[yellow]The SKILL.md has no `name:` in its frontmatter, "
-                "and the URL path doesn't produce a valid identifier.[/]\n\n"
-                "Retry with an explicit name:\n"
-                f"  [bold]/skills install {url} --name <your-name>[/]\n"
-                f"  [bold]hermes skills install {url} --name <your-name>[/]\n\n"
-                "[dim]Or ask the SKILL.md's author to add a `name:` field to "
-                "its YAML frontmatter.[/]\n"
-            )
-            return
-        else:
-            # Interactive TTY — prompt.
-            url = bundle_meta.get("url") or identifier
-            chosen = _prompt_for_skill_name(c, url)
-            if not chosen:
-                c.print("[dim]Installation cancelled.[/]\n")
-                return
-            bundle.name = chosen
-            bundle_meta["awaiting_name"] = False
-        # Keep SkillMeta in sync so downstream "already installed" checks,
-        # audit logs, and display all see the final name.
-        if meta is not None:
-            meta.name = bundle.name
-            meta.path = bundle.name
-
-    # URL-sourced skills: offer to pick a category interactively when the
-    # caller didn't specify one (TTY only — non-interactive installs fall
-    # through to flat install, matching all other sources).
-    if bundle.source == "url" and not category and not skip_confirm:
-        category = _prompt_for_category(c, _existing_categories())
-
    # Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
    if bundle.source == "official" and not category:
        id_parts = bundle.identifier.split("/")  # ["official", "category", "skill"]
@@ -758,24 +599,11 @@ def inspect_skill(identifier: str) -> Optional[dict]:
    return out


-def do_list(source_filter: str = "all",
-            enabled_only: bool = False,
-            console: Optional[Console] = None) -> None:
-    """List installed skills, distinguishing hub, builtin, and local skills.
-
-    Args:
-        source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
-        enabled_only: If True, hide disabled skills from the output.
-
-    Enabled/disabled state is resolved against the currently active profile's
-    config — ``hermes -p <profile> skills list`` reads that profile's
-    ``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
-    start.  No explicit profile flag needed here.
-    """
+def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
+    """List installed skills, distinguishing hub, builtin, and local skills."""
    from tools.skills_hub import HubLockFile, ensure_hub_dirs
    from tools.skills_sync import _read_manifest
    from tools.skills_tool import _find_all_skills
-    from agent.skill_utils import get_disabled_skill_names

    c = console or _console
    ensure_hub_dirs()
@@ -783,26 +611,17 @@ def do_list(source_filter: str = "all",
    hub_installed = {e["name"]: e for e in lock.list_installed()}
    builtin_names = set(_read_manifest())

-    # Pull ALL skills (including disabled ones) so we can annotate status.
-    all_skills = _find_all_skills(skip_disabled=True)
-    disabled_names = get_disabled_skill_names()
+    all_skills = _find_all_skills()

-    title = "Installed Skills"
-    if enabled_only:
-        title += " (enabled only)"
-
-    table = Table(title=title)
+    table = Table(title="Installed Skills")
    table.add_column("Name", style="bold cyan")
    table.add_column("Category", style="dim")
    table.add_column("Source", style="dim")
    table.add_column("Trust", style="dim")
-    table.add_column("Status", style="dim")

    hub_count = 0
    builtin_count = 0
    local_count = 0
-    enabled_count = 0
-    disabled_count = 0

    for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
        name = skill["name"]
@@ -813,48 +632,29 @@ def do_list(source_filter: str = "all",
            source_type = "hub"
            source_display = hub_entry.get("source", "hub")
            trust = hub_entry.get("trust_level", "community")
+            hub_count += 1
        elif name in builtin_names:
            source_type = "builtin"
            source_display = "builtin"
            trust = "builtin"
+            builtin_count += 1
        else:
            source_type = "local"
            source_display = "local"
            trust = "local"
+            local_count += 1

        if source_filter != "all" and source_filter != source_type:
            continue

-        is_enabled = name not in disabled_names
-        if enabled_only and not is_enabled:
-            continue
-
-        if source_type == "hub":
-            hub_count += 1
-        elif source_type == "builtin":
-            builtin_count += 1
-        else:
-            local_count += 1
-
-        if is_enabled:
-            enabled_count += 1
-            status_cell = "[bold green]enabled[/]"
-        else:
-            disabled_count += 1
-            status_cell = "[dim red]disabled[/]"
-
        trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
        trust_label = "official" if source_display == "official" else trust
-        table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)
+        table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")

    c.print(table)
-    summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
-    if enabled_only:
-        summary += f" — {enabled_count} enabled shown"
-    else:
-        summary += f" — {enabled_count} enabled, {disabled_count} disabled"
-    summary += "[/]\n"
-    c.print(summary)
+    c.print(
+        f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
+    )


 def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
@@ -1323,15 +1123,11 @@ def skills_command(args) -> None:
        do_search(args.query, source=args.source, limit=args.limit)
    elif action == "install":
        do_install(args.identifier, category=args.category, force=args.force,
-                   skip_confirm=getattr(args, "yes", False),
-                   name_override=getattr(args, "name", "") or "")
+                   skip_confirm=getattr(args, "yes", False))
    elif action == "inspect":
        do_inspect(args.identifier)
    elif action == "list":
-        do_list(
-            source_filter=args.source,
-            enabled_only=getattr(args, "enabled_only", False),
-        )
+        do_list(source_filter=args.source)
    elif action == "check":
        do_check(name=getattr(args, "name", None))
    elif action == "update":
@@ -1381,7 +1177,6 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        /skills search kubernetes
        /skills install openai/skills/skill-creator
        /skills install openai/skills/skill-creator --force
-        /skills install https://example.com/path/SKILL.md
        /skills inspect openai/skills/skill-creator
        /skills list
        /skills list --source hub
@@ -1458,11 +1253,10 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "install":
        if not args:
-            c.print("[bold red]Usage:[/] /skills install <identifier-or-url> [--name <name>] [--category <cat>] [--force] [--now]\n")
+            c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force] [--now]\n")
            return
        identifier = args[0]
        category = ""
-        name_override = ""
        # Slash commands run inside prompt_toolkit where input() hangs.
        # Always skip confirmation — the user typing the command is implicit consent.
        skip_confirm = True
@@ -1473,11 +1267,9 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
        for i, a in enumerate(args):
            if a == "--category" and i + 1 < len(args):
                category = args[i + 1]
-            elif a == "--name" and i + 1 < len(args):
-                name_override = args[i + 1]
        do_install(identifier, category=category, force=force,
                   skip_confirm=skip_confirm, invalidate_cache=invalidate_cache,
-                   name_override=name_override, console=c)
+                   console=c)

    elif action == "inspect":
        if not args:
@@ -1487,12 +1279,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "list":
        source_filter = "all"
-        enabled_only = "--enabled-only" in args or "--enabled" in args
        if "--source" in args:
            idx = args.index("--source")
            if idx + 1 < len(args):
                source_filter = args[idx + 1]
-        do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)
+        do_list(source_filter=source_filter, console=c)

    elif action == "check":
        name = args[0] if args else None
@@ -1580,8 +1371,7 @@ def _print_skills_help(console: Console) -> None:
        "  [cyan]search[/] <query>              Search registries for skills\n"
        "  [cyan]install[/] <identifier>        Install a skill (with security scan)\n"
        "  [cyan]inspect[/] <identifier>        Preview a skill without installing\n"
-        "  [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
-        "       List installed skills; --enabled-only filters to the active profile's live set\n"
+        "  [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
        "  [cyan]check[/] [name]                Check hub skills for upstream updates\n"
        "  [cyan]update[/] [name]               Update hub skills with upstream changes\n"
        "  [cyan]audit[/] [name]                Re-scan hub skills for security\n"
@@ -1,152 +0,0 @@
-"""``hermes slack ...`` CLI subcommands.
-
-Today only ``hermes slack manifest`` is implemented — it generates the
-Slack app manifest JSON for registering every gateway command as a native
-Slack slash (``/btw``, ``/stop``, ``/model``, …) so users get the same
-first-class slash UX Discord and Telegram already have.
-
-Typical workflow::
-
-    $ hermes slack manifest > slack-manifest.json
-    # or:
-    $ hermes slack manifest --write
-
-Then paste the printed JSON into the Slack app config (Features → App
-Manifest → Edit) and click Save. Slack diffs the manifest and prompts
-for reinstall when scopes/commands change.
-"""
-from __future__ import annotations
-
-import json
-import sys
-from pathlib import Path
-
-
-def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
-    """Build a full Slack manifest merging display info + our slash list.
-
-    The slash-command list is always generated from ``COMMAND_REGISTRY`` so
-    it stays in sync with the rest of Hermes. Other manifest sections
-    (display info, OAuth scopes, socket mode) are set to sensible defaults
-    for a Hermes deployment — users can tweak them in the Slack UI after
-    pasting.
-    """
-    from hermes_cli.commands import slack_app_manifest
-
-    partial = slack_app_manifest()
-    slashes = partial["features"]["slash_commands"]
-
-    return {
-        "_metadata": {
-            "major_version": 1,
-            "minor_version": 1,
-        },
-        "display_information": {
-            "name": bot_name[:35],
-            "description": (bot_description or "Your Hermes agent on Slack")[:140],
-            "background_color": "#1a1a2e",
-        },
-        "features": {
-            "bot_user": {
-                "display_name": bot_name[:80],
-                "always_online": True,
-            },
-            "slash_commands": slashes,
-            "assistant_view": {
-                "assistant_description": "Chat with Hermes in threads and DMs.",
-            },
-        },
-        "oauth_config": {
-            "scopes": {
-                "bot": [
-                    "app_mentions:read",
-                    "assistant:write",
-                    "channels:history",
-                    "channels:read",
-                    "chat:write",
-                    "commands",
-                    "files:read",
-                    "files:write",
-                    "groups:history",
-                    "im:history",
-                    "im:read",
-                    "im:write",
-                    "users:read",
-                ],
-            },
-        },
-        "settings": {
-            "event_subscriptions": {
-                "bot_events": [
-                    "app_mention",
-                    "assistant_thread_context_changed",
-                    "assistant_thread_started",
-                    "message.channels",
-                    "message.groups",
-                    "message.im",
-                ],
-            },
-            "interactivity": {
-                "is_enabled": True,
-            },
-            "org_deploy_enabled": False,
-            "socket_mode_enabled": True,
-            "token_rotation_enabled": False,
-        },
-    }
-
-
-def slack_manifest_command(args) -> int:
-    """Print or write a Slack app manifest JSON.
-
-    Flags (all parsed in ``hermes_cli/main.py``):
-      --write [PATH]  Write to file instead of stdout (default path:
-                      ``$HERMES_HOME/slack-manifest.json``)
-      --name NAME     Override the bot display name (default: "Hermes")
-      --description DESC  Override the bot description
-      --slashes-only  Emit only the ``features.slash_commands`` array (for
-                      merging into an existing manifest manually)
-    """
-    name = getattr(args, "name", None) or "Hermes"
-    description = getattr(args, "description", None) or "Your Hermes agent on Slack"
-
-    if getattr(args, "slashes_only", False):
-        from hermes_cli.commands import slack_app_manifest
-
-        manifest = slack_app_manifest()["features"]["slash_commands"]
-    else:
-        manifest = _build_full_manifest(name, description)
-
-    payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
-
-    write_target = getattr(args, "write", None)
-    if write_target is not None:
-        if isinstance(write_target, bool) and write_target:
-            # --write with no value → default location
-            try:
-                from hermes_constants import get_hermes_home
-
-                target = Path(get_hermes_home()) / "slack-manifest.json"
-            except Exception:
-                target = Path.home() / ".hermes" / "slack-manifest.json"
-        else:
-            target = Path(write_target).expanduser()
-        target.parent.mkdir(parents=True, exist_ok=True)
-        target.write_text(payload, encoding="utf-8")
-        print(f"Slack manifest written to: {target}", file=sys.stderr)
-        print(
-            "\nNext steps:\n"
-            "  1. Open https://api.slack.com/apps and pick your Hermes app\n"
-            "     (or create a new one: Create New App → From an app manifest).\n"
-            f"  2. Features → App Manifest → paste the contents of\n"
-            f"     {target}\n"
-            "  3. Save; Slack will prompt to reinstall the app if scopes or\n"
-            "     slash commands changed.\n"
-            "  4. Make sure Socket Mode is enabled and you have a bot token\n"
-            "     (xoxb-...) and app token (xapp-...) configured via\n"
-            "     `hermes setup`.\n",
-            file=sys.stderr,
-        )
-    else:
-        sys.stdout.write(payload)
-    return 0
@@ -326,8 +326,7 @@ def show_status(args):
        "WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
        "Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
        "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
-        "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
-        "Yuanbao": ("YUANBAO_APP_ID", "YUANBAO_HOME_CHANNEL"),
+        "QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
    }
    
    for name, (token_var, home_var) in platforms.items():
@@ -20,10 +20,10 @@ def get_provider_request_timeout(

    try:
        from hermes_cli.config import load_config
-        config = load_config()
-    except Exception:
+    except ImportError:
        return None

+    config = load_config()
    providers = config.get("providers", {}) if isinstance(config, dict) else {}
    provider_config = (
        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
@@ -49,10 +49,10 @@ def get_provider_stale_timeout(

    try:
        from hermes_cli.config import load_config
-        config = load_config()
-    except Exception:
+    except ImportError:
        return None

+    config = load_config()
    providers = config.get("providers", {}) if isinstance(config, dict) else {}
    provider_config = (
        providers.get(provider_id, {}) if isinstance(providers, dict) else {}
@@ -10,7 +10,8 @@ import random

 TIPS = [
    # --- Slash Commands ---
-    "/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
+    "/btw <question> asks a quick side question without tools or history — great for clarifications.",
+    "/background <prompt> runs a task in a separate session while your current one stays free.",
    "/branch forks the current session so you can explore a different direction without losing progress.",
    "/compress manually compresses conversation context when things get long.",
    "/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
@@ -106,7 +107,7 @@ TIPS = [
    "Set display.streaming: true to see tokens appear in real time as the model generates.",
    "Set display.show_reasoning: true to watch the model's chain-of-thought reasoning.",
    "Set display.compact: true to reduce whitespace in output for denser information.",
-    "Set display.busy_input_mode: queue to queue messages instead of interrupting the agent, or steer to inject them mid-run via /steer.",
+    "Set display.busy_input_mode: queue to queue messages instead of interrupting the agent.",
    "Set display.resume_display: minimal to skip the full conversation recap on session resume.",
    "Set compression.threshold: 0.50 to control when auto-compression fires (default: 50% of context).",
    "Set agent.max_turns: 200 to let the agent take more tool-calling steps per turn.",
@@ -11,7 +11,6 @@ the `platform_toolsets` key.

 import json as _json
 import logging
-import os
 import sys
 from pathlib import Path
 from typing import Dict, List, Optional, Set
@@ -26,7 +25,7 @@ from hermes_cli.nous_subscription import (
    get_nous_subscription_features,
 )
 from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
-from utils import base_url_hostname, is_truthy_value
+from utils import base_url_hostname

 logger = logging.getLogger(__name__)

@@ -69,59 +68,25 @@ CONFIGURABLE_TOOLSETS = [
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
    ("spotify",          "🎵 Spotify",                  "playback, search, playlists, library"),
-    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
-    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
-    ("yuanbao",          "🤖 Yuanbao",                  "group info, member queries, DM"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
-
-# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
-# these platforms, and only resolve/save for these platforms.  A toolset
-# absent from this map is available on every platform (current behaviour).
-#
-# Use this for tools whose APIs only make sense on one platform (Discord
-# server admin, Slack workspace admin, etc.).  Keeps every other platform's
-# checklist from filling up with irrelevant toggles.
-_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
-    "discord": {"discord"},
-    "discord_admin": {"discord"},
-}
-
-
-def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
-    """Return True if ``ts_key`` is configurable on ``platform``.
-
-    Toolsets without a restriction entry are allowed everywhere (the default).
-    """
-    allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
-    return allowed is None or platform in allowed
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}


 def _get_effective_configurable_toolsets():
    """Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.

    Plugin toolsets are appended at the end so they appear after the
-    built-in toolsets in the TUI checklist. A plugin whose toolset key
-    already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
-    plugins (e.g. ``plugins/spotify``) share their toolset key with the
-    built-in entry, and we want the built-in label/description to win.
-    Without the dedupe, ``hermes tools`` → "reconfigure existing" would
-    list the same toolset twice.
+    built-in toolsets in the TUI checklist.
    """
    result = list(CONFIGURABLE_TOOLSETS)
-    seen = {ts_key for ts_key, _, _ in result}
    try:
        from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
        discover_plugins()  # idempotent — ensures plugins are loaded
-        for entry in get_plugin_toolsets():
-            if entry[0] in seen:
-                continue
-            seen.add(entry[0])
-            result.append(entry)
+        result.extend(get_plugin_toolsets())
    except Exception:
        pass
    return result
@@ -626,7 +591,7 @@ def _get_platform_tools(
    include_default_mcp_servers: bool = True,
 ) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset, TOOLSETS
+    from toolsets import resolve_toolset

    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)
@@ -640,8 +605,6 @@ def _get_platform_tools(
    toolset_names = [str(ts) for ts in toolset_names]

    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
-    plugin_ts_keys = _get_plugin_toolset_keys()
-    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # If the saved list contains any configurable keys directly, the user
    # has explicitly configured this platform — use direct membership.
@@ -651,10 +614,7 @@ def _get_platform_tools(
    has_explicit_config = any(ts in configurable_keys for ts in toolset_names)

    if has_explicit_config:
-        enabled_toolsets = {
-            ts for ts in toolset_names
-            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
-        }
+        enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -664,61 +624,14 @@ def _get_platform_tools(

        enabled_toolsets = set()
        for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
-            if not _toolset_allowed_for_platform(ts_key, platform):
-                continue
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
-
        default_off = set(_DEFAULT_OFF_TOOLSETS)
-        # Legacy safety: if the platform's own name matches a default-off
-        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
-        # keep that toolset enabled on first install.  Skip this dodge for
-        # platform-restricted toolsets — those are always opt-in even on
-        # their own platform (e.g. `discord` + `discord` should stay OFF).
-        if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
+        if platform in default_off:
            default_off.remove(platform)
-        # Home Assistant is already runtime-gated by its check_fn (requires
-        # HASS_TOKEN to register any tools). When a user has configured
-        # HASS_TOKEN, they've explicitly opted in — don't also strip it via
-        # _DEFAULT_OFF_TOOLSETS, which would silently drop HA from platforms
-        # (e.g. cron) that run through _get_platform_tools without an
-        # explicit saved toolset list. Without this, Norbert's HA cron jobs
-        # regressed after #14798 made cron honor per-platform tool config.
-        if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
-            default_off.remove("homeassistant")
        enabled_toolsets -= default_off

-    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
-    # feishu_drive).  These are part of the platform's default composite but
-    # absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
-    # checklist or in a user-saved config.  Must run in BOTH branches —
-    # otherwise saving via `hermes tools` (which flips has_explicit_config
-    # to True) silently drops them.
-    platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
-    configurable_tool_universe = set()
-    for ck in configurable_keys:
-        configurable_tool_universe.update(resolve_toolset(ck))
-    claimed = set()
-    for ts_key in enabled_toolsets:
-        claimed.update(resolve_toolset(ts_key))
-    skip = configurable_keys | plugin_ts_keys | platform_default_keys
-    skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
-    skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
-    for ts_key, ts_def in TOOLSETS.items():
-        if ts_key in skip:
-            continue
-        if ts_def.get("includes"):
-            continue
-        ts_tools = set(resolve_toolset(ts_key))
-        if not ts_tools or not ts_tools.issubset(platform_tool_universe):
-            continue
-        if ts_tools.issubset(configurable_tool_universe):
-            continue
-        if not ts_tools.issubset(claimed):
-            enabled_toolsets.add(ts_key)
-            claimed.update(ts_tools)
-
    # Plugin toolsets: enabled by default unless explicitly disabled, or
    # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
    # shipped as a bundled plugin but user must opt in via `hermes tools`
@@ -726,6 +639,7 @@ def _get_platform_tools(
    # A plugin toolset is "known" for a platform once `hermes tools`
    # has been saved for that platform (tracked via known_plugin_toolsets).
    # Unknown plugins default to enabled; known-but-absent = disabled.
+    plugin_ts_keys = _get_plugin_toolset_keys()
    if plugin_ts_keys:
        known_map = config.get("known_plugin_toolsets", {})
        known_for_platform = set(known_map.get(platform, []))
@@ -743,6 +657,7 @@ def _get_platform_tools(

    # Preserve any explicit non-configurable toolset entries (for example,
    # custom toolsets or MCP server names saved in platform_toolsets).
+    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
    explicit_passthrough = {
        ts
        for ts in toolset_names
@@ -788,14 +703,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    """
    config.setdefault("platform_toolsets", {})

-    # Drop platform-scoped toolsets that don't apply here.  Prevents the
-    # "Configure all platforms" checklist (or a hand-edited config.yaml)
-    # from turning on, say, the `discord` toolset for Telegram.
-    enabled_toolset_keys = {
-        ts for ts in enabled_toolset_keys
-        if _toolset_allowed_for_platform(ts, platform)
-    }
-
    # Get the set of all configurable toolset keys (built-in + plugin)
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
    plugin_keys = _get_plugin_toolset_keys()
@@ -810,7 +717,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
-    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -818,11 +724,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
-    # Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
-    # saving from the picker as consent to clear the "no_mcp" sentinel. The
-    # picker has no checkbox for no_mcp, so without this users who once set it
-    # by hand could never re-enable MCP servers through the UI.
-    preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -930,7 +831,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    return _tool_token_cache


-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
+def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
    from hermes_cli.curses_ui import curses_checklist
    from toolsets import resolve_toolset
@@ -938,12 +839,7 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform:
    # Pre-compute per-tool token counts (cached after first call).
    tool_tokens = _estimate_tool_tokens()

-    effective_all = _get_effective_configurable_toolsets()
-    # Drop platform-scoped toolsets that don't apply to this platform.
-    effective = [
-        (k, l, d) for (k, l, d) in effective_all
-        if _toolset_allowed_for_platform(k, platform)
-    ]
+    effective = _get_effective_configurable_toolsets()

    labels = []
    for ts_key, ts_label, ts_desc in effective:
@@ -1188,7 +1084,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
                configured_provider = image_cfg.get("provider")
                if configured_provider not in (None, "", "fal"):
                    return False
-                if image_cfg.get("use_gateway") is not None and not is_truthy_value(image_cfg.get("use_gateway"), default=False):
+                if image_cfg.get("use_gateway") is False:
                    return False
            return feature.managed_by_nous
        if provider.get("tts_provider"):
@@ -1220,7 +1116,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        return (
            provider["imagegen_backend"] == "fal"
            and configured_provider in (None, "", "fal")
-            and not is_truthy_value(image_cfg.get("use_gateway"), default=False)
+            and not image_cfg.get("use_gateway")
        )
    return False

@@ -1857,7 +1753,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS

            # Show checklist
-            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)

            added = new_enabled - current_enabled
            removed = current_enabled - new_enabled
@@ -2213,11 +2109,7 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]

 def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
-    effective_all = _get_effective_configurable_toolsets()
-    effective = [
-        (k, l, d) for (k, l, d) in effective_all
-        if _toolset_allowed_for_platform(k, platform)
-    ]
+    effective = _get_effective_configurable_toolsets()
    builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    print(f"Built-in toolsets ({platform}):")
@@ -2283,20 +2175,6 @@ def tools_disable_enable_command(args):
            _print_error(f"Unknown toolset '{name}'")
        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]

-    # Reject platform-scoped toolsets on platforms that don't allow them.
-    restricted_targets = [
-        t for t in toolset_targets
-        if not _toolset_allowed_for_platform(t, platform)
-    ]
-    if restricted_targets:
-        for name in restricted_targets:
-            allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
-            _print_error(
-                f"Toolset '{name}' is not available on platform '{platform}' "
-                f"(only: {', '.join(allowed)})"
-            )
-        toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
-
    if toolset_targets:
        _apply_toolset_change(config, platform, toolset_targets, action)

@@ -287,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
    "display.busy_input_mode": {
        "type": "select",
        "description": "Input behavior while agent is running",
-        "options": ["interrupt", "queue", "steer"],
+        "options": ["interrupt", "queue"],
    },
    "memory.provider": {
        "type": "select",
@@ -2327,14 +2327,16 @@ def _resolve_chat_argv(
    from hermes_cli.main import PROJECT_ROOT, _make_tui_argv

    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
-    env = os.environ.copy()
-    env.setdefault("NODE_ENV", "production")
+    env: Optional[dict] = None

-    if resume:
-        env["HERMES_TUI_RESUME"] = resume
+    if resume or sidecar_url:
+        env = os.environ.copy()

-    if sidecar_url:
-        env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
+        if resume:
+            env["HERMES_TUI_RESUME"] = resume
+
+        if sidecar_url:
+            env["HERMES_TUI_SIDECAR_URL"] = sidecar_url

    return list(argv), str(cwd) if cwd else None, env

@@ -195,6 +195,10 @@ def setup_logging(
        The ``logs/`` directory where files are written.
    """
    global _logging_initialized
+    if _logging_initialized and not force:
+        home = hermes_home or get_hermes_home()
+        return home / "logs"
+
    home = hermes_home or get_hermes_home()
    log_dir = home / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
@@ -244,9 +248,6 @@ def setup_logging(
            log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
        )

-    if _logging_initialized and not force:
-        return log_dir
-
    # Ensure root logger level is low enough for the handlers to fire.
    if root.level == logging.NOTSET or root.level > level:
        root.setLevel(level)
@@ -22,8 +22,6 @@ import sqlite3
 import threading
 import time
 from pathlib import Path
-
-from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
 from typing import Any, Callable, Dict, List, Optional, TypeVar

@@ -33,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 10
+SCHEMA_VERSION = 8

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -85,8 +83,7 @@ CREATE TABLE IF NOT EXISTS messages (
    reasoning TEXT,
    reasoning_content TEXT,
    reasoning_details TEXT,
-    codex_reasoning_items TEXT,
-    codex_message_items TEXT
+    codex_reasoning_items TEXT
 );

 CREATE TABLE IF NOT EXISTS state_meta (
@@ -121,32 +118,6 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
 END;
 """

-# Trigram FTS5 table for CJK substring search.  The default unicode61
-# tokenizer splits CJK characters into individual tokens, breaking phrase
-# matching.  The trigram tokenizer creates overlapping 3-byte sequences so
-# substring queries work natively for any script (CJK, Thai, etc.).
-FTS_TRIGRAM_SQL = """
-CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
-    content,
-    content=messages,
-    content_rowid=id,
-    tokenize='trigram'
-);
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-END;
-
-CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
-    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
-END;
-"""
-

 class SessionDB:
    """
@@ -385,27 +356,6 @@ class SessionDB:
                except sqlite3.OperationalError:
                    pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 8")
-            if current_version < 9:
-                # v9: preserve replayable Codex assistant message ids/phases so
-                # follow-up turns can rebuild Responses API message items instead
-                # of flattening everything to plain assistant text.
-                try:
-                    cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
-                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 9")
-            if current_version < 10:
-                # v10: trigram FTS5 table for CJK/substring search.
-                # Created via FTS_TRIGRAM_SQL below; backfill existing messages.
-                try:
-                    cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-                except sqlite3.OperationalError:
-                    cursor.executescript(FTS_TRIGRAM_SQL)
-                    cursor.execute(
-                        "INSERT INTO messages_fts_trigram(rowid, content) "
-                        "SELECT id, content FROM messages WHERE content IS NOT NULL"
-                    )
-                cursor.execute("UPDATE schema_version SET version = 10")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -423,12 +373,6 @@ class SessionDB:
        except sqlite3.OperationalError:
            cursor.executescript(FTS_SQL)

-        # Trigram FTS5 for CJK/substring search
-        try:
-            cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
-        except sqlite3.OperationalError:
-            cursor.executescript(FTS_TRIGRAM_SQL)
-
        self._conn.commit()

    # =========================================================================
@@ -878,18 +822,7 @@ class SessionDB:
        params = []

        if not include_children:
-            # Show root sessions and branch sessions (whose parent ended with
-            # end_reason='branched' before the child was created), while still
-            # hiding sub-agent runs and compression continuations (which also
-            # carry a parent_session_id but were spawned while the parent was
-            # still live — i.e., started_at < parent.ended_at).
-            where_clauses.append(
-                "(s.parent_session_id IS NULL"
-                " OR EXISTS (SELECT 1 FROM sessions p"
-                "            WHERE p.id = s.parent_session_id"
-                "            AND p.end_reason = 'branched'"
-                "            AND s.started_at >= p.ended_at))"
-            )
+            where_clauses.append("s.parent_session_id IS NULL")

        if source:
            where_clauses.append("s.source = ?")
@@ -1023,7 +956,6 @@ class SessionDB:
        reasoning_content: str = None,
        reasoning_details: Any = None,
        codex_reasoning_items: Any = None,
-        codex_message_items: Any = None,
    ) -> int:
        """
        Append a message to a session. Returns the message row ID.
@@ -1040,10 +972,6 @@ class SessionDB:
            json.dumps(codex_reasoning_items)
            if codex_reasoning_items else None
        )
-        codex_message_items_json = (
-            json.dumps(codex_message_items)
-            if codex_message_items else None
-        )
        tool_calls_json = json.dumps(tool_calls) if tool_calls else None

        # Pre-compute tool call count
@@ -1055,9 +983,8 @@ class SessionDB:
            cursor = conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                   codex_message_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -1072,7 +999,6 @@ class SessionDB:
                    reasoning_content,
                    reasoning_details_json,
                    codex_items_json,
-                    codex_message_items_json,
                ),
            )
            msg_id = cursor.lastrowid
@@ -1178,33 +1104,22 @@ class SessionDB:
                current = child_id
        return session_id

-    def get_messages_as_conversation(
-        self, session_id: str, include_ancestors: bool = False
-    ) -> List[Dict[str, Any]]:
+    def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
        """
        Load messages in the OpenAI conversation format (role + content dicts).
        Used by the gateway to restore conversation history.
        """
-        session_ids = [session_id]
-        if include_ancestors:
-            session_ids = self._session_lineage_root_to_tip(session_id)
-
        with self._lock:
-            placeholders = ",".join("?" for _ in session_ids)
-            rows = self._conn.execute(
+            cursor = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
-                "codex_message_items "
-                f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY timestamp, id",
-                tuple(session_ids),
-            ).fetchall()
-
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
+                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
+                (session_id,),
+            )
+            rows = cursor.fetchall()
        messages = []
        for row in rows:
-            content = row["content"]
-            if row["role"] in {"user", "assistant"} and isinstance(content, str):
-                content = sanitize_context(content).strip()
-            msg = {"role": row["role"], "content": content}
+            msg = {"role": row["role"], "content": row["content"]}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
            if row["tool_name"]:
@@ -1235,53 +1150,9 @@ class SessionDB:
                    except (json.JSONDecodeError, TypeError):
                        logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
                        msg["codex_reasoning_items"] = None
-                if row["codex_message_items"]:
-                    try:
-                        msg["codex_message_items"] = json.loads(row["codex_message_items"])
-                    except (json.JSONDecodeError, TypeError):
-                        logger.warning("Failed to deserialize codex_message_items, falling back to None")
-                        msg["codex_message_items"] = None
-            if include_ancestors and self._is_duplicate_replayed_user_message(messages, msg):
-                continue
            messages.append(msg)
        return messages

-    def _session_lineage_root_to_tip(self, session_id: str) -> List[str]:
-        if not session_id:
-            return [session_id]
-
-        chain = []
-        current = session_id
-        seen = set()
-        with self._lock:
-            for _ in range(100):
-                if not current or current in seen:
-                    break
-                seen.add(current)
-                chain.append(current)
-                row = self._conn.execute(
-                    "SELECT parent_session_id FROM sessions WHERE id = ?",
-                    (current,),
-                ).fetchone()
-                if row is None:
-                    break
-                current = row["parent_session_id"] if hasattr(row, "keys") else row[0]
-        return list(reversed(chain)) or [session_id]
-
-    @staticmethod
-    def _is_duplicate_replayed_user_message(messages: List[Dict[str, Any]], msg: Dict[str, Any]) -> bool:
-        if msg.get("role") != "user":
-            return False
-        content = msg.get("content")
-        if not isinstance(content, str) or not content:
-            return False
-        for prev in reversed(messages):
-            if prev.get("role") == "user" and prev.get("content") == content:
-                return True
-            if prev.get("role") == "assistant" and (prev.get("content") or prev.get("tool_calls")):
-                return False
-        return False
-
    # =========================================================================
    # Search
    # =========================================================================
@@ -1340,16 +1211,6 @@ class SessionDB:
        return sanitized.strip()


-    @staticmethod
-    def _is_cjk_codepoint(cp: int) -> bool:
-        return (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
-                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
-                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
-                0x3000 <= cp <= 0x303F or    # CJK Symbols
-                0x3040 <= cp <= 0x309F or    # Hiragana
-                0x30A0 <= cp <= 0x30FF or    # Katakana
-                0xAC00 <= cp <= 0xD7AF)      # Hangul Syllables
-
    @staticmethod
    def _contains_cjk(text: str) -> bool:
        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1365,11 +1226,6 @@ class SessionDB:
                return True
        return False

-    @classmethod
-    def _count_cjk(cls, text: str) -> int:
-        """Count CJK characters in text."""
-        return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
-
    def search_messages(
        self,
        query: str,
@@ -1440,113 +1296,52 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """

-        # CJK queries bypass the unicode61 FTS5 table.  The default tokenizer
-        # splits CJK characters into individual tokens, so "大别山项目" becomes
-        # "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
-        # missing exact phrase matches.
-        #
-        # For queries with 3+ CJK characters, we use the trigram FTS5 table
-        # (indexed substring matching with ranking and snippets).  For shorter
-        # CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
-        # bytes = 3 CJK chars), so we fall back to LIKE.
-        is_cjk = self._contains_cjk(query)
-        if is_cjk:
-            raw_query = query.strip('"').strip()
-            cjk_count = self._count_cjk(raw_query)
-
-            if cjk_count >= 3:
-                # Trigram FTS5 path — quote each non-operator token to handle
-                # FTS5 special chars (%, *, etc.) while preserving boolean
-                # operators (AND, OR, NOT) for multi-term queries.
-                tokens = raw_query.split()
-                parts = []
-                for tok in tokens:
-                    if tok.upper() in ("AND", "OR", "NOT"):
-                        parts.append(tok)
-                    else:
-                        parts.append('"' + tok.replace('"', '""') + '"')
-                trigram_query = " ".join(parts)
-                tri_where = ["messages_fts_trigram MATCH ?"]
-                tri_params: list = [trigram_query]
-                if source_filter is not None:
-                    tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    tri_params.extend(source_filter)
-                if exclude_sources is not None:
-                    tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    tri_params.extend(exclude_sources)
-                if role_filter:
-                    tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    tri_params.extend(role_filter)
-                tri_sql = f"""
-                    SELECT
-                        m.id,
-                        m.session_id,
-                        m.role,
-                        snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
-                        m.content,
-                        m.timestamp,
-                        m.tool_name,
-                        s.source,
-                        s.model,
-                        s.started_at AS session_started
-                    FROM messages_fts_trigram
-                    JOIN messages m ON m.id = messages_fts_trigram.rowid
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
-                    LIMIT ? OFFSET ?
-                """
-                tri_params.extend([limit, offset])
-                with self._lock:
-                    try:
-                        tri_cursor = self._conn.execute(tri_sql, tri_params)
-                    except sqlite3.OperationalError:
-                        matches = []
-                    else:
-                        matches = [dict(row) for row in tri_cursor.fetchall()]
-            else:
-                # Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
-                # Fall back to LIKE substring search.
-                escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-                like_where = ["m.content LIKE ? ESCAPE '\\'"]
-                like_params: list = [f"%{escaped}%"]
-                if source_filter is not None:
-                    like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                    like_params.extend(source_filter)
-                if exclude_sources is not None:
-                    like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                    like_params.extend(exclude_sources)
-                if role_filter:
-                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                    like_params.extend(role_filter)
-                like_sql = f"""
-                    SELECT m.id, m.session_id, m.role,
-                           substr(m.content,
-                                  max(1, instr(m.content, ?) - 40),
-                                  120) AS snippet,
-                           m.content, m.timestamp, m.tool_name,
-                           s.source, s.model, s.started_at AS session_started
-                    FROM messages m
-                    JOIN sessions s ON s.id = m.session_id
-                    WHERE {' AND '.join(like_where)}
-                    ORDER BY m.timestamp DESC
-                    LIMIT ? OFFSET ?
-                """
-                like_params.extend([limit, offset])
-                # instr() parameter goes first in the bound list
-                like_params = [raw_query] + like_params
-                with self._lock:
-                    like_cursor = self._conn.execute(like_sql, like_params)
-                    matches = [dict(row) for row in like_cursor.fetchall()]
-        else:
-            with self._lock:
-                try:
-                    cursor = self._conn.execute(sql, params)
-                except sqlite3.OperationalError:
-                    # FTS5 query syntax error despite sanitization — return empty
+        with self._lock:
+            try:
+                cursor = self._conn.execute(sql, params)
+            except sqlite3.OperationalError:
+                # FTS5 query syntax error despite sanitization — return empty
+                # unless query contains CJK (fall back to LIKE below)
+                if not self._contains_cjk(query):
                    return []
-                else:
-                    matches = [dict(row) for row in cursor.fetchall()]
+                matches = []
+            else:
+                matches = [dict(row) for row in cursor.fetchall()]
+
+        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
+        # characters individually, causing multi-character queries to fail.
+        if not matches and self._contains_cjk(query):
+            raw_query = query.strip('"').strip()
+            like_where = ["m.content LIKE ?"]
+            like_params: list = [f"%{raw_query}%"]
+            if source_filter is not None:
+                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                like_params.extend(source_filter)
+            if exclude_sources is not None:
+                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                like_params.extend(exclude_sources)
+            if role_filter:
+                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                like_params.extend(role_filter)
+            like_sql = f"""
+                SELECT m.id, m.session_id, m.role,
+                       substr(m.content,
+                              max(1, instr(m.content, ?) - 40),
+                              120) AS snippet,
+                       m.content, m.timestamp, m.tool_name,
+                       s.source, s.model, s.started_at AS session_started
+                FROM messages m
+                JOIN sessions s ON s.id = m.session_id
+                WHERE {' AND '.join(like_where)}
+                ORDER BY m.timestamp DESC
+                LIMIT ? OFFSET ?
+            """
+            like_params.extend([limit, offset])
+            # instr() parameter goes first in the bound list
+            like_params = [raw_query] + like_params
+            with self._lock:
+                like_cursor = self._conn.execute(like_sql, like_params)
+                matches = [dict(row) for row in like_cursor.fetchall()]

        # Add surrounding context (1 message before + after each match).
        # Done outside the lock so we don't hold it across N sequential queries.
@@ -1606,32 +1401,16 @@ class SessionDB:
        limit: int = 20,
        offset: int = 0,
    ) -> List[Dict[str, Any]]:
-        """List sessions, optionally filtered by source.
-
-        Returns rows enriched with a computed ``last_active`` column (latest
-        message timestamp for the session, falling back to ``started_at``),
-        ordered by most-recently-used first.
-        """
-        select_with_last_active = (
-            "SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
-            "FROM sessions s "
-            "LEFT JOIN ("
-            "SELECT session_id, MAX(timestamp) AS last_active "
-            "FROM messages GROUP BY session_id"
-            ") m ON m.session_id = s.id "
-        )
+        """List sessions, optionally filtered by source."""
        with self._lock:
            if source:
                cursor = self._conn.execute(
-                    f"{select_with_last_active}"
-                    "WHERE s.source = ? "
-                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
+                    "SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
                    (source, limit, offset),
                )
            else:
                cursor = self._conn.execute(
-                    f"{select_with_last_active}"
-                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
+                    "SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
                    (limit, offset),
                )
            return [dict(row) for row in cursor.fetchall()]
@@ -1698,45 +1477,12 @@ class SessionDB:
            )
        self._execute_write(_do)

-    @staticmethod
-    def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None:
-        """Remove on-disk transcript files for a session.
-
-        Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any
-        ``request_dump_{session_id}_*.json`` files left by the gateway.
-        Silently skips files that don't exist and swallows OSError so a
-        filesystem hiccup never blocks a DB operation.
-        """
-        if sessions_dir is None:
-            return
-        for suffix in (".json", ".jsonl"):
-            p = sessions_dir / f"{session_id}{suffix}"
-            try:
-                p.unlink(missing_ok=True)
-            except OSError:
-                pass
-        # request_dump files use session_id as a prefix component
-        try:
-            for p in sessions_dir.glob(f"request_dump_{session_id}_*.json"):
-                try:
-                    p.unlink(missing_ok=True)
-                except OSError:
-                    pass
-        except OSError:
-            pass
-
-    def delete_session(
-        self,
-        session_id: str,
-        sessions_dir: Optional[Path] = None,
-    ) -> bool:
+    def delete_session(self, session_id: str) -> bool:
        """Delete a session and all its messages.

        Child sessions are orphaned (parent_session_id set to NULL) rather
        than cascade-deleted, so they remain accessible independently.
-        When *sessions_dir* is provided, also removes on-disk transcript
-        files (``.json`` / ``.jsonl`` / ``request_dump_*``) for the deleted
-        session. Returns True if the session was found and deleted.
+        Returns True if the session was found and deleted.
        """
        def _do(conn):
            cursor = conn.execute(
@@ -1753,29 +1499,16 @@ class SessionDB:
            conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
            conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
            return True
+        return self._execute_write(_do)

-        deleted = self._execute_write(_do)
-        if deleted:
-            self._remove_session_files(sessions_dir, session_id)
-        return deleted
-
-    def prune_sessions(
-        self,
-        older_than_days: int = 90,
-        source: str = None,
-        sessions_dir: Optional[Path] = None,
-    ) -> int:
+    def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
        """Delete sessions older than N days. Returns count of deleted sessions.

        Only prunes ended sessions (not active ones).  Child sessions outside
        the prune window are orphaned (parent_session_id set to NULL) rather
-        than cascade-deleted.  When *sessions_dir* is provided, also removes
-        on-disk transcript files (``.json`` / ``.jsonl`` /
-        ``request_dump_*``) for every pruned session, outside the DB
-        transaction.
+        than cascade-deleted.
        """
        cutoff = time.time() - (older_than_days * 86400)
-        removed_ids: list[str] = []

        def _do(conn):
            if source:
@@ -1805,14 +1538,9 @@ class SessionDB:
            for sid in session_ids:
                conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
                conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
-                removed_ids.append(sid)
            return len(session_ids)

-        count = self._execute_write(_do)
-        # Clean up on-disk files outside the DB transaction
-        for sid in removed_ids:
-            self._remove_session_files(sessions_dir, sid)
-        return count
+        return self._execute_write(_do)

    # ── Meta key/value (for scheduler bookkeeping) ──

@@ -1866,7 +1594,6 @@ class SessionDB:
        retention_days: int = 90,
        min_interval_hours: int = 24,
        vacuum: bool = True,
-        sessions_dir: Optional[Path] = None,
    ) -> Dict[str, Any]:
        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.

@@ -1874,10 +1601,6 @@ class SessionDB:
        within ``min_interval_hours`` no-op. Designed to be called once at
        startup from long-lived entrypoints (CLI, gateway, cron scheduler).

-        When *sessions_dir* is provided, on-disk transcript files
-        (``.json`` / ``.jsonl`` / ``request_dump_*``) for pruned sessions
-        are removed as part of the same sweep (issue #3015).
-
        Never raises. On any failure, logs a warning and returns a dict
        with ``"error"`` set.

@@ -1901,10 +1624,7 @@ class SessionDB:
                except (TypeError, ValueError):
                    pass  # corrupt meta; treat as no prior run

-            pruned = self.prune_sessions(
-                older_than_days=retention_days,
-                sessions_dir=sessions_dir,
-            )
+            pruned = self.prune_sessions(older_than_days=retention_days)
            result["pruned"] = pruned

            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
@@ -24,7 +24,6 @@ import json
 import asyncio
 import logging
 import threading
-import time
 from typing import Dict, Any, List, Optional, Tuple

 from tools.registry import discover_builtin_tools, registry
@@ -289,34 +288,30 @@ def get_tool_definitions(
                filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                break

-    # Rebuild discord / discord_admin schemas based on the bot's privileged
-    # intents (detected from GET /applications/@me) and the user's action
-    # allowlist in config.  Hides actions the bot's intents don't support so
-    # the model never attempts them, and annotates fetch_messages when the
+    # Rebuild discord_server schema based on the bot's privileged intents
+    # (detected from GET /applications/@me) and the user's action allowlist
+    # in config.  Hides actions the bot's intents don't support so the
+    # model never attempts them, and annotates fetch_messages when the
    # MESSAGE_CONTENT intent is missing.
-    _discord_schema_fns = {
-        "discord": "get_dynamic_schema_core",
-        "discord_admin": "get_dynamic_schema_admin",
-    }
-    for discord_tool_name in _discord_schema_fns:
-        if discord_tool_name in available_tool_names:
-            try:
-                from tools import discord_tool as _dt
-                schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
-                dynamic = schema_fn()
-            except Exception:
-                dynamic = None
-            if dynamic is None:
-                filtered_tools = [
-                    t for t in filtered_tools
-                    if t.get("function", {}).get("name") != discord_tool_name
-                ]
-                available_tool_names.discard(discord_tool_name)
-            else:
-                for i, td in enumerate(filtered_tools):
-                    if td.get("function", {}).get("name") == discord_tool_name:
-                        filtered_tools[i] = {"type": "function", "function": dynamic}
-                        break
+    if "discord_server" in available_tool_names:
+        try:
+            from tools.discord_tool import get_dynamic_schema
+            dynamic = get_dynamic_schema()
+        except Exception:  # pragma: no cover — defensive, fall back to static
+            dynamic = None
+        if dynamic is None:
+            # Tool filtered out entirely (empty allowlist or detection disabled
+            # the only remaining actions).  Drop it from the schema list.
+            filtered_tools = [
+                t for t in filtered_tools
+                if t.get("function", {}).get("name") != "discord_server"
+            ]
+            available_tool_names.discard("discord_server")
+        else:
+            for i, td in enumerate(filtered_tools):
+                if td.get("function", {}).get("name") == "discord_server":
+                    filtered_tools[i] = {"type": "function", "function": dynamic}
+                    break

    # Strip web tool cross-references from browser_navigate description when
    # web_search / web_extract are not available.  The static schema says
@@ -568,14 +563,6 @@ def handle_function_call(
            except Exception:
                pass  # file_tools may not be loaded yet

-        # Measure tool dispatch latency so post_tool_call and
-        # transform_tool_result hooks can observe per-tool duration.
-        # Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
-        # PostToolUse hook inputs so plugin authors can build latency
-        # dashboards, budget alerts, and regression canaries without having
-        # to wrap every tool manually.  We use monotonic() so the value is
-        # unaffected by wall-clock adjustments during the call.
-        _dispatch_start = time.monotonic()
        if function_name == "execute_code":
            # Prefer the caller-provided list so subagents can't overwrite
            # the parent's tool set via the process-global.
@@ -591,7 +578,6 @@ def handle_function_call(
                task_id=task_id,
                user_task=user_task,
            )
-        duration_ms = int((time.monotonic() - _dispatch_start) * 1000)

        try:
            from hermes_cli.plugins import invoke_hook
@@ -603,7 +589,6 @@ def handle_function_call(
                task_id=task_id or "",
                session_id=session_id or "",
                tool_call_id=tool_call_id or "",
-                duration_ms=duration_ms,
            )
        except Exception:
            pass
@@ -624,7 +609,6 @@ def handle_function_call(
                task_id=task_id or "",
                session_id=session_id or "",
                tool_call_id=tool_call_id or "",
-                duration_ms=duration_ms,
            )
            for hook_result in hook_results:
                if isinstance(hook_result, str):
@@ -7,7 +7,9 @@
  perSystem = { pkgs, system, lib, ... }:
    let
      hermes-agent = inputs.self.packages.${system}.default;
-      hermesVenv = hermes-agent.hermesVenv;
+      hermesVenv = pkgs.callPackage ./python.nix {
+        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };

      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };

@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
          echo "ok" > $out/result
        '';

-        # Verify extraPythonPackages PYTHONPATH injection
-        extra-python-packages = let
-          testPkg = pkgs.python312Packages.pyfiglet;
-          hermesWithExtra = hermes-agent.override {
-            extraPythonPackages = [ testPkg ];
-          };
-        in pkgs.runCommand "hermes-extra-python-packages" { } ''
-          set -e
-          echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
-
-          grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
-          echo "PASS: PYTHONPATH present in wrapper"
-
-          grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
-            (echo "FAIL: test package path not in PYTHONPATH"; exit 1)
-          echo "PASS: test package path found in wrapper"
-
-          echo "=== Checking base package has no PYTHONPATH ==="
-          if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
-            echo "FAIL: base package should not have PYTHONPATH"; exit 1
-          fi
-          echo "PASS: base package clean"
-
-          echo "=== All extraPythonPackages checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
        # ── Config merge + round-trip test ────────────────────────────────
        # Tests the merge script (Nix activation behavior) across 7
        # scenarios, then verifies Python's load_config() reads correctly.
@@ -1,186 +0,0 @@
-# nix/hermes-agent.nix — Overridable Hermes Agent package
-#
-# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
-# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
-{
-  lib,
-  stdenv,
-  makeWrapper,
-  callPackage,
-  python312,
-  nodejs_22,
-  ripgrep,
-  git,
-  openssh,
-  ffmpeg,
-  tirith,
-  # Flake inputs — passed explicitly by packages.nix and overlays.nix
-  uv2nix,
-  pyproject-nix,
-  pyproject-build-systems,
-  npm-lockfile-fix,
-  # Overridable parameters
-  extraPythonPackages ? [ ],
-}:
-let
-  hermesVenv = callPackage ./python.nix {
-    inherit uv2nix pyproject-nix pyproject-build-systems;
-  };
-
-  hermesNpmLib = callPackage ./lib.nix {
-    inherit npm-lockfile-fix;
-  };
-
-  hermesTui = callPackage ./tui.nix {
-    inherit hermesNpmLib;
-  };
-
-  hermesWeb = callPackage ./web.nix {
-    inherit hermesNpmLib;
-  };
-
-  bundledSkills = lib.cleanSourceWith {
-    src = ../skills;
-    filter = path: _type: !(lib.hasInfix "/index-cache/" path);
-  };
-
-  runtimeDeps = [
-    nodejs_22
-    ripgrep
-    git
-    openssh
-    ffmpeg
-    tirith
-  ];
-
-  runtimePath = lib.makeBinPath runtimeDeps;
-
-  sitePackagesPath = python312.sitePackages;
-
-  # Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
-  # Without this, a plugin listing e.g. requests as a dep would fail at runtime
-  # if requests isn't already in the sealed uv2nix venv.
-  allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
-
-  pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
-
-  pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
-  uvLockHash =
-    if builtins.pathExists ../uv.lock then
-      builtins.hashString "sha256" (builtins.readFile ../uv.lock)
-    else
-      "none";
-in
-stdenv.mkDerivation {
-  pname = "hermes-agent";
-  version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
-
-  dontUnpack = true;
-  dontBuild = true;
-  nativeBuildInputs = [ makeWrapper ];
-
-  installPhase = ''
-    runHook preInstall
-
-    mkdir -p $out/share/hermes-agent $out/bin
-    cp -r ${bundledSkills} $out/share/hermes-agent/skills
-    cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
-
-    mkdir -p $out/ui-tui
-    cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
-
-    ${lib.concatMapStringsSep "\n"
-      (name: ''
-        makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
-          --suffix PATH : "${runtimePath}" \
-          --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
-          --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
-          --set HERMES_TUI_DIR $out/ui-tui \
-          --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
-          --set HERMES_NODE ${nodejs_22}/bin/node \
-          ${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
-      '')
-      [
-        "hermes"
-        "hermes-agent"
-        "hermes-acp"
-      ]
-    }
-
-    ${lib.optionalString (extraPythonPackages != [ ]) ''
-      echo "=== Checking for plugin/core package collisions ==="
-      ${hermesVenv}/bin/python3 -c "
-import pathlib, sys, re
-
-def canonical(name):
-    return re.sub(r'[-_.]+', '-', name).lower()
-
-# Collect core venv package names
-core = set()
-venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
-for di in venv_sp.glob('*.dist-info'):
-    meta = di / 'METADATA'
-    if meta.exists():
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                core.add(canonical(line.split(':', 1)[1].strip()))
-                break
-
-# Check each extra package for collisions
-extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
-for edir in extras_dirs:
-    sp = pathlib.Path(edir) / '${sitePackagesPath}'
-    if not sp.exists():
-        continue
-    for di in sp.glob('*.dist-info'):
-        meta = di / 'METADATA'
-        if not meta.exists():
-            continue
-        for line in meta.read_text().splitlines():
-            if line.startswith('Name:'):
-                pkg = canonical(line.split(':', 1)[1].strip())
-                if pkg in core:
-                    print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
-                    print(f'  from: {di}', file=sys.stderr)
-                    print(f'  Remove this dependency from extraPythonPackages.', file=sys.stderr)
-                    sys.exit(1)
-                break
-
-print('No collisions found.')
-      "
-      echo "=== No collisions ==="
-    ''}
-
-    runHook postInstall
-  '';
-
-  passthru = {
-    inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
-
-    devShellHook = ''
-      STAMP=".nix-stamps/hermes-agent"
-      STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
-      if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-        echo "hermes-agent: installing Python dependencies..."
-        uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
-        source .venv/bin/activate
-        uv pip install -e ".[all]"
-        [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
-        [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
-        mkdir -p .nix-stamps
-        echo "$STAMP_VALUE" > "$STAMP"
-      else
-        source .venv/bin/activate
-        export HERMES_PYTHON=${hermesVenv}/bin/python3
-      fi
-    '';
-  };
-
-  meta = with lib; {
-    description = "AI agent with advanced tool-calling capabilities";
-    homepage = "https://github.com/NousResearch/hermes-agent";
-    mainProgram = "hermes";
-    license = licenses.mit;
-    platforms = platforms.unix;
-  };
-}
@@ -28,8 +28,6 @@

  let
    cfg = config.services.hermes-agent;
-    effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
-      else cfg.package.override { inherit (cfg) extraPythonPackages; };
    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -458,52 +456,6 @@
        description = "Extra packages available on PATH.";
      };

-      extraPlugins = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Directory-based plugin packages to symlink into the hermes plugins
-          directory. Each package should contain a plugin.yaml and __init__.py
-          at its root. Hermes discovers these automatically on startup.
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.fetchFromGitHub {
-              owner = "stephenschoettler";
-              repo = "hermes-lcm";
-              name = "hermes-lcm";
-              rev = "v0.7.0";
-              hash = "sha256-...";
-            })
-          ]
-        '';
-      };
-
-      extraPythonPackages = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = ''
-          Python packages to add to PYTHONPATH for entry-point plugin discovery.
-          These are pip-packaged plugins that register via the
-          hermes_agent.plugins entry-point group. Each package must be built
-          with the same Python interpreter as hermes (python312).
-        '';
-        example = literalExpression ''
-          [
-            (pkgs.python312Packages.buildPythonPackage {
-              pname = "rtk-hermes";
-              version = "1.0.0";
-              src = pkgs.fetchFromGitHub {
-                owner = "ogallotti";
-                repo = "rtk-hermes";
-                rev = "main";
-                hash = "sha256-...";
-              };
-            })
-          ]
-        '';
-      };
-
      restart = mkOption {
        type = types.str;
        default = "always";
@@ -618,7 +570,7 @@
      # so interactive shells share state (sessions, skills, cron) with the
      # gateway service instead of creating a separate ~/.hermes/.
      (lib.mkIf cfg.addToSystemPackages {
-        environment.systemPackages = [ effectivePackage ];
+        environment.systemPackages = [ cfg.package ];
        environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
      })

@@ -629,16 +581,6 @@
        });
      })

-      # ── Assertions ─────────────────────────────────────────────────────
-      {
-        assertions = let
-          names = map lib.getName cfg.extraPlugins;
-        in [{
-          assertion = (lib.length names) == (lib.length (lib.unique names));
-          message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
-        }];
-      }
-
      # ── Warnings ──────────────────────────────────────────────────────
      (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
        warnings = [
@@ -660,7 +602,6 @@
          "d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/logs   2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.workingDirectory}         2770 ${cfg.user} ${cfg.group} - -"
        ];
@@ -682,7 +623,7 @@
          find ${cfg.stateDir}/.hermes -maxdepth 1 \
            \( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
            -exec chmod g+rw {} + 2>/dev/null || true
-          for _subdir in cron sessions logs memories plugins; do
+          for _subdir in cron sessions logs memories; do
            mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
            chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
            chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -791,22 +732,6 @@ HERMES_NIX_ENV_EOF
          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
            install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
          '') cfg.documents)}
-
-        # ── Declarative plugins ─────────────────────────────────────────
-        # Remove stale managed symlinks (plugins removed from config)
-        find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
-
-        ${lib.concatStringsSep "\n" (map (plugin:
-          let
-            name = lib.getName plugin;
-          in ''
-            if [ ! -f "${plugin}/plugin.yaml" ]; then
-              echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
-              exit 1
-            fi
-            ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-            chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
-          '') cfg.extraPlugins)}
        '';
      }

@@ -837,7 +762,7 @@ HERMES_NIX_ENV_EOF
            # reads them at Python startup — no systemd EnvironmentFile needed.

            ExecStart = lib.concatStringsSep " " ([
-              "${effectivePackage}/bin/hermes"
+              "${cfg.package}/bin/hermes"
              "gateway"
            ] ++ cfg.extraArgs);

@@ -860,7 +785,7 @@ HERMES_NIX_ENV_EOF
          };

          path = [
-            effectivePackage
+            cfg.package
            pkgs.bash
            pkgs.coreutils
            pkgs.git
@@ -885,11 +810,11 @@ HERMES_NIX_ENV_EOF

          preStart = ''
            # Stable symlinks — container references these, not store paths directly
-            ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
+            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint

            # GC roots so nix-collect-garbage doesn't remove store paths in use
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true

            # Check if container needs (re)creation
@@ -1,10 +0,0 @@
-# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
-{ inputs, ... }:
-{
-  flake.overlays.default = final: _: {
-    hermes-agent = final.callPackage ./hermes-agent.nix {
-      inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
-    };
-  };
-}
@@ -4,19 +4,120 @@
  perSystem =
    { pkgs, inputs', ... }:
    let
-      hermesAgent = pkgs.callPackage ./hermes-agent.nix {
+      hermesVenv = pkgs.callPackage ./python.nix {
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };
+
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };
+
+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
+      # Import bundled skills, excluding runtime caches
+      bundledSkills = pkgs.lib.cleanSourceWith {
+        src = ../skills;
+        filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
+      };
+
+      hermesWeb = pkgs.callPackage ./web.nix {
+        inherit hermesNpmLib;
+      };
+
+      runtimeDeps = with pkgs; [
+        nodejs_22
+        ripgrep
+        git
+        openssh
+        ffmpeg
+        tirith
+      ];
+
+      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
+
+      # Lockfile hashes for dev shell stamps
+      pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
+      uvLockHash =
+        if builtins.pathExists ../uv.lock then
+          builtins.hashString "sha256" (builtins.readFile ../uv.lock)
+        else
+          "none";
    in
    {
      packages = {
-        default = hermesAgent;
-        tui = hermesAgent.hermesTui;
-        web = hermesAgent.hermesWeb;
+        default = pkgs.stdenv.mkDerivation {
+          pname = "hermes-agent";
+          version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;

-        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
-          packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
+          dontUnpack = true;
+          dontBuild = true;
+          nativeBuildInputs = [ pkgs.makeWrapper ];
+
+          installPhase = ''
+            runHook preInstall
+
+            mkdir -p $out/share/hermes-agent $out/bin
+            cp -r ${bundledSkills} $out/share/hermes-agent/skills
+            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
+
+            # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
+            mkdir -p $out/ui-tui
+            cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
+
+            ${pkgs.lib.concatMapStringsSep "\n"
+              (name: ''
+                makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
+                  --suffix PATH : "${runtimePath}" \
+                  --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
+                  --set HERMES_TUI_DIR $out/ui-tui \
+                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
+              '')
+              [
+                "hermes"
+                "hermes-agent"
+                "hermes-acp"
+              ]
+            }
+
+            runHook postInstall
+          '';
+
+          passthru.devShellHook = ''
+            STAMP=".nix-stamps/hermes-agent"
+            STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
+            if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+              echo "hermes-agent: installing Python dependencies..."
+              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
+              source .venv/bin/activate
+              uv pip install -e ".[all]"
+              [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
+              [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
+              mkdir -p .nix-stamps
+              echo "$STAMP_VALUE" > "$STAMP"
+            else
+              source .venv/bin/activate
+              export HERMES_PYTHON=${hermesVenv}/bin/python3
+            fi
+          '';
+
+          meta = with pkgs.lib; {
+            description = "AI agent with advanced tool-calling capabilities";
+            homepage = "https://github.com/NousResearch/hermes-agent";
+            mainProgram = "hermes";
+            license = licenses.mit;
+            platforms = platforms.unix;
+          };
+        };
+
+        tui = hermesTui;
+        web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
        };
      };
    };
@@ -7,7 +7,6 @@
  pyproject-nix,
  pyproject-build-systems,
  stdenv,
-  dependency-groups ? [ "all" ],
 }:
 let
  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -97,5 +96,5 @@ let
      ]);
 in
 pythonSet.mkVirtualEnv "hermes-agent-env" {
-  hermes-agent = dependency-groups;
+  hermes-agent = [ "all" ];
 }
@@ -4,7 +4,7 @@ let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-Chz+NW9NXqboXHOa6PKwf5bhAkkcFtKNhvKWwg2XSPc=";
+    hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -17,7 +17,6 @@ pkgs.buildNpmPackage (npm // {
  inherit src npmDeps version;

  doCheck = false;
-  npmFlags = [ "--legacy-peer-deps" ];

  installPhase = ''
    runHook preInstall
@@ -1,7 +1,7 @@
 ---
 name: touchdesigner-mcp
 description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
-version: 1.1.0
+version: 1.0.0
 author: kshitijk4poor
 license: MIT
 metadata:
@@ -332,12 +332,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
 | `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
 | `references/python-api.md` | TD Python: op(), scripting, extensions |
 | `references/troubleshooting.md` | Connection diagnostics, debugging |
-| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
-| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
-| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
-| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
-| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
-| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
 | `scripts/setup.sh` | Automated setup script |

 ---
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
 ```python
 # td_execute_python:
 for c in list(root.children):
-    if c.valid and c.name.startswith('my_'):
+    if c.valid and c.name.startswith('promo_'):
        c.destroy()
-# ... then create my_audio, my_shader etc. in same script → CRASHES
+# ... then create promo_audio, promo_shader etc. in same script → CRASHES
 ```

 **CORRECT (two separate calls):**
 ```python
 # Call 1: td_execute_python — clean only
 for c in list(root.children):
-    if c.valid and c.name.startswith('my_'):
+    if c.valid and c.name.startswith('promo_'):
        c.destroy()

 # Call 2: td_execute_python — build (separate MCP call)
-audio = root.create(audiofileinCHOP, 'my_audio')
+audio = root.create(audiofileinCHOP, 'promo_audio')
 # ... rest of build
 ```

@@ -361,13 +361,21 @@ win.par.winopen.pulse()

 `out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.

-### 32. Audio-reactive GLSL: TD-side pipeline
+### 32. Audio-reactive GLSL: dual-layer sync pipeline

-For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+For audio-synced visuals, use BOTH layers for maximum effect:
+
+**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
+
+**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
+
+Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.

 **Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.

-### 33. twozero MCP: prefer native tools
+### 33. twozero MCP: benchmark and prefer native tools
+
+Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.

 **Always prefer native MCP tools over td_execute_python:**
 - `td_create_operator` over `root.create()` scripts (handles viewport positioning)
@@ -417,16 +425,13 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still

 **a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.

-**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
-
-Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
+**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.

 Diagnostic sequence when FPS=0:
-1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
-2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
-3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
-4. Check for other blocking CHOPs (audiodevin, etc.)
-5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
+1. `td_get_perf` — check if any op has extreme CPU/s
+2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
+3. Check for blocking CHOPs (audioout, audiodevin, etc.)
+4. Toggle play state (spacebar, or check if absTime.seconds is advancing)

 ### 39. Recording while FPS=0 produces empty or near-empty files

@@ -479,20 +484,9 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
 op("/project1/shader_code").text = shader_string
 ```

-### 42. td_execute_python DOES return print() output — use it for debugging
+### 42. td_execute_python does NOT return stdout or print() output

-`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
-
-However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
-```python
-# CORRECT — appears in response:
-print('value:', some_value)
-
-# WRONG — not reliably in response:
-result = some_value
-```
-
-For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
+Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.

 ### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()

@@ -502,203 +496,13 @@ clean = response_text.rsplit('[fps', 1)[0]
 data = json.loads(clean)
 ```

-### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
+### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`

-Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
+Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.

-**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
-```bash
-# Record in TD first, then extract frames:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
-```
-
-If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
-
-### 45. Heavy shaders cook below record FPS — many duplicate frames in output
-
-A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
-```bash
-# Extract at 24fps from a 60fps recording of an 8fps shader:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
-```
-Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
-
-### 46. Recording duration is manual — no auto-stop at audio end
+### 45. Recording duration is manual — no auto-stop at audio end

 MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
 ```bash
 ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
-```
-
-### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
-
-When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
-
-**How to verify audio is actually playing:**
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
-
-### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
-
-Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
-
-```glsl
-float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
-bass = clamp(bass, 0.0, 3.0);   // prevent whiteout
-mids = clamp(mids, 0.0, 3.0);
-hi = clamp(hi, 0.0, 3.0);
-```
-
-Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
-
-### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
-
-Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
-
-```bash
-# Center-crop from 1280x1280 to 1280x720:
-ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
-```
-
-Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
-
-## Advanced Patterns (pitfalls 51+)
-
-### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
-
-```python
-# CORRECT
-src.outputConnectors[0].connect(dst.inputConnectors[0])
-# WRONG — raises IndexError or AttributeError
-src.outputs[0].connect(dst.inputs[0])
-```
-
-For feedback TOP, BOTH are required:
-```python
-fb.par.top = target.path
-target.outputConnectors[0].connect(fb.inputConnectors[0])
-```
-
-### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
-
-Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
-
-**Workaround — frame capture + ffmpeg:**
-```python
-out = op('/project1/out')
-for i in range(300):
-    delay = i * 5
-    run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
-# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
-```
-
-### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
-
-```python
-start = me.fetch('cap_frame', 0)
-for i in range(60):
-    frame = start + i
-    op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
-me.store('cap_frame', start + 60)
-```
-Call 5 times for 300 frames. Each picks up where the last left off.
-
-### 54. GLSL TOP pixel shader requirements in TD 2025
-
-```glsl
-// REQUIRED — declare output
-layout(location = 0) out vec4 fragColor;
-
-void main() {
-    vec3 col = vec3(1.0, 0.0, 0.0);
-    fragColor = TDOutputSwizzle(vec4(col, 1.0));
-}
-```
-**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
-**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
-
-### 55. TOP.save() doesn't advance time — identical frames in tight loops
-
-`.save()` captures the current cooked frame without advancing TD's timeline:
-```python
-# WRONG — all frames identical
-for i in range(300):
-    op('/project1/out').save(f'frames/f_{i:04d}.png')
-
-# CORRECT — use run() with delayFrames
-for i in range(300):
-    delay = i * 5
-    run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
-```
-**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
-
-### 56. Feedback loop masks input changes — force switch during capture
-
-With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
-
-**Fix — force switch index per capture:**
-```python
-for i in range(300):
-    idx = (i // 8) % num_inputs
-    delay = i * 5
-    run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
-```
-
-### 57. Large td_execute_python scripts fail — split into incremental calls
-
-10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
-
-### 58. MCP instance reconnection after project.load()
-
-`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
-
-### 59. TOX reverse-engineering workflow
-
-```python
-comp = root.loadTox(r'/path/to/file.tox')
-comp.name = '_study_comp'
-for child in comp.children:
-    print(f'{child.name} ({child.OPType})')
-# Use td_get_operators_info, td_read_dat, check custom params
-```
-
-### 60. sliderCOMP naming — TD appends suffix
-
-TD auto-renames: `slider_brightness` → `slider_brightness1`. Always check names after creation.
-
-### 61. create() requires full operator type suffix
-
-```python
-# CORRECT
-proj.create('audiofileinCHOP', 'audio_in')
-proj.create('glslTOP', 'render')
-
-# WRONG — raises "Unknown operator type"
-proj.create('audiofilein', 'audio_in')
-proj.create('glsl', 'render')
-```
-
-### 62. Reparenting COMPs — use copyOPs, not connect()
-
-Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
-```python
-copied = target.copyOPs([source])  # preserves internal wiring
-source.destroy()
-# Re-wire external connections manually after the move
-```
-
-### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
-
-```python
-# CRASHES TD — don't do this
-echop = root.create(expressionCHOP, 'slider_ctrl')
-echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
-
-# WORKING — parameterCHOP as bridge
-pchop = root.create(parameterCHOP, 'slider_vals')
-pchop.par.ops = '/project1/controls'
-pchop.par.parameters = 'value0'
-pchop.par.custom = True
-pchop.par.builtin = False
 ```
@@ -380,10 +380,6 @@ def backup_existing(path: Path, backup_root: Path) -> Optional[Path]:
 # Replace OpenClaw brand names with Hermes in migrated text so that
 # memory entries, user profiles, SOUL.md, and workspace instructions
 # read as self-referential to the new agent identity.
-#
-# Case-preserving: ``OpenClaw`` → ``Hermes`` (prose), but lowercase matches
-# like ``openclaw`` → ``hermes`` (so filesystem paths like ``~/.openclaw``
-# become ``~/.hermes`` — the real Hermes home — not the broken ``~/.Hermes``).
 _REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
    (re.compile(r'\bOpen[\s-]?Claw\b', re.IGNORECASE), 'Hermes'),
    (re.compile(r'\bClawdBot\b', re.IGNORECASE), 'Hermes'),
@@ -391,31 +387,10 @@ _REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
 ]


-def _case_preserving_replacement(replacement: str):
-    """Return a re.sub replacement fn that lowercases the result when the
-    matched text was all-lowercase.
-
-    Keeps ``OpenClaw`` → ``Hermes`` but maps ``openclaw`` → ``hermes`` so a
-    filesystem path like ``~/.openclaw/config.yaml`` rewrites to
-    ``~/.hermes/config.yaml`` (the real Hermes home) instead of the broken
-    ``~/.Hermes/config.yaml``.
-    """
-    def _sub(match: "re.Match[str]") -> str:
-        matched = match.group(0)
-        if matched and matched.islower():
-            return replacement.lower()
-        return replacement
-    return _sub
-
-
 def rebrand_text(text: str) -> str:
-    """Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes.
-
-    Preserves case so filesystem-path matches (lowercase) don't become
-    capitalized directory names that don't exist.
-    """
+    """Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes."""
    for pattern, replacement in _REBRAND_PATTERNS:
-        text = pattern.sub(_case_preserving_replacement(replacement), text)
+        text = pattern.sub(replacement, text)
    return text


@@ -91,29 +91,4 @@

  // Register this plugin — the dashboard picks it up automatically.
  window.__HERMES_PLUGINS__.register("example", ExamplePage);
-
-  // ─────────────────────────────────────────────────────────────────────
-  // Page-scoped slot demo: inject a small banner at the top of /sessions.
-  //
-  // Built-in pages expose named slots (<page>:top, <page>:bottom) that
-  // plugins can populate without overriding the whole route. The
-  // manifest lists the slots we use in its `slots` array so the shell
-  // knows to render <PluginSlot name="sessions:top" /> there.
-  // ─────────────────────────────────────────────────────────────────────
-  function SessionsTopBanner() {
-    return React.createElement(Card, {
-      className: "border-dashed",
-    },
-      React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
-        React.createElement(Badge, { variant: "outline" }, "Example"),
-        React.createElement("span", {
-          className: "text-xs text-muted-foreground",
-        }, "This banner was injected into the Sessions page by the example plugin via the ",
-          React.createElement("code", { className: "font-courier" }, "sessions:top"),
-          " slot."),
-      ),
-    );
-  }
-
-  window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
 })();
@@ -8,7 +8,6 @@
    "path": "/example",
    "position": "after:skills"
  },
-  "slots": ["sessions:top"],
  "entry": "dist/index.js",
  "api": "plugin_api.py"
 }
@@ -1,131 +0,0 @@
-# google_meet plugin
-
-Let the hermes agent join a Google Meet call, transcribe it, optionally speak
-in it, and do the followup work afterwards.
-
-## What ships
-
-| Version | What | Status |
-|---|---|---|
-| v1 | Transcribe-only: Playwright joins Meet, scrapes captions to transcript file | ✓ ships by default |
-| v2 | Realtime duplex audio: bot speaks in-call via OpenAI Realtime + BlackHole/PulseAudio null-sink | ✓ opt in with `mode='realtime'` |
-| v3 | Remote node host: run the bot on a different machine than the gateway | ✓ opt in with `node='<name>'` |
-
-## Architecture
-
-```
-┌─ gateway (Linux box, where hermes runs) ────────────────────────────┐
-│                                                                      │
-│   agent → meet_join(url, mode='realtime', node='my-mac')             │
-│         │                                                            │
-│         └─ NodeClient ─── ws ────┐                                   │
-│                                  │                                   │
-└──────────────────────────────────┼───────────────────────────────────┘
-                                   │ wss (token auth)
-                                   ▼
-┌─ node host (user's Mac, signed-in Chrome lives here) ───────────────┐
-│                                                                      │
-│   NodeServer (from `hermes meet node run`)                           │
-│     │                                                                │
-│     ├─ start_bot → process_manager.start() → spawns meet_bot         │
-│     │                                                                │
-│     └─ meet_bot (Playwright)                                         │
-│        ├─ Chromium → meet.google.com                                 │
-│        ├─ caption scraper → transcript.txt                           │
-│        └─ (realtime mode only) RealtimeSpeaker thread                │
-│             ↓                                                        │
-│           OpenAI Realtime WS → speaker.pcm                           │
-│             ↓                                                        │
-│           paplay → null-sink ← Chrome fake mic                       │
-│                                                                      │
-└──────────────────────────────────────────────────────────────────────┘
-```
-
-Without v3: the whole right column runs on the gateway machine.
-Without v2: the "realtime" path is skipped; transcribe runs alone.
-
-## Files
-
-| Path | Purpose |
-|---|---|
-| `plugin.yaml` | manifest |
-| `__init__.py` | `register(ctx)` — registers 5 tools + `on_session_end` hook + `hermes meet` CLI |
-| `meet_bot.py` | Playwright bot subprocess (standalone, `python -m plugins.google_meet.meet_bot`) |
-| `process_manager.py` | local bot lifecycle + `enqueue_say` |
-| `tools.py` | agent-facing tools + node-routing helper |
-| `cli.py` | `hermes meet setup / auth / join / status / transcript / say / stop / node ...` |
-| `audio_bridge.py` | v2: PulseAudio null-sink (Linux) + BlackHole probe (macOS) |
-| `realtime/openai_client.py` | v2: `RealtimeSession` + `RealtimeSpeaker` (file-queue → OpenAI Realtime WS → PCM) |
-| `node/protocol.py` | v3: message envelope + validation |
-| `node/registry.py` | v3: `$HERMES_HOME/workspace/meetings/nodes.json` |
-| `node/server.py` | v3: `NodeServer` (runs on host machine) |
-| `node/client.py` | v3: `NodeClient` (used by tool handlers + CLI on gateway) |
-| `node/cli.py` | v3: `hermes meet node {run,list,approve,remove,status,ping}` |
-| `SKILL.md` | agent usage guide |
-
-## Local quick start
-
-```bash
-hermes plugins enable google_meet
-hermes meet install                                      # pip + Chromium
-hermes meet setup                                        # preflight
-hermes meet auth                                         # optional
-hermes meet join https://meet.google.com/abc-defg-hij    # transcribe
-```
-
-## Realtime mode
-
-Linux (preferred, most automated):
-```bash
-hermes meet install --realtime                     # installs pulseaudio-utils
-echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
-hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
-# then from the agent or CLI:
-hermes meet say "Good morning everyone, I'm the note-taker bot."
-```
-
-macOS:
-```bash
-hermes meet install --realtime     # runs: brew install blackhole-2ch ffmpeg
-# then — manually! — open System Settings → Sound → Input → BlackHole 2ch
-echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
-hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
-```
-
-On macOS, hermes will **not** switch your system audio input automatically — the
-user has to do it. This is deliberate: switching default input on a whim would
-be a surprising side effect.
-
-## Remote node host
-
-On the node machine (e.g. user's Mac with a signed-in Chrome):
-```bash
-pip install playwright websockets
-python -m playwright install chromium
-hermes plugins enable google_meet
-hermes meet node run --display-name my-mac --host 0.0.0.0 --port 18789
-# prints the bearer token on first run; copy it
-```
-
-On the gateway:
-```bash
-hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
-hermes meet node ping my-mac
-# now any meet_* tool call accepts node='my-mac' (or 'auto')
-```
-
-## Safety
-
- URL gate: only `https://meet.google.com/abc-defg-hij`, `/new`, `/lookup/<id>`.
- No calendar scanning, no auto-dial, no auto-consent announcement.
- Node server uses bearer-token auth; no key exchange, no TLS termination
-  built in — run it on a LAN or behind a reverse proxy you trust.
- One active meeting per (gateway, node) pair. A second `meet_join` leaves the first.
- `meet_say` refuses unless the active meeting was started with `mode='realtime'`.
-
-## Out of scope
-
- **Calendar scanning** — deliberately not implemented. Join URLs must be explicit.
- **Multi-tenant node sharing** — a node serves one gateway at a time.
- **Windows** — audio bridging isn't tested; `register()` no-ops on Windows.
- **System audio input switching on macOS** — user responsibility, not the bot's.
@@ -1,148 +0,0 @@
---
-name: google_meet
-description: Join a Google Meet call, transcribe live captions, optionally speak in realtime, and do the followup work afterwards. Use when the user asks the agent to sit in on a meeting, take notes, summarize, respond in-call, or action items from it.
-version: 0.2.0
-platforms:
-  - linux
-  - macos
-metadata:
-  hermes:
-    tags: [meetings, google-meet, transcription, realtime-voice]
---
-
-# google_meet
-
-## When to use
-
-The user says any of:
-
- "join my Meet at <url>"
- "take notes on this meeting"
- "summarize the meeting and send followups"
- "sit in on my standup"
- "be a bot in this call and speak up when X"
-
-## Two modes
-
-| Mode | What the bot does |
-|---|---|
-| `transcribe` (default) | Joins, enables captions, scrapes a transcript. Listen-only. |
-| `realtime` | Same as transcribe PLUS speaks into the meeting via OpenAI Realtime. The agent calls `meet_say(text)` and the bot's voice comes out of the call. |
-
-Pick `realtime` only when the user actually wants the agent to speak. It costs real money (OpenAI Realtime is pay-per-audio-minute) and requires a virtual audio device set up on the machine running the bot.
-
-## Two locations
-
-| Location | When |
-|---|---|
-| Local (default) | Gateway machine runs the Playwright bot directly. |
-| Remote node (`node="<name>"`) | Bot runs on a different machine that has a signed-in Chrome and (for realtime) a configured audio bridge. Useful when the gateway runs on a headless Linux box but the user's real signed-in Chrome lives on their Mac. |
-
-## Prerequisites the user must handle once
-
-Easiest path — run the built-in installer:
-
-```bash
-hermes plugins enable google_meet
-hermes meet install                 # pip deps + Chromium (transcribe only)
-hermes meet install --realtime      # + pulseaudio-utils / brew blackhole+ffmpeg
-hermes meet auth                    # optional; skips guest-lobby wait
-hermes meet setup                   # preflight checks
-```
-
-`hermes meet install --realtime` prompts before running `sudo apt-get` (Linux)
-or `brew install` (macOS). Pass `--yes` to skip the prompt. It will NOT touch
-your macOS default-input setting — you have to select BlackHole 2ch in
-System Settings yourself before starting a realtime meeting.
-
-Or do it manually:
-```bash
-pip install playwright websockets && python -m playwright install chromium
-
-# For realtime mode, additionally:
-#   Linux:  sudo apt install pulseaudio-utils
-#   macOS:  brew install blackhole-2ch ffmpeg
-#           → System Settings → Sound → Input → BlackHole 2ch
-#   Then set OPENAI_API_KEY or HERMES_MEET_REALTIME_KEY in ~/.hermes/.env
-```
-
-For a remote node:
-```bash
-# on the user's Mac (where Chrome is signed in):
-pip install playwright websockets && python -m playwright install chromium
-hermes plugins enable google_meet
-hermes meet node run --display-name my-mac    # persistent server
-# copy the printed token
-
-# on the gateway:
-hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
-hermes meet node ping my-mac                   # confirm reachable
-```
-
-Run `hermes meet setup` to preflight local prereqs.
-
-## Flow
-
-1. **Join** — call `meet_join(url=..., mode=..., node=...)`. Returns immediately.
-2. **Announce yourself** — no auto-consent. Say (in whatever channel the user is watching): "A Hermes agent bot is in this call taking notes."
-3. **Poll** — `meet_status()` for liveness, `meet_transcript(last=20)` for recent captions. Don't re-read the whole transcript every turn.
-4. **Speak (realtime only)** — `meet_say(text="...")` queues text for TTS. The speech lags by ~2s. Don't spam it.
-5. **Leave** — `meet_leave()` when done, or set `duration="30m"` on `meet_join` for auto-leave.
-6. **Follow up** — read `meet_transcript()` in full, summarize, and use regular tools to send the recap, file issues, schedule followups.
-
-## Tool reference
-
-| Tool | Parameters | Use |
-|---|---|---|
-| `meet_join` | `url`, `mode?`, `guest_name?`, `duration?`, `headed?`, `node?` | Start bot |
-| `meet_status` | `node?` | Liveness + progress |
-| `meet_transcript` | `last?`, `node?` | Read captions |
-| `meet_leave` | `node?` | Close bot |
-| `meet_say` | `text`, `node?` | Speak in realtime meeting |
-
-`node?` on all tools: pass a registered node name (or `"auto"` for the sole node) to operate a remote bot instead of a local one. Omit for local.
-
-## Important limits
-
- Captions are only as good as Google Meet's live captions. English-biased, lossy on overlapping speakers.
- Guest mode sits in the lobby until a host admits. Warn the user; `hermes meet auth` avoids this.
- **Lobby timeout**: if the host doesn't admit the bot within 5 minutes (configurable via `HERMES_MEET_LOBBY_TIMEOUT` env), the bot leaves and `meet_status` reports `leaveReason: "lobby_timeout"`.
- **One active meeting per install per location.** A second `meet_join` leaves the first.
- **Windows not supported.**
- Realtime mode needs a virtual audio device. If the audio bridge setup fails, the bot falls back to transcribe mode and flags it in `meet_status().error`.
- `meet_say` requires `mode='realtime'` on the originating `meet_join`. Calling it against a transcribe-mode meeting returns a clear error.
- **Barge-in is best-effort.** When a caption arrives attributed to a real participant while the bot is generating audio, the bot sends `response.cancel` to OpenAI Realtime. Captions take ~500ms to show up, so the bot will talk over the first second or so of a human interruption.
-
-## Status dict reference
-
-`meet_status()` returns (subset shown, there are more):
-
-| Key | Meaning |
-|---|---|
-| `inCall` | Past the lobby. False while waiting for admission. |
-| `lobbyWaiting` | Clicked "Ask to join", waiting on host. |
-| `joinAttemptedAt` / `joinedAt` | Timestamps for lobby-click and actual admission. |
-| `captioning` | Caption observer is installed. |
-| `transcriptLines` / `lastCaptionAt` | Transcript progress. |
-| `realtime` / `realtimeReady` | Realtime mode provisioned / WS connected. |
-| `realtimeDevice` | Audio device name the bot is feeding (e.g. `hermes_meet_src`). |
-| `audioBytesOut` / `lastAudioOutAt` | How much PCM the OpenAI session has produced. |
-| `lastBargeInAt` | Timestamp of the most recent `response.cancel` sent. |
-| `leaveReason` | `duration_expired`, `lobby_timeout`, `denied`, `page_closed`, or null. |
-| `error` | Last error (soft — bot may still be running). |
-
-## Transcript location
-
-Local:
-```
-$HERMES_HOME/workspace/meetings/<meeting-id>/transcript.txt
-```
-
-Remote node: transcript lives on the node host's disk. Use `meet_transcript(node=...)` to read it over RPC.
-
-## Safety
-
- URL regex: only `https://meet.google.com/...` URLs pass.
- No calendar scanning. No auto-dial.
- Remote nodes use bearer-token auth; tokens are generated on the node (32 hex chars, persisted in `$HERMES_HOME/workspace/meetings/node_token.json`) and must be copied to the gateway via `hermes meet node approve`.
- `meet_say` text is rate-limited by the OpenAI Realtime session; spam-protection is the bot's problem, not yours, but still — don't queue hundreds of lines.
@@ -1,103 +0,0 @@
-"""google_meet plugin — let the agent join a Meet call, transcribe it, follow up.
-
-v1: transcribe-only. Spawns a headless Chromium via Playwright, joins the Meet
-URL, enables live captions, scrapes them into a transcript file. The agent then
-has the transcript in its workspace and can do whatever followup work it needs
-using its regular tools.
-
-v2 (not in this PR): realtime duplex audio so the agent can speak in the
-meeting, via OpenAI Realtime / Gemini Live + BlackHole / PulseAudio null-sink.
-``meet_say`` exists as a stub today so the tool surface is stable.
-
-Explicit-by-design: only joins ``https://meet.google.com/`` URLs explicitly
-passed in. No calendar scanning, no auto-dial, no consent announcement.
-"""
-
-from __future__ import annotations
-
-import logging
-import platform
-
-from plugins.google_meet import process_manager as pm
-from plugins.google_meet.cli import register_cli as _register_meet_cli
-from plugins.google_meet.cli import meet_command as _meet_command
-from plugins.google_meet.tools import (
-    MEET_JOIN_SCHEMA,
-    MEET_LEAVE_SCHEMA,
-    MEET_SAY_SCHEMA,
-    MEET_STATUS_SCHEMA,
-    MEET_TRANSCRIPT_SCHEMA,
-    check_meet_requirements,
-    handle_meet_join,
-    handle_meet_leave,
-    handle_meet_say,
-    handle_meet_status,
-    handle_meet_transcript,
-)
-
-logger = logging.getLogger(__name__)
-
-
-_TOOLS = (
-    ("meet_join",       MEET_JOIN_SCHEMA,       handle_meet_join,       "📞"),
-    ("meet_status",     MEET_STATUS_SCHEMA,     handle_meet_status,     "🟢"),
-    ("meet_transcript", MEET_TRANSCRIPT_SCHEMA, handle_meet_transcript, "📝"),
-    ("meet_leave",      MEET_LEAVE_SCHEMA,      handle_meet_leave,      "👋"),
-    ("meet_say",        MEET_SAY_SCHEMA,        handle_meet_say,        "🗣️"),
-)
-
-
-def _on_session_end(**kwargs) -> None:
-    """Best-effort cleanup — if a meet bot is still running when the session
-    ends, leave the call so we don't orphan a headless Chromium.
-
-    No-ops when nothing is active. Swallows all exceptions — session end must
-    not fail because the bot cleanup hit an edge case.
-    """
-    try:
-        status = pm.status()
-        if status.get("ok") and status.get("alive"):
-            pm.stop(reason="session ended")
-    except Exception as e:  # pragma: no cover — defensive
-        logger.debug("google_meet on_session_end cleanup failed: %s", e)
-
-
-def register(ctx) -> None:
-    """Register tools, CLI, and lifecycle hooks.
-
-    Called once by the plugin loader when the plugin is enabled via
-    ``plugins.enabled`` in config.yaml.
-    """
-    # Windows is not supported in v1 — audio routing for v2 doesn't have a
-    # tested path there and guest-join Chromium is flakier. Refuse to register
-    # rather than half-working.
-    system = platform.system().lower()
-    if system not in ("linux", "darwin"):
-        logger.info(
-            "google_meet plugin: platform=%s not supported (linux/macos only)",
-            system,
-        )
-        return
-
-    for name, schema, handler, emoji in _TOOLS:
-        ctx.register_tool(
-            name=name,
-            toolset="google_meet",
-            schema=schema,
-            handler=handler,
-            check_fn=check_meet_requirements,
-            emoji=emoji,
-        )
-
-    ctx.register_cli_command(
-        name="meet",
-        help="Google Meet bot (join, transcribe, follow up)",
-        setup_fn=_register_meet_cli,
-        handler_fn=_meet_command,
-        description=(
-            "Let the hermes agent join a Google Meet call and scrape live "
-            "captions into a transcript. See: hermes meet setup"
-        ),
-    )
-
-    ctx.register_hook("on_session_end", _on_session_end)
@@ -1,244 +0,0 @@
-"""Virtual audio bridge for feeding generated speech into Chrome's mic.
-
-v2 module. Provisions a platform-specific virtual audio device so the
-Meet bot's Chromium instance can be pointed at an input source we
-control. The OpenAI Realtime client writes PCM bytes into this device;
-Chrome reads them as if they were coming from a microphone.
-
-Linux (primary): uses pactl (PulseAudio) to create a null-sink plus a
-virtual source whose master is the null-sink's monitor. Callers set
-PULSE_SOURCE=<source_name> in Chrome's env and pass the fake-mic flag.
-
-macOS: requires BlackHole 2ch to be installed. This module only
-verifies its presence and returns the device name; routing OS default
-input is left to the user (or a future switchaudio-osx integration) to
-avoid surprising the user's system audio state.
-
-Windows: not supported in v2.
-"""
-
-from __future__ import annotations
-
-import platform
-import subprocess
-from typing import Optional
-
-
-_BLACKHOLE_DEVICE = "BlackHole 2ch"
-
-
-class AudioBridge:
-    """Manages a virtual audio device for Chrome fake-mic input.
-
-    Call ``setup()`` once before launching the Meet bot and
-    ``teardown()`` when the session ends. ``teardown()`` is idempotent.
-    """
-
-    def __init__(self, name_prefix: str = "hermes_meet") -> None:
-        self._name_prefix = name_prefix
-        self._platform: Optional[str] = None
-        self._device_name: Optional[str] = None
-        self._write_target: Optional[str] = None
-        self._module_ids: list[int] = []
-        self._torn_down = False
-
-    # ── public properties ─────────────────────────────────────────────────
-
-    @property
-    def device_name(self) -> str:
-        if not self._device_name:
-            raise RuntimeError("AudioBridge not set up yet")
-        return self._device_name
-
-    @property
-    def write_target(self) -> str:
-        if not self._write_target:
-            raise RuntimeError("AudioBridge not set up yet")
-        return self._write_target
-
-    # ── lifecycle ─────────────────────────────────────────────────────────
-
-    def setup(self) -> dict:
-        """Provision the virtual audio device.
-
-        Returns a dict describing the device. Raises RuntimeError on
-        unsupported platforms or when required system tools are missing.
-        """
-        system = platform.system()
-        if system == "Linux":
-            return self._setup_linux()
-        if system == "Darwin":
-            return self._setup_darwin()
-        if system == "Windows":
-            raise RuntimeError("windows not supported in v2")
-        raise RuntimeError(f"unsupported platform: {system}")
-
-    def teardown(self) -> None:
-        """Release the virtual audio device. Idempotent."""
-        if self._torn_down:
-            return
-        # Only Linux needs explicit unloading.
-        if self._platform == "linux" and self._module_ids:
-            # Unload in reverse order (virtual-source before null-sink).
-            for mod_id in reversed(self._module_ids):
-                try:
-                    subprocess.run(
-                        ["pactl", "unload-module", str(mod_id)],
-                        check=False,
-                        capture_output=True,
-                    )
-                except Exception:
-                    # Best-effort teardown — never raise from here.
-                    pass
-            self._module_ids = []
-        self._torn_down = True
-
-    # ── platform impls ────────────────────────────────────────────────────
-
-    def _setup_linux(self) -> dict:
-        sink_name = f"{self._name_prefix}_sink"
-        src_name = f"{self._name_prefix}_src"
-
-        try:
-            sink_out = subprocess.run(
-                [
-                    "pactl",
-                    "load-module",
-                    "module-null-sink",
-                    f"sink_name={sink_name}",
-                    f"sink_properties=device.description=HermesMeetSink",
-                ],
-                check=True,
-                capture_output=True,
-                text=True,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                "pactl not found — install PulseAudio/pipewire-pulse"
-            ) from exc
-        except subprocess.CalledProcessError as exc:
-            raise RuntimeError(
-                f"pactl load-module null-sink failed: {exc.stderr or exc}"
-            ) from exc
-
-        sink_mod_id = self._parse_module_id(sink_out.stdout)
-
-        try:
-            src_out = subprocess.run(
-                [
-                    "pactl",
-                    "load-module",
-                    "module-virtual-source",
-                    f"source_name={src_name}",
-                    f"master={sink_name}.monitor",
-                ],
-                check=True,
-                capture_output=True,
-                text=True,
-            )
-        except subprocess.CalledProcessError as exc:
-            # Roll back the null-sink we just created so we don't leak it.
-            subprocess.run(
-                ["pactl", "unload-module", str(sink_mod_id)],
-                check=False,
-                capture_output=True,
-            )
-            raise RuntimeError(
-                f"pactl load-module virtual-source failed: {exc.stderr or exc}"
-            ) from exc
-
-        src_mod_id = self._parse_module_id(src_out.stdout)
-
-        self._platform = "linux"
-        self._device_name = src_name
-        self._write_target = sink_name
-        self._module_ids = [sink_mod_id, src_mod_id]
-        self._torn_down = False
-
-        return {
-            "platform": "linux",
-            "device_name": src_name,
-            "sample_rate": 48000,
-            "channels": 2,
-            "module_ids": list(self._module_ids),
-            "write_target": sink_name,
-        }
-
-    def _setup_darwin(self) -> dict:
-        try:
-            out = subprocess.check_output(
-                ["system_profiler", "SPAudioDataType"],
-                text=True,
-                stderr=subprocess.STDOUT,
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                "system_profiler not found (macOS-only command)"
-            ) from exc
-        except subprocess.CalledProcessError as exc:
-            raise RuntimeError(
-                f"system_profiler failed: {exc.output}"
-            ) from exc
-
-        if "BlackHole" not in out:
-            raise RuntimeError(
-                "BlackHole virtual audio device not installed. "
-                "Install via: brew install blackhole-2ch"
-            )
-
-        self._platform = "darwin"
-        self._device_name = _BLACKHOLE_DEVICE
-        self._write_target = _BLACKHOLE_DEVICE
-        self._module_ids = []
-        self._torn_down = False
-
-        return {
-            "platform": "darwin",
-            "device_name": _BLACKHOLE_DEVICE,
-            "sample_rate": 48000,
-            "channels": 2,
-            "module_ids": [],
-            "write_target": _BLACKHOLE_DEVICE,
-        }
-
-    # ── helpers ──────────────────────────────────────────────────────────
-
-    @staticmethod
-    def _parse_module_id(stdout: str) -> int:
-        """pactl load-module prints the new module ID to stdout."""
-        text = (stdout or "").strip()
-        if not text:
-            raise RuntimeError("pactl load-module returned empty stdout")
-        # Take the last whitespace-separated token on the first non-empty line.
-        first = text.splitlines()[0].strip()
-        token = first.split()[-1]
-        try:
-            return int(token)
-        except ValueError as exc:
-            raise RuntimeError(
-                f"could not parse pactl module id from: {stdout!r}"
-            ) from exc
-
-
-def chrome_fake_audio_flags(bridge_info: dict) -> list[str]:
-    """Return Chrome flags for using the fake audio input.
-
-    The PulseAudio source is selected via the ``PULSE_SOURCE`` env var,
-    which callers must set in Chrome's environment before launch:
-
-        env["PULSE_SOURCE"] = bridge_info["device_name"]
-
-    On macOS the caller must ensure the system default audio input is
-    set to the returned BlackHole device (we do not flip that switch).
-    """
-    system = platform.system()
-    if system == "Linux":
-        # Chromium on Linux picks up the PulseAudio source selected via
-        # PULSE_SOURCE env var; the fake-ui flag skips the permission
-        # prompt so the bot can pick "use my mic" without user input.
-        return ["--use-fake-ui-for-media-stream"]
-    if system == "Darwin":
-        return ["--use-fake-ui-for-media-stream"]
-    if system == "Windows":
-        raise RuntimeError("windows not supported in v2")
-    raise RuntimeError(f"unsupported platform: {system}")
--- a/Show More
+++ b/Show More