feat(plugins): add optional-plugins/ discovery + langfuse_tracing as first official optional plugin

Introduces optional-plugins/ — a new category for plugins that ship with the repo but are NOT auto-discovered. They live alongside the code but only land in ~/.hermes/plugins/ (and thus get loaded) when the user explicitly installs them. Core changes: - optional-plugins/observability/langfuse-tracing/ — langfuse tracing plugin (pre/post LLM + tool hooks, usage/cost normalization, fail-open when SDK missing). NOT in plugins/ so zero import overhead on devices that don't want it. - hermes_cli/plugins_cmd.py — official install path: _resolve_official_plugin() recognises 'official/<category>/<name>' identifiers and copies from optional-plugins/ into ~/.hermes/plugins/ (no git clone, no network). _list_official_plugins() enumerates available optional plugins. cmd_list(available=True) shows not-yet-installed official plugins. - hermes_cli/main.py — hermes plugins list --available flag - hermes_cli/tools_config.py — Langfuse Observability in TOOL_CATEGORIES; post_setup handler installs the langfuse SDK and runs cmd_install() - hermes_cli/config.py — Langfuse credentials in OPTIONAL_ENV_VARS; optional tuning keys in _EXTRA_ENV_KEYS User flows: hermes plugins install official/observability/langfuse-tracing hermes plugins list --available hermes tools (-> Langfuse Observability -> credentials -> auto-installs) Closes #15764
2026-04-28 11:52:42 +05:30
242 changed files with 2783 additions and 14283 deletions
@@ -5,9 +5,7 @@

 # Dependencies
 node_modules
-**/node_modules
 .venv
-**/.venv

 # CI/CD
 .github
@@ -13,7 +13,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  nix-lockfile-check:
+  check:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
@@ -36,12 +36,6 @@ jobs:
          LINK_SHA: ${{ steps.sha.outputs.full }}
        run: nix run .#fix-lockfiles -- --check

-      - name: Fail if check crashed without reporting
-        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
      - name: Post sticky PR comment (stale)
        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
@@ -1,13 +1,6 @@
 name: Nix Lockfile Fix

 on:
-  push:
-    branches: [main]
-    paths:
-      - 'ui-tui/package-lock.json'
-      - 'ui-tui/package.json'
-      - 'web/package-lock.json'
-      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -26,103 +19,9 @@ concurrency:
  cancel-in-progress: false

 jobs:
-  # ── Auto-fix on main ───────────────────────────────────────────────
-  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
-  # update commit directly to main so Nix builds never stay broken.
-  #
-  # Safety invariants:
-  #   1. The fix commit only touches nix/*.nix files, which are NOT in
-  #      the paths filter above, so this cannot re-trigger itself.
-  #   2. An explicit file-whitelist check before commit aborts if
-  #      fix-lockfiles ever modifies unexpected files.
-  #   3. Job-level concurrency with cancel-in-progress: true ensures
-  #      back-to-back pushes collapse to the newest; ref: main checkout
-  #      always operates on the latest branch state.
-  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
-  #      triggers downstream nix.yml verification.
-  auto-fix-main:
-    if: github.event_name == 'push'
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    concurrency:
-      group: auto-fix-main
-      cancel-in-progress: true
-    steps:
-      - name: Generate GitHub App token
-        id: app-token
-        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
-        with:
-          app-id: ${{ secrets.APP_ID }}
-          private-key: ${{ secrets.APP_PRIVATE_KEY }}
-
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          ref: main
-          token: ${{ steps.app-token.outputs.token }}
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          # Ensure only nix files were modified — prevents accidental
-          # self-triggering if fix-lockfiles ever touches package files.
-          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
-          if [ -n "$unexpected" ]; then
-            echo "::error::Unexpected modified files: $unexpected"
-            exit 1
-          fi
-
-          # Record the base SHA before committing — used to detect package
-          # file changes if we need to rebase after a non-fast-forward push.
-          BASE_SHA="$(git rev-parse HEAD)"
-
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
-          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
-            -m "Source: $GITHUB_SHA" \
-            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
-
-          # Retry push with rebase in case main advanced with an unrelated
-          # commit during the nix build. Without this, a non-fast-forward
-          # rejection silently loses the fix. If package files changed during
-          # the rebase, abort — a fresh auto-fix run will handle the new state.
-          for attempt in 1 2 3; do
-            if git push origin HEAD:main; then
-              exit 0
-            fi
-            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
-            git fetch origin main
-
-            # If package files changed between our base and the new main,
-            # our computed hashes are stale. Abort and let the next triggered
-            # run recompute from the correct package-lock state.
-            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
-              'ui-tui/package-lock.json' 'ui-tui/package.json' \
-              'web/package-lock.json' 'web/package.json' || true)"
-            if [ -n "$pkg_changed" ]; then
-              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
-              exit 0
-            fi
-
-            git rebase origin/main
-          done
-          echo "::error::Failed to push after 3 rebase attempts"
-          exit 1
-
-  # ── PR fix (manual / checkbox) ─────────────────────────────────────
-  # Existing behavior: run on manual dispatch OR when a task-list
-  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -38,7 +38,7 @@ hermes-agent/
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
+│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -45,13 +45,7 @@ COPY --chown=hermes:hermes . .

 # Build browser dashboard and terminal UI assets.
 RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build && \
-    rm -rf node_modules/@hermes/ink && \
-    rm -rf packages/hermes-ink/node_modules && \
-    cp -R packages/hermes-ink node_modules/@hermes/ink && \
-    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
-    rm -rf node_modules/@hermes/ink/node_modules/react && \
-    node --input-type=module -e "await import('@hermes/ink')"
+    cd ../ui-tui && npm run build

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -112,17 +112,6 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

-    # MCP tool discovery from config.yaml — run before asyncio.run() so
-    # it's safe to use blocking waits.  (ACP also registers per-session
-    # MCP servers dynamically via asyncio.to_thread inside the event
-    # loop; that path is unaffected.)  Moved from model_tools.py module
-    # scope to avoid freezing the gateway's loop on lazy import (#16856).
-    try:
-        from tools.mcp_tool import discover_mcp_tools
-        discover_mcp_tools()
-    except Exception:
-        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
-
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import asyncio
-import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -575,22 +574,6 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
-            # Bind HERMES_SESSION_KEY for this session so per-session caches
-            # (e.g. the interactive sudo password cache in tools.terminal_tool)
-            # scope to the ACP session rather than leaking across sessions
-            # that land on the same reused executor thread. This call runs
-            # inside a contextvars.copy_context() below, so the ContextVar
-            # write is isolated from other concurrent ACP sessions.
-            try:
-                from gateway.session_context import (
-                    clear_session_vars,
-                    set_session_vars,
-                )
-                session_tokens = set_session_vars(session_key=session_id)
-            except Exception:
-                session_tokens = None
-                clear_session_vars = None  # type: ignore[assignment]
-                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -624,19 +607,9 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
-                if session_tokens is not None and clear_session_vars is not None:
-                    try:
-                        clear_session_vars(session_tokens)
-                    except Exception:
-                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            # Wrap the executor call in a fresh copy of the current context so
-            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
-            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
-            # particular — used by the interactive sudo password cache scope).
-            ctx = contextvars.copy_context()
-            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
+            result = await loop.run_in_executor(_executor, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
            return PromptResponse(stop_reason="end_turn")
@@ -22,25 +22,10 @@ from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

-# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
-# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
-# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
-# read_claude_code_credentials_from_keychain) are all on cold user-triggered
-# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
-# the module after the first call and returns None on ImportError.
-_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
-
-
-def _get_anthropic_sdk():
-    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
-    global _anthropic_sdk
-    if _anthropic_sdk is ...:
-        try:
-            import anthropic as _sdk
-            _anthropic_sdk = _sdk
-        except ImportError:
-            _anthropic_sdk = None
-    return _anthropic_sdk
+try:
+    import anthropic as _anthropic_sdk
+except ImportError:
+    _anthropic_sdk = None  # type: ignore[assignment]

 logger = logging.getLogger(__name__)

@@ -257,11 +242,10 @@ _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]

-# Claude Code version — sent on OAuth token-exchange / refresh requests
-# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
-# OAuth flow validates the UA and may reject requests with a version that's
-# too old, so detecting dynamically keeps users on a current Claude Code
-# install from hitting stale-version errors during login/refresh.
+# Claude Code identity — required for OAuth requests to be routed correctly.
+# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
+# The version must stay reasonably current — Anthropic rejects OAuth requests
+# when the spoofed user-agent version is too far behind the actual release.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None

@@ -269,9 +253,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
    """Detect the installed Claude Code version, fall back to a static constant.

-    Used only by the OAuth token-exchange / refresh flow
-    (``platform.claude.com/v1/oauth/token``). The Messages API client no
-    longer sends a claude-cli user-agent.
+    Anthropic's OAuth infrastructure validates the user-agent version and may
+    reject requests with a version that's too old.  Detecting dynamically means
+    users who keep Claude Code updated never hit stale-version 400s.
    """
    import subprocess as _sp

@@ -291,13 +275,12 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version for OAuth flow headers.
+_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
+_MCP_TOOL_PREFIX = "mcp_"

-    Used only on the OAuth token-exchange and refresh endpoints
-    (``platform.claude.com/v1/oauth/token``). The Messages API client does
-    not send a claude-cli user-agent.
-    """
+
+def _get_claude_code_version() -> str:
+    """Lazily detect the installed Claude Code version when OAuth headers need it."""
    global _claude_code_version_cache
    if _claude_code_version_cache is None:
        _claude_code_version_cache = _detect_claude_code_version()
@@ -410,7 +393,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =

    Returns an anthropic.Anthropic instance.
    """
-    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -467,21 +449,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
-        # The OAuth-specific beta headers are still required by Anthropic's
-        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
-        # spoofing is deliberately NOT sent — Hermes identifies as itself.
-        #
-        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
-        # OAuth requests that carry it with
-        #   "This authentication style is incompatible with the long
-        #    context beta header."
-        # Subscription-gated OAuth traffic gets the 200K default window.
-        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
-        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
+        # Anthropic routes OAuth requests based on user-agent and headers;
+        # without Claude Code's fingerprint, requests get intermittent 500s.
+        all_betas = common_betas + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
+            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            "x-app": "cli",
        }
    else:
        # Regular API key → x-api-key header + common betas
@@ -508,7 +484,6 @@ def build_anthropic_bedrock_client(region: str):

    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
-    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -540,6 +515,9 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
+    import platform
+    import subprocess
+
    if platform.system() != "Darwin":
        return None

@@ -825,45 +803,17 @@ def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

    Priority:
-      1. Hermes credential pool (``~/.hermes/auth.json`` →
-         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
-         own PKCE login flow. Entries are auto-refreshed when near
-         expiry. Env-sourced pool entries (``source="env:..."``) are
-         skipped here so the env-var priority logic below still runs.
-      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      3. CLAUDE_CODE_OAUTH_TOKEN env var
-      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      2. CLAUDE_CODE_OAUTH_TOKEN env var
+      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
         — with automatic refresh if expired and a refresh token is available
-      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)

    Returns the token string or None.
    """
-    # 1. Hermes credential pool — the live source of truth for tokens
-    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
-    #    ``select()`` picks the best available entry and refreshes it if
-    #    it's near expiry, so callers always get a fresh token.
-    #
-    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
-    #    those are passthroughs of the env var, and the env-var branches
-    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
-    #    that can upgrade a static env OAuth token to a refreshed
-    #    Claude Code token. Letting the pool win here would short-circuit
-    #    that upgrade.
-    try:
-        from agent.credential_pool import load_pool
-        pool = load_pool("anthropic")
-        entry = pool.select()
-        if entry and entry.access_token and not entry.source.startswith("env:"):
-            return entry.access_token
-    except Exception as exc:
-        # Pool lookup is best-effort — fall through to env/file sources
-        # if anything goes wrong (e.g. auth.json corruption during a
-        # concurrent write).
-        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
-
    creds = read_claude_code_credentials()

-    # 2. Hermes-managed OAuth/setup token env var
+    # 1. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -871,7 +821,7 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return token

-    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -879,12 +829,12 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 4. Claude Code credential file
+    # 3. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -1131,33 +1081,6 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


-def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
-    """Normalize tool schemas before sending them to Anthropic.
-
-    Anthropic's tool schema validator rejects nullable unions such as
-    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
-    commonly emits for optional fields. Tool optionality is represented by
-    the parent ``required`` array, so we delegate to the shared
-    ``strip_nullable_unions`` helper to collapse nullable unions to the
-    non-null branch while preserving metadata like description/default.
-
-    ``keep_nullable_hint=False`` because the Anthropic validator does not
-    recognize the OpenAPI-style ``nullable: true`` extension and strict
-    schema-to-grammar converters may reject unknown keywords.
-    """
-    if not schema:
-        return {"type": "object", "properties": {}}
-
-    from tools.schema_sanitizer import strip_nullable_unions
-
-    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
-    if not isinstance(normalized, dict):
-        return {"type": "object", "properties": {}}
-    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
-        normalized = {**normalized, "properties": {}}
-    return normalized
-
-
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -1168,9 +1091,7 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": _normalize_tool_input_schema(
-                fn.get("parameters", {"type": "object", "properties": {}})
-            ),
+            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
        })
    return result

@@ -1649,10 +1570,8 @@ def build_anthropic_kwargs(
    "max_tokens too large given prompt" errors and retry with a smaller cap
    (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).

-    When *is_oauth* is True, enables the OAuth-only beta headers required by
-    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
-    the default headers are set by build_anthropic_client). No system-prompt
-    or tool-name rewriting is performed — Hermes identifies as itself.
+    When *is_oauth* is True, applies Claude Code compatibility transforms:
+    system prompt prefix, tool name prefixing, and prompt sanitization.

    When *preserve_dots* is True, model name dots are not converted to hyphens
    (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1685,11 +1604,45 @@ def build_anthropic_kwargs(
    if context_length and effective_max_tokens > context_length:
        effective_max_tokens = max(context_length - 1, 1)

-    # OAuth requests go through Anthropic's subscription-gated Messages
-    # endpoint but otherwise send the real Hermes system prompt and real
-    # Hermes tool names — the only OAuth-specific wire differences are
-    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
-    # build_anthropic_client and the fast-mode branch below).
+    # ── OAuth: Claude Code identity ──────────────────────────────────
+    if is_oauth:
+        # 1. Prepend Claude Code system prompt identity
+        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
+        if isinstance(system, list):
+            system = [cc_block] + system
+        elif isinstance(system, str) and system:
+            system = [cc_block, {"type": "text", "text": system}]
+        else:
+            system = [cc_block]
+
+        # 2. Sanitize system prompt — replace product name references
+        #    to avoid Anthropic's server-side content filters.
+        for block in system:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                text = text.replace("Hermes Agent", "Claude Code")
+                text = text.replace("Hermes agent", "Claude Code")
+                text = text.replace("hermes-agent", "claude-code")
+                text = text.replace("Nous Research", "Anthropic")
+                block["text"] = text
+
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        if anthropic_tools:
+            for tool in anthropic_tools:
+                if "name" in tool:
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
+
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
+        for msg in anthropic_messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "tool_use" and "name" in block:
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
+                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                            pass  # tool_result uses ID, not name

    kwargs: Dict[str, Any] = {
        "model": model,
@@ -1780,9 +1733,6 @@ def build_anthropic_kwargs(
        # extra_headers override the client-level anthropic-beta header).
        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
-            # Strip context-1m — incompatible with OAuth auth. See matching
-            # comment in build_anthropic_client().
-            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
@@ -41,57 +41,10 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse, parse_qs, urlunparse

-# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
-# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
-# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
-# first call and forwards, so:
-#   (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
-#       (Python's function-scope name lookup resolves `OpenAI` to the proxy
-#       object bound in module globals here, without triggering any import);
-#   (b) external code can still do `auxiliary_client.OpenAI` or
-#       `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
-#       and patch replaces the module attribute as usual;
-#   (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
-#       (which is harmless — annotations aren't type-checked at runtime).
-# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
-if TYPE_CHECKING:
-    from openai import OpenAI  # noqa: F401 — type hints only
-
-_OPENAI_CLS_CACHE: Optional[type] = None
-
-
-def _load_openai_cls() -> type:
-    """Import and cache ``openai.OpenAI``."""
-    global _OPENAI_CLS_CACHE
-    if _OPENAI_CLS_CACHE is None:
-        from openai import OpenAI as _cls
-        _OPENAI_CLS_CACHE = _cls
-    return _OPENAI_CLS_CACHE
-
-
-class _OpenAIProxy:
-    """Module-level proxy that looks like the ``openai.OpenAI`` class.
-
-    Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
-    real SDK class, importing the SDK lazily on first use.
-    """
-
-    __slots__ = ()
-
-    def __call__(self, *args, **kwargs):
-        return _load_openai_cls()(*args, **kwargs)
-
-    def __instancecheck__(self, obj):
-        return isinstance(obj, _load_openai_cls())
-
-    def __repr__(self):
-        return "<lazy openai.OpenAI proxy>"
-
-
-OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance
+from openai import OpenAI

 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
@@ -141,10 +94,6 @@ _PROVIDER_ALIASES = {
    "github-models": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
-    "tencent": "tencent-tokenhub",
-    "tokenhub": "tencent-tokenhub",
-    "tencent-cloud": "tencent-tokenhub",
-    "tencentmaas": "tencent-tokenhub",
 }


@@ -217,7 +166,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
    "ollama-cloud": "nemotron-3-nano:30b",
-    "tencent-tokenhub": "hy3-preview",
 }

 # Vision-specific model overrides for direct providers.
@@ -457,33 +405,6 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Translate extra_body.reasoning (chat.completions shape) into the
-        # Responses API's top-level reasoning + include fields.  Mirrors
-        # agent/transports/codex.py::build_kwargs() so auxiliary callers
-        # that configure reasoning via auxiliary.<task>.extra_body get the
-        # same behavior as the main agent's Codex transport.
-        extra_body = kwargs.get("extra_body") or {}
-        if isinstance(extra_body, dict):
-            reasoning_cfg = extra_body.get("reasoning")
-            if isinstance(reasoning_cfg, dict):
-                if reasoning_cfg.get("enabled") is False:
-                    # Reasoning explicitly disabled — do not set reasoning
-                    # or include.  The Codex backend still thinks by
-                    # default, but we honor the caller's intent where the
-                    # API allows it.
-                    pass
-                else:
-                    effort = reasoning_cfg.get("effort", "medium")
-                    # Codex backend rejects "minimal"; clamp to "low" to
-                    # match the main-agent Codex transport behavior.
-                    if effort == "minimal":
-                        effort = "low"
-                    resp_kwargs["reasoning"] = {
-                        "effort": effort,
-                        "summary": "auto",
-                    }
-                    resp_kwargs["include"] = ["reasoning.encrypted_content"]
-
        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
@@ -713,7 +634,9 @@ class _AnthropicCompletionsAdapter:

        response = self._client.messages.create(**anthropic_kwargs)
        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(response)
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )

        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
        # .function.arguments) via properties, so no wrapping needed.
@@ -791,116 +714,6 @@ class AsyncAnthropicAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


-def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
-    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
-    protocol instead of OpenAI chat.completions.
-
-    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
-    auxiliary client and the main agent stay in sync on transport selection.
-    Covers:
-
-    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
-      Anthropic-compatible gateways).
-    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
-      speaks Claude-Code's native Anthropic shape; ``chat.completions``
-      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
-    - ``api.anthropic.com`` (native Anthropic).
-    """
-    normalized = (base_url or "").strip().lower().rstrip("/")
-    if not normalized:
-        return False
-    if normalized.endswith("/anthropic"):
-        return True
-    hostname = base_url_hostname(normalized)
-    if hostname == "api.anthropic.com":
-        return True
-    if hostname == "api.kimi.com" and "/coding" in normalized:
-        return True
-    return False
-
-
-def _maybe_wrap_anthropic(
-    client_obj: Any,
-    model: str,
-    api_key: str,
-    base_url: str,
-    api_mode: Optional[str] = None,
-) -> Any:
-    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
-    the endpoint actually speaks Anthropic Messages.
-
-    This is the single chokepoint for aux-client transport correction.
-    Runs at the end of every ``resolve_provider_client`` branch so that
-    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
-    future /anthropic gateways all land on the right wire format
-    regardless of which branch built the client.
-
-    Returns ``client_obj`` unchanged when:
-
-    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
-    - The endpoint is an OpenAI-wire endpoint.
-    - ``api_mode`` is explicitly set to a non-Anthropic transport.
-    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
-    """
-    # Already wrapped — don't double-wrap.
-    if isinstance(client_obj, AnthropicAuxiliaryClient):
-        return client_obj
-    # Other specialized adapters we should never re-dispatch.
-    if isinstance(client_obj, CodexAuxiliaryClient):
-        return client_obj
-    try:
-        from agent.gemini_native_adapter import GeminiNativeClient
-        if isinstance(client_obj, GeminiNativeClient):
-            return client_obj
-    except ImportError:
-        pass
-    try:
-        from agent.copilot_acp_client import CopilotACPClient
-        if isinstance(client_obj, CopilotACPClient):
-            return client_obj
-    except ImportError:
-        pass
-
-    # Explicit non-anthropic api_mode wins over URL heuristics.
-    if api_mode and api_mode != "anthropic_messages":
-        return client_obj
-
-    should_wrap = (
-        api_mode == "anthropic_messages"
-        or _endpoint_speaks_anthropic_messages(base_url)
-    )
-    if not should_wrap:
-        return client_obj
-
-    try:
-        from agent.anthropic_adapter import build_anthropic_client
-    except ImportError:
-        logger.warning(
-            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
-            "not installed — falling back to OpenAI-wire (will likely 404).",
-            base_url,
-        )
-        return client_obj
-
-    try:
-        real_client = build_anthropic_client(api_key, base_url)
-    except Exception as exc:
-        logger.warning(
-            "Failed to build Anthropic client for %s (%s) — falling back to "
-            "OpenAI-wire client.", base_url, exc,
-        )
-        return client_obj
-
-    logger.debug(
-        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
-        "(model=%s, base_url=%s, api_mode=%s)",
-        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
-    )
-    return AnthropicAuxiliaryClient(
-        real_client, model, api_key, base_url, is_oauth=False,
-    )
-
-
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -1071,9 +884,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
-            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-            _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
-            return _client, model
+            return OpenAI(api_key=api_key, base_url=base_url, **extra), model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
@@ -1099,9 +910,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-        _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
-        return _client, model
+        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

    return None, None

@@ -1385,13 +1194,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    # URL-based anthropic detection for custom endpoints that didn't set
-    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
-    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
-    _fallback_client = _maybe_wrap_anthropic(
-        _fallback_client, model, custom_key, custom_base, custom_mode,
-    )
-    return _fallback_client, model
+    return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1942,20 +1745,8 @@ def resolve_provider_client(
                return True
        return False

-    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
-                        api_key_str: str = ""):
-        """Wrap a plain OpenAI client in the correct transport adapter.
-
-        Handles two cases:
-        - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
-          (explicit ``api_mode=codex_responses`` or api.openai.com + codex
-          model name).
-        - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
-          Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
-          suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
-
-        Clients that are already specialized wrappers pass through unchanged.
-        """
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
+        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
            logger.debug(
                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@@ -1963,11 +1754,7 @@ def resolve_provider_client(
                api_mode or "auto-detected", final_model_str,
                base_url_str[:60] if base_url_str else "")
            return CodexAuxiliaryClient(client_obj, final_model_str)
-        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
-        # chat.completions.create() is translated to /v1/messages.
-        return _maybe_wrap_anthropic(
-            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
-        )
+        return client_obj

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -2047,7 +1834,7 @@ def resolve_provider_client(
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
-            custom_base = _to_openai_base_url(explicit_base_url).strip()
+            custom_base = explicit_base_url.strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
@@ -2060,7 +1847,7 @@ def resolve_provider_client(
                )
                return None, None
            final_model = _normalize_resolved_model(
-                model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
+                model or _read_main_model() or "gpt-4o-mini",
                provider,
            )
            extra = {}
@@ -2075,7 +1862,7 @@ def resolve_provider_client(
                    is_agent_turn=True, is_vision=is_vision
                )
            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
-            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
+            client = _wrap_if_needed(client, final_model, custom_base)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
@@ -2085,8 +1872,7 @@ def resolve_provider_client(
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                _ckey = str(getattr(client, "api_key", "") or "")
-                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
+                client = _wrap_if_needed(client, final_model, _cbase)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
@@ -2109,22 +1895,10 @@ def resolve_provider_client(
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model
-                    or custom_entry.get("model")
-                    or (main_runtime.get("model") if main_runtime else None)
-                    or _read_main_model()
-                    or "gpt-4o-mini",
+                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
-                # anthropic_messages talks to the /anthropic surface directly;
-                # OpenAI-wire paths (chat_completions / codex_responses) need the
-                # /v1 equivalent.  Rewrite only on the OpenAI-wire path so the
-                # Anthropic fallback SDK still sees the original URL.
-                if entry_api_mode == "anthropic_messages":
-                    openai_base = custom_base
-                else:
-                    openai_base = _to_openai_base_url(custom_base)
-                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
+                _clean_base2, _dq2 = _extract_url_query_params(custom_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
@@ -2143,12 +1917,7 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        # Fallback went OpenAI-wire after all — redo the query
-                        # extraction against the rewritten /v1 URL.
-                        _fallback_base = _to_openai_base_url(custom_base)
-                        _fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
-                        _fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
-                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
@@ -2167,7 +1936,7 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, openai_base, custom_key)
+                    client = _wrap_if_needed(client, final_model, custom_base)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -2260,11 +2029,8 @@ def resolve_provider_client(

        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
        # codex-family models).  The copilot-specific wrapping above handles
-        # copilot; this covers the general case (#6800).  Also rewraps
-        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
-        # /anthropic-suffixed gateways) so named providers like kimi-coding
-        # land on the right transport without needing per-provider branches.
-        client = _wrap_if_needed(client, final_model, base_url, api_key)
+        # copilot; this covers the general case (#6800).
+        client = _wrap_if_needed(client, final_model, base_url)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
@@ -2272,12 +2038,7 @@ def resolve_provider_client(

    if pconfig.auth_type == "external_process":
        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(
-            model
-            or (main_runtime.get("model") if main_runtime else None)
-            or _read_main_model(),
-            provider,
-        )
+        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
        if provider == "copilot-acp":
            api_key = str(creds.get("api_key", "")).strip()
            base_url = str(creds.get("base_url", "")).strip()
@@ -291,52 +291,14 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority:
-      1. AWS_REGION env var
-      2. AWS_DEFAULT_REGION env var
-      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
-      4. us-east-1 (hard fallback)
-
-    The boto3 fallback is critical for EU/AP users who configure their region
-    in ~/.aws/config via a named profile rather than env vars — without it,
-    live model discovery would always return us.* profile IDs regardless of
-    the user's actual region.
+    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
    """
    env = env if env is not None else os.environ
-    explicit = (
+    return (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
+        or "us-east-1"
    )
-    if explicit:
-        return explicit
-    try:
-        import botocore.session
-        region = botocore.session.get_session().get_config_variable("region")
-        if region:
-            return region
-    except Exception:
-        pass
-    return "us-east-1"
-
-
-def bedrock_model_ids_or_none() -> Optional[List[str]]:
-    """Live-discover Bedrock model IDs for the active region.
-
-    Returns a list of model ID strings if discovery succeeds and yields
-    at least one model, or ``None`` on failure / empty result.  Callers
-    should fall back to the static curated list when ``None`` is returned.
-
-    This helper consolidates the discover → extract-ids → fallback
-    pattern that was previously duplicated across ``provider_model_ids``,
-    ``list_authenticated_providers`` section 2, and section 3.
-    """
-    try:
-        discovered = discover_bedrock_models(resolve_bedrock_region())
-        if discovered:
-            return [m["id"] for m in discovered]
-    except Exception:
-        pass
-    return None


 # ---------------------------------------------------------------------------
@@ -7,6 +7,7 @@ import random
 import threading
 import time
 import uuid
+import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
@@ -455,70 +456,6 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

-    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
-        """Sync a Codex device_code pool entry from auth.json if tokens differ.
-
-        When a Codex OAuth access token expires (or the ChatGPT account hits
-        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
-        with a ``last_error_reset_at`` that can be many hours in the future.
-        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
-        performs a fresh device-code login and writes new tokens to
-        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
-        entry stays frozen until ``last_error_reset_at`` elapses — even
-        though fresh credentials are sitting on disk — and every request
-        fails with "no available entries (all exhausted or empty)".
-
-        Mirrors the Nous/Anthropic resync paths above.  Only applies to
-        device_code-sourced entries; env/API-key-sourced entries have no
-        auth.json shadow to sync from.
-        """
-        if self.provider != "openai-codex" or entry.source != "device_code":
-            return entry
-        try:
-            with _auth_store_lock():
-                auth_store = _load_auth_store()
-                state = _load_provider_state(auth_store, "openai-codex")
-            if not isinstance(state, dict):
-                return entry
-            tokens = state.get("tokens")
-            if not isinstance(tokens, dict):
-                return entry
-            store_access = tokens.get("access_token", "")
-            store_refresh = tokens.get("refresh_token", "")
-            # Adopt auth.json tokens when either side differs.  Codex refresh
-            # tokens are single-use too, so a fresh refresh_token from
-            # another process means our entry's pair is consumed/stale.
-            entry_access = entry.access_token or ""
-            entry_refresh = entry.refresh_token or ""
-            if store_access and (
-                store_access != entry_access
-                or (store_refresh and store_refresh != entry_refresh)
-            ):
-                logger.debug(
-                    "Pool entry %s: syncing Codex tokens from auth.json "
-                    "(refreshed by another process)",
-                    entry.id,
-                )
-                field_updates: Dict[str, Any] = {
-                    "access_token": store_access,
-                    "refresh_token": store_refresh or entry.refresh_token,
-                    "last_status": None,
-                    "last_status_at": None,
-                    "last_error_code": None,
-                    "last_error_reason": None,
-                    "last_error_message": None,
-                    "last_error_reset_at": None,
-                }
-                if state.get("last_refresh"):
-                    field_updates["last_refresh"] = state["last_refresh"]
-                updated = replace(entry, **field_updates)
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
-        return entry
-
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@@ -851,18 +788,6 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
-            # For openai-codex entries, same pattern: the user may have
-            # re-authed via `hermes model` / `hermes auth` after a 429/401,
-            # leaving fresh tokens on disk while the pool entry is still
-            # frozen behind last_error_reset_at (can be hours in the
-            # future for ChatGPT weekly windows).
-            if (self.provider == "openai-codex"
-                    and entry.source == "device_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
-                synced = self._sync_codex_entry_from_auth_store(entry)
-                if synced is not entry:
-                    entry = synced
-                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -47,6 +47,7 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Callable, List, Optional


@@ -91,7 +91,6 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
-    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -30,6 +30,7 @@ from __future__ import annotations

 import json
 import logging
+import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -41,6 +42,7 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
+    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict
+from typing import Any, Dict, List

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -29,6 +29,7 @@ from __future__ import annotations

 import json
 import logging
+import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,13 +49,14 @@ import json
 import logging
 import os
 import secrets
+import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -97,7 +98,6 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
-from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            atomic_replace(tmp_path, path)
+            os.replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -1,48 +0,0 @@
-"""LM Studio reasoning-effort resolution shared by the chat-completions
-transport and run_agent's iteration-limit summary path.
-
-LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
-``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
-graduated models). We map the user's ``reasoning_config`` onto LM Studio's
-OpenAI-compatible vocabulary, then clamp against the model's allowed set so
-the server doesn't 400 on an unsupported effort.
-"""
-
-from __future__ import annotations
-
-from typing import List, Optional
-
-# LM Studio accepts these top-level reasoning_effort values via its
-# OpenAI-compatible chat.completions endpoint.
-_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
-
-# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
-# Map them onto the OpenAI-compatible request vocabulary.
-_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
-
-
-def resolve_lmstudio_effort(
-    reasoning_config: Optional[dict],
-    allowed_options: Optional[List[str]],
-) -> Optional[str]:
-    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
-
-    ``None`` means "omit the field": the user picked a level the model can't
-    honor, so let LM Studio fall back to the model's declared default rather
-    than silently substituting a different effort. When ``allowed_options`` is
-    falsy (probe failed), skip clamping and send the resolved effort anyway.
-    """
-    effort = "medium"
-    if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is False:
-            effort = "none"
-        else:
-            raw = (reasoning_config.get("effort") or "").strip().lower()
-            raw = _LM_EFFORT_ALIASES.get(raw, raw)
-            if raw in _LM_VALID_EFFORTS:
-                effort = raw
-    if allowed_options:
-        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
-        if effort not in allowed:
-            return None
-    return effort
@@ -28,6 +28,7 @@ Usage in run_agent.py:

 from __future__ import annotations

+import json
 import logging
 import re
 import inspect
@@ -52,7 +52,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "xiaomi",
    "arcee",
    "gmi",
-    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
-    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
@@ -210,8 +208,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
-    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
-    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -314,7 +310,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "api.gmi-serving.com": "gmi",
-    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
 }

@@ -625,6 +620,8 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -1014,7 +1011,10 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            break
+                            # Fall back to max_context_length (theoretical model max)
+                            ctx = m.get("max_context_length") or m.get("context_length")
+                            if ctx and isinstance(ctx, (int, float)):
+                                return int(ctx)

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -1276,10 +1276,7 @@ def get_model_context_length(
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    # LM Studio is excluded — its loaded context length is transient (the
-    # user can reload the model with a different context_length at any time
-    # via /api/v1/models/load), so a stale cached value would mask reloads.
-    if base_url and provider != "lmstudio":
+    if base_url:
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
@@ -1332,8 +1329,7 @@ def get_model_context_length(
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    if provider != "lmstudio":
-                        save_context_length(model, base_url, local_ctx)
+                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1423,8 +1419,7 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            if provider != "lmstudio":
-                save_context_length(model, base_url, local_ctx)
+            save_context_length(model, base_url, local_ctx)
            return local_ctx

    # 10. Default fallback — 128K
@@ -18,7 +18,6 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -119,7 +118,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            atomic_replace(tmp_path, path)
+            os.replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -310,10 +310,6 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram has NO table syntax — prefer bullet lists or labeled "
-        "key: value pairs over pipe tables (any tables you do emit are "
-        "auto-rewritten into row-group bullets, which you can produce "
-        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -76,7 +76,6 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -569,7 +568,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            atomic_replace(tmp_path, p)
+            os.replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -30,12 +30,10 @@ def generate_title(
    assistant_response: str,
    timeout: float = 30.0,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the main runtime's model when available, falling back to the
-    auxiliary LLM client (cheapest/fastest available model).
+    Uses the auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.

    ``failure_callback`` is invoked with ``(task, exception)`` when the
@@ -59,7 +57,6 @@ def generate_title(
            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
-            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -89,7 +86,6 @@ def auto_title_session(
    user_message: str,
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -111,7 +107,7 @@ def auto_title_session(
        return

    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
+        user_message, assistant_response, failure_callback=failure_callback
    )
    if not title:
        return
@@ -130,7 +126,6 @@ def maybe_auto_title(
    assistant_response: str,
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -152,7 +147,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
+        kwargs={"failure_callback": failure_callback},
        daemon=True,
        name="auto-title",
    )
@@ -85,6 +85,9 @@ class AnthropicTransport(ProviderTransport):
        from agent.anthropic_adapter import _to_plain_data
        from agent.transports.types import ToolCall

+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        _MCP_PREFIX = "mcp_"
+
        text_parts = []
        reasoning_parts = []
        reasoning_details = []
@@ -99,10 +102,13 @@ class AnthropicTransport(ProviderTransport):
                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
                tool_calls.append(
                    ToolCall(
                        id=block.id,
-                        name=block.name,
+                        name=name,
                        arguments=json.dumps(block.input),
                    )
                )
@@ -12,65 +12,12 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

-from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


-def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
-    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
-
-    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
-    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
-    then request thought parts with ``includeThoughts`` enabled.
-    """
-    if reasoning_config is None or not isinstance(reasoning_config, dict):
-        return None
-
-    if reasoning_config.get("enabled") is False:
-        # Gemini can hide thought parts even when internal thinking still
-        # happens; omit thinkingLevel to avoid model-specific validation quirks.
-        return {"includeThoughts": False}
-
-    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
-    if effort == "none":
-        return {"includeThoughts": False}
-
-    thinking_config: Dict[str, Any] = {"includeThoughts": True}
-    normalized_model = (model or "").strip().lower()
-    if normalized_model.startswith("google/"):
-        normalized_model = normalized_model.split("/", 1)[1]
-
-    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
-    # coarse effort levels. ``includeThoughts`` alone is enough to surface
-    # thought parts without risking request validation errors.
-    if normalized_model.startswith("gemini-2.5-"):
-        return thinking_config
-
-    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
-        effort = "medium"
-
-    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
-    # is stricter (low/high). Clamp Hermes' wider effort set to what each
-    # family accepts so we never forward an undocumented level verbatim.
-    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
-        if "flash" in normalized_model:
-            if effort in {"minimal", "low"}:
-                thinking_config["thinkingLevel"] = "low"
-            elif effort in {"high", "xhigh"}:
-                thinking_config["thinkingLevel"] = "high"
-            else:
-                thinking_config["thinkingLevel"] = "medium"
-        elif "pro" in normalized_model:
-            thinking_config["thinkingLevel"] = (
-                "high" if effort in {"high", "xhigh"} else "low"
-            )
-
-    return thinking_config
-
-
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -154,7 +101,6 @@ class ChatCompletionsTransport(ProviderTransport):
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
-            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@@ -168,7 +114,6 @@ class ChatCompletionsTransport(ProviderTransport):
            # Reasoning
            supports_reasoning: bool
            github_reasoning_extra: dict | None
-            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
            # Extra
@@ -243,7 +188,6 @@ class ChatCompletionsTransport(ProviderTransport):
        anthropic_max_out = params.get("anthropic_max_output")
        is_nvidia_nim = params.get("is_nvidia_nim", False)
        is_kimi = params.get("is_kimi", False)
-        is_tokenhub = params.get("is_tokenhub", False)
        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
@@ -275,40 +219,12 @@ class ChatCompletionsTransport(ProviderTransport):
                        _kimi_effort = _e
                api_kwargs["reasoning_effort"] = _kimi_effort

-        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
-        if is_tokenhub:
-            _tokenhub_thinking_off = bool(
-                reasoning_config
-                and isinstance(reasoning_config, dict)
-                and reasoning_config.get("enabled") is False
-            )
-            if not _tokenhub_thinking_off:
-                _tokenhub_effort = "high"
-                if reasoning_config and isinstance(reasoning_config, dict):
-                    _e = (reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _tokenhub_effort = _e
-                api_kwargs["reasoning_effort"] = _tokenhub_effort
-
-        # LM Studio: top-level reasoning_effort. Only emit when the model
-        # declares reasoning support via /api/v1/models capabilities (gated
-        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
-        # is shared with run_agent's summary path so both stay in sync.
-        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
-            _lm_effort = resolve_lmstudio_effort(
-                reasoning_config,
-                params.get("lmstudio_reasoning_options"),
-            )
-            if _lm_effort is not None:
-                api_kwargs["reasoning_effort"] = _lm_effort
-
        # extra_body assembly
        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
-        provider_name = str(params.get("provider_name") or "").strip().lower()

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -324,9 +240,8 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
-        # so skip emitting extra_body.reasoning for it.
-        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
+        # Reasoning
+        if params.get("supports_reasoning", False):
            if is_github_models:
                gh_reasoning = params.get("github_reasoning_extra")
                if gh_reasoning is not None:
@@ -362,11 +277,6 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

-        if provider_name in {"gemini", "google-gemini-cli"}:
-            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
-            if thinking_config:
-                extra_body["thinking_config"] = thinking_config
-
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall
+from agent.transports.types import NormalizedResponse, ToolCall, Usage


 class ResponsesApiTransport(ProviderTransport):
@@ -151,6 +151,8 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -30,13 +30,14 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
-  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
-  #   LM Studio is first-class and uses provider: "lmstudio".
-  #   It works with both no-auth and auth-enabled server modes.
+  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
+  #   Example for LM Studio:
+  #     provider: "lmstudio"
+  #     base_url: "http://localhost:1234/v1"
+  #   No API key needed — local servers typically ignore auth.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -69,9 +69,7 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
-# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
-# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
-# needed when the user runs `/limits`. Lazy-imported inside the handler below.
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -5459,8 +5457,6 @@ class HermesCLI:
            try:
                providers = list_authenticated_providers(
                    current_provider=self.provider or "",
-                    current_base_url=self.base_url or "",
-                    current_model=self.model or "",
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=50,
@@ -6236,8 +6232,6 @@ class HermesCLI:
            self._console_print(f"  Status bar {state}")
        elif canonical == "verbose":
            self._toggle_verbose()
-        elif canonical == "footer":
-            self._handle_footer_command(cmd_original)
        elif canonical == "yolo":
            self._toggle_yolo()
        elif canonical == "reasoning":
@@ -6865,58 +6859,6 @@ class HermesCLI:
        if self._apply_tui_skin_style():
            print("  Prompt + TUI colors updated.")

-    def _handle_footer_command(self, cmd_original: str) -> None:
-        """Toggle or inspect ``display.runtime_footer.enabled`` from the CLI.
-
-        Usage:
-            /footer           → toggle
-            /footer on|off    → explicit
-            /footer status    → show current state
-        """
-        from hermes_cli.config import load_config
-        from hermes_cli.colors import Colors as _Colors
-
-        # Parse arg
-        arg = ""
-        try:
-            parts = (cmd_original or "").strip().split(None, 1)
-            if len(parts) > 1:
-                arg = parts[1].strip().lower()
-        except Exception:
-            arg = ""
-
-        cfg = load_config() or {}
-        footer_cfg = ((cfg.get("display") or {}).get("runtime_footer") or {})
-        current = bool(footer_cfg.get("enabled", False))
-        fields = footer_cfg.get("fields") or ["model", "context_pct", "cwd"]
-
-        if arg in ("status", "?"):
-            state = "ON" if current else "OFF"
-            _cprint(
-                f"  {_Colors.BOLD}Runtime footer:{_Colors.RESET} {state}\n"
-                f"  Fields: {', '.join(fields)}"
-            )
-            return
-
-        if arg in ("on", "enable", "true", "1"):
-            new_state = True
-        elif arg in ("off", "disable", "false", "0"):
-            new_state = False
-        elif arg == "":
-            new_state = not current
-        else:
-            _cprint("  Usage: /footer [on|off|status]")
-            return
-
-        if save_config_value("display.runtime_footer.enabled", new_state):
-            state = (
-                f"{_Colors.GREEN}ON{_Colors.RESET}" if new_state
-                else f"{_Colors.DIM}OFF{_Colors.RESET}"
-            )
-            _cprint(f"  Runtime footer: {state}")
-        else:
-            _cprint("  Failed to save runtime_footer setting to config.yaml")
-
    def _toggle_verbose(self):
        """Cycle tool progress mode: off → new → all → verbose → off."""
        cycle = ["off", "new", "all", "verbose"]
@@ -7157,15 +7099,9 @@ class HermesCLI:
                else:
                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

-                # Pass None as system_message so _compress_context rebuilds
-                # the system prompt from scratch via _build_system_prompt(None).
-                # Passing _cached_system_prompt caused duplication because
-                # _build_system_prompt appends system_message to prompt_parts
-                # which already contain the agent identity — resulting in the
-                # identity block appearing twice (issue #15281).
                compressed, _ = self.agent._compress_context(
                    original_history,
-                    None,
+                    self.agent._cached_system_prompt or "",
                    approx_tokens=approx_tokens,
                    focus_topic=focus_topic or None,
                )
@@ -7289,8 +7225,6 @@ class HermesCLI:
        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
-        # Lazy import — pulls the OpenAI SDK chain, only needed here.
-        from agent.account_usage import fetch_account_usage, render_account_usage_lines
        account_snapshot = None
        if provider:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
@@ -8880,13 +8814,6 @@ class HermesCLI:
                        response,
                        self.conversation_history,
                        failure_callback=_title_failure_cb,
-                        main_runtime={
-                            "model": self.model,
-                            "provider": self.provider,
-                            "base_url": self.base_url,
-                            "api_key": self.api_key,
-                            "api_mode": self.api_mode,
-                        },
                    )
                except Exception:
                    pass
@@ -21,7 +21,6 @@ from typing import Optional, Dict, List, Any, Union
 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
-from utils import atomic_replace

 try:
    from croniter import croniter
@@ -368,7 +367,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, JOBS_FILE)
+        os.replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -864,7 +863,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, output_file)
+        os.replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -198,9 +198,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id = parsed_chat_id
-                    if parsed_thread_id is not None:
-                        thread_id = parsed_thread_id
+                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -0,0 +1,85 @@
+"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
+
+This hook is always registered. It silently skips if no BOOT.md exists.
+To activate, create ``~/.hermes/BOOT.md`` with instructions for the
+agent to execute on every gateway restart.
+
+Example BOOT.md::
+
+    # Startup Checklist
+
+    1. Check if any cron jobs failed overnight
+    2. Send a status update to Discord #general
+    3. If there are errors in /opt/app/deploy.log, summarize them
+
+The agent runs in a background thread so it doesn't block gateway
+startup. If nothing needs attention, it replies with [SILENT] to
+suppress delivery.
+"""
+
+import logging
+import threading
+
+logger = logging.getLogger("hooks.boot-md")
+
+from hermes_constants import get_hermes_home
+HERMES_HOME = get_hermes_home()
+BOOT_FILE = HERMES_HOME / "BOOT.md"
+
+
+def _build_boot_prompt(content: str) -> str:
+    """Wrap BOOT.md content in a system-level instruction."""
+    return (
+        "You are running a startup boot checklist. Follow the BOOT.md "
+        "instructions below exactly.\n\n"
+        "---\n"
+        f"{content}\n"
+        "---\n\n"
+        "Execute each instruction. If you need to send a message to a "
+        "platform, use the send_message tool.\n"
+        "If nothing needs attention and there is nothing to report, "
+        "reply with ONLY: [SILENT]"
+    )
+
+
+def _run_boot_agent(content: str) -> None:
+    """Spawn a one-shot agent session to execute the boot instructions."""
+    try:
+        from run_agent import AIAgent
+
+        prompt = _build_boot_prompt(content)
+        agent = AIAgent(
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+            max_iterations=20,
+        )
+        result = agent.run_conversation(prompt)
+        response = result.get("final_response", "")
+        if response and "[SILENT]" not in response:
+            logger.info("boot-md completed: %s", response[:200])
+        else:
+            logger.info("boot-md completed (nothing to report)")
+    except Exception as e:
+        logger.error("boot-md agent failed: %s", e)
+
+
+async def handle(event_type: str, context: dict) -> None:
+    """Gateway startup handler — run BOOT.md if it exists."""
+    if not BOOT_FILE.exists():
+        return
+
+    content = BOOT_FILE.read_text(encoding="utf-8").strip()
+    if not content:
+        return
+
+    logger.info("Running BOOT.md (%d chars)", len(content))
+
+    # Run in a background thread so we don't block gateway startup.
+    thread = threading.Thread(
+        target=_run_boot_agent,
+        args=(content,),
+        name="boot-md",
+        daemon=True,
+    )
+    thread.start()
@@ -52,13 +52,19 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active.
+        """Register built-in hooks that are always active."""
+        try:
+            from gateway.builtin_hooks.boot_md import handle as boot_md_handle

-        Currently empty — no shipped built-in hooks. Kept as the extension
-        point for future always-on gateway hooks so they drop in without
-        re-plumbing discover_and_load().
-        """
-        return
+            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
+            self._loaded_hooks.append({
+                "name": "boot-md",
+                "description": "Run ~/.hermes/BOOT.md on gateway startup",
+                "events": ["gateway:startup"],
+                "path": "(builtin)",
+            })
+        except Exception as e:
+            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)

    def discover_and_load(self) -> None:
        """
@@ -28,7 +28,6 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
-from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -60,7 +59,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, path)
+        os.replace(tmp_path, str(path))
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -907,41 +907,6 @@ class MessageEvent:
        return args


-_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
-    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
-    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
-    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
-)
-
-
-def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
-    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
-
-    This keeps high-impact operational phrases like ``restart gateway`` out of
-    the LLM/tool path, where they can trigger a self-restart from inside the
-    currently running agent and leave the gateway stuck in ``draining`` while it
-    waits for that same agent to finish.
-
-    Scope is intentionally narrow: DM text messages only, exact restart-style
-    phrases only. Group chats keep natural-language semantics.
-    """
-    try:
-        if event is None or event.message_type != MessageType.TEXT:
-            return
-        text = (event.text or "").strip()
-        if not text or text.startswith("/"):
-            return
-        source = getattr(event, "source", None)
-        if getattr(source, "chat_type", None) != "dm":
-            return
-        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
-            if pattern.match(text):
-                event.text = "/restart"
-                return
-    except Exception:
-        return
-
-
@dataclass 
 class SendResult:
    """Result of sending a message."""
@@ -2228,8 +2193,6 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
-
-        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
@@ -2469,11 +2432,15 @@ class BasePlatformAdapter(ABC):
                # Send the text portion
                if text_content:
                    logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
+                    # Build send metadata: thread_id + mention target for platforms that need it
+                    send_metadata = dict(_thread_metadata) if _thread_metadata else {}
+                    if event.source.user_id:
+                        send_metadata["mention_user_id"] = event.source.user_id
                    result = await self._send_with_retry(
                        chat_id=event.source.chat_id,
                        content=text_content,
                        reply_to=event.message_id,
-                        metadata=_thread_metadata,
+                        metadata=send_metadata,
                    )
                    _record_delivery(result)

@@ -305,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: E402 — delayed import, only in voice path
+            import nacl.secret  # noqa: delayed import – only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -813,14 +813,7 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
                return

-            # Discord's per-app command-management bucket is ~5 writes / 20 s,
-            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
-            # desired = 107 writes) takes several minutes of forced waits.
-            # A flat 30 s budget blew up reliably under bucket pressure and
-            # left slash commands broken for ~60 min until the bucket fully
-            # recovered. Use a wide ceiling; the cap still guards against a
-            # true hang. (#16713)
-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
            logger.info(
                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
                self.name,
@@ -832,11 +825,7 @@ class DiscordAdapter(BasePlatformAdapter):
                summary["deleted"],
            )
        except asyncio.TimeoutError:
-            logger.warning(
-                "[%s] Slash command sync timed out — Discord rate-limit bucket "
-                "may be saturated; will retry on next reconnect",
-                self.name,
-            )
+            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
@@ -974,6 +974,7 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
+    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,10 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict
+from typing import TYPE_CHECKING, Dict, Optional

 if TYPE_CHECKING:
-    from gateway.platforms.base import MessageEvent
+    from gateway.platforms.base import BasePlatformAdapter, MessageEvent

 logger = logging.getLogger(__name__)

@@ -879,6 +879,8 @@ class MatrixAdapter(BasePlatformAdapter):
        if not content:
            return SendResult(success=True)

+        mention_user_id = (metadata or {}).get("mention_user_id")
+
        formatted = self.format_message(content)
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

@@ -886,6 +888,24 @@ class MatrixAdapter(BasePlatformAdapter):
        for i, chunk in enumerate(chunks):
            msg_content = self._build_text_message_content(chunk)

+            # Append @mention pill to the last chunk for push notifications
+            # in muted rooms (mention-only mode).
+            if mention_user_id and i == len(chunks) - 1:
+                mention_html = (
+                    f'<a href="https://matrix.to/#/{mention_user_id}">'
+                    f"{mention_user_id}</a>"
+                )
+                msg_content["body"] = chunk + f" @{mention_user_id}"
+                base_html = msg_content.get("formatted_body", chunk)
+                msg_content["format"] = "org.matrix.custom.html"
+                msg_content["formatted_body"] = base_html + " " + mention_html
+                # m.mentions for MSC3952 push reliability.
+                existing_mentions = msg_content.get("m.mentions", {}).get("user_ids", [])
+                if mention_user_id not in existing_mentions:
+                    msg_content["m.mentions"] = {
+                        "user_ids": existing_mentions + [mention_user_id]
+                    }
+
            # Reply-to support.
            if reply_to:
                msg_content["m.relates_to"] = {"m.in_reply_to": {"event_id": reply_to}}
@@ -412,6 +412,7 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

+        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -1957,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        self._next_msg_seq(reply_to or openid)
+        msg_seq = self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1970,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        self._next_msg_seq(reply_to or group_openid)
+        msg_seq = self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2135,6 +2135,11 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
+            target_path = (
+                f"/v2/users/{chat_id}/files"
+                if chat_type == "c2c"
+                else f"/v2/groups/{chat_id}/files"
+            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -84,7 +84,6 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
-from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -123,12 +122,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → Telegram-friendly row groups
+# Markdown table → code block conversion
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Reformating each row into a bold heading plus bullet list keeps the content
-# readable on mobile clients while preserving the source data.
+# Wrapping the table in a fenced code block makes Telegram render it as
+# monospace preformatted text with columns intact.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -145,49 +144,13 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


-def _split_markdown_table_row(line: str) -> list[str]:
-    """Split a simple GFM table row into stripped cell values."""
-    stripped = line.strip()
-    if stripped.startswith("|"):
-        stripped = stripped[1:]
-    if stripped.endswith("|"):
-        stripped = stripped[:-1]
-    return [cell.strip() for cell in stripped.split("|")]
-
-
-def _render_table_block_for_telegram(table_block: list[str]) -> str:
-    """Render a detected GFM table as Telegram-friendly row groups."""
-    if len(table_block) < 3:
-        return "\n".join(table_block)
-
-    headers = _split_markdown_table_row(table_block[0])
-    if len(headers) < 2:
-        return "\n".join(table_block)
-
-    rendered_rows: list[str] = []
-    for index, row in enumerate(table_block[2:], start=1):
-        cells = _split_markdown_table_row(row)
-        if len(cells) < len(headers):
-            cells.extend([""] * (len(headers) - len(cells)))
-        elif len(cells) > len(headers):
-            cells = cells[: len(headers)]
-
-        heading = next((cell for cell in cells if cell), f"Row {index}")
-        rendered_rows.append(f"**{heading}**")
-        rendered_rows.extend(
-            f"• {header}: {value}" for header, value in zip(headers, cells)
-        )
-
-    return "\n\n".join(rendered_rows)
-
-
 def _wrap_markdown_tables(text: str) -> str:
-    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
+    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and rewritten as
-    per-row bullet groups. Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and included in the
+    wrapped block.  Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -224,7 +187,9 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append(_render_table_block_for_telegram(table_block))
+            out.append('```')
+            out.extend(table_block)
+            out.append('```')
            i = j
            continue

@@ -369,49 +334,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

-    async def _drain_polling_connections(self) -> None:
-        """Reset the httpx connection pool used for getUpdates polling.
-
-        Network errors (especially through proxies like sing-box) can leave
-        httpx connections in a half-closed state that still occupy pool slots.
-        After enough reconnect cycles the pool fills up entirely, causing
-        ``Pool timeout: All connections in the connection pool are occupied.``
-
-        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
-        request (``_request[1]``) is left untouched so concurrent
-        ``send_message`` / ``edit_message`` calls are never interrupted.
-
-        Implementation note: accesses ``Bot._request[0]`` which is the
-        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
-        ``(get_updates_request, general_request)``.  There is no public
-        accessor for the polling request; review if upgrading to PTB 23+.
-        """
-        if not (self._app and self._app.bot):
-            return
-        try:
-            # PTB 22.x: _request is a (get_updates, general) tuple;
-            # no public accessor exists for the polling request.
-            polling_req = self._app.bot._request[0]  # noqa: SLF001
-        except Exception:
-            return
-        try:
-            await polling_req.shutdown()
-        except Exception:
-            logger.debug(
-                "[%s] Polling request shutdown failed (non-fatal)",
-                self.name, exc_info=True,
-            )
-        try:
-            await polling_req.initialize()
-            logger.debug(
-                "[%s] Polling request pool drained before reconnect", self.name
-            )
-        except Exception:
-            logger.debug(
-                "[%s] Polling request re-initialize failed (non-fatal)",
-                self.name, exc_info=True,
-            )
-
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -457,8 +379,6 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

-        await self._drain_polling_connections()
-
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -506,7 +426,6 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
-            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -635,7 +554,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    atomic_replace(tmp_path, config_path)
+                    os.replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -2161,8 +2080,10 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
-        #    before the normal MarkdownV2 conversions run.
+        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
+        #    render tables natively, but fenced code blocks render as
+        #    monospace preformatted text with columns intact.  The wrapped
+        #    tables then flow through step (1) below as protected regions.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -89,7 +89,6 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
-RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

 MEDIA_IMAGE = 1
@@ -1114,7 +1113,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 2000
+    MAX_MESSAGE_LENGTH = 4000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1139,10 +1138,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1532,28 +1531,6 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
-                        # Rate limit (-2) — backoff and retry
-                        is_rate_limited = (
-                            ret == RATE_LIMIT_ERRCODE
-                            or errcode == RATE_LIMIT_ERRCODE
-                        )
-                        if is_rate_limited:
-                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
-                            # Record the error so we raise a descriptive
-                            # RuntimeError (instead of AssertionError) if the
-                            # loop exhausts with the server still rate-limiting.
-                            last_error = RuntimeError(
-                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
-                            )
-                            if attempt >= self._send_chunk_retries:
-                                break
-                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
-                            logger.warning(
-                                "[%s] rate limited for %s; backing off %.1fs before retry",
-                                self.name, _safe_id(chat_id), wait,
-                            )
-                            await asyncio.sleep(wait)
-                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
    encode_get_group_member_list,
    next_seq_no,
 )
-from gateway.session import build_session_key
+from gateway.session import SessionSource, build_session_key

 logger = logging.getLogger(__name__)

@@ -1897,7 +1897,7 @@ class OwnerCommandMiddleware(InboundMiddleware):
            return None, None, False

        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
-        # owner_id = (push or {}).get("bot_owner_id") or ""
+        owner_id = (push or {}).get("bot_owner_id") or ""
        # is_owner = bool(owner_id) and owner_id == from_account
        is_owner = True
        return cmd, cmd_line, is_owner
@@ -21,10 +21,12 @@ import hashlib
 import hmac
 import logging
 import os
+import re
 import secrets
 import struct
 import time
 import urllib.parse
+from datetime import datetime, timezone, timedelta
 from typing import Optional, Any

 import httpx
@@ -19,8 +19,9 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码（纯 Python 实现）
 from __future__ import annotations

 import logging
+import struct
 import threading
-from typing import Optional
+from typing import Optional, Union

 logger = logging.getLogger(__name__)

@@ -31,12 +31,6 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

-# account_usage imports the OpenAI SDK chain (~230 ms). Only needed by
-# /usage; we still import it at module top in the gateway because test
-# patches (tests/gateway/test_usage_command.py) target
-# `gateway.run.fetch_account_usage` as a module-level attribute. The
-# gateway is a long-running daemon, so its boot cost matters less than
-# preserving the established test-patch surface.
 from agent.account_usage import fetch_account_usage, render_account_usage_lines

 # --- Agent cache tuning ---------------------------------------------------
@@ -46,133 +40,6 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines
 # from _enforce_agent_cache_cap() and _session_expiry_watcher() below.
 _AGENT_CACHE_MAX_SIZE = 128
 _AGENT_CACHE_IDLE_TTL_SECS = 3600.0  # evict agents idle for >1h
-# Only auto-continue interrupted gateway turns while the interruption is fresh.
-# Stale tool-tail/resume markers can otherwise revive an unrelated old task
-# after a gateway restart when the user's next message starts new work.
-#
-# The freshness signal is the timestamp of the last transcript row, which
-# ``hermes_state.get_messages`` carries on every persisted message.  This
-# handles the two auto-continue cases uniformly:
-#   * resume_pending (gateway restart/shutdown watchdog marked the session)
-#   * tool-tail     (last persisted message is a tool result the agent
-#                    never got to reply to)
-# In both cases "when did we last do anything on this transcript" is the
-# correct freshness question, so one signal replaces two divergent ones.
-#
-# Default window: 1 hour.  This comfortably covers ``agent.gateway_timeout``
-# (30 min default) plus runtime slack — a legitimate long-running turn that
-# gets interrupted near its timeout boundary and is resumed shortly after
-# is still classified fresh.  Override via
-# ``config.yaml`` ``agent.gateway_auto_continue_freshness``.
-_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60
-
-
-def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
-    """Best-effort conversion of stored gateway timestamps to epoch seconds.
-
-    Missing/unparseable timestamps return None so legacy transcripts keep the
-    historical auto-continue behaviour instead of being silently dropped.
-    Accepts: datetime, epoch seconds (int/float), epoch milliseconds (when
-    the magnitude exceeds year-2286), ISO-8601 strings (with or without a
-    trailing ``Z``), and numeric strings.
-    """
-    if value is None:
-        return None
-    if isinstance(value, datetime):
-        return value.timestamp()
-    if isinstance(value, bool):  # bool is a subclass of int — skip it
-        return None
-    if isinstance(value, (int, float)):
-        # Some platform events use milliseconds; Hermes state rows use seconds.
-        return float(value) / 1000.0 if float(value) > 10_000_000_000 else float(value)
-    if isinstance(value, str):
-        text = value.strip()
-        if not text:
-            return None
-        try:
-            numeric = float(text)
-            return numeric / 1000.0 if numeric > 10_000_000_000 else numeric
-        except ValueError:
-            pass
-        try:
-            return datetime.fromisoformat(text.replace("Z", "+00:00")).timestamp()
-        except ValueError:
-            return None
-    return None
-
-
-def _auto_continue_freshness_window() -> float:
-    """Return the configured auto-continue freshness window in seconds.
-
-    Reads ``HERMES_AUTO_CONTINUE_FRESHNESS`` (bridged from
-    ``config.yaml`` ``agent.gateway_auto_continue_freshness`` at gateway
-    startup, same pattern as ``HERMES_AGENT_TIMEOUT``).  Falls back to the
-    module default when unset or malformed.  Non-positive values disable
-    the freshness gate (restores the pre-fix "always fresh" behaviour for
-    users who want to opt out).
-    """
-    raw = os.environ.get("HERMES_AUTO_CONTINUE_FRESHNESS")
-    if raw is None or raw == "":
-        return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
-    try:
-        return float(raw)
-    except (TypeError, ValueError):
-        return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
-
-
-def _is_fresh_gateway_interruption(
-    value: Any,
-    *,
-    now: Optional[float] = None,
-    window_secs: Optional[float] = None,
-) -> bool:
-    """Return True when an interruption marker is fresh enough to auto-continue.
-
-    Unknown timestamps are treated as fresh for backward compatibility with
-    legacy transcripts (pre-dating timestamp persistence) and with in-memory
-    test scaffolding that constructs history entries without timestamps.
-
-    A non-positive ``window_secs`` disables the gate (always fresh), which
-    restores the pre-fix behaviour for users who opt out via config.
-    """
-    window = (
-        float(window_secs)
-        if window_secs is not None
-        else float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
-    )
-    if window <= 0:
-        return True
-    timestamp = _coerce_gateway_timestamp(value)
-    if timestamp is None:
-        return True
-    current = time.time() if now is None else now
-    return current - timestamp <= window
-
-
-def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
-    """Return the ``timestamp`` of the last usable transcript row, if any.
-
-    Skips metadata-only rows (``session_meta``, system injections) that are
-    dropped before being handed to the agent.  Returns ``None`` when no
-    usable row carries a timestamp — callers should treat that as "fresh"
-    for backward compatibility.
-    """
-    if not history:
-        return None
-    for msg in reversed(history):
-        if not isinstance(msg, dict):
-            continue
-        role = msg.get("role")
-        if not role or role in ("session_meta", "system"):
-            continue
-        ts = msg.get("timestamp")
-        if ts is not None:
-            return ts
-        # First non-meta row without a timestamp — legacy transcript row.
-        # Returning None lets the caller fall through to the legacy-fresh path.
-        return None
-    return None
-

 # ---------------------------------------------------------------------------
 # SSL certificate auto-detection for NixOS and other non-standard systems.
@@ -346,13 +213,6 @@ if _config_path.exists():
                os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
            if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
                os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
-            if (
-                "gateway_auto_continue_freshness" in _agent_cfg
-                and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
-            ):
-                os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
-                    _agent_cfg["gateway_auto_continue_freshness"]
-                )
        _display_cfg = _cfg.get("display", {})
        if _display_cfg and isinstance(_display_cfg, dict):
            if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
@@ -649,31 +509,15 @@ def _platform_config_key(platform: "Platform") -> str:


 def _load_gateway_config() -> dict:
-    """Load and parse ~/.hermes/config.yaml, returning {} on any error.
-
-    Uses the module-level ``_hermes_home`` (so tests that monkeypatch it
-    still see their fixture) and shares the mtime-keyed raw-yaml cache
-    from ``hermes_cli.config.read_raw_config`` when the paths match.
-    """
-    config_path = _hermes_home / 'config.yaml'
-    try:
-        from hermes_cli.config import get_config_path, read_raw_config
-        # Fast path: if _hermes_home agrees with the canonical config
-        # location, reuse the shared cache. Otherwise fall through to a
-        # direct read (keeps test fixtures with a monkeypatched
-        # _hermes_home working).
-        if config_path == get_config_path():
-            return read_raw_config()
-    except Exception:
-        pass
-
+    """Load and parse ~/.hermes/config.yaml, returning {} on any error."""
    try:
+        config_path = _hermes_home / 'config.yaml'
        if config_path.exists():
            import yaml
            with open(config_path, 'r', encoding='utf-8') as f:
                return yaml.safe_load(f) or {}
    except Exception:
-        logger.debug("Could not load gateway config from %s", config_path)
+        logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
    return {}


@@ -1293,14 +1137,14 @@ class GatewayRunner:

        service_tier = getattr(self, "_service_tier", None)
        if not service_tier:
-            route["request_overrides"] = {}
+            route["request_overrides"] = None
            return route

        try:
            overrides = resolve_fast_mode_overrides(route["model"])
        except Exception:
            overrides = None
-        route["request_overrides"] = overrides or {}
+        route["request_overrides"] = overrides
        return route

    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
@@ -3927,8 +3771,6 @@ class GatewayRunner:
                    return await self._handle_yolo_command(event)
                if _cmd_def_inner.name == "verbose":
                    return await self._handle_verbose_command(event)
-                if _cmd_def_inner.name == "footer":
-                    return await self._handle_footer_command(event)

            # Gateway-handled info/control commands with dedicated
            # running-agent handlers.
@@ -4149,9 +3991,6 @@ class GatewayRunner:
        if canonical == "verbose":
            return await self._handle_verbose_command(event)

-        if canonical == "footer":
-            return await self._handle_footer_command(event)
-
        if canonical == "yolo":
            return await self._handle_yolo_command(event)

@@ -4607,7 +4446,9 @@ class GatewayRunner:
        # Read privacy.redact_pii from config (re-read per message)
        _redact_pii = False
        try:
-            _pcfg = _load_gateway_config()
+            import yaml as _pii_yaml
+            with open(_config_path, encoding="utf-8") as _pf:
+                _pcfg = _pii_yaml.safe_load(_pf) or {}
            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
        except Exception:
            pass
@@ -4750,15 +4591,18 @@ class GatewayRunner:
            _hyg_model = "anthropic/claude-sonnet-4.6"
            _hyg_threshold_pct = 0.85
            _hyg_compression_enabled = True
-            _hyg_hard_msg_limit = 400
            _hyg_config_context_length = None
            _hyg_provider = None
            _hyg_base_url = None
            _hyg_api_key = None
            _hyg_data = {}
            try:
-                _hyg_data = _load_gateway_config()
-                if _hyg_data:
+                _hyg_cfg_path = _hermes_home / "config.yaml"
+                if _hyg_cfg_path.exists():
+                    import yaml as _hyg_yaml
+                    with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
+                        _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
+
                    # Resolve model name (same logic as run_sync)
                    _model_cfg = _hyg_data.get("model", {})
                    if isinstance(_model_cfg, str):
@@ -4785,14 +4629,6 @@ class GatewayRunner:
                        _hyg_compression_enabled = str(
                            _comp_cfg.get("enabled", True)
                        ).lower() in ("true", "1", "yes")
-                        _raw_hard_limit = _comp_cfg.get("hygiene_hard_message_limit")
-                        if _raw_hard_limit is not None:
-                            try:
-                                _parsed = int(_raw_hard_limit)
-                                if _parsed > 0:
-                                    _hyg_hard_msg_limit = _parsed
-                            except (TypeError, ValueError):
-                                pass

                try:
                    _hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
@@ -4874,10 +4710,8 @@ class GatewayRunner:
                # collection, which prevents compression, which causes more
                # disconnects.  400 messages is well above normal sessions
                # but catches runaway growth before it becomes unrecoverable.
-                # Threshold is configurable via
-                # compression.hygiene_hard_message_limit.
                # (#2153)
-                _HARD_MSG_LIMIT = _hyg_hard_msg_limit
+                _HARD_MSG_LIMIT = 400
                _needs_compress = (
                    _approx_tokens >= _compress_token_threshold
                    or _msg_count >= _HARD_MSG_LIMIT
@@ -5245,27 +5079,6 @@ class GatewayRunner:
                        display_reasoning = last_reasoning.strip()
                    response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"

-            # Runtime-metadata footer — only on the FINAL message of the turn.
-            # Off by default (display.runtime_footer.enabled=false).  When
-            # streaming already delivered the body, we can't mutate the sent
-            # text, so we fire a separate trailing send below.
-            _footer_line = ""
-            try:
-                from gateway.runtime_footer import build_footer_line as _bfl
-                _footer_line = _bfl(
-                    user_config=_load_gateway_config(),
-                    platform_key=_platform_config_key(source.platform),
-                    model=agent_result.get("model"),
-                    context_tokens=agent_result.get("last_prompt_tokens", 0) or 0,
-                    context_length=agent_result.get("context_length") or None,
-                    cwd=os.environ.get("TERMINAL_CWD", ""),
-                )
-            except Exception as _footer_err:
-                logger.debug("runtime_footer build failed: %s", _footer_err)
-                _footer_line = ""
-            if _footer_line and response and not agent_result.get("already_sent"):
-                response = f"{response}\n\n{_footer_line}"
-
            # Emit agent:end hook
            await self.hooks.emit("agent:end", {
                **hook_ctx,
@@ -5436,17 +5249,6 @@ class GatewayRunner:
                        await self._deliver_media_from_response(
                            response, event, _media_adapter,
                        )
-                # Streaming already delivered the body text, but the footer was
-                # intentionally held back (see the `not already_sent` gate above).
-                # Send it now as a small trailing message so Telegram/Discord/etc.
-                # still surface the runtime metadata on the final reply.
-                if _footer_line:
-                    try:
-                        _foot_adapter = self.adapters.get(source.platform)
-                        if _foot_adapter:
-                            await _foot_adapter.send(source.chat_id, _footer_line)
-                    except Exception as _e:
-                        logger.debug("trailing footer send failed: %s", _e)
                return None

            return response
@@ -5529,8 +5331,11 @@ class GatewayRunner:
        custom_provs = None

        try:
-            data = _load_gateway_config()
-            if data:
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                import yaml as _info_yaml
+                with open(cfg_path, encoding="utf-8") as f:
+                    data = _info_yaml.safe_load(f) or {}
                model_cfg = data.get("model", {})
                if isinstance(model_cfg, dict):
                    raw_ctx = model_cfg.get("context_length")
@@ -6129,8 +5934,9 @@ class GatewayRunner:
        custom_provs = None
        config_path = _hermes_home / "config.yaml"
        try:
-            cfg = _load_gateway_config()
-            if cfg:
+            if config_path.exists():
+                with open(config_path, encoding="utf-8") as f:
+                    cfg = yaml.safe_load(f) or {}
                model_cfg = cfg.get("model", {})
                if isinstance(model_cfg, dict):
                    current_model = model_cfg.get("default", "")
@@ -6169,7 +5975,6 @@ class GatewayRunner:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
                        current_base_url=current_base_url,
-                        current_model=current_model,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -6291,7 +6096,6 @@ class GatewayRunner:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
                    current_base_url=current_base_url,
-                    current_model=current_model,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -6437,14 +6241,20 @@ class GatewayRunner:

    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
+        import yaml
        from hermes_constants import display_hermes_home

        args = event.get_command_args().strip().lower()
        config_path = _hermes_home / 'config.yaml'

        try:
-            config = _load_gateway_config()
-            personalities = config.get("agent", {}).get("personalities", {}) if config else {}
+            if config_path.exists():
+                with open(config_path, 'r', encoding="utf-8") as f:
+                    config = yaml.safe_load(f) or {}
+                personalities = config.get("agent", {}).get("personalities", {})
+            else:
+                config = {}
+                personalities = {}
        except Exception:
            config = {}
            personalities = {}
@@ -6574,10 +6384,18 @@ class GatewayRunner:
        
        env_key = f"{platform_name.upper()}_HOME_CHANNEL"
        
-        # Save to .env so it persists across restarts
+        # Save to config.yaml
        try:
-            from hermes_cli.config import save_env_value
-            save_env_value(env_key, str(chat_id))
+            import yaml
+            config_path = _hermes_home / 'config.yaml'
+            user_config = {}
+            if config_path.exists():
+                with open(config_path, encoding="utf-8") as f:
+                    user_config = yaml.safe_load(f) or {}
+            user_config[env_key] = chat_id
+            atomic_yaml_write(config_path, user_config)
+            # Also set in the current environment so it takes effect immediately
+            os.environ[env_key] = str(chat_id)
        except Exception as e:
            return f"Failed to save home channel: {e}"
        
@@ -7438,13 +7256,17 @@ class GatewayRunner:
        ``display.platforms.<platform>.tool_progress`` so each channel can
        have its own verbosity level independently.
        """
+        import yaml

        config_path = _hermes_home / "config.yaml"
        platform_key = _platform_config_key(event.source.platform)

        # --- check config gate ------------------------------------------------
        try:
-            user_config = _load_gateway_config()
+            user_config = {}
+            if config_path.exists():
+                with open(config_path, encoding="utf-8") as f:
+                    user_config = yaml.safe_load(f) or {}
            gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
        except Exception:
            gate_enabled = False
@@ -7492,94 +7314,6 @@ class GatewayRunner:
            logger.warning("Failed to save tool_progress mode: %s", e)
            return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"

-    async def _handle_footer_command(self, event: MessageEvent) -> str:
-        """Handle /footer command — toggle the runtime-metadata footer.
-
-        Usage:
-            /footer           → toggle on/off
-            /footer on        → enable globally
-            /footer off       → disable globally
-            /footer status    → show current state + fields
-
-        The footer is saved to ``display.runtime_footer.enabled`` (global).
-        Per-platform overrides under ``display.platforms.<platform>.runtime_footer``
-        are respected but not modified here — edit config.yaml directly for
-        per-platform control.
-        """
-        from gateway.runtime_footer import resolve_footer_config
-
-        config_path = _hermes_home / "config.yaml"
-        platform_key = _platform_config_key(event.source.platform)
-
-        # --- parse argument -------------------------------------------------
-        arg = ""
-        try:
-            text = (getattr(event, "message", None) or "").strip()
-            if text.startswith("/"):
-                parts = text.split(None, 1)
-                if len(parts) > 1:
-                    arg = parts[1].strip().lower()
-        except Exception:
-            arg = ""
-
-        # --- load config ----------------------------------------------------
-        try:
-            user_config: dict = _load_gateway_config()
-        except Exception as e:
-            return f"⚠️ Could not read config.yaml: {e}"
-
-        effective = resolve_footer_config(user_config, platform_key)
-
-        if arg in ("status", "?"):
-            state = "ON" if effective["enabled"] else "OFF"
-            fields = ", ".join(effective.get("fields") or [])
-            return (
-                f"📎 Runtime footer: **{state}**\n"
-                f"Fields: `{fields}`\n"
-                f"Platform: `{platform_key}`"
-            )
-
-        if arg in ("on", "enable", "true", "1"):
-            new_state = True
-        elif arg in ("off", "disable", "false", "0"):
-            new_state = False
-        elif arg == "":
-            new_state = not effective["enabled"]
-        else:
-            return "Usage: `/footer [on|off|status]`"
-
-        # --- write global flag ---------------------------------------------
-        try:
-            if not isinstance(user_config.get("display"), dict):
-                user_config["display"] = {}
-            display = user_config["display"]
-            if not isinstance(display.get("runtime_footer"), dict):
-                display["runtime_footer"] = {}
-            display["runtime_footer"]["enabled"] = new_state
-            atomic_yaml_write(config_path, user_config)
-        except Exception as e:
-            logger.warning("Failed to save runtime_footer.enabled: %s", e)
-            return f"⚠️ Could not save config: {e}"
-
-        state = "ON" if new_state else "OFF"
-        example = ""
-        if new_state:
-            # Show a preview using current agent state if available.
-            from gateway.runtime_footer import format_runtime_footer
-            preview = format_runtime_footer(
-                model=_resolve_gateway_model(user_config) or None,
-                context_tokens=0,
-                context_length=None,
-                fields=effective.get("fields") or ["model", "context_pct", "cwd"],
-            )
-            if preview:
-                example = f"\nExample: `{preview}`"
-        return (
-            f"📎 Runtime footer: **{state}**"
-            f"{example}\n"
-            f"_(saved globally — takes effect on next message)_"
-        )
-
    async def _handle_compress_command(self, event: MessageEvent) -> str:
        """Handle /compress command -- manually compress conversation context.

@@ -7615,6 +7349,7 @@ class GatewayRunner:
                for m in history
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]
+            original_count = len(msgs)
            approx_tokens = estimate_messages_tokens_rough(msgs)

            tmp_agent = AIAgent(
@@ -9207,47 +8942,12 @@ class GatewayRunner:

    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)

-    # Config keys whose values MUST invalidate the gateway's cached agent
-    # when they change.  The agent bakes these into its compressor / context
-    # handling at construction time, so a mid-running-gateway config edit
-    # would otherwise be silently ignored until the user triggers a
-    # different cache eviction (model switch, /reset, etc.).
-    #
-    # Each entry is a tuple of (section, key) read from the raw config dict.
-    # Add more here as new baked-at-construction config settings are added.
-    _CACHE_BUSTING_CONFIG_KEYS: tuple = (
-        ("model", "context_length"),
-        ("compression", "enabled"),
-        ("compression", "threshold"),
-        ("compression", "target_ratio"),
-        ("compression", "protect_last_n"),
-    )
-
-    @classmethod
-    def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
-        """Pull the subset of config values that must bust the agent cache.
-
-        Returns a flat dict keyed by 'section.key'.  Missing keys and
-        non-dict sections yield None values, which still contribute to
-        the signature (so 'absent' vs 'present-and-null' differ).
-        """
-        out: Dict[str, Any] = {}
-        cfg = user_config if isinstance(user_config, dict) else {}
-        for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
-            section_val = cfg.get(section)
-            if isinstance(section_val, dict):
-                out[f"{section}.{key}"] = section_val.get(key)
-            else:
-                out[f"{section}.{key}"] = None
-        return out
-
    @staticmethod
    def _agent_config_signature(
        model: str,
        runtime: dict,
        enabled_toolsets: list,
        ephemeral_prompt: str,
-        cache_keys: dict | None = None,
    ) -> str:
        """Compute a stable string key from agent config values.

@@ -9255,12 +8955,6 @@ class GatewayRunner:
        discarded and rebuilt.  When it stays the same, the cached agent is
        reused — preserving the frozen system prompt and tool schemas for
        prompt cache hits.
-
-        ``cache_keys`` is an optional flat dict of additional config values
-        that should invalidate the cache when they change.  Callers pass
-        the output of ``_extract_cache_busting_config(user_config)`` so
-        edits to model.context_length / compression.* in config.yaml are
-        picked up on the next gateway message without a manual restart.
        """
        import hashlib, json as _j

@@ -9271,8 +8965,6 @@ class GatewayRunner:
        _api_key = str(runtime.get("api_key", "") or "")
        _api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""

-        _cache_keys_sorted = sorted((cache_keys or {}).items())
-
        blob = _j.dumps(
            [
                model,
@@ -9284,7 +8976,6 @@ class GatewayRunner:
                # reasoning_config excluded — it's set per-message on the
                # cached agent and doesn't affect system prompt or tools.
                ephemeral_prompt or "",
-                _cache_keys_sorted,
            ],
            sort_keys=True,
            default=str,
@@ -10350,7 +10041,7 @@ class GatewayRunner:
        # Bridge sync status_callback → async adapter.send for context pressure
        _status_adapter = self.adapters.get(source.platform)
        _status_chat_id = source.chat_id
-        _status_thread_metadata = {"thread_id": _progress_thread_id} if _progress_thread_id else None
+        _status_thread_metadata = {"thread_id": _progress_thread_id, "mention_user_id": source.user_id} if _progress_thread_id else {"mention_user_id": source.user_id}

        def _status_callback_sync(event_type: str, message: str) -> None:
            if not _status_adapter or not _run_still_current():
@@ -10537,7 +10228,6 @@ class GatewayRunner:
                turn_route["runtime"],
                enabled_toolsets,
                combined_ephemeral,
-                cache_keys=self._extract_cache_busting_config(user_config),
            )
            agent = None
            _cache_lock = getattr(self, "_agent_cache_lock", None)
@@ -10604,7 +10294,7 @@ class GatewayRunner:
            agent.status_callback = _status_callback_sync
            agent.reasoning_config = reasoning_config
            agent.service_tier = self._service_tier
-            agent.request_overrides = turn_route.get("request_overrides") or {}
+            agent.request_overrides = turn_route.get("request_overrides")

            _bg_review_release = threading.Event()
            _bg_review_pending: list[str] = []
@@ -10825,23 +10515,6 @@ class GatewayRunner:
            # anything (tool, assistant with unfinished work, etc.), so we
            # give a stronger, reason-aware instruction that subsumes the
            # tool-tail case.
-            #
-            # Freshness gate (#16802): both branches are gated on the age
-            # of the last persisted transcript row.  That is the correct
-            # "when did we last do anything here" signal for both the
-            # resume_pending path (restart watchdog) and the tool-tail
-            # path (in-flight tool loop killed).  We read ``history[-1]``
-            # here because ``agent_history`` has already stripped the
-            # ``timestamp`` field off tool/tool_call rows for API purity
-            # (see the `k != "timestamp"` filter above).  Rows without a
-            # timestamp (legacy transcripts) are treated as fresh so the
-            # historical auto-continue behaviour is preserved.
-            _freshness_window = _auto_continue_freshness_window()
-            _interruption_is_fresh = _is_fresh_gateway_interruption(
-                _last_transcript_timestamp(history),
-                window_secs=_freshness_window,
-            )
-
            _resume_entry = None
            if session_key:
                try:
@@ -10849,14 +10522,7 @@ class GatewayRunner:
                except Exception:
                    _resume_entry = None
            _is_resume_pending = bool(
-                _resume_entry is not None
-                and getattr(_resume_entry, "resume_pending", False)
-                and _interruption_is_fresh
-            )
-            _has_fresh_tool_tail = bool(
-                agent_history
-                and agent_history[-1].get("role") == "tool"
-                and _interruption_is_fresh
+                _resume_entry is not None and getattr(_resume_entry, "resume_pending", False)
            )

            if _is_resume_pending:
@@ -10876,7 +10542,7 @@ class GatewayRunner:
                    f"message below.]\n\n"
                    + message
                )
-            elif _has_fresh_tool_tail:
+            elif agent_history and agent_history[-1].get("role") == "tool":
                message = (
                    "[System note: Your previous turn was interrupted before you could "
                    "process the last tool result(s). The conversation history contains "
@@ -10939,13 +10605,11 @@ class GatewayRunner:
            _last_prompt_toks = 0
            _input_toks = 0
            _output_toks = 0
-            _context_length = 0
            _agent = agent_holder[0]
            if _agent and hasattr(_agent, "context_compressor"):
                _last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
                _input_toks = getattr(_agent, "session_prompt_tokens", 0)
                _output_toks = getattr(_agent, "session_completion_tokens", 0)
-                _context_length = getattr(_agent.context_compressor, "context_length", 0) or 0
            _resolved_model = getattr(_agent, "model", None) if _agent else None

            if not final_response:
@@ -10962,7 +10626,6 @@ class GatewayRunner:
                    "input_tokens": _input_toks,
                    "output_tokens": _output_toks,
                    "model": _resolved_model,
-                    "context_length": _context_length,
                }
            
            # Scan tool results for MEDIA:<path> tags that need to be delivered
@@ -11045,13 +10708,6 @@ class GatewayRunner:
                        final_response,
                        all_msgs,
                        failure_callback=_title_failure_cb,
-                        main_runtime={
-                            "model": getattr(agent, "model", None),
-                            "provider": getattr(agent, "provider", None),
-                            "base_url": getattr(agent, "base_url", None),
-                            "api_key": getattr(agent, "api_key", None),
-                            "api_mode": getattr(agent, "api_mode", None),
-                        } if agent else None,
                    )
                except Exception:
                    pass
@@ -11067,7 +10723,6 @@ class GatewayRunner:
                "input_tokens": _input_toks,
                "output_tokens": _output_toks,
                "model": _resolved_model,
-                "context_length": _context_length,
                "session_id": effective_session_id,
                "response_previewed": result.get("response_previewed", False),
            }
@@ -12016,19 +11671,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    atexit.register(remove_pid_file)
    atexit.register(release_gateway_runtime_lock)

-    # MCP tool discovery — run in an executor so the asyncio event loop
-    # stays responsive even when a configured MCP server is slow or
-    # unreachable.  discover_mcp_tools() uses a blocking 120s wait
-    # internally; calling it from the loop thread would freeze platform
-    # heartbeats (Discord shard, Telegram polling) until it returned.
-    # See #16856.
-    try:
-        from tools.mcp_tool import discover_mcp_tools
-        _loop = asyncio.get_running_loop()
-        await _loop.run_in_executor(None, discover_mcp_tools)
-    except Exception as e:
-        logger.debug("MCP tool discovery failed: %s", e)
-
    # Start the gateway
    success = await runner.start()
    if not success:
@@ -1,150 +0,0 @@
-"""Gateway runtime-metadata footer.
-
-Renders a compact footer showing runtime state (model, context %, cwd) and
-appends it to the FINAL message of an agent turn when enabled.  Off by default
-to keep replies minimal.
-
-Config (``~/.hermes/config.yaml``)::
-
-    display:
-      runtime_footer:
-        enabled: true                       # off by default
-        fields: [model, context_pct, cwd]   # order shown; drop any to hide
-
-Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
-Users can toggle the global setting with ``/footer on|off`` from both the CLI
-and any gateway platform.
-
-The footer is appended to the final response text in ``gateway/run.py`` right
-before returning the response to the adapter send path — so it only lands on
-the final message a user sees, not on tool-progress updates or streaming
-partials.  When streaming is on and the final text has already been delivered
-piecemeal, the footer is sent as a separate trailing message via
-``send_trailing_footer()``.
-"""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from typing import Any, Iterable, Optional
-
-_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
-_SEP = " · "
-
-
-def _home_relative_cwd(cwd: str) -> str:
-    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
-    if not cwd:
-        return ""
-    try:
-        home = os.path.expanduser("~")
-        p = os.path.abspath(cwd)
-        if home and (p == home or p.startswith(home + os.sep)):
-            return "~" + p[len(home):]
-        return p
-    except Exception:
-        return cwd
-
-
-def _model_short(model: Optional[str]) -> str:
-    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
-    if not model:
-        return ""
-    return model.rsplit("/", 1)[-1]
-
-
-def resolve_footer_config(
-    user_config: dict[str, Any] | None,
-    platform_key: str | None = None,
-) -> dict[str, Any]:
-    """Resolve effective runtime-footer config for *platform_key*.
-
-    Merge order (later wins):
-        1. Built-in defaults (enabled=False)
-        2. ``display.runtime_footer``
-        3. ``display.platforms.<platform_key>.runtime_footer``
-    """
-    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
-    cfg = (user_config or {}).get("display") or {}
-
-    global_cfg = cfg.get("runtime_footer")
-    if isinstance(global_cfg, dict):
-        if "enabled" in global_cfg:
-            resolved["enabled"] = bool(global_cfg.get("enabled"))
-        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
-            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
-
-    if platform_key:
-        platforms = cfg.get("platforms") or {}
-        plat_cfg = platforms.get(platform_key)
-        if isinstance(plat_cfg, dict):
-            plat_footer = plat_cfg.get("runtime_footer")
-            if isinstance(plat_footer, dict):
-                if "enabled" in plat_footer:
-                    resolved["enabled"] = bool(plat_footer.get("enabled"))
-                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
-                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
-
-    return resolved
-
-
-def format_runtime_footer(
-    *,
-    model: Optional[str],
-    context_tokens: int,
-    context_length: Optional[int],
-    cwd: Optional[str] = None,
-    fields: Iterable[str] = _DEFAULT_FIELDS,
-) -> str:
-    """Render the footer line, or return "" if no fields have data.
-
-    Fields are skipped silently when their underlying data is missing — a
-    partially-populated footer is better than a line with ``?%`` or empty slots.
-    """
-    parts: list[str] = []
-    for field in fields:
-        if field == "model":
-            m = _model_short(model)
-            if m:
-                parts.append(m)
-        elif field == "context_pct":
-            if context_length and context_length > 0 and context_tokens >= 0:
-                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
-                parts.append(f"{pct}%")
-        elif field == "cwd":
-            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
-            if rel:
-                parts.append(rel)
-        # Unknown field names are silently ignored.
-
-    if not parts:
-        return ""
-    return _SEP.join(parts)
-
-
-def build_footer_line(
-    *,
-    user_config: dict[str, Any] | None,
-    platform_key: str | None,
-    model: Optional[str],
-    context_tokens: int,
-    context_length: Optional[int],
-    cwd: Optional[str] = None,
-) -> str:
-    """Top-level entry point used by gateway/run.py.
-
-    Returns the footer text (empty string when disabled or no data).  Callers
-    append this to the final response themselves, preserving a single blank
-    line of separation.
-    """
-    cfg = resolve_footer_config(user_config, platform_key)
-    if not cfg.get("enabled"):
-        return ""
-    return format_runtime_footer(
-        model=model,
-        context_tokens=context_tokens,
-        context_length=context_length,
-        cwd=cwd,
-        fields=cfg.get("fields") or _DEFAULT_FIELDS,
-    )
@@ -62,8 +62,8 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,
 )
-from utils import atomic_replace


@dataclass
@@ -705,7 +705,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            atomic_replace(tmp_path, sessions_file)
+            os.replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1257,11 +1257,25 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
-        # the session half-empty in the DB while JSONL still has history.
+        # SQLite: clear old messages and re-insert
        if self._db:
            try:
-                self._db.replace_messages(session_id, messages)
+                self._db.clear_messages(session_id)
+                for msg in messages:
+                    role = msg.get("role", "unknown")
+                    self._db.append_message(
+                        session_id=session_id,
+                        role=role,
+                        content=msg.get("content"),
+                        tool_name=msg.get("tool_name"),
+                        tool_calls=msg.get("tool_calls"),
+                        tool_call_id=msg.get("tool_call_id"),
+                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
+                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
+                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
+                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
+                    )
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -43,7 +43,6 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -110,12 +109,6 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
 DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
 GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry

-# LM Studio's default no-auth mode still requires *some* non-empty bearer for
-# the API-key code paths (auxiliary_client, runtime resolver) to treat the
-# provider as configured. This sentinel is sent only to LM Studio, never to
-# any remote service.
-LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
-

 # =============================================================================
 # Provider Registry
@@ -166,14 +159,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
    ),
-    "lmstudio": ProviderConfig(
-        id="lmstudio",
-        name="LM Studio",
-        auth_type="api_key",
-        inference_base_url="http://127.0.0.1:1234/v1",
-        api_key_env_vars=("LM_API_KEY",),
-        base_url_env_var="LM_BASE_URL",
-    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -363,14 +348,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
-    "tencent-tokenhub": ProviderConfig(
-        id="tencent-tokenhub",
-        name="Tencent TokenHub",
-        auth_type="api_key",
-        inference_base_url="https://tokenhub.tencentmaas.com/v1",
-        api_key_env_vars=("TOKENHUB_API_KEY",),
-        base_url_env_var="TOKENHUB_BASE_URL",
-    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -843,7 +820,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
            handle.write(payload)
            handle.flush()
            os.fsync(handle.fileno())
-        atomic_replace(tmp_path, auth_file)
+        os.replace(tmp_path, auth_file)
        try:
            dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
        except OSError:
@@ -1164,13 +1141,11 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
-        "tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
-        "tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
-        "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
        # Local server aliases — route through the generic custom provider
+        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
        "ollama": "custom", "ollama_cloud": "ollama-cloud",
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
@@ -1217,11 +1192,8 @@ def resolve_provider(
            continue
        # GitHub tokens are commonly present for repo/tool access but should not
        # hijack inference auto-selection unless the user explicitly chooses
-        # Copilot/GitHub Models as the provider. LM Studio is a local server
-        # whose availability isn't implied by LM_API_KEY presence (it may be
-        # offline, and the no-auth setup uses a placeholder value), so it
-        # also requires explicit selection.
-        if pid in ("copilot", "lmstudio"):
+        # Copilot/GitHub Models as the provider.
+        if pid == "copilot":
            continue
        for env_var in pconfig.api_key_env_vars:
            if has_usable_secret(os.getenv(env_var, "")):
@@ -3499,13 +3471,6 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    key_source = ""
    api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)

-    # No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
-    # see the local server as configured. doctor still reports unconfigured
-    # because get_api_key_provider_status uses the raw secret resolver.
-    if not api_key and provider_id == "lmstudio":
-        api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
-        key_source = key_source or "default"
-
    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
 from typing import Optional
 from urllib import request as urllib_request
 from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlunparse

 logger = logging.getLogger(__name__)

@@ -696,78 +696,6 @@ def run_quick_backup(args) -> None:
        print("No state files found to snapshot.")


-# ---------------------------------------------------------------------------
-# Shared full-zip backup helper
-# ---------------------------------------------------------------------------
-
-def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
-    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
-
-    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
-    Returns the output path on success, None on failure (nothing to back up,
-    or write error — caller should surface the outcome but not raise).
-    """
-    files_to_add: list[tuple[Path, Path]] = []
-    try:
-        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
-            dp = Path(dirpath)
-            # Prune excluded directories in-place so os.walk doesn't descend
-            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
-
-            for fname in filenames:
-                fpath = dp / fname
-                try:
-                    rel = fpath.relative_to(hermes_root)
-                except ValueError:
-                    continue
-
-                if _should_exclude(rel):
-                    continue
-
-                # Skip the output zip itself if it already exists inside root.
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
-                files_to_add.append((fpath, rel))
-    except OSError as exc:
-        logger.warning("Full-zip backup: walk failed: %s", exc)
-        return None
-
-    if not files_to_add:
-        return None
-
-    try:
-        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
-            for abs_path, rel_path in files_to_add:
-                try:
-                    if abs_path.suffix == ".db":
-                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-                            tmp_db = Path(tmp.name)
-                        try:
-                            if _safe_copy_db(abs_path, tmp_db):
-                                zf.write(tmp_db, arcname=str(rel_path))
-                        finally:
-                            tmp_db.unlink(missing_ok=True)
-                    else:
-                        zf.write(abs_path, arcname=str(rel_path))
-                except (PermissionError, OSError, ValueError) as exc:
-                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
-                    continue
-    except OSError as exc:
-        logger.warning("Full-zip backup: zip write failed: %s", exc)
-        # Best-effort cleanup of partial file
-        try:
-            out_path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return None
-
-    return out_path
-
-
 # ---------------------------------------------------------------------------
 # Pre-update auto-backup
 # ---------------------------------------------------------------------------
@@ -840,87 +768,64 @@ def create_pre_update_backup(
    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"

-    result = _write_full_zip_backup(out_path, hermes_root)
-    if result is None:
+    # Collect files (same logic as run_backup, minus the chatty progress prints)
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Pre-update backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Pre-update backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
        return None

    _prune_pre_update_backups(backup_dir, keep=keep)
    return out_path
-
-
-# ---------------------------------------------------------------------------
-# Pre-migration auto-backup (used by `hermes claw migrate`)
-# ---------------------------------------------------------------------------
-
-_PRE_MIGRATION_PREFIX = "pre-migration-"
-_PRE_MIGRATION_DEFAULT_KEEP = 5
-
-
-def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
-    """Remove oldest pre-migration backups beyond the keep limit.
-
-    Only touches files matching ``pre-migration-*.zip`` so other backups in
-    the same directory are never touched.
-    """
-    if keep < 0:
-        keep = 0
-    if not backup_dir.exists():
-        return 0
-
-    backups = sorted(
-        (p for p in backup_dir.iterdir()
-         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
-        key=lambda p: p.name,
-        reverse=True,
-    )
-
-    deleted = 0
-    for p in backups[keep:]:
-        try:
-            p.unlink()
-            deleted += 1
-        except OSError as exc:
-            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
-
-    return deleted
-
-
-def create_pre_migration_backup(
-    hermes_home: Optional[Path] = None,
-    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
-) -> Optional[Path]:
-    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
-    ``hermes claw migrate`` apply.
-
-    Shares implementation with :func:`create_pre_update_backup` via
-    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
-    restorable with ``hermes import <archive>``.  Writes to
-    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
-    old pre-migration backups.
-
-    Returns the path to the created zip, or ``None`` if nothing was found
-    to back up (fresh install) or the write failed.  Never raises — the
-    caller decides whether to abort or proceed.
-    """
-    hermes_root = hermes_home or get_default_hermes_root()
-    if not hermes_root.is_dir():
-        return None
-
-    # Reuses the shared backups/ directory so `hermes import` and the
-    # update-backup listing pick up pre-migration archives too.
-    backup_dir = _pre_update_backup_dir(hermes_root)
-    try:
-        backup_dir.mkdir(parents=True, exist_ok=True)
-    except OSError as exc:
-        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
-        return None
-
-    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
-    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
-
-    result = _write_full_zip_backup(out_path, hermes_root)
-    if result is None:
-        return None
-
-    _prune_pre_migration_backups(backup_dir, keep=keep)
-    return out_path
@@ -562,6 +562,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

+    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
    version_label = format_banner_version_label()
@@ -4,8 +4,7 @@ Usage:
    hermes claw migrate              # Preview then migrate (always shows preview first)
    hermes claw migrate --dry-run    # Preview only, no changes
    hermes claw migrate --yes        # Skip confirmation prompt
-    hermes claw migrate --preset full --overwrite --migrate-secrets  # Full run w/ secrets
-    hermes claw migrate --no-backup  # Skip pre-migration snapshot
+    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
    hermes claw cleanup              # Archive leftover OpenClaw directories
    hermes claw cleanup --dry-run    # Preview what would be archived
 """
@@ -16,7 +15,6 @@ import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
-from typing import Optional

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
 from hermes_constants import get_optional_skills_dir
@@ -323,13 +321,10 @@ def _cmd_migrate(args):
    migrate_secrets = getattr(args, "migrate_secrets", False)
    workspace_target = getattr(args, "workspace_target", None)
    skill_conflict = getattr(args, "skill_conflict", "skip")
-    no_backup = getattr(args, "no_backup", False)

-    # Secrets are never included implicitly — they must be explicitly requested
-    # via --migrate-secrets, even under --preset full.  This mirrors OpenClaw's
-    # migrate-hermes posture (two-phase: run once without secrets, rerun with
-    # --include-secrets) and prevents a --preset full invocation from silently
-    # importing API keys that the user may not have intended to copy.
+    # If using the "full" preset, secrets are included by default
+    if preset == "full":
+        migrate_secrets = True

    print()
    print(
@@ -436,24 +431,15 @@ def _cmd_migrate(args):

    preview_summary = preview_report.get("summary", {})
    preview_count = preview_summary.get("migrated", 0)
-    preview_conflicts = preview_summary.get("conflict", 0)

-    # "Nothing to migrate" means nothing migrated AND nothing blocked by
-    # conflicts.  If there are conflicts, we still want to show the plan and
-    # surface the refusal/--overwrite guidance instead of silently bailing.
-    if preview_count == 0 and preview_conflicts == 0:
+    if preview_count == 0:
        print()
        print_info("Nothing to migrate from OpenClaw.")
        _print_migration_report(preview_report, dry_run=True)
        return

    print()
-    if preview_count > 0:
-        print_header(f"Migration Preview — {preview_count} item(s) would be imported")
-    else:
-        print_header(
-            f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
-        )
+    print_header(f"Migration Preview — {preview_count} item(s) would be imported")
    print_info("No changes have been made yet. Review the list below:")
    _print_migration_report(preview_report, dry_run=True)

@@ -461,24 +447,6 @@ def _cmd_migrate(args):
    if dry_run:
        return

-    # ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
-    # Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
-    # on conflicts unless the user explicitly opts in to overwriting.  Without
-    # this guard, the user would answer "yes, proceed" and silently end up
-    # with a migration that skipped every conflicting item.
-    if preview_conflicts > 0 and not overwrite:
-        print()
-        print_error(
-            f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
-        )
-        print_info(
-            "Each conflict is an item whose target already exists in ~/.hermes/. "
-            "Re-run with --overwrite to replace conflicting targets (item-level "
-            "backups are written to the migration report directory)."
-        )
-        print_info("Or re-run with --dry-run to review the full plan.")
-        return
-
    # ── Phase 2: Confirm and execute ───────────────────────────
    print()
    if not auto_yes:
@@ -490,32 +458,6 @@ def _cmd_migrate(args):
            print_info("Migration cancelled.")
            return

-    # ── Phase 2b: Pre-apply backup of the Hermes home ─────────
-    # Delegates to hermes_cli.backup.create_pre_migration_backup(), which
-    # shares implementation with the pre-update backup (same exclusion
-    # rules, same SQLite safe-copy, zip format) so the archive is
-    # restorable with `hermes import`.  Mirrors OpenClaw's
-    # createPreMigrationBackup posture — one atomic restore point before
-    # any mutation, auto-pruned to the last 5 pre-migration zips.
-    backup_archive: Optional[Path] = None
-    if not no_backup:
-        try:
-            from hermes_cli.backup import create_pre_migration_backup, _format_size
-            backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
-            if backup_archive:
-                size_str = _format_size(backup_archive.stat().st_size)
-                print()
-                print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
-                print_info(f"Restore with: hermes import {backup_archive.name}")
-        except Exception as e:
-            print()
-            print_error(f"Could not create pre-migration backup: {e}")
-            print_info(
-                "Re-run with --no-backup to skip, or free up disk space under the Hermes home."
-            )
-            logger.debug("Pre-migration backup error", exc_info=True)
-            return
-
    try:
        migrator = mod.Migrator(
            source_root=source_dir.resolve(),
@@ -534,9 +476,6 @@ def _cmd_migrate(args):
        print()
        print_error(f"Migration failed: {e}")
        logger.debug("OpenClaw migration error", exc_info=True)
-        if backup_archive:
-            print_info(f"A pre-migration backup is available at: {backup_archive}")
-            print_info(f"Restore with: hermes import {backup_archive.name}")
        return

    # Print results
@@ -115,9 +115,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
-    CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
-               "Configuration", args_hint="[on|off|status]",
-               subcommands=("on", "off", "status")),
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
@@ -946,42 +943,6 @@ def slack_subcommand_map() -> dict[str, str]:
 # Autocomplete
 # ---------------------------------------------------------------------------

-
-# Per-process cache for /model<space> LM Studio autocomplete. Probing on
-# every keystroke would block the UI; a short TTL keeps it live without
-# hammering the server.
-_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
-
-
-def _lmstudio_completion_models() -> list[str]:
-    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
-    global _LMSTUDIO_COMPLETION_CACHE
-    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
-    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
-        try:
-            from hermes_cli.auth import _load_auth_store
-            store = _load_auth_store() or {}
-            if "lmstudio" not in (store.get("providers") or {}) \
-               and "lmstudio" not in (store.get("credential_pool") or {}):
-                return []
-        except Exception:
-            return []
-    now = time.time()
-    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
-        return _LMSTUDIO_COMPLETION_CACHE[1]
-    try:
-        from hermes_cli.models import fetch_lmstudio_models
-        models = fetch_lmstudio_models(
-            api_key=os.environ.get("LM_API_KEY", ""),
-            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
-            timeout=0.8,
-        )
-    except Exception:
-        models = []
-    _LMSTUDIO_COMPLETION_CACHE = (now, models)
-    return models
-
-
 class SlashCommandCompleter(Completer):
    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

@@ -1405,19 +1366,6 @@ class SlashCommandCompleter(Completer):
                    )
        except Exception:
            pass
-        # LM Studio: surface locally-loaded models. Gated on the user actually
-        # having LM Studio configured (env var or auth-store entry) so we
-        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
-        for name in _lmstudio_completion_models():
-            if name in seen:
-                continue
-            if name.startswith(sub_lower) and name != sub_lower:
-                yield Completion(
-                    name,
-                    start_position=-len(sub_text),
-                    display=name,
-                    display_meta="LM Studio",
-                )

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
@@ -30,59 +30,36 @@ logger = logging.getLogger(__name__)
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
-# (path, mtime_ns, size) -> cached expanded config dict.
-# load_config() returns a deepcopy of the cached value when the file
-# hasn't changed since the last load, skipping yaml.safe_load +
-# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
-# save_config() + migrate_config() write via atomic_yaml_write which
-# produces a fresh inode, so stat() sees a new mtime_ns and the next
-# load repopulates automatically — no explicit invalidation hook.
-_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
-# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
-# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
-# the user's on-disk values without defaults merged in.
-_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
 # (managed by setup/provider flows directly).
 _EXTRA_ENV_KEYS = frozenset({
    "OPENAI_API_KEY", "OPENAI_BASE_URL",
    "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
-    "DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
-    "TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
-    "SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
+    "DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
    "SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
    "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
-    "SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
-    "SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
-    "DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
    "FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
-    "FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
-    "YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
    "WECOM_BOT_ID", "WECOM_SECRET",
    "WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
    "WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
    "WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
-    "WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
    "WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
-    "BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
-    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
+    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
    "MATRIX_RECOVERY_KEY",
-    # Langfuse observability plugin — optional tuning keys + standard SDK vars.
-    # Activation is via plugins.enabled (opt-in through `hermes plugins enable
-    # observability/langfuse` or `hermes tools → Langfuse`); credentials gate
-    # the plugin at runtime.
+    # Langfuse observability plugin — optional tuning keys + standard SDK vars
+    "HERMES_LANGFUSE_ENABLED",   # backward-compat env var (new: plugins.langfuse.enabled in config.yaml)
    "HERMES_LANGFUSE_ENV",
    "HERMES_LANGFUSE_RELEASE",
    "HERMES_LANGFUSE_SAMPLE_RATE",
@@ -239,7 +216,6 @@ def get_container_exec_info() -> Optional[dict]:

 # Re-export from hermes_constants — canonical definition lives there.
 from hermes_constants import get_hermes_home  # noqa: F811,E402
-from utils import atomic_replace

 def get_config_path() -> Path:
    """Get the main config file path."""
@@ -423,20 +399,6 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
-        # Freshness window for the gateway auto-continue note (seconds).
-        # After a gateway crash/restart/SIGTERM mid-run, the next user
-        # message gets a "[System note: your previous turn was
-        # interrupted — process the unfinished tool result(s) first]"
-        # prepended so the model picks up where it left off.  That's the
-        # right behaviour while the interruption is fresh, but stale
-        # markers (transcript last touched hours or days ago) can revive
-        # an unrelated old task when the user's next message starts new
-        # work.  This window is the max age of the last persisted
-        # transcript row for which we still inject the continue note.
-        # Default 3600s comfortably covers a long turn (gateway_timeout
-        # default is 1800s) plus runtime slack.  Set to 0 to disable the
-        # gate and restore pre-fix behaviour (always inject).
-        "gateway_auto_continue_freshness": 3600,
        # How user-attached images are presented to the main model on each turn.
        #   "auto"   — attach natively when the active model reports
        #              supports_vision=True AND the user hasn't explicitly
@@ -594,7 +556,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
+
    },

    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
@@ -703,11 +665,6 @@ DEFAULT_CONFIG = {
        "personality": "kawaii",
        "resume_display": "full",
        "busy_input_mode": "interrupt",  # interrupt | queue | steer
-        # When true, `hermes --tui` auto-resumes the most recent human-
-        # facing session on launch instead of forging a fresh one.
-        # Mirrors `hermes -c` muscle memory.  Default off so existing
-        # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
-        "tui_auto_resume_recent": False,
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@@ -724,14 +681,6 @@ DEFAULT_CONFIG = {
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
-        # Gateway runtime-metadata footer appended to the FINAL message of a turn
-        # (disabled by default to keep replies minimal). When enabled, renders
-        # e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
-        # display.platforms.<platform>.runtime_footer.
-        "runtime_footer": {
-            "enabled": False,
-            "fields": ["model", "context_pct", "cwd"],  # Order shown; drop any to hide
-        },
    },

    # Web dashboard settings
@@ -949,7 +898,6 @@ DEFAULT_CONFIG = {

    # Telegram platform settings (gateway mode)
    "telegram": {
-        "reactions": False,            # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
    },

@@ -1228,22 +1176,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "LM_API_KEY": {
-        "description": "LM Studio bearer token for auth-enabled local servers",
-        "prompt": "LM Studio API key / bearer token",
-        "url": None,
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "LM_BASE_URL": {
-        "description": "LM Studio base URL override",
-        "prompt": "LM Studio base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -2290,21 +2222,14 @@ def _normalize_custom_provider_entry(
        "baseUrl": "base_url",
        "apiMode": "api_mode",
        "keyEnv": "key_env",
-        "apiKeyEnv": "key_env",  # alias — OpenClaw-compatible + docs variant
        "defaultModel": "default_model",
        "contextLength": "context_length",
        "rateLimitDelay": "rate_limit_delay",
    }
-    # api_key_env is a documented snake_case alias for key_env (see
-    # website/docs/guides/azure-foundry.md).  Normalize it up front so the
-    # rest of the normalizer treats it as the canonical field.
-    if "api_key_env" in entry and "key_env" not in entry:
-        entry["key_env"] = entry["api_key_env"]
    _KNOWN_KEYS = {
-        "name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
+        "name", "api", "url", "base_url", "api_key", "key_env",
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
-        "request_timeout_seconds", "stale_timeout_seconds",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@@ -2556,9 +2481,6 @@ _KNOWN_ROOT_KEYS = {
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
    "context_length", "rate_limit_delay",
-    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
-    # — include it here so the set accurately describes the supported schema.
-    "key_env",
 }

 # Fields that look like they should be inside custom_providers, not at root
@@ -2635,32 +2557,10 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
                        "Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
                    ))

-    # ── fallback_model: single dict OR list of dicts (chain) ─────────────
+    # ── fallback_model must be a top-level dict with provider + model ────
    fb = config.get("fallback_model")
    if fb is not None:
-        if isinstance(fb, list):
-            # Chain fallback — validate each entry
-            for i, entry in enumerate(fb):
-                if not isinstance(entry, dict):
-                    issues.append(ConfigIssue(
-                        "error",
-                        f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
-                        "Each entry needs provider + model",
-                    ))
-                else:
-                    if not entry.get("provider"):
-                        issues.append(ConfigIssue(
-                            "warning",
-                            f"fallback_model[{i}] is missing 'provider' field",
-                            "Add: provider: openrouter (or another provider)",
-                        ))
-                    if not entry.get("model"):
-                        issues.append(ConfigIssue(
-                            "warning",
-                            f"fallback_model[{i}] is missing 'model' field",
-                            "Add: model: <model-name>",
-                        ))
-        elif not isinstance(fb, dict):
+        if not isinstance(fb, dict):
            issues.append(ConfigIssue(
                "error",
                f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
@@ -3453,62 +3353,25 @@ def read_raw_config() -> Dict[str, Any]:
    be parsed.  Use this for lightweight config reads where you just need a
    single value and don't want the overhead of ``load_config()``'s deep-merge
    + migration pipeline.
-
-    Cached on the config file's (mtime_ns, size) — same strategy as
-    ``load_config()``. Returns a deepcopy on every call since some callers
-    mutate the result before passing to ``save_config()``.
    """
    try:
        config_path = get_config_path()
-        st = config_path.stat()
-        cache_key = (st.st_mtime_ns, st.st_size)
-    except (FileNotFoundError, OSError):
-        return {}
-
-    path_key = str(config_path)
-    cached = _RAW_CONFIG_CACHE.get(path_key)
-    if cached is not None and cached[:2] == cache_key:
-        return copy.deepcopy(cached[2])
-
-    try:
-        with open(config_path, encoding="utf-8") as f:
-            data = yaml.safe_load(f) or {}
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                return yaml.safe_load(f) or {}
    except Exception:
-        return {}
-
-    if not isinstance(data, dict):
-        data = {}
-    _RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
-    return data
+        pass
+    return {}


 def load_config() -> Dict[str, Any]:
-    """Load configuration from ~/.hermes/config.yaml.
-
-    Cached on the config file's (mtime_ns, size). Returns a deepcopy of
-    the cached value when unchanged, since most call sites mutate the
-    result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
-    The cache is keyed on ``str(config_path)`` so profile switches
-    (which change ``HERMES_HOME`` and therefore ``get_config_path()``)
-    don't collide.
-    """
+    """Load configuration from ~/.hermes/config.yaml."""
    ensure_hermes_home()
    config_path = get_config_path()
-    path_key = str(config_path)
-
-    try:
-        st = config_path.stat()
-        cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
-    except FileNotFoundError:
-        cache_key = None
-
-    cached = _LOAD_CONFIG_CACHE.get(path_key)
-    if cached is not None and cache_key is not None and cached[:2] == cache_key:
-        return copy.deepcopy(cached[2])
-
+    
    config = copy.deepcopy(DEFAULT_CONFIG)
-
-    if cache_key is not None:
+    
+    if config_path.exists():
        try:
            with open(config_path, encoding="utf-8") as f:
                user_config = yaml.safe_load(f) or {}
@@ -3526,11 +3389,7 @@ def load_config() -> Dict[str, Any]:

    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
    expanded = _expand_env_vars(normalized)
-    _LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
-    if cache_key is not None:
-        _LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
-    else:
-        _LOAD_CONFIG_CACHE.pop(path_key, None)
+    _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
    return expanded


@@ -3633,12 +3492,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    fb_is_valid = False
-    if isinstance(fb, list):
-        fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
-    elif isinstance(fb, dict):
-        fb_is_valid = bool(fb.get("provider") and fb.get("model"))
-    if not fb_is_valid:
+    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3764,7 +3618,7 @@ def sanitize_env_file() -> int:
            f.writelines(sanitized)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, env_path)
+        os.replace(tmp_path, env_path)
    except BaseException:
        try:
            os.unlink(tmp_path)
@@ -3827,7 +3681,7 @@ def save_env_value(key: str, value: str):
    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
-
+    
    # On Windows, open() defaults to the system locale (cp1252) which can
    # cause OSError errno 22 on UTF-8 .env files.
    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
@@ -3839,7 +3693,7 @@ def save_env_value(key: str, value: str):
            lines = f.readlines()
        # Sanitize on every read: split concatenated keys, drop stale placeholders
        lines = _sanitize_env_lines(lines)
-
+    
    # Find and update or append
    found = False
    for i, line in enumerate(lines):
@@ -3847,7 +3701,7 @@ def save_env_value(key: str, value: str):
            lines[i] = f"{key}={value}\n"
            found = True
            break
-
+    
    if not found:
        # Ensure there's a newline at the end of the file before appending
        if lines and not lines[-1].endswith("\n"):
@@ -3867,7 +3721,7 @@ def save_env_value(key: str, value: str):
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, env_path)
+        os.replace(tmp_path, env_path)
        # Restore original permissions before _secure_file may tighten them.
        if original_mode is not None:
            try:
@@ -3923,7 +3777,7 @@ def remove_env_value(key: str) -> bool:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
-            atomic_replace(tmp_path, env_path)
+            os.replace(tmp_path, env_path)
            if original_mode is not None:
                try:
                    os.chmod(env_path, original_mode)
@@ -7,6 +7,7 @@ Currently supports:

 import io
 import json
+import os
 import sys
 import time
 import urllib.error
@@ -17,7 +18,6 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_home
-from utils import atomic_replace


 # ---------------------------------------------------------------------------
@@ -79,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp = path.with_suffix(".json.tmp")
        tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
-        atomic_replace(tmp, path)
+        os.replace(tmp, path)
    except OSError:
        # Non-fatal — worst case the user has to run ``hermes debug delete``
        # manually.
@@ -13,6 +13,7 @@ automatically.

 from __future__ import annotations

+import io
 import os
 import sys
 import time
@@ -57,7 +57,6 @@ _PROVIDER_ENV_HINTS = (
    "OPENCODE_ZEN_API_KEY",
    "OPENCODE_GO_API_KEY",
    "XIAOMI_API_KEY",
-    "TOKENHUB_API_KEY",
 )


@@ -344,7 +343,7 @@ def run_doctor(args):
                    )

            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
-            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous", "lmstudio"):
+            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
                check_warn(
                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
@@ -517,14 +516,7 @@ def run_doctor(args):
    if shutil.which("codex"):
        check_ok("codex CLI")
    else:
-        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
-        # only needed if you want to import existing tokens from
-        # ~/.codex/auth.json.  Downgrade to info so users running
-        # `hermes auth openai-codex` aren't told they're missing something.
-        check_info(
-            "codex CLI not installed "
-            "(optional — only required to import tokens from an existing Codex CLI login)"
-        )
+        check_warn("codex CLI not found", "(required for openai-codex login)")

    # =========================================================================
    # Check: Directory structure
@@ -7,7 +7,6 @@ import sys
 from pathlib import Path

 from dotenv import load_dotenv
-from utils import atomic_replace


 # Env var name suffixes that indicate credential values.  These are the
@@ -128,7 +127,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
                    f.writelines(sanitized)
                    f.flush()
                    os.fsync(f.fileno())
-                atomic_replace(tmp, path)
+                os.replace(tmp, path)
            except BaseException:
                try:
                    os.unlink(tmp)
@@ -2953,7 +2953,7 @@ def _setup_sms():
 def _setup_dingtalk():
    """Configure DingTalk — QR scan (recommended) or manual credential entry."""
    from hermes_cli.setup import (
-        prompt_choice, prompt_yes_no, print_success, print_warning,
+        prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
    )

    dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
@@ -3504,6 +3504,7 @@ def _setup_qqbot():
    method_idx = prompt_choice("  How would you like to set up QQ Bot?", method_choices, 0)

    credentials = None
+    used_qr = False

    if method_idx == 0:
        # ── QR scan-to-configure ──
@@ -3514,6 +3515,8 @@ def _setup_qqbot():
            print()
            print_warning("  QQ Bot setup cancelled.")
            return
+        if credentials:
+            used_qr = True
        if not credentials:
            print_info("  QR setup did not complete. Continuing with manual input.")

@@ -19,8 +19,9 @@ format) lives there.
 from __future__ import annotations

 import json
+import os
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional


 def hooks_command(args) -> None:
@@ -1820,8 +1820,6 @@ def select_provider_and_model(args=None):
        "gmi",
        "nvidia",
        "ollama-cloud",
-        "tencent-tokenhub",
-        "lmstudio",
    ):
        _model_flow_api_key_provider(config, selected_provider, current_model)

@@ -2048,11 +2046,7 @@ def _aux_select_for_task(task: str) -> None:

    # Gather authenticated providers (has credentials + curated model list)
    try:
-        providers = list_authenticated_providers(
-            current_provider=current_provider,
-            current_model=current_model,
-            current_base_url=current_base_url,
-        )
+        providers = list_authenticated_providers(current_provider=current_provider)
    except Exception as exc:
        print(f"Could not detect authenticated providers: {exc}")
        providers = []
@@ -4382,7 +4376,6 @@ def _model_flow_bedrock(config, current_model=""):
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
    from hermes_cli.auth import (
-        LMSTUDIO_NOAUTH_PLACEHOLDER,
        PROVIDER_REGISTRY,
        _prompt_model_selection,
        _save_model_choice,
@@ -4417,20 +4410,13 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            try:
                import getpass

-                if provider_id == "lmstudio":
-                    prompt = f"{key_env} (Enter for no-auth default {LMSTUDIO_NOAUTH_PLACEHOLDER!r}): "
-                else:
-                    prompt = f"{key_env} (or Enter to cancel): "
-                new_key = getpass.getpass(prompt).strip()
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not new_key:
-                if provider_id == "lmstudio":
-                    new_key = LMSTUDIO_NOAUTH_PLACEHOLDER
-                else:
-                    print("Cancelled.")
-                    return
+                print("Cancelled.")
+                return
            save_env_value(key_env, new_key)
            existing_key = new_key
            print("API key saved.")
@@ -4497,21 +4483,10 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
                print("  Tier check: could not verify (proceeding anyway).")
            print()

-    # Optional base URL override.
-    # Precedence: env var → config.yaml model.base_url → registry default.
-    # Reading config.yaml prevents silently overwriting a saved remote URL
-    # (e.g. a remote LM Studio endpoint) with localhost when the user just
-    # presses Enter at the prompt below.
+    # Optional base URL override
    current_base = ""
    if base_url_env:
        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
-    if not current_base:
-        try:
-            _m = load_config().get("model") or {}
-            if str(_m.get("provider") or "").strip().lower() == provider_id:
-                current_base = str(_m.get("base_url") or "").strip()
-        except Exception:
-            pass
    effective_base = current_base or pconfig.inference_base_url

    try:
@@ -4533,22 +4508,8 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    #   2. Curated static fallback list (offline insurance)
    #   3. Live /models endpoint probe (small providers without models.dev data)
    #
-    # LM Studio: live /api/v1/models probe (no models.dev catalog).
-    # Ollama Cloud: merged discovery (live API + models.dev + disk cache).
-    if provider_id == "lmstudio":
-        from hermes_cli.auth import AuthError
-        from hermes_cli.models import fetch_lmstudio_models
-
-        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
-        try:
-            model_list = fetch_lmstudio_models(api_key=api_key_for_probe, base_url=effective_base)
-        except AuthError as exc:
-            print(f"  LM Studio rejected the request: {exc}")
-            print("  Set LM_API_KEY (or update it) to match the server's bearer token.")
-            model_list = []
-        if model_list:
-            print(f"  Found {len(model_list)} model(s) from LM Studio")
-    elif provider_id == "ollama-cloud":
+    # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
+    if provider_id == "ollama-cloud":
        from hermes_cli.models import fetch_ollama_cloud_models

        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
@@ -4770,6 +4731,7 @@ def _model_flow_anthropic(config, current_model=""):
            read_claude_code_credentials,
            is_claude_code_token_valid,
            _is_oauth_token,
+            _resolve_claude_code_token_from_credentials,
        )

        cc_creds = read_claude_code_credentials()
@@ -5251,93 +5213,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    return True


-def _warn_stale_dashboard_processes() -> None:
-    """Warn about running dashboard processes that still hold pre-update code.
-
-    ``hermes dashboard`` is a long-lived server process commonly started and
-    forgotten.  When ``hermes update`` replaces files on disk, the running
-    process keeps the old Python backend in memory while the JS bundle on
-    disk is updated, causing a silent frontend/backend mismatch (e.g. new
-    auth headers the old backend doesn't recognise → every API call 401s).
-
-    Unlike the gateway, the dashboard has no service manager (systemd /
-    launchd), so we can only warn — we don't auto-kill user-managed
-    background processes.
-    """
-    patterns = [
-        "hermes dashboard",
-        "hermes_cli.main dashboard",
-        "hermes_cli/main.py dashboard",
-    ]
-    self_pid = os.getpid()
-    dashboard_pids: list[int] = []
-
-    try:
-        if sys.platform == "win32":
-            result = subprocess.run(
-                ["wmic", "process", "get", "ProcessId,CommandLine",
-                 "/FORMAT:LIST"],
-                capture_output=True, text=True, timeout=10,
-            )
-            if result.returncode != 0:
-                return
-            current_cmd = ""
-            for line in result.stdout.split("\n"):
-                line = line.strip()
-                if line.startswith("CommandLine="):
-                    current_cmd = line[len("CommandLine="):]
-                elif line.startswith("ProcessId="):
-                    pid_str = line[len("ProcessId="):]
-                    if (any(p in current_cmd for p in patterns)
-                            and int(pid_str) != self_pid):
-                        try:
-                            dashboard_pids.append(int(pid_str))
-                        except ValueError:
-                            pass
-        else:
-            # Linux / macOS: scan the process table via ps and match against
-            # the same explicit patterns list used on Windows.  Using ps
-            # (rather than `pgrep -f "hermes.*dashboard"`) keeps us consistent
-            # with `hermes_cli.gateway._scan_gateway_pids` and avoids the
-            # greedy regex matching unrelated cmdlines that merely contain
-            # both words (e.g. a chat session discussing "dashboard").
-            result = subprocess.run(
-                ["ps", "-A", "-o", "pid=,command="],
-                capture_output=True, text=True, timeout=10,
-            )
-            if result.returncode == 0:
-                for line in result.stdout.split("\n"):
-                    stripped = line.strip()
-                    if not stripped or "grep" in stripped:
-                        continue
-                    parts = stripped.split(None, 1)
-                    if len(parts) != 2:
-                        continue
-                    try:
-                        pid = int(parts[0])
-                    except ValueError:
-                        continue
-                    command = parts[1]
-                    if (any(p in command for p in patterns)
-                            and pid != self_pid):
-                        dashboard_pids.append(pid)
-    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
-        return
-
-    if not dashboard_pids:
-        return
-
-    print()
-    print(f"⚠ {len(dashboard_pids)} dashboard process(es) still running "
-          f"with the previous version:")
-    for pid in dashboard_pids:
-        print(f"    PID {pid}")
-    print("  The running backend may not match the updated frontend,")
-    print("  causing silent auth failures or empty data.")
-    print("  Restart them to pick up the changes:")
-    print("    kill <pid> && hermes dashboard --port <port> ...")
-
-
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.

@@ -5472,7 +5347,6 @@ def _update_via_zip(args):

    print()
    print("✓ Update complete!")
-    _warn_stale_dashboard_processes()


 def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
@@ -7174,7 +7048,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    print(
                                        f"  ⚠ {svc_name} died after restart, retrying..."
                                    )
-                                    subprocess.run(
+                                    retry = subprocess.run(
                                        scope_cmd + ["restart", svc_name],
                                        capture_output=True,
                                        text=True,
@@ -7289,10 +7163,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
        except Exception as e:
            logger.debug("Legacy unit check during update failed: %s", e)

-        # Warn about stale dashboard processes — the dashboard has no
-        # service manager, so we can only tell the user to restart them.
-        _warn_stale_dashboard_processes()
-
        print()
        print("Tip: You can now select a provider and model:")
        print("  hermes model              # Select provider and model")
@@ -7941,12 +7811,32 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        # No `choices=` here: user-defined providers from config.yaml `providers:`
-        # are also valid values, and runtime resolution (resolve_runtime_provider)
-        # handles validation/error reporting consistently with the top-level
-        # `--provider` flag.
+        choices=[
+            "auto",
+            "openrouter",
+            "nous",
+            "openai-codex",
+            "copilot-acp",
+            "copilot",
+            "anthropic",
+            "gemini",
+            "xai",
+            "ollama-cloud",
+            "huggingface",
+            "zai",
+            "kimi-coding",
+            "kimi-coding-cn",
+            "stepfun",
+            "minimax",
+            "minimax-cn",
+            "kilocode",
+            "xiaomi",
+            "arcee",
+            "gmi",
+            "nvidia",
+        ],
        default=None,
-        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
+        help="Inference provider (default: auto)",
    )
    chat_parser.add_argument(
        "-v", "--verbose", action="store_true", help="Verbose output"
@@ -9192,7 +9082,11 @@ Examples:
    )
    plugins_remove.add_argument("name", help="Plugin directory name to remove")

-    plugins_subparsers.add_parser("list", aliases=["ls"], help="List installed plugins")
+    plugins_list = plugins_subparsers.add_parser("list", aliases=["ls"], help="List installed plugins")
+    plugins_list.add_argument(
+        "--available", action="store_true",
+        help="Also show official optional plugins that are not yet installed",
+    )

    plugins_enable = plugins_subparsers.add_parser(
        "enable", help="Enable a disabled plugin"
@@ -9786,26 +9680,17 @@ Examples:
        "--preset",
        choices=["user-data", "full"],
        default="full",
-        help="Migration preset (default: full). Neither preset imports secrets — "
-        "pass --migrate-secrets to include API keys.",
+        help="Migration preset (default: full). 'user-data' excludes secrets",
    )
    claw_migrate.add_argument(
        "--overwrite",
        action="store_true",
-        help="Overwrite existing files (default: refuse to apply when the plan has conflicts)",
+        help="Overwrite existing files (default: skip conflicts)",
    )
    claw_migrate.add_argument(
        "--migrate-secrets",
        action="store_true",
-        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.). "
-        "Required even under --preset full.",
-    )
-    claw_migrate.add_argument(
-        "--no-backup",
-        action="store_true",
-        help="Skip the pre-migration zip snapshot of ~/.hermes/ (by default a "
-        "single restore-point archive is written to ~/.hermes/backups/ "
-        "before apply; restorable with 'hermes import').",
+        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)",
    )
    claw_migrate.add_argument(
        "--workspace-target", help="Absolute path to copy workspace instructions into"
@@ -10220,17 +10105,6 @@ Examples:
            logger.debug(
                "plugin discovery failed at CLI startup", exc_info=True,
            )
-        try:
-            # MCP tool discovery — no event loop running in CLI/TUI startup,
-            # so inline is safe.  Moved here from model_tools.py module scope
-            # to avoid freezing the gateway's event loop on its first message
-            # via the same lazy import path (#16856).
-            from tools.mcp_tool import discover_mcp_tools
-            discover_mcp_tools()
-        except Exception:
-            logger.debug(
-                "MCP tool discovery failed at CLI startup", exc_info=True,
-            )
        try:
            from hermes_cli.config import load_config
            from agent.shell_hooks import register_from_config
@@ -46,6 +46,7 @@ from __future__ import annotations

 import json
 import logging
+import os
 import time
 import urllib.error
 import urllib.request
@@ -53,7 +54,6 @@ from pathlib import Path
 from typing import Any

 from hermes_cli import __version__ as _HERMES_VERSION
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -190,7 +190,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
        with open(tmp, "w") as fh:
            json.dump(data, fh, indent=2)
            fh.write("\n")
-        atomic_replace(tmp, path)
+        os.replace(tmp, path)
    except OSError as exc:
        logger.info("model catalog cache write failed: %s", exc)

@@ -213,15 +213,10 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:


 def _ensure_direct_aliases() -> None:
-    """Lazy-load direct aliases on first use.
-
-    Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
-    the module attribute. This keeps `from hermes_cli.model_switch import
-    DIRECT_ALIASES` references valid in callers — rebinding would leave them
-    pointing at a stale empty dict.
-    """
+    """Lazy-load direct aliases on first use."""
+    global DIRECT_ALIASES
    if not DIRECT_ALIASES:
-        DIRECT_ALIASES.update(_load_direct_aliases())
+        DIRECT_ALIASES = _load_direct_aliases()


 # ---------------------------------------------------------------------------
@@ -984,7 +979,6 @@ def list_authenticated_providers(
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
-    current_model: str = "",
 ) -> List[dict]:
    """Detect which providers have credentials and list their curated models.

@@ -1031,34 +1025,6 @@ def list_authenticated_providers(
    if "ollama-cloud" not in curated:
        from hermes_cli.models import fetch_ollama_cloud_models
        curated["ollama-cloud"] = fetch_ollama_cloud_models()
-    # LM Studio has no static catalog — probe its native /api/v1/models
-    # endpoint live so the picker reflects whatever the user has loaded.
-    # Base URL precedence: LM_BASE_URL env var > active config's base_url
-    # (when current provider is lmstudio) > 127.0.0.1 default.
-    # On auth rejection or unreachable server, fall back to the caller-supplied
-    # current model so the picker still shows something when offline / mis-keyed.
-    if "lmstudio" not in curated and (
-        os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
-    ):
-        from hermes_cli.models import fetch_lmstudio_models
-        from hermes_cli.auth import AuthError
-        is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
-        lm_base = (
-            os.environ.get("LM_BASE_URL")
-            or (current_base_url if is_current_lmstudio and current_base_url else None)
-            or "http://127.0.0.1:1234/v1"
-        )
-        try:
-            live = fetch_lmstudio_models(
-                api_key=os.environ.get("LM_API_KEY", ""),
-                base_url=lm_base,
-                timeout=1.5, # Smaller timeout for picker
-            )
-        except AuthError:
-            live = []
-        if not live and is_current_lmstudio and current_model:
-            live = [current_model]
-        curated["lmstudio"] = live

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
@@ -1209,15 +1175,6 @@ def list_authenticated_providers(

        if hermes_slug in {"copilot", "copilot-acp"}:
            model_ids = provider_model_ids(hermes_slug)
-        # For aws_sdk providers (bedrock), use live discovery so the list
-        # reflects the active region (eu.*, ap.*) not the static us.* list.
-        elif overlay.auth_type == "aws_sdk":
-            try:
-                from agent.bedrock_adapter import bedrock_model_ids_or_none
-                _ids = bedrock_model_ids_or_none()
-                model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
-            except Exception:
-                model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
        else:
            # Use curated list — look up by Hermes slug, fall back to overlay key
            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
@@ -1280,30 +1237,10 @@ def list_authenticated_providers(
            except Exception:
                pass

-        # Special case: aws_sdk auth (bedrock) — no API key env vars,
-        # credentials come from the boto3 credential chain (env vars,
-        # ~/.aws/credentials, instance roles, etc.)
-        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
-            try:
-                from agent.bedrock_adapter import has_aws_credentials
-                _cp_has_creds = has_aws_credentials()
-            except Exception:
-                pass
-
        if not _cp_has_creds:
            continue

-        # For bedrock, use live discovery so the list reflects the active
-        # region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
-        if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
-            try:
-                from agent.bedrock_adapter import bedrock_model_ids_or_none
-                _ids = bedrock_model_ids_or_none()
-                _cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
-            except Exception:
-                _cp_model_ids = curated.get(_cp.slug, [])
-        else:
-            _cp_model_ids = curated.get(_cp.slug, [])
+        _cp_model_ids = curated.get(_cp.slug, [])
        _cp_total = len(_cp_model_ids)
        _cp_top = _cp_model_ids[:max_models]

@@ -1375,23 +1312,8 @@ def list_authenticated_providers(
                    if fb:
                        models_list = list(fb)

-            # Prefer the endpoint's live /models list when credentials are
-            # available. This keeps OpenAI-compatible relays (for example CRS)
-            # in sync when the server catalog changes without requiring the
-            # user to mirror every model into config.yaml.
-            api_key = str(ep_cfg.get("api_key", "") or "").strip()
-            if not api_key:
-                key_env = str(ep_cfg.get("key_env", "") or "").strip()
-                api_key = os.environ.get(key_env, "").strip() if key_env else ""
-            if api_url and api_key:
-                try:
-                    from hermes_cli.models import fetch_api_models
-                    live_models = fetch_api_models(api_key, api_url)
-                    if live_models:
-                        models_list = live_models
-                except Exception:
-                    pass
-
+            # Try to probe /v1/models if URL is set (but don't block on it)
+            # For now just show what we know from config
            results.append({
                "slug": ep_name,
                "name": display_name,
@@ -44,7 +44,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
    ("xiaomi/mimo-v2.5",                 ""),
-    ("tencent/hy3-preview:free",         "free"),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -107,57 +106,11 @@ def _codex_curated_models() -> list[str]:
    return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))


-# Static fallback for xAI when the models.dev disk cache is empty (fresh
-# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
-# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
-# or retires a model, the disk cache picks it up on the next refresh and the
-# fallback here only matters until that refresh lands.
-_XAI_STATIC_FALLBACK: list[str] = [
-    "grok-4.20-0309-reasoning",
-    "grok-4.20-0309-non-reasoning",
-    "grok-4.20-multi-agent-0309",
-    "grok-4-1-fast",
-    "grok-4-1-fast-non-reasoning",
-    "grok-4-fast",
-    "grok-4-fast-non-reasoning",
-    "grok-4",
-    "grok-code-fast-1",
-]
-
-
-def _xai_curated_models() -> list[str]:
-    """Derive the xAI-direct curated list from models.dev disk cache.
-
-    Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
-    runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
-    when the cache is empty or unreadable. Hermes refreshes the cache from
-    https://models.dev/api.json on normal use, so this list self-heals as
-    xAI renames models.
-
-    Mirrors ``_codex_curated_models()``'s role for openai-codex.
-    """
-    try:
-        from agent.models_dev import _load_disk_cache
-        data = _load_disk_cache()
-        xai = data.get("xai") if isinstance(data, dict) else None
-        models = xai.get("models") if isinstance(xai, dict) else None
-        if isinstance(models, dict) and models:
-            ids = [mid for mid in models.keys() if isinstance(mid, str)]
-            if ids:
-                return sorted(ids)
-    except Exception:
-        # Any failure (missing file, malformed JSON, import error)
-        # falls through to the static list.
-        pass
-    return list(_XAI_STATIC_FALLBACK)
-
-
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
        "xiaomi/mimo-v2.5-pro",
        "xiaomi/mimo-v2.5",
-        "tencent/hy3-preview",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -240,7 +193,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5",
        "glm-4.5-flash",
    ],
-    "xai": _xai_curated_models(),
+    "xai": [
+        "grok-4.20-reasoning",
+        "grok-4-1-fast-reasoning",
+    ],
    "nvidia": [
        # NVIDIA flagship reasoning models
        "nvidia/nemotron-3-super-120b-a12b",
@@ -317,9 +273,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "mimo-v2-flash",
    ],
-    "tencent-tokenhub": [
-        "hy3-preview",
-    ],
    "arcee": [
        "trinity-large-thinking",
        "trinity-large-preview",
@@ -397,7 +350,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
    # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
    "alibaba": [
-        "qwen3.6-plus",
        "kimi-k2.5",
        "qwen3.5-plus",
        "qwen3-coder-plus",
@@ -768,12 +720,10 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
-    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
-    ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -856,10 +806,6 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
-    "tencent": "tencent-tokenhub",
-    "tokenhub": "tencent-tokenhub",
-    "tencent-cloud": "tencent-tokenhub",
-    "tencentmaas": "tencent-tokenhub",
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
    "amazon-bedrock": "bedrock",
@@ -871,9 +817,6 @@ _PROVIDER_ALIASES = {
    "nvidia-nim": "nvidia",
    "build-nvidia": "nvidia",
    "nemotron": "nvidia",
-    "lmstudio": "lmstudio",
-    "lm-studio": "lmstudio",
-    "lm_studio": "lmstudio",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
 }
@@ -1680,41 +1623,31 @@ def provider_label(provider: Optional[str]) -> str:

 # Models that support OpenAI Priority Processing (service_tier="priority").
 # See https://openai.com/api-priority-processing/ for the canonical list.
-#
-# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
-# is assumed to support Priority Processing. service_tier=priority is silently
-# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
-# strip the field), so false positives are harmless. Codex-series models
-# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
-# service_tier parameter through the Codex Responses API.
-_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
-    "gpt-",
-    "o1",
+# Only the bare model slug is stored (no vendor prefix).
+_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
+    "gpt-5.4",
+    "gpt-5.4-mini",
+    "gpt-5.2",
+    "gpt-5.1",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4o",
+    "gpt-4o-mini",
    "o3",
-    "o4",
-)
-
-
-def _is_openai_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model is an OpenAI flagship eligible for Priority Processing."""
-    raw = _strip_vendor_prefix(str(model_id or ""))
-    base = raw.split(":")[0]
-    if not base:
-        return False
-    # Exclude Codex-series — they route through the Codex Responses API
-    # which doesn't accept service_tier.
-    if "codex" in base:
-        return False
-    return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
-
+    "o4-mini",
+})

 # Models that support Anthropic Fast Mode (speed="fast").
 # See https://platform.claude.com/docs/en/build-with-claude/fast-mode
-#
-# Pattern-based matching — any claude-* model is eligible. The anthropic
-# adapter gates speed=fast on native Anthropic endpoints only (see
-# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
-# third-party proxies that would reject the beta header are protected.
+# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
+# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
+_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
+    "claude-opus-4-6",
+    "claude-opus-4.6",
+})


 def _strip_vendor_prefix(model_id: str) -> str:
@@ -1727,14 +1660,20 @@ def _strip_vendor_prefix(model_id: str) -> str:

 def model_supports_fast_mode(model_id: Optional[str]) -> bool:
    """Return whether Hermes should expose the /fast toggle for this model."""
-    return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    if raw in _PRIORITY_PROCESSING_MODELS:
+        return True
+    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
+    # and OpenRouter variant tags (:fast, :beta) for matching.
+    base = raw.split(":")[0]
+    return base in _ANTHROPIC_FAST_MODE_MODELS


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
+    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base.startswith("claude-")
+    return base in _ANTHROPIC_FAST_MODE_MODELS


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
@@ -1756,61 +1695,14 @@ def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | Non


 def _resolve_copilot_catalog_api_key() -> str:
-    """Best-effort GitHub token for fetching the Copilot model catalog.
-
-    Resolution order:
-      1. ``resolve_api_key_provider_credentials("copilot")`` — env vars
-         (``COPILOT_GITHUB_TOKEN`` / ``GH_TOKEN`` / ``GITHUB_TOKEN``) plus
-         the ``gh auth token`` CLI fallback.
-      2. ``read_credential_pool("copilot")`` — a token (typically a
-         ``gho_*`` from device-code login, or a fine-grained PAT) stored in
-         ``auth.json`` under ``credential_pool.copilot[]``. The pool is
-         populated by ``hermes auth add copilot`` and by ``_seed_from_env``
-         when the env var is set in ``~/.hermes/.env``.
-
-    Without (2), users whose only Copilot credential is in the pool see
-    the ``/model`` picker fall back to a stale hardcoded list because the
-    live catalog fetch silently 401s. To avoid wedging on a malformed pool
-    entry, each candidate is exchanged via ``exchange_copilot_token`` —
-    only entries that actually exchange successfully are returned, so a
-    later valid entry is reachable when an earlier one is unsupported.
-    """
+    """Best-effort GitHub token for fetching the Copilot model catalog."""
    try:
        from hermes_cli.auth import resolve_api_key_provider_credentials

        creds = resolve_api_key_provider_credentials("copilot")
-        api_key = str(creds.get("api_key") or "").strip()
-        if api_key:
-            return api_key
+        return str(creds.get("api_key") or "").strip()
    except Exception:
-        pass
-
-    try:
-        from hermes_cli.auth import read_credential_pool
-        from hermes_cli.copilot_auth import (
-            exchange_copilot_token,
-            validate_copilot_token,
-        )
-
-        for entry in read_credential_pool("copilot"):
-            if not isinstance(entry, dict):
-                continue
-            raw = str(entry.get("access_token") or "").strip()
-            if not raw:
-                continue
-            valid, _ = validate_copilot_token(raw)
-            if not valid:
-                continue
-            try:
-                api_token, _expires_at = exchange_copilot_token(raw)
-            except Exception:
-                continue
-            if api_token:
-                return api_token
-    except Exception:
-        pass
-
-    return ""
+        return ""


 # Providers where models.dev is treated as authoritative: curated static
@@ -1992,18 +1884,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-    # Bedrock uses live discovery keyed by the resolved AWS region so that
-    # EU/AP users see eu.*/ap.* model IDs instead of the static us.* list.
-    # Note: early return intentionally skips _MODELS_DEV_PREFERRED merge
-    # below — bedrock is not expected to appear in that table.
-    if normalized == "bedrock":
-        try:
-            from agent.bedrock_adapter import bedrock_model_ids_or_none
-            ids = bedrock_model_ids_or_none()
-            if ids is not None:
-                return ids
-        except Exception:
-            pass
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -2199,228 +2079,6 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
    )


-def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
-    """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
-
-    Returns ``None`` when the base URL is empty/invalid.
-    """
-    root = (base_url or "").strip().rstrip("/")
-    if root.endswith("/v1"):
-        root = root[:-3].rstrip("/")
-    return root or None
-
-
-def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
-    """Build HTTP headers for LM Studio native API requests."""
-    headers = {"User-Agent": _HERMES_USER_AGENT}
-    token = str(api_key or "").strip()
-    if token:
-        headers["Authorization"] = f"Bearer {token}"
-    return headers
-
-
-def _lmstudio_fetch_raw_models(
-    api_key: Optional[str] = None,
-    base_url: Optional[str] = None,
-    timeout: float = 5.0,
-) -> Optional[list[dict]]:
-    """Fetch the raw model list from LM Studio's ``/api/v1/models``.
-
-    Returns the ``models`` list of dicts on success, ``None`` on network
-    errors or malformed responses.  Raises ``AuthError`` on HTTP 401/403.
-    """
-    server_root = _lmstudio_server_root(base_url)
-    if not server_root:
-        return None
-
-    headers = _lmstudio_request_headers(api_key)
-    request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
-    try:
-        with urllib.request.urlopen(request, timeout=timeout) as resp:
-            payload = json.loads(resp.read().decode())
-    except urllib.error.HTTPError as exc:
-        if exc.code in (401, 403):
-            from hermes_cli.auth import AuthError
-            raise AuthError(
-                f"LM Studio rejected the request with HTTP {exc.code}.",
-                provider="lmstudio",
-                code="auth_rejected",
-            ) from exc
-        import logging
-        logging.getLogger(__name__).debug(
-            "LM Studio probe at %s failed with HTTP %s", server_root, exc.code,
-        )
-        return None
-    except Exception as exc:
-        import logging
-        logging.getLogger(__name__).debug(
-            "LM Studio probe at %s failed: %s", server_root, exc,
-        )
-        return None
-
-    raw_models = payload.get("models") if isinstance(payload, dict) else None
-    if not isinstance(raw_models, list):
-        import logging
-        logging.getLogger(__name__).debug(
-            "LM Studio probe at %s returned malformed payload (no `models` list)",
-            server_root,
-        )
-        return None
-    return raw_models
-
-
-def probe_lmstudio_models(
-    api_key: Optional[str] = None,
-    base_url: Optional[str] = None,
-    timeout: float = 5.0,
-) -> Optional[list[str]]:
-    """Probe LM Studio's model listing.
-
-    Returns chat-capable model keys on success, including the valid empty-list
-    case when the server is reachable but has no non-embedding models.
-    Returns ``None`` on network errors, malformed responses, or empty/invalid
-    base URLs.
-
-    Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
-    separately from reachability problems.
-    """
-    raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
-    if raw_models is None:
-        return None
-
-    keys: list[str] = []
-    for raw in raw_models:
-        if not isinstance(raw, dict):
-            continue
-        if str(raw.get("type") or "").strip().lower() == "embedding":
-            continue
-        key = str(raw.get("key") or raw.get("id") or "").strip()
-        if key and key not in keys:
-            keys.append(key)
-    return keys
-
-
-def fetch_lmstudio_models(
-    api_key: Optional[str] = None,
-    base_url: Optional[str] = None,
-    timeout: float = 5.0,
-) -> list[str]:
-    """Fetch LM Studio chat-capable model keys from native ``/api/v1/models``.
-
-    Returns a list of model keys (e.g. ``publisher/model-name``) with embedding
-    models filtered out. Returns an empty list on network errors, malformed
-    responses, or empty/invalid base URLs.
-
-    Raises ``AuthError`` on HTTP 401/403 so callers can distinguish a missing
-    or wrong ``LM_API_KEY`` from an unreachable server — the most common
-    LM Studio support case once auth-enabled mode is turned on.
-    """
-    models = probe_lmstudio_models(api_key=api_key, base_url=base_url, timeout=timeout)
-    return models or []
-
-
-def ensure_lmstudio_model_loaded(
-    model: str,
-    base_url: Optional[str],
-    api_key: Optional[str],
-    target_context_length: int,
-    timeout: float = 120.0,
-) -> Optional[int]:
-    """Ensure LM Studio has ``model`` loaded with at least ``target_context_length``.
-
-    No-op when an instance is already loaded with sufficient context. Otherwise
-    POSTs ``/api/v1/models/load`` to (re)load with the target context, capped
-    at the model's ``max_context_length``. Returns the resolved loaded context
-    length, or ``None`` when the probe / load failed.
-    """
-    server_root = _lmstudio_server_root(base_url)
-    if not server_root:
-        return None
-
-    headers = _lmstudio_request_headers(api_key)
-
-    try:
-        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
-    except Exception:
-        raw_models = None
-    if raw_models is None:
-        return None
-
-    target_entry = None
-    for raw in raw_models:
-        if not isinstance(raw, dict):
-            continue
-        if raw.get("key") == model or raw.get("id") == model:
-            target_entry = raw
-            break
-    if target_entry is None:
-        return None
-
-    max_ctx = target_entry.get("max_context_length")
-    if isinstance(max_ctx, int) and max_ctx > 0:
-        target_context_length = min(target_context_length, max_ctx)
-
-    for inst in target_entry.get("loaded_instances") or []:
-        cfg = inst.get("config") if isinstance(inst, dict) else None
-        loaded_ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
-        if isinstance(loaded_ctx, int) and loaded_ctx >= target_context_length:
-            return loaded_ctx
-
-    body = json.dumps({
-        "model": model,
-        "context_length": target_context_length,
-    }).encode()
-    load_headers = dict(headers)
-    load_headers["Content-Type"] = "application/json"
-    try:
-        with urllib.request.urlopen(
-            urllib.request.Request(
-                server_root + "/api/v1/models/load",
-                data=body,
-                headers=load_headers,
-                method="POST",
-            ),
-            timeout=timeout,
-        ) as resp:
-            resp.read()
-    except Exception:
-        return None
-    return target_context_length
-
-
-def lmstudio_model_reasoning_options(
-    model: str,
-    base_url: Optional[str],
-    api_key: Optional[str] = None,
-    timeout: float = 5.0,
-) -> list[str]:
-    """Return the reasoning ``allowed_options`` LM Studio publishes for ``model``.
-
-    Pulls ``capabilities.reasoning.allowed_options`` from ``/api/v1/models``.
-    Returns ``[]`` when the model is unknown, the endpoint is unreachable,
-    or the model does not declare a reasoning capability.
-    """
-    try:
-        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
-    except Exception:
-        raw_models = None
-    if not raw_models:
-        return []
-
-    for raw in raw_models:
-        if not isinstance(raw, dict):
-            continue
-        if raw.get("key") != model and raw.get("id") != model:
-            continue
-        caps = raw.get("capabilities")
-        reasoning = caps.get("reasoning") if isinstance(caps, dict) else None
-        opts = reasoning.get("allowed_options") if isinstance(reasoning, dict) else None
-        if isinstance(opts, list):
-            return [str(o).strip().lower() for o in opts if isinstance(o, str)]
-        return []
-    return []
-
-
 def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
    catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
    if not catalog:
@@ -3016,40 +2674,6 @@ def validate_requested_model(
            "message": "Model names cannot contain spaces.",
        }

-    if normalized == "lmstudio":
-        from hermes_cli.auth import AuthError
-        # Use probe_lmstudio_models so we can distinguish None (unreachable
-        # / malformed response) from [] (reachable, but no chat-capable models
-        # are loaded). fetch_lmstudio_models collapses both to [].
-        try:
-            models = probe_lmstudio_models(api_key=api_key, base_url=base_url)
-        except AuthError as exc:
-            return {
-                "accepted": False, "persist": False, "recognized": False,
-                "message": (
-                    f"{exc} Set `LM_API_KEY` (or update it) to match the server's bearer token."
-                ),
-            }
-        if models is None:
-            return {
-                "accepted": False, "persist": False, "recognized": False,
-                "message": f"Could not reach LM Studio's `/api/v1/models` to validate `{requested}`.",
-            }
-        if not models:
-            return {
-                "accepted": False, "persist": False, "recognized": False,
-                "message": (
-                    f"LM Studio is reachable but no chat-capable models are loaded. "
-                    f"Load `{requested}` in LM Studio (Developer tab → Load Model) and try again."
-                ),
-            }
-        if requested_for_lookup in set(models):
-            return {"accepted": True, "persist": True, "recognized": True, "message": None}
-        return {
-            "accepted": False, "persist": False, "recognized": False,
-            "message": f"Model `{requested}` was not found in LM Studio's model listing.",
-        }
-
    if normalized == "custom":
        # Try probing with correct auth for the api_mode.
        if api_mode == "anthropic_messages":
@@ -128,44 +128,27 @@ def _run_agent(
    # the user's configured default provider, which may not host the model
    # the caller just asked for.
    effective_provider = (provider or "").strip() or None
-    explicit_base_url_from_alias: Optional[str] = None
    if effective_provider is None and (model or env_model):
        # Only auto-detect when the model was explicitly requested via arg or
        # env var (not when it came from config — that's the "use my defaults"
        # path and the configured provider is already correct).
        explicit_model = (model or "").strip() or env_model
        if explicit_model:
-            # First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
-            # These map a user-defined alias to (model, provider, base_url) for
-            # endpoints not in any catalog (local servers, custom proxies, etc.).
-            try:
-                from hermes_cli import model_switch as _ms
-                _ms._ensure_direct_aliases()
-                direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
-            except Exception:
-                direct = None
-            if direct is not None:
-                effective_model = direct.model
-                effective_provider = direct.provider
-                if direct.base_url:
-                    explicit_base_url_from_alias = direct.base_url.rstrip("/")
-            else:
-                cfg_provider = ""
-                if isinstance(model_cfg, dict):
-                    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-                current_provider = (
-                    cfg_provider
-                    or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
-                    or "auto"
-                )
-                detected = detect_provider_for_model(explicit_model, current_provider)
-                if detected:
-                    effective_provider, effective_model = detected
+            cfg_provider = ""
+            if isinstance(model_cfg, dict):
+                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+            current_provider = (
+                cfg_provider
+                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                or "auto"
+            )
+            detected = detect_provider_for_model(explicit_model, current_provider)
+            if detected:
+                effective_provider, effective_model = detected

    runtime = resolve_runtime_provider(
        requested=effective_provider,
        target_model=effective_model or None,
-        explicit_base_url=explicit_base_url_from_alias,
    )

    # Pull in whatever toolsets the user has enabled for "cli".
@@ -1,7 +1,13 @@
 """``hermes plugins`` CLI subcommand — install, update, remove, and list plugins.

-Plugins are installed from Git repositories into ``~/.hermes/plugins/``.
-Supports full URLs and ``owner/repo`` shorthand (resolves to GitHub).
+Plugins can be installed from:
+- Official optional plugins shipped with the repo: ``official/<category>/<name>``
+- Git repositories (full URL or ``owner/repo`` GitHub shorthand)
+
+Official plugins live in ``optional-plugins/`` inside the Hermes repo and are
+copied into ``~/.hermes/plugins/`` on install — no git clone needed, no network
+required. They are NOT auto-discovered from ``optional-plugins/``; only installed
+copies in ``~/.hermes/plugins/`` are loaded by Hermes.

 After install, if the plugin ships an ``after-install.md`` file it is
 rendered with Rich Markdown.  Otherwise a default confirmation is shown.
@@ -95,10 +101,80 @@ def _resolve_git_url(identifier: str) -> str:

    raise ValueError(
        f"Invalid plugin identifier: '{identifier}'. "
-        "Use a Git URL or owner/repo shorthand."
+        "Use 'official/<category>/<name>', a Git URL, or owner/repo shorthand."
    )


+def _optional_plugins_dir() -> Path:
+    """Return the optional-plugins/ directory shipped with the Hermes repo."""
+    return Path(__file__).resolve().parent.parent / "optional-plugins"
+
+
+def _resolve_official_plugin(identifier: str) -> Optional[Path]:
+    """If *identifier* is 'official/<category>/<name>', return its source path.
+
+    Returns ``None`` when the identifier is not in official format or the
+    plugin directory does not exist.
+    """
+    # Accept 'official/category/name' or just 'category/name' when the
+    # category/name path exists under optional-plugins/.
+    parts = identifier.strip("/").split("/")
+
+    # Strip leading 'official' prefix if present
+    if parts and parts[0] == "official":
+        parts = parts[1:]
+
+    if len(parts) < 1:
+        return None
+
+    base = _optional_plugins_dir()
+    # Try category/name  (2 parts) or bare name  (1 part)
+    for nparts in (2, 1):
+        if len(parts) < nparts:
+            continue
+        candidate = base.joinpath(*parts[-nparts:])
+        try:
+            resolved = candidate.resolve()
+            base_resolved = base.resolve()
+            resolved.relative_to(base_resolved)  # traversal guard
+        except (ValueError, OSError):
+            continue
+        if resolved.is_dir() and (
+            (resolved / "plugin.yaml").exists() or (resolved / "__init__.py").exists()
+        ):
+            return resolved
+
+    return None
+
+
+def _list_official_plugins() -> list[tuple[str, str]]:
+    """Return [(identifier, description), ...] for all official optional plugins."""
+    base = _optional_plugins_dir()
+    if not base.is_dir():
+        return []
+
+    results = []
+    for category_dir in sorted(base.iterdir()):
+        if not category_dir.is_dir() or category_dir.name.startswith("."):
+            continue
+        for plugin_dir in sorted(category_dir.iterdir()):
+            if not plugin_dir.is_dir() or plugin_dir.name.startswith("."):
+                continue
+            manifest_file = plugin_dir / "plugin.yaml"
+            desc = ""
+            if manifest_file.exists():
+                try:
+                    import yaml
+                    data = yaml.safe_load(manifest_file.read_text()) or {}
+                    desc = data.get("description", "")
+                except Exception:
+                    pass
+            identifier = f"official/{category_dir.name}/{plugin_dir.name}"
+            results.append((identifier, desc))
+
+    return results
+
+
 def _repo_name_from_url(url: str) -> str:
    """Extract the repo name from a Git URL for the plugin directory name."""
    # Strip trailing .git and slashes
@@ -296,7 +372,61 @@ def cmd_install(
    from rich.console import Console

    console = Console()
+    plugins_dir = _plugins_dir()

+    # ── Official optional plugins (no network, copied from optional-plugins/) ──
+    official_src = _resolve_official_plugin(identifier)
+    if official_src is not None:
+        manifest = _read_manifest(official_src)
+        plugin_name = manifest.get("name") or official_src.name
+        target = _sanitize_plugin_name(plugin_name, plugins_dir)
+
+        if target.exists():
+            if not force:
+                console.print(
+                    f"[red]Error:[/red] Plugin '{plugin_name}' already exists at {target}.\n"
+                    f"Use [bold]--force[/bold] to reinstall, or "
+                    f"[bold]hermes plugins update {plugin_name}[/bold] to update."
+                )
+                sys.exit(1)
+            console.print(f"[dim]  Removing existing {plugin_name}...[/dim]")
+            shutil.rmtree(target)
+
+        console.print(f"[dim]Installing {plugin_name} from official optional plugins...[/dim]")
+        shutil.copytree(str(official_src), str(target))
+
+        _copy_example_files(target, console)
+        _prompt_plugin_env_vars(manifest, console)
+        _display_after_install(target, identifier)
+
+        installed_name = manifest.get("name") or target.name
+        should_enable = enable
+        if should_enable is None:
+            if sys.stdin.isatty() and sys.stdout.isatty():
+                try:
+                    answer = input("  Enable now? [y/N] ").strip().lower()
+                    should_enable = answer in ("y", "yes")
+                except (EOFError, KeyboardInterrupt):
+                    should_enable = False
+            else:
+                should_enable = False
+
+        if should_enable:
+            enabled = _get_enabled_set()
+            disabled = _get_disabled_set()
+            enabled.add(installed_name)
+            disabled.discard(installed_name)
+            _save_enabled_set(enabled)
+            _save_disabled_set(disabled)
+            console.print(f"  [green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled.")
+        else:
+            console.print(
+                f"  [dim]Plugin installed but not enabled. "
+                f"Run [bold]hermes plugins enable {installed_name}[/bold] to activate.[/dim]"
+            )
+        return
+
+    # ── Git URL / owner/repo install ──────────────────────────────────────────
    try:
        git_url = _resolve_git_url(identifier)
    except ValueError as e:
@@ -310,8 +440,6 @@ def cmd_install(
            "Consider using https:// or git@ for production installs."
        )

-    plugins_dir = _plugins_dir()
-
    # Clone into a temp directory first so we can read plugin.yaml for the name
    with tempfile.TemporaryDirectory() as tmp:
        tmp_target = Path(tmp) / "plugin"
@@ -696,16 +824,21 @@ def _discover_all_plugins() -> list:
    return list(seen.values())


-def cmd_list() -> None:
-    """List all plugins (bundled + user) with enabled/disabled state."""
+def cmd_list(available: bool = False) -> None:
+    """List all plugins (bundled + user) with enabled/disabled state.
+
+    When *available* is True, also show official optional plugins that are
+    not yet installed.
+    """
    from rich.console import Console
    from rich.table import Table

    console = Console()
    entries = _discover_all_plugins()
-    if not entries:
+    if not entries and not available:
        console.print("[dim]No plugins installed.[/dim]")
-        console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
+        console.print("[dim]Install with:[/dim] hermes plugins install official/<category>/<name>")
+        console.print("[dim]Browse available:[/dim] hermes plugins list --available")
        return

    enabled = _get_enabled_set()
@@ -734,6 +867,31 @@ def cmd_list() -> None:
    console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
    console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")

+    if available:
+        official = _list_official_plugins()
+        if official:
+            installed_names = {name for name, *_ in entries}
+            def _is_installed(ident: str) -> bool:
+                dirname = ident.rsplit("/", 1)[-1]
+                # Check both the directory name (langfuse-tracing) and
+                # common underscore variant (langfuse_tracing) since the
+                # installed plugin uses the manifest name, not the dir name.
+                return (dirname in installed_names
+                        or dirname.replace("-", "_") in installed_names)
+            not_installed = [(ident, desc) for ident, desc in official
+                             if not _is_installed(ident)]
+            if not_installed:
+                console.print()
+                avail_table = Table(title="Official optional plugins (not installed)", show_lines=False)
+                avail_table.add_column("Identifier", style="bold")
+                avail_table.add_column("Description")
+                for ident, desc in not_installed:
+                    avail_table.add_row(ident, desc)
+                console.print(avail_table)
+                console.print("[dim]Install:[/dim] hermes plugins install official/<category>/<name>")
+            else:
+                console.print("[dim]All official optional plugins are already installed.[/dim]")
+

 # ---------------------------------------------------------------------------
 # Provider plugin discovery helpers
@@ -999,6 +1157,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            # We need to map logical cursor positions to screen rows
            # accounting for non-navigable separator/headers

+            draw_row = 0  # tracks navigable item index

            # --- General Plugins section ---
            if n_plugins > 0:
@@ -1269,7 +1428,7 @@ def plugins_command(args) -> None:
    elif action == "disable":
        cmd_disable(args.name)
    elif action in ("list", "ls"):
-        cmd_list()
+        cmd_list(available=getattr(args, "available", False))
    elif action is None:
        cmd_toggle()
    else:
@@ -954,59 +954,6 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
 # Rename
 # ---------------------------------------------------------------------------

-def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
-    """Rename Honcho host blocks for a renamed profile without changing peers."""
-    old_host = f"hermes.{old_name}"
-    new_host = f"hermes.{new_name}"
-
-    candidates = [
-        new_dir / "honcho.json",
-        _get_default_hermes_home() / "honcho.json",
-        Path.home() / ".honcho" / "config.json",
-    ]
-
-    seen: set[Path] = set()
-    for path in candidates:
-        try:
-            resolved = path.resolve()
-        except OSError:
-            resolved = path
-        if resolved in seen or not path.is_file():
-            continue
-        seen.add(resolved)
-
-        try:
-            raw = json.loads(path.read_text(encoding="utf-8"))
-        except (OSError, json.JSONDecodeError):
-            continue
-
-        hosts = raw.get("hosts")
-        if not isinstance(hosts, dict) or old_host not in hosts:
-            continue
-
-        if new_host in hosts:
-            print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
-            continue
-
-        block = hosts[old_host]
-        if isinstance(block, dict) and "aiPeer" not in block:
-            bare = old_host.split(".", 1)[1] if "." in old_host else old_host
-            block["aiPeer"] = bare
-        hosts[new_host] = hosts.pop(old_host)
-        tmp = path.with_suffix(path.suffix + ".tmp")
-        try:
-            tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            tmp.replace(path)
-        except OSError:
-            try:
-                tmp.unlink(missing_ok=True)
-            except OSError:
-                pass
-            continue
-
-        print(f"✓ Honcho host updated: {old_host} → {new_host}")
-
-
 def rename_profile(old_name: str, new_name: str) -> Path:
    """Rename a profile: directory, wrapper script, service, active_profile.

@@ -1037,10 +984,7 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    old_dir.rename(new_dir)
    print(f"✓ Renamed {old_dir.name} → {new_dir.name}")

-    # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity
-    _migrate_honcho_profile_host(old_name, new_name, new_dir)
-
-    # 4. Update wrapper script
+    # 3. Update wrapper script
    remove_wrapper_script(old_name)
    collision = check_alias_collision(new_name)
    if not collision:
@@ -1049,7 +993,7 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    else:
        print(f"⚠ Cannot create alias '{new_name}' — {collision}")

-    # 5. Update active_profile if it pointed to old name
+    # 4. Update active_profile if it pointed to old name
    try:
        if get_active_profile() == old_name:
            set_active_profile(new_name)
@@ -71,13 +71,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        auth_type="oauth_external",
        base_url_override="cloudcode-pa://google",
    ),
-    "lmstudio": HermesOverlay(
-        transport="openai_chat",
-        auth_type="api_key",
-        extra_env_vars=("LM_API_KEY",),
-        base_url_override="http://127.0.0.1:1234/v1",
-        base_url_env_var="LM_BASE_URL",
-    ),
    "copilot-acp": HermesOverlay(
        transport="codex_responses",
        auth_type="external_process",
@@ -165,10 +158,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="XIAOMI_BASE_URL",
    ),
-    "tencent-tokenhub": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="TOKENHUB_BASE_URL",
-    ),
    "arcee": HermesOverlay(
        transport="openai_chat",
        base_url_override="https://api.arcee.ai/api/v1",
@@ -190,10 +179,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",  # default; overridden by api_mode in config
        base_url_env_var="AZURE_FOUNDRY_BASE_URL",
    ),
-    "bedrock": HermesOverlay(
-        transport="bedrock_converse",
-        auth_type="aws_sdk",
-    ),
 }


@@ -308,12 +293,6 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",

-    # tencent
-    "tencent": "tencent-tokenhub",
-    "tokenhub": "tencent-tokenhub",
-    "tencent-cloud": "tencent-tokenhub",
-    "tencentmaas": "tencent-tokenhub",
-
    # bedrock
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
@@ -351,8 +330,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
    "gmi": "GMI Cloud",
-    "tencent-tokenhub": "Tencent TokenHub",
-    "lmstudio": "LM Studio",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -260,16 +260,11 @@ def _resolve_runtime_from_pool_entry(
            if cfg_base_url:
                base_url = cfg_base_url
        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-        if provider in ("opencode-zen", "opencode-go"):
-            # Re-derive api_mode from the effective model rather than the
-            # persisted api_mode: the opencode providers serve both
-            # anthropic_messages and chat_completions models, so the previous
-            # session's mode must not leak across /model switches.
-            # Refs #16878.
+        if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+            api_mode = configured_mode
+        elif provider in ("opencode-zen", "opencode-go"):
            from hermes_cli.models import opencode_model_api_mode
            api_mode = opencode_model_api_mode(provider, effective_model)
-        elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
-            api_mode = configured_mode
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
@@ -469,30 +464,6 @@ def _resolve_named_custom_runtime(
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
 ) -> Optional[Dict[str, Any]]:
-    # Bare `provider="custom"` with an explicit base_url (e.g. propagated
-    # from a `model_aliases:` direct-alias resolution) — build a runtime
-    # directly so the alias's base_url actually takes effect.
-    requested_norm = (requested_provider or "").strip().lower()
-    if requested_norm == "custom" and explicit_base_url:
-        base_url = explicit_base_url.strip().rstrip("/")
-        api_key_candidates = [
-            (explicit_api_key or "").strip(),
-            os.getenv("OPENAI_API_KEY", "").strip(),
-            os.getenv("OPENROUTER_API_KEY", "").strip(),
-        ]
-        api_key = next(
-            (c for c in api_key_candidates if has_usable_secret(c)),
-            "",
-        ) or "no-key-required"
-        return {
-            "provider": "custom",
-            "api_mode": _detect_api_mode_for_url(base_url) or "chat_completions",
-            "base_url": base_url,
-            "api_key": api_key,
-            "source": "direct-alias",
-            "requested_provider": requested_provider,
-        }
-
    custom_provider = _get_named_custom_provider(requested_provider)
    if not custom_provider:
        return None
@@ -1124,34 +1095,13 @@ def resolve_runtime_provider(
            cfg_base_url and "azure.com" in cfg_base_url.lower()
        )
        if _is_azure_endpoint:
-            # Honor user-specified env var hints on the model config before
-            # falling back to the built-in AZURE_ANTHROPIC_KEY / ANTHROPIC_API_KEY
-            # chain.  Accept both `key_env` (Hermes canonical — matches the
-            # custom_providers field name) and `api_key_env` (documented in the
-            # Azure Foundry guide and read by most Hermes-compatible importers).
-            # Matches the config.yaml examples in website/docs/guides/azure-foundry.md.
-            token = ""
-            for hint_key in ("key_env", "api_key_env"):
-                env_var = str(model_cfg.get(hint_key) or "").strip()
-                if env_var:
-                    token = os.getenv(env_var, "").strip()
-                    if token:
-                        break
-            # Next: an inline api_key on the model config (useful in multi-profile
-            # setups that want to avoid env-var juggling).
-            if not token:
-                token = str(model_cfg.get("api_key") or "").strip()
-            # Finally fall back to the historical fixed names.
-            if not token:
-                token = (
-                    os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-                    or os.getenv("ANTHROPIC_API_KEY", "").strip()
-                )
+            token = (
+                os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
+                or os.getenv("ANTHROPIC_API_KEY", "").strip()
+            )
            if not token:
                raise AuthError(
-                    "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or "
-                    "ANTHROPIC_API_KEY, or point key_env/api_key_env in your "
-                    "config.yaml model section at a custom env var."
+                    "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
                )
        else:
            from agent.anthropic_adapter import resolve_anthropic_token
@@ -1262,20 +1212,15 @@ def resolve_runtime_provider(
            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
            # Only honor persisted api_mode when it belongs to the same provider family.
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if provider in ("opencode-zen", "opencode-go"):
-                # opencode-zen/go must always re-derive api_mode from the
-                # target model (not the stale persisted api_mode), because
-                # the same provider serves both anthropic_messages
-                # (e.g. minimax-m2.7) and chat_completions (e.g.
-                # deepseek-v4-flash) and switching models via /model would
-                # otherwise carry the previous mode forward, stripping /v1
-                # from base_url for chat_completions models and 404'ing.
-                # Refs #16878.
+            if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+                api_mode = configured_mode
+            elif provider in ("opencode-zen", "opencode-go"):
                from hermes_cli.models import opencode_model_api_mode
+                # Prefer the target_model from the caller (explicit mid-session
+                # switch) over the stale model.default; see _resolve_runtime_from_pool_entry
+                # for the same rationale.
                _effective = target_model or model_cfg.get("default", "")
                api_mode = opencode_model_api_mode(provider, _effective)
-            elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
-                api_mode = configured_mode
            else:
                # Auto-detect Anthropic-compatible endpoints by URL convention
                # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
@@ -712,6 +712,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
    if isinstance(_m, dict):
        selected_provider = _m.get("provider")

+    nous_subscription_selected = selected_provider == "nous"
+
    # ── Same-provider fallback & rotation setup (full setup only) ──
    if not quick and _supports_same_provider_pool_setup(selected_provider):
        try:
@@ -6,7 +6,7 @@ Shows the status of all Hermes Agent components.

 import os
 import sys
-import subprocess  # noqa: F401 — re-exported for tests that monkeypatch status.subprocess to guard against regressions
+import subprocess
 from pathlib import Path

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@@ -274,23 +274,6 @@ def show_status(args):
        label = "configured" if configured else "not configured (run: hermes model)"
        print(f"  {pname:<16} {check_mark(configured)} {label}")

-    # LM Studio reachability — only probe when it's the active provider so
-    # users with foreign configs don't see noise. Auth rejection vs. silent
-    # empty list is the most common LM Studio support case.
-    if _effective_provider_label() == "LM Studio":
-        from hermes_cli.models import probe_lmstudio_models
-        model_cfg = config.get("model")
-        base = (model_cfg.get("base_url") if isinstance(model_cfg, dict) else None) or get_env_value("LM_BASE_URL") or "http://127.0.0.1:1234/v1"
-        try:
-            models = probe_lmstudio_models(api_key=get_env_value("LM_API_KEY") or "", base_url=base, timeout=1.5)
-            if models is None:
-                ok, msg = False, f"unreachable at {base}"
-            else:
-                ok, msg = True, f"reachable ({len(models)} model(s)) at {base}"
-        except AuthError:
-            ok, msg = False, "auth rejected — set LM_API_KEY"
-        print(f"  {'LM Studio':<16} {check_mark(ok)} {msg}")
-
    # =========================================================================
    # Terminal Configuration
    # =========================================================================
@@ -263,6 +263,7 @@ TIPS = [
    "hermes status --deep runs deeper diagnostic checks across all components.",

    # --- Hidden Gems & Power-User Tricks ---
+    "BOOT.md at ~/.hermes/BOOT.md runs automatically on every gateway start — use it for startup checks.",
    "Cron jobs can attach a Python script (--script) whose stdout is injected into the prompt as context.",
    "Cron scripts live in ~/.hermes/scripts/ and run before the agent — perfect for data collection pipelines.",
    "prefill_messages_file in config.yaml injects few-shot examples into every API call, never saved to history.",
@@ -467,10 +467,7 @@ def _run_post_setup(post_setup_key: str):
    import shutil
    if post_setup_key in ("agent_browser", "browserbase"):
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
-        npm_bin = shutil.which("npm")
-        npx_bin = shutil.which("npx")
-        # Step 1: install the agent-browser npm package into node_modules/
-        if not node_modules.exists() and npm_bin:
+        if not node_modules.exists() and shutil.which("npm"):
            _print_info("    Installing Node.js dependencies for browser tools...")
            import subprocess
            result = subprocess.run(
@@ -482,94 +479,8 @@ def _run_post_setup(post_setup_key: str):
            else:
                from hermes_constants import display_hermes_home
                _print_warning(f"    npm install failed - run manually: cd {display_hermes_home()}/hermes-agent && npm install")
-                if result.stderr:
-                    _print_info(f"      {result.stderr.strip()[:200]}")
        elif not node_modules.exists():
            _print_warning("    Node.js not found - browser tools require: npm install (in hermes-agent directory)")
-            return
-
-        # Step 2: only the local browser provider actually needs Chromium on
-        # disk. Cloud providers (Browserbase, Browser Use, Firecrawl) host
-        # their own Chromium and don't need the local install.
-        if post_setup_key != "agent_browser":
-            return
-
-        # Step 3: ensure the Chromium / headless-shell build agent-browser
-        # drives is actually installed. Without it the CLI hangs on first
-        # use until the command timeout fires. Skip inside Docker — the
-        # image bakes Chromium in at build time, and runtime users usually
-        # can't write to PLAYWRIGHT_BROWSERS_PATH anyway.
-        try:
-            # Import lazily so the tools_config UI doesn't pull in the full
-            # browser_tool module at import time.
-            from tools.browser_tool import (
-                _chromium_installed,
-                _running_in_docker,
-            )
-        except Exception as exc:  # pragma: no cover — defensive
-            _print_warning(f"    Could not check Chromium status: {exc}")
-            return
-
-        if _chromium_installed():
-            _print_success("    Chromium browser already installed")
-            return
-
-        if _running_in_docker():
-            _print_warning(
-                "    Chromium is missing but you're running in Docker."
-            )
-            _print_info(
-                "    Pull the latest image to get the bundled Chromium:"
-            )
-            _print_info(
-                "      docker pull ghcr.io/nousresearch/hermes-agent:latest"
-            )
-            return
-
-        if not npx_bin:
-            _print_warning(
-                "    npx not found - install Chromium manually: npx agent-browser install --with-deps"
-            )
-            return
-
-        _print_info("    Installing Chromium (~170MB one-time download)...")
-        import subprocess
-        # Prefer the bundled agent-browser install subcommand so the
-        # version of Chromium matches the CLI. Fall back to npx shim on
-        # setups where the local bin stub isn't present.
-        local_ab = PROJECT_ROOT / "node_modules" / ".bin" / "agent-browser"
-        if sys.platform == "win32":
-            local_ab_win = local_ab.with_suffix(".cmd")
-            if local_ab_win.exists():
-                local_ab = local_ab_win
-        install_cmd = (
-            [str(local_ab), "install", "--with-deps"]
-            if local_ab.exists()
-            else [npx_bin, "-y", "agent-browser", "install", "--with-deps"]
-        )
-        try:
-            result = subprocess.run(
-                install_cmd,
-                capture_output=True, text=True, cwd=str(PROJECT_ROOT), timeout=600,
-            )
-            if result.returncode == 0:
-                _print_success("    Chromium installed")
-                # Invalidate the cached "missing" result so subsequent
-                # check_browser_requirements() calls see the new install.
-                import tools.browser_tool as _bt
-                _bt._cached_chromium_installed = None
-            else:
-                _print_warning("    Chromium install failed:")
-                tail = (result.stderr or result.stdout or "").strip().splitlines()[-3:]
-                for line in tail:
-                    _print_info(f"      {line[:200]}")
-                _print_info("    Run manually: npx agent-browser install --with-deps")
-        except subprocess.TimeoutExpired:
-            _print_warning("    Chromium install timed out (>10min)")
-            _print_info("    Run manually: npx agent-browser install --with-deps")
-        except Exception as exc:
-            _print_warning(f"    Chromium install failed: {exc}")
-            _print_info("    Run manually: npx agent-browser install --with-deps")

    elif post_setup_key == "camofox":
        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
@@ -697,21 +608,12 @@ def _run_post_setup(post_setup_key: str):
                _print_success("    langfuse SDK installed")
            else:
                _print_warning("    langfuse SDK install failed — run manually: pip install langfuse")
-        # Opt the bundled observability/langfuse plugin into plugins.enabled.
-        # The plugin ships in the repo but doesn't load until the user enables
-        # it (standalone plugins are opt-in).
+        # Install and enable the official optional plugin into ~/.hermes/plugins/.
        try:
-            from hermes_cli.plugins_cmd import _get_enabled_set, _save_enabled_set
-            enabled = _get_enabled_set()
-            if "observability/langfuse" in enabled or "langfuse" in enabled:
-                _print_success("    Plugin observability/langfuse already enabled")
-            else:
-                enabled.add("observability/langfuse")
-                _save_enabled_set(enabled)
-                _print_success("    Plugin observability/langfuse enabled")
-        except Exception as exc:
-            _print_warning(f"    Could not enable plugin automatically: {exc}")
-            _print_info("    Run manually: hermes plugins enable observability/langfuse")
+            from hermes_cli.plugins_cmd import cmd_install as _plugins_install
+            _plugins_install("official/observability/langfuse", enable=True)
+        except SystemExit:
+            pass  # cmd_install prints its own errors and calls sys.exit
        _print_info("    Restart Hermes for tracing to take effect.")
        _print_info("    Verify: hermes plugins list")

@@ -925,16 +827,6 @@ def _get_platform_tools(
    else:
        enabled_toolsets.update(explicit_mcp_servers)

-    # Honor agent.disabled_toolsets from config.yaml — allows users to
-    # globally suppress specific toolsets (e.g. "memory") across all
-    # platforms without per-platform toolset configuration.  This runs
-    # last so it overrides everything above.
-    agent_cfg = config.get("agent") or {}
-    disabled_toolsets = agent_cfg.get("disabled_toolsets") or []
-    if disabled_toolsets:
-        disabled_set = {str(ts) for ts in disabled_toolsets}
-        enabled_toolsets -= disabled_set
-
    return enabled_toolsets


@@ -736,7 +736,7 @@ async def get_sessions(limit: int = 20, offset: int = 0):
            return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
        finally:
            db.close()
-    except Exception:
+    except Exception as e:
        _log.exception("GET /api/sessions failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -968,7 +968,7 @@ async def update_config(body: ConfigUpdate):
    try:
        save_config(_denormalize_config_from_web(body.config))
        return {"ok": True}
-    except Exception:
+    except Exception as e:
        _log.exception("PUT /api/config failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -997,7 +997,7 @@ async def set_env_var(body: EnvVarUpdate):
    try:
        save_env_value(body.key, body.value)
        return {"ok": True, "key": body.key}
-    except Exception:
+    except Exception as e:
        _log.exception("PUT /api/env failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -1011,7 +1011,7 @@ async def remove_env_var(body: EnvVarDelete):
        return {"ok": True, "key": body.key}
    except HTTPException:
        raise
-    except Exception:
+    except Exception as e:
        _log.exception("DELETE /api/env failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -1568,6 +1568,7 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
    then spawns a background poller. Returns the user-facing display fields
    so the UI can render the verification page link + user code.
    """
+    from hermes_cli import auth as hauth
    if provider_id == "nous":
        from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
        import httpx
@@ -11,6 +11,7 @@ hot-reloaded by the webhook adapter without a gateway restart.
 """

 import json
+import os
 import re
 import secrets
 import time
@@ -18,7 +19,6 @@ from pathlib import Path
 from typing import Dict

 from hermes_constants import display_hermes_home
-from utils import atomic_replace


 _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
@@ -52,7 +52,7 @@ def _save_subscriptions(subs: Dict[str, dict]) -> None:
        json.dumps(subs, indent=2, ensure_ascii=False),
        encoding="utf-8",
    )
-    atomic_replace(tmp_path, path)
+    os.replace(str(tmp_path), str(path))


 def _get_webhook_config() -> dict:
@@ -33,7 +33,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 11
+SCHEMA_VERSION = 10

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -102,26 +102,22 @@ CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestam

 FTS_SQL = """
 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
-    content
+    content,
+    content=messages,
+    content_rowid=id
 );

 CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts(rowid, content) VALUES (
-        new.id,
-        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
-    );
+    INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN
-    DELETE FROM messages_fts WHERE rowid = old.id;
+    INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
-    DELETE FROM messages_fts WHERE rowid = old.id;
-    INSERT INTO messages_fts(rowid, content) VALUES (
-        new.id,
-        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
-    );
+    INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
+    INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
 END;
 """

@@ -132,26 +128,22 @@ END;
 FTS_TRIGRAM_SQL = """
 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
    content,
+    content=messages,
+    content_rowid=id,
    tokenize='trigram'
 );

 CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (
-        new.id,
-        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
-    );
+    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
-    DELETE FROM messages_fts_trigram WHERE rowid = old.id;
+    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
-    DELETE FROM messages_fts_trigram WHERE rowid = old.id;
-    INSERT INTO messages_fts_trigram(rowid, content) VALUES (
-        new.id,
-        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
-    );
+    INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
+    INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
 END;
 """

@@ -436,51 +428,6 @@ class SessionDB:
                        "INSERT INTO messages_fts_trigram(rowid, content) "
                        "SELECT id, content FROM messages WHERE content IS NOT NULL"
                    )
-            if current_version < 11:
-                # v11: re-index FTS5 tables to cover tool_name + tool_calls and
-                # switch from external-content to inline mode. Existing DBs have
-                # old-schema FTS tables and triggers that IF NOT EXISTS won't
-                # overwrite, so we drop them explicitly and let the post-migration
-                # existence checks (below) recreate them from FTS_SQL /
-                # FTS_TRIGRAM_SQL, then backfill every message row. Fixes #16751.
-                for _trig in (
-                    "messages_fts_insert",
-                    "messages_fts_delete",
-                    "messages_fts_update",
-                    "messages_fts_trigram_insert",
-                    "messages_fts_trigram_delete",
-                    "messages_fts_trigram_update",
-                ):
-                    try:
-                        cursor.execute(f"DROP TRIGGER IF EXISTS {_trig}")
-                    except sqlite3.OperationalError:
-                        pass
-                for _tbl in ("messages_fts", "messages_fts_trigram"):
-                    try:
-                        cursor.execute(f"DROP TABLE IF EXISTS {_tbl}")
-                    except sqlite3.OperationalError:
-                        pass
-                # Recreate virtual tables + triggers with the new inline-mode
-                # schema that indexes content || tool_name || tool_calls.
-                cursor.executescript(FTS_SQL)
-                cursor.executescript(FTS_TRIGRAM_SQL)
-                # Backfill both indexes from every existing messages row.
-                cursor.execute(
-                    "INSERT INTO messages_fts(rowid, content) "
-                    "SELECT id, "
-                    "COALESCE(content, '') || ' ' || "
-                    "COALESCE(tool_name, '') || ' ' || "
-                    "COALESCE(tool_calls, '') "
-                    "FROM messages"
-                )
-                cursor.execute(
-                    "INSERT INTO messages_fts_trigram(rowid, content) "
-                    "SELECT id, "
-                    "COALESCE(content, '') || ' ' || "
-                    "COALESCE(tool_name, '') || ' ' || "
-                    "COALESCE(tool_calls, '') "
-                    "FROM messages"
-                )
            if current_version < SCHEMA_VERSION:
                cursor.execute(
                    "UPDATE schema_version SET version = ?",
@@ -1172,85 +1119,6 @@ class SessionDB:

        return self._execute_write(_do)

-    def replace_messages(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
-        """Atomically replace every message for a session.
-
-        Used by transcript-rewrite flows such as /retry, /undo, and /compress.
-        The delete + reinsert sequence must commit as one transaction so a
-        mid-rewrite failure does not leave SQLite with a partial transcript.
-        """
-
-        def _do(conn):
-            conn.execute(
-                "DELETE FROM messages WHERE session_id = ?", (session_id,)
-            )
-            conn.execute(
-                "UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
-                (session_id,),
-            )
-
-            now_ts = time.time()
-            total_messages = 0
-            total_tool_calls = 0
-            for msg in messages:
-                role = msg.get("role", "unknown")
-                tool_calls = msg.get("tool_calls")
-                reasoning_details = msg.get("reasoning_details") if role == "assistant" else None
-                codex_reasoning_items = (
-                    msg.get("codex_reasoning_items") if role == "assistant" else None
-                )
-                codex_message_items = (
-                    msg.get("codex_message_items") if role == "assistant" else None
-                )
-
-                reasoning_details_json = (
-                    json.dumps(reasoning_details) if reasoning_details else None
-                )
-                codex_items_json = (
-                    json.dumps(codex_reasoning_items) if codex_reasoning_items else None
-                )
-                codex_message_items_json = (
-                    json.dumps(codex_message_items) if codex_message_items else None
-                )
-                tool_calls_json = json.dumps(tool_calls) if tool_calls else None
-
-                conn.execute(
-                    """INSERT INTO messages (session_id, role, content, tool_call_id,
-                       tool_calls, tool_name, timestamp, token_count, finish_reason,
-                       reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                       codex_message_items)
-                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-                    (
-                        session_id,
-                        role,
-                        msg.get("content"),
-                        msg.get("tool_call_id"),
-                        tool_calls_json,
-                        msg.get("tool_name"),
-                        now_ts,
-                        msg.get("token_count"),
-                        msg.get("finish_reason"),
-                        msg.get("reasoning") if role == "assistant" else None,
-                        msg.get("reasoning_content") if role == "assistant" else None,
-                        reasoning_details_json,
-                        codex_items_json,
-                        codex_message_items_json,
-                    ),
-                )
-                total_messages += 1
-                if tool_calls is not None:
-                    total_tool_calls += (
-                        len(tool_calls) if isinstance(tool_calls, list) else 1
-                    )
-                now_ts += 1e-6
-
-            conn.execute(
-                "UPDATE sessions SET message_count = ?, tool_call_count = ? WHERE id = ?",
-                (total_messages, total_tool_calls, session_id),
-            )
-
-        self._execute_write(_do)
-
    def get_messages(self, session_id: str) -> List[Dict[str, Any]]:
        """Load all messages for a session, ordered by timestamp."""
        with self._lock:
@@ -1487,9 +1355,9 @@ class SessionDB:
        # quotes.  FTS5's tokenizer splits on dots and hyphens, turning
        # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
        # Quoting preserves phrase semantics.  A single pass avoids the
-        # double-quoting bug that would occur if dotted, hyphenated and underscored
+        # double-quoting bug that would occur if dotted and hyphenated
        # patterns were applied sequentially (e.g. ``my-app.config``).
-        sanitized = re.sub(r"\b(\w+(?:[._-]\w+)+)\b", r'"\1"', sanitized)
+        sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)

        # Step 6: Restore preserved quoted phrases
        for i, quoted in enumerate(_quoted_parts):
@@ -1666,8 +1534,8 @@ class SessionDB:
                # Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
                # Fall back to LIKE substring search.
                escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-                like_where = ["(m.content LIKE ? ESCAPE '\\' OR m.tool_name LIKE ? ESCAPE '\\' OR m.tool_calls LIKE ? ESCAPE '\\')"]
-                like_params: list = [f"%{escaped}%", f"%{escaped}%", f"%{escaped}%"]
+                like_where = ["m.content LIKE ? ESCAPE '\\'"]
+                like_params: list = [f"%{escaped}%"]
                if source_filter is not None:
                    like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
                    like_params.extend(source_filter)
@@ -138,18 +138,12 @@ def _run_async(coro):

 discover_builtin_tools()

-# MCP tool discovery (external MCP servers from config) used to run here as
-# a module-level side effect.  It was removed because discover_mcp_tools()
-# internally uses a blocking future.result(timeout=120) wait, and the
-# gateway lazy-imports this module from inside the asyncio event loop on
-# the first user message — freezing Discord/Telegram heartbeats for up to
-# 120s whenever any configured MCP server was slow or unreachable (#16856).
-#
-# Each entry point now runs discovery explicitly at its own startup:
-#   - gateway/run.py            -> start_gateway() uses run_in_executor
-#   - cli.py, hermes_cli/*      -> inline on startup (no event loop)
-#   - tui_gateway/server.py     -> inline on startup (no event loop)
-#   - acp_adapter/server.py     -> asyncio.to_thread on session init
+# MCP tool discovery (external MCP servers from config)
+try:
+    from tools.mcp_tool import discover_mcp_tools
+    discover_mcp_tools()
+except Exception as e:
+    logger.debug("MCP tool discovery failed: %s", e)

 # Plugin tool discovery (user/project/pip plugins)
 try:
@@ -415,27 +409,24 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
        if not prop_schema:
            continue
        expected = prop_schema.get("type")
-        if not expected and not _schema_allows_null(prop_schema):
+        if not expected:
            continue
-        coerced = _coerce_value(value, expected, schema=prop_schema)
+        coerced = _coerce_value(value, expected)
        if coerced is not value:
            args[key] = coerced

    return args


-def _coerce_value(value: str, expected_type, schema: dict | None = None):
+def _coerce_value(value: str, expected_type):
    """Attempt to coerce a string *value* to *expected_type*.

    Returns the original string when coercion is not applicable or fails.
    """
-    if _schema_allows_null(schema) and value.strip().lower() == "null":
-        return None
-
    if isinstance(expected_type, list):
        # Union type — try each in order, return first successful coercion
        for t in expected_type:
-            result = _coerce_value(value, t, schema=schema)
+            result = _coerce_value(value, t)
            if result is not value:
                return result
        return value
@@ -448,35 +439,9 @@ def _coerce_value(value: str, expected_type, schema: dict | None = None):
        return _coerce_json(value, list)
    if expected_type == "object":
        return _coerce_json(value, dict)
-    if expected_type == "null" and value.strip().lower() == "null":
-        return None
    return value


-def _schema_allows_null(schema: dict | None) -> bool:
-    """Return True when a JSON Schema fragment explicitly permits null."""
-    if not isinstance(schema, dict):
-        return False
-
-    schema_type = schema.get("type")
-    if schema_type == "null":
-        return True
-    if isinstance(schema_type, list) and "null" in schema_type:
-        return True
-    if schema.get("nullable") is True:
-        return True
-
-    for union_key in ("anyOf", "oneOf"):
-        variants = schema.get(union_key)
-        if not isinstance(variants, list):
-            continue
-        for variant in variants:
-            if isinstance(variant, dict) and variant.get("type") == "null":
-                return True
-
-    return False
-
-
 def _coerce_json(value: str, expected_python_type: type):
    """Parse *value* as JSON when the schema expects an array or object.

@@ -187,10 +187,7 @@

        if [ "$MODE" = "--apply" ]; then
          sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
-          if ! nix build ".#$ATTR.npmDeps" --no-link --print-build-logs; then
-            echo "    verification build failed after hash update" >&2
-            exit 1
-          fi
+          nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
          FIXED=1
          echo "    fixed"
        fi
@@ -455,15 +455,7 @@
      extraPackages = mkOption {
        type = types.listOf types.package;
        default = [ ];
-        description = ''
-          Extra packages available to the agent — terminal commands, skills,
-          cron jobs, and the service process all see them.
-
-          Implemented via the hermes user's per-user profile
-          (`/etc/profiles/per-user/${cfg.user}/bin`), which NixOS includes
-          in PATH for login shells.  The packages are also added to the
-          systemd service PATH for direct process access.
-        '';
+        description = "Extra packages available on PATH.";
      };

      extraPlugins = mkOption {
@@ -648,17 +640,6 @@
      }

      # ── Warnings ──────────────────────────────────────────────────────
-      # ── Per-user profile for extraPackages ───────────────────────────
-      # Wire extraPackages into the hermes user's per-user profile so the
-      # login-shell snapshot (which rebuilds PATH from NixOS profiles) sees
-      # them.  The systemd service PATH also includes them for direct access.
-      (lib.mkIf (cfg.extraPackages != []) {
-        # listOf options are merged by the NixOS module system — this appends to
-        # any packages the operator assigned to this user externally (e.g. when
-        # createUser = false and the user definition lives elsewhere in the config).
-        users.users.${cfg.user}.packages = cfg.extraPackages;
-      })
-
      (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
        warnings = [
          ''
@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-AahWmJ9gDQ9pMPa1FYwUjYdO2mOi6JM9Mst27E0vp68=";
+    hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -1,14 +1,11 @@
 """langfuse — Hermes plugin for Langfuse observability.

 Traces Hermes conversations, LLM calls, and tool usage to Langfuse.
-
-Activation is handled by the Hermes plugin system — standalone plugins only
-load when listed in ``plugins.enabled`` (via ``hermes plugins enable
-observability/langfuse`` or ``hermes tools → Langfuse Observability``). At
-runtime the plugin also requires the ``langfuse`` SDK and credentials; if
-either is missing the hooks are inert.
+Enable via ``hermes tools`` or by setting HERMES_LANGFUSE_ENABLED=true
+and the required credentials in ~/.hermes/.env.

 Required env vars (set via ``hermes tools`` or ~/.hermes/.env):
+  HERMES_LANGFUSE_ENABLED   - set to "true" to activate tracing
  HERMES_LANGFUSE_PUBLIC_KEY  - Langfuse project public key (pk-lf-...)
  HERMES_LANGFUSE_SECRET_KEY  - Langfuse project secret key (sk-lf-...)
  HERMES_LANGFUSE_BASE_URL    - Langfuse server URL (default: https://cloud.langfuse.com)
@@ -80,37 +77,42 @@ def _debug(message: str) -> None:
        logger.info("Langfuse tracing: %s", message)


-# Sentinel: "_get_langfuse() has tried and failed". Lets us short-circuit
-# every subsequent hook call without re-checking env vars or re-attempting
-# SDK init. Cleared by reset_cache_for_tests().
-_INIT_FAILED = object()
+def _is_enabled() -> bool:
+    if Langfuse is None:
+        return False
+    # Primary activation path: config.yaml plugins.langfuse.enabled
+    try:
+        from hermes_cli.config import load_config
+        _cfg = load_config()
+        _plugin_cfg = _cfg.get("plugins", {})
+        if isinstance(_plugin_cfg, dict):
+            _lt_cfg = _plugin_cfg.get("langfuse", {})
+            if isinstance(_lt_cfg, dict) and "enabled" in _lt_cfg:
+                if not _lt_cfg["enabled"]:
+                    return False
+                # Explicit enabled=true in config — skip env-var check below
+                public_key = _env("HERMES_LANGFUSE_PUBLIC_KEY") or _env("LANGFUSE_PUBLIC_KEY")
+                secret_key = _env("HERMES_LANGFUSE_SECRET_KEY") or _env("LANGFUSE_SECRET_KEY")
+                return bool(public_key and secret_key)
+    except Exception:
+        pass
+    # Backward-compat path: HERMES_LANGFUSE_ENABLED env var (legacy .env installs)
+    if not _env_bool("HERMES_LANGFUSE_ENABLED"):
+        return False
+    public_key = _env("HERMES_LANGFUSE_PUBLIC_KEY") or _env("LANGFUSE_PUBLIC_KEY")
+    secret_key = _env("HERMES_LANGFUSE_SECRET_KEY") or _env("LANGFUSE_SECRET_KEY")
+    return bool(public_key and secret_key)


 def _get_langfuse() -> Optional[Langfuse]:
-    """Return a cached Langfuse client, or ``None`` if unavailable.
-
-    Activation of this plugin is controlled by the Hermes plugin system —
-    this function only handles the runtime-availability gate (SDK installed
-    + credentials present). The result is cached: on the first call we try
-    to construct a client, and every subsequent call returns that client
-    (or fast-returns ``None`` if init failed).
-    """
    global _LANGFUSE_CLIENT
-    if _LANGFUSE_CLIENT is _INIT_FAILED:
+    if not _is_enabled():
        return None
    if _LANGFUSE_CLIENT is not None:
        return _LANGFUSE_CLIENT

-    if Langfuse is None:
-        _LANGFUSE_CLIENT = _INIT_FAILED
-        return None
-
    public_key = _env("HERMES_LANGFUSE_PUBLIC_KEY") or _env("LANGFUSE_PUBLIC_KEY")
    secret_key = _env("HERMES_LANGFUSE_SECRET_KEY") or _env("LANGFUSE_SECRET_KEY")
-    if not (public_key and secret_key):
-        _LANGFUSE_CLIENT = _INIT_FAILED
-        return None
-
    base_url = _env("HERMES_LANGFUSE_BASE_URL") or _env("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com"
    environment = _env("HERMES_LANGFUSE_ENV") or _env("LANGFUSE_ENV")
    release = _env("HERMES_LANGFUSE_RELEASE") or _env("LANGFUSE_RELEASE")
@@ -135,7 +137,6 @@ def _get_langfuse() -> Optional[Langfuse]:
        _LANGFUSE_CLIENT = Langfuse(**kwargs)
    except Exception as exc:  # pragma: no cover - fail-open
        logger.warning("Could not initialize Langfuse client: %s", exc)
-        _LANGFUSE_CLIENT = _INIT_FAILED
        return None

    return _LANGFUSE_CLIENT
@@ -0,0 +1,38 @@
+# After installing langfuse
+
+Langfuse tracing is now installed and enabled for your Hermes profile.
+
+## Required credentials
+
+Set these in `~/.hermes/.env` (or via `hermes tools` → Langfuse Observability):
+
+```bash
+HERMES_LANGFUSE_PUBLIC_KEY=pk-lf-...
+HERMES_LANGFUSE_SECRET_KEY=sk-lf-...
+HERMES_LANGFUSE_BASE_URL=https://cloud.langfuse.com   # or your self-hosted URL
+```
+
+## Verify
+
+```bash
+hermes plugins list          # langfuse should appear as enabled
+hermes chat -q "hello"       # then check Langfuse for a "Hermes turn" trace
+```
+
+## Optional settings
+
+```bash
+HERMES_LANGFUSE_ENV=production       # environment tag
+HERMES_LANGFUSE_RELEASE=v1.0.0      # release tag
+HERMES_LANGFUSE_SAMPLE_RATE=0.5     # sample 50% of traces
+HERMES_LANGFUSE_MAX_CHARS=12000     # max chars per field (default: 12000)
+HERMES_LANGFUSE_DEBUG=true          # verbose plugin logging
+```
+
+## Dependencies
+
+The `langfuse` Python SDK is required. Install it into your Hermes venv:
+
+```bash
+pip install langfuse
+```
@@ -1,6 +1,6 @@
 name: langfuse
 version: "1.0.0"
-description: "Optional Langfuse observability for Hermes — traces conversations, LLM calls, and tool usage. Opt-in via `hermes plugins enable observability/langfuse` or `hermes tools → Langfuse Observability`."
+description: "Optional Langfuse observability for Hermes — traces conversations, LLM calls, and tool usage. Install via: hermes plugins install official/observability/langfuse"
 author: NousResearch
 requires_env:
  - HERMES_LANGFUSE_PUBLIC_KEY
@@ -224,24 +224,6 @@ MIGRATION_PRESETS: Dict[str, set[str]] = {
 }


-# ───────────────────────────────────────────────────────────────────────
-# Item shape constants — kept stable for downstream consumers of report.json.
-# Inspired by OpenClaw's src/plugin-sdk/migration.ts so both sides speak the
-# same vocabulary.  Values intentionally match the strings already produced
-# by this script (migrated/archived/skipped/conflict/error) so the addition
-# is backward-compatible.
-# ───────────────────────────────────────────────────────────────────────
-STATUS_MIGRATED = "migrated"
-STATUS_ARCHIVED = "archived"
-STATUS_SKIPPED = "skipped"
-STATUS_CONFLICT = "conflict"
-STATUS_ERROR = "error"
-STATUS_PLANNED = "planned"
-
-REASON_TARGET_EXISTS = "Target exists and overwrite is disabled"
-REASON_BLOCKED_BY_APPLY_CONFLICT = "blocked by earlier apply conflict"
-
-
@dataclass
 class ItemResult:
    kind: str
@@ -250,7 +232,6 @@ class ItemResult:
    status: str
    reason: str = ""
    details: Dict[str, Any] = field(default_factory=dict)
-    sensitive: bool = False


 def parse_selection_values(values: Optional[Sequence[str]]) -> List[str]:
@@ -566,128 +547,32 @@ def relative_label(path: Path, root: Path) -> str:
        return str(path)


-# ───────────────────────────────────────────────────────────────────────
-# Secret redaction for migration reports.
-#
-# The report JSON persists to disk inside the migration output directory and
-# frequently ends up in bug reports or support channels.  Anything that looks
-# like a credential — by key name or by value shape — is replaced with
-# "[redacted]" before the report is written.
-#
-# Modelled on OpenClaw's src/plugin-sdk/migration.ts so both migration tools
-# redact consistently.  Pure function — safe to call on any plain-data dict.
-# ───────────────────────────────────────────────────────────────────────
-REDACTED_MIGRATION_VALUE = "[redacted]"
-
-_SECRET_KEY_MARKERS = (
-    "accesstoken",
-    "apikey",
-    "authorization",
-    "bearertoken",
-    "clientsecret",
-    "cookie",
-    "credential",
-    "password",
-    "privatekey",
-    "refreshtoken",
-    "secret",
-)
-
-_SECRET_VALUE_PATTERNS = (
-    re.compile(r"\bBearer\s+[A-Za-z0-9._~+/=\-]+"),
-    re.compile(r"\bsk-[A-Za-z0-9_\-]{8,}\b"),
-    re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{16,}\b"),
-    re.compile(r"\bxox[abprs]-[A-Za-z0-9\-]{8,}\b"),
-    re.compile(r"\bAIza[0-9A-Za-z_\-]{12,}\b"),
-)
-
-
-def _normalize_secret_key(key: str) -> str:
-    return re.sub(r"[^a-z0-9]", "", key.lower())
-
-
-def _is_secret_key(key: str) -> bool:
-    normalized = _normalize_secret_key(key)
-    if normalized == "token" or normalized.endswith("token"):
-        return True
-    if normalized in ("auth", "authorization"):
-        return True
-    return any(marker in normalized for marker in _SECRET_KEY_MARKERS)
-
-
-def _redact_string(value: str) -> str:
-    for pattern in _SECRET_VALUE_PATTERNS:
-        value = pattern.sub(REDACTED_MIGRATION_VALUE, value)
-    return value
-
-
-def redact_migration_value(value: Any) -> Any:
-    """Return a deep copy of ``value`` with secret-looking content replaced.
-
-    Applied to every report written to disk.  Keys whose normalized form
-    matches a credential marker get their value replaced wholesale.  Strings
-    anywhere in the tree are scanned for common token patterns (sk-..., ghp_...,
-    xox*-, AIza*, Bearer ...) and those substrings are replaced inline.
-    """
-    return _redact_internal(value, set())
-
-
-def _redact_internal(value: Any, seen: set) -> Any:
-    if isinstance(value, str):
-        return _redact_string(value)
-    if isinstance(value, (list, tuple)):
-        return [_redact_internal(entry, seen) for entry in value]
-    if isinstance(value, dict):
-        obj_id = id(value)
-        if obj_id in seen:
-            return REDACTED_MIGRATION_VALUE
-        seen.add(obj_id)
-        out: Dict[str, Any] = {}
-        for key, entry in value.items():
-            if isinstance(key, str) and _is_secret_key(key):
-                out[key] = REDACTED_MIGRATION_VALUE
-            else:
-                out[key] = _redact_internal(entry, seen)
-        return out
-    return value
-
-
 def write_report(output_dir: Path, report: Dict[str, Any]) -> None:
    output_dir.mkdir(parents=True, exist_ok=True)
-    # Always redact before persisting.  Callers who need the raw object
-    # (in-process) still get it back from build_report(); only the on-disk
-    # copy is redacted.
-    redacted = redact_migration_value(report)
    (output_dir / "report.json").write_text(
-        json.dumps(redacted, indent=2, ensure_ascii=False) + "\n",
+        json.dumps(report, indent=2, ensure_ascii=False) + "\n",
        encoding="utf-8",
    )

    grouped: Dict[str, List[Dict[str, Any]]] = {}
-    for item in redacted["items"]:
+    for item in report["items"]:
        grouped.setdefault(item["status"], []).append(item)

    lines = [
        "# OpenClaw -> Hermes Migration Report",
        "",
-        f"- Timestamp: {redacted['timestamp']}",
-        f"- Mode: {redacted['mode']}",
-        f"- Source: `{redacted['source_root']}`",
-        f"- Target: `{redacted['target_root']}`",
+        f"- Timestamp: {report['timestamp']}",
+        f"- Mode: {report['mode']}",
+        f"- Source: `{report['source_root']}`",
+        f"- Target: `{report['target_root']}`",
        "",
        "## Summary",
        "",
    ]

-    for key, value in redacted["summary"].items():
+    for key, value in report["summary"].items():
        lines.append(f"- {key}: {value}")

-    warnings = redacted.get("warnings") or []
-    if warnings:
-        lines.extend(["", "## Warnings", ""])
-        for warning in warnings:
-            lines.append(f"- {warning}")
-
    lines.extend(["", "## What Was Not Fully Brought Over", ""])
    skipped = grouped.get("skipped", []) + grouped.get("conflict", []) + grouped.get("error", [])
    if not skipped:
@@ -699,12 +584,6 @@ def write_report(output_dir: Path, report: Dict[str, Any]) -> None:
            reason = item["reason"] or item["status"]
            lines.append(f"- `{source}` -> `{dest}`: {reason}")

-    next_steps = redacted.get("next_steps") or []
-    if next_steps:
-        lines.extend(["", "## Next Steps", ""])
-        for step in next_steps:
-            lines.append(f"- {step}")
-
    (output_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")


@@ -739,31 +618,6 @@ class Migrator:
        self.backup_dir = self.output_dir / "backups" if self.output_dir else None
        self.overflow_dir = self.output_dir / "overflow" if self.output_dir else None
        self.items: List[ItemResult] = []
-        # Once a config.yaml write hits conflict/error mid-run, later
-        # config.yaml writes are deliberately short-circuited to avoid
-        # leaving config in a partially-written state.  Modelled on
-        # OpenClaw's extensions/migrate-hermes/apply.ts "blocked by earlier
-        # apply conflict" sequencing.
-        self._config_apply_blocked: bool = False
-
-        # Resolve the configured workspace directory from openclaw.json.
-        # Many users (especially those who started before the OpenClaw rebrand)
-        # have a custom workspace path (e.g. ~/clawd/) that differs from the
-        # default ~/.openclaw/workspace/.  Reading agents.defaults.workspace
-        # lets source_candidate() find files in the actual workspace.
-        self._custom_workspace: Optional[Path] = None
-        oc_config = self.load_openclaw_config()
-        ws = (oc_config.get("agents", {}).get("defaults", {}).get("workspace") or "").strip()
-        if ws:
-            ws_path = Path(ws).expanduser().resolve()
-            # Only use it if it exists and is outside the source_root tree
-            # (otherwise the standard relative-path logic already covers it).
-            if ws_path.is_dir():
-                try:
-                    ws_path.relative_to(self.source_root)
-                except ValueError:
-                    # ws_path is outside source_root — use it as custom workspace
-                    self._custom_workspace = ws_path

        config = load_yaml_file(self.target_root / "config.yaml")
        mem_cfg = config.get("memory", {}) if isinstance(config.get("memory"), dict) else {}
@@ -781,32 +635,6 @@ class Migrator:
    def is_selected(self, option_id: str) -> bool:
        return option_id in self.selected_options

-    # Option ids that mutate the Hermes config.yaml file.  Once any one of
-    # them records a conflict/error on config.yaml, subsequent ones are
-    # short-circuited to avoid partial writes.  Keep in sync with methods
-    # that call load_yaml_file(target_root / "config.yaml") + dump_yaml_file.
-    _CONFIG_MUTATING_OPTIONS = frozenset({
-        "model-config",
-        "tts-config",
-        "mcp-servers",
-        "plugins-config",
-        "cron-jobs",
-        "hooks-config",
-        "agent-config",
-        "gateway-config",
-        "session-config",
-        "full-providers",
-        "deep-channels",
-        "browser-config",
-        "tools-config",
-        "approvals-config",
-        "memory-backend",
-        "skills-config",
-        "ui-identity",
-        "logging-config",
-        "command-allowlist",
-    })
-
    def record(
        self,
        kind: str,
@@ -816,7 +644,6 @@ class Migrator:
        reason: str = "",
        **details: Any,
    ) -> None:
-        sensitive = bool(details.pop("sensitive", False))
        self.items.append(
            ItemResult(
                kind=kind,
@@ -825,16 +652,8 @@ class Migrator:
                status=status,
                reason=reason,
                details=details,
-                sensitive=sensitive,
            )
        )
-        # Flip the config-block flag when a conflict/error occurs on a
-        # config.yaml write.  Later config-mutating options will skip rather
-        # than attempting a partial write.
-        if status in (STATUS_CONFLICT, STATUS_ERROR) and destination is not None:
-            dest_str = str(destination)
-            if dest_str.endswith("config.yaml") or dest_str.endswith("config.yml"):
-                self._config_apply_blocked = True

    def source_candidate(self, *relative_paths: str) -> Optional[Path]:
        for rel in relative_paths:
@@ -854,23 +673,6 @@ class Migrator:
                alt = self.source_root / "workspace-main" / suffix
                if alt.exists():
                    return alt
-
-        # Final fallback: check the configured workspace directory from
-        # agents.defaults.workspace in openclaw.json.  Users who started
-        # before the OpenClaw rebrand (when the project was named clawd /
-        # clawdbot) often have a custom workspace path outside ~/.openclaw/.
-        if self._custom_workspace:
-            for rel in relative_paths:
-                # Strip the leading "workspace/" or "workspace.default/"
-                # prefix to get the bare filename/subpath.
-                for prefix in ("workspace/", "workspace.default/"):
-                    if rel.startswith(prefix):
-                        suffix = rel[len(prefix):]
-                        alt = self._custom_workspace / suffix
-                        if alt.exists():
-                            return alt
-                        break
-
        return None

    def resolve_skill_destination(self, destination: Path) -> Path:
@@ -960,30 +762,11 @@ class Migrator:
        return self.build_report()

    def run_if_selected(self, option_id: str, func) -> None:
-        if not self.is_selected(option_id):
-            meta = MIGRATION_OPTION_METADATA[option_id]
-            self.record(option_id, None, None, "skipped", "Not selected for this run", option_label=meta["label"])
+        if self.is_selected(option_id):
+            func()
            return
-        # If a previous config.yaml write hit a conflict/error during apply,
-        # skip remaining config-mutating options rather than risk a partial
-        # write.  Dry-run mode never blocks — the user needs the full preview
-        # to decide how to proceed (re-run with --overwrite, etc.).
-        if (
-            self.execute
-            and self._config_apply_blocked
-            and option_id in self._CONFIG_MUTATING_OPTIONS
-        ):
-            meta = MIGRATION_OPTION_METADATA[option_id]
-            self.record(
-                option_id,
-                None,
-                None,
-                STATUS_SKIPPED,
-                REASON_BLOCKED_BY_APPLY_CONFLICT,
-                option_label=meta["label"],
-            )
-            return
-        func()
+        meta = MIGRATION_OPTION_METADATA[option_id]
+        self.record(option_id, None, None, "skipped", "Not selected for this run", option_label=meta["label"])

    def build_report(self) -> Dict[str, Any]:
        summary: Dict[str, int] = {
@@ -1021,8 +804,6 @@ class Migrator:
            },
            "summary": summary,
            "items": [asdict(item) for item in self.items],
-            "warnings": self._build_warnings(summary),
-            "next_steps": self._build_next_steps(summary),
        }

        if self.output_dir:
@@ -1030,67 +811,6 @@ class Migrator:

        return report

-    def _build_warnings(self, summary: Dict[str, int]) -> List[str]:
-        """Structured warnings surfaced on the report for downstream consumers.
-
-        Modelled on OpenClaw's extensions/migrate-hermes/plan.ts warnings[].
-        Keep the messages actionable — they show up in summary.md and the
-        JSON report.
-        """
-        warnings: List[str] = []
-        if summary.get("conflict", 0) > 0:
-            warnings.append(
-                "Conflicts were found. Re-run with --overwrite to replace conflicting "
-                "targets after item-level backups."
-            )
-        if summary.get("error", 0) > 0:
-            warnings.append(
-                "One or more items failed. Inspect the report and re-run after fixing "
-                "the underlying cause."
-            )
-        if self._config_apply_blocked and self.execute:
-            warnings.append(
-                "A config.yaml write hit a conflict or error mid-apply; later config "
-                "items were skipped to avoid a partial write."
-            )
-        # Detect whether secrets were detected but not migrated.
-        provider_keys_skipped = any(
-            item.kind == "provider-keys" and item.status == STATUS_SKIPPED
-            for item in self.items
-        )
-        if provider_keys_skipped and not self.migrate_secrets:
-            warnings.append(
-                "API keys and other credentials were detected but not imported. "
-                "Re-run with --migrate-secrets to copy supported keys into the "
-                "Hermes env file."
-            )
-        return warnings
-
-    def _build_next_steps(self, summary: Dict[str, int]) -> List[str]:
-        """Human-readable next-step guidance baked into the report."""
-        if not self.execute:
-            return [
-                "Re-run without --dry-run to apply the migration.",
-                "Pass --overwrite to resolve conflicts, or --migrate-secrets to "
-                "include API keys.",
-            ]
-        steps: List[str] = []
-        if summary.get("migrated", 0) > 0:
-            steps.append(
-                "Review the migration report at "
-                f"{self.output_dir}/summary.md"
-                if self.output_dir
-                else "Review the migration report."
-            )
-            steps.append(
-                "Start a new Hermes session (or /reset) to pick up the imported config."
-            )
-        if summary.get("conflict", 0) > 0:
-            steps.append(
-                "Re-run with --overwrite to apply items that were blocked by conflicts."
-            )
-        return steps
-
    def maybe_backup(self, path: Path) -> Optional[Path]:
        if not self.execute or not self.backup_dir or not path.exists():
            return None
@@ -1671,29 +1391,6 @@ class Migrator:

        model_str = model_str.strip()

-        # Resolve a model alias against the OpenClaw model catalog.
-        # OpenClaw stores agents.defaults.model as either a bare string or
-        # {"primary": "<value>"}, and that value can be either:
-        #   - a full provider/model API ID (e.g. "anthropic/claude-opus-4-6"), or
-        #   - a display alias (e.g. "Claude Opus 4.6") that maps to one.
-        # The catalog at agents.defaults.models is keyed by the full
-        # provider/model API ID with an "alias" field on the value, e.g.:
-        #   {"anthropic/claude-opus-4-6": {"alias": "Claude Opus 4.6"}}
-        # If model_str matches an alias in the catalog, rewrite it to the
-        # catalog key (the real API ID).  If it's already an API ID or has
-        # no catalog match, leave it alone and let downstream pass it through.
-        model_catalog = config.get("agents", {}).get("defaults", {}).get("models", {})
-        if isinstance(model_catalog, dict) and model_str not in model_catalog:
-            for api_id, entry in model_catalog.items():
-                if not isinstance(api_id, str):
-                    continue
-                if isinstance(entry, dict) and entry.get("alias") == model_str:
-                    model_str = api_id
-                    break
-                if isinstance(entry, str) and entry == model_str:
-                    model_str = api_id
-                    break
-
        if yaml is None:
            self.record("model-config", source_path, destination, "error", "PyYAML is not available")
            return
@@ -2998,13 +2695,6 @@ def parse_args() -> argparse.Namespace:
             f"Valid ids: {', '.join(sorted(MIGRATION_OPTION_METADATA))}",
    )
    parser.add_argument("--output-dir", help="Where to write report, backups, and archived docs")
-    parser.add_argument(
-        "--json",
-        action="store_true",
-        dest="json_output",
-        help="Print the migration report as JSON on stdout (redacted). "
-             "Combine with no --execute for a safe plan-only machine-readable preview.",
-    )
    return parser.parse_args()


@@ -3029,13 +2719,6 @@ def main() -> int:
    )
    report = migrator.migrate()

-    # ── Machine-readable JSON mode ────────────────────────────
-    # When --json is set, print the redacted report to stdout and skip the
-    # human-readable terminal recap.  Useful for CI and scripted wrappers.
-    if getattr(args, "json_output", False):
-        print(json.dumps(redact_migration_value(report), indent=2, ensure_ascii=False))
-        return 0
-
    # ── Human-readable terminal recap ─────────────────────────
    s = report["summary"]
    items = report["items"]
@@ -1,53 +0,0 @@
-# Langfuse Observability Plugin
-
-This plugin ships bundled with Hermes but is **opt-in** — it only loads when
-you explicitly enable it.
-
-## Enable
-
-Pick one:
-
-```bash
-# Interactive: walks you through credentials + SDK install + enable
-hermes tools  # → Langfuse Observability
-
-# Manual
-pip install langfuse
-hermes plugins enable observability/langfuse
-```
-
-## Required credentials
-
-Set these in `~/.hermes/.env` (or via `hermes tools`):
-
-```bash
-HERMES_LANGFUSE_PUBLIC_KEY=pk-lf-...
-HERMES_LANGFUSE_SECRET_KEY=sk-lf-...
-HERMES_LANGFUSE_BASE_URL=https://cloud.langfuse.com   # or your self-hosted URL
-```
-
-Without the SDK or credentials the hooks no-op silently — the plugin fails
-open.
-
-## Verify
-
-```bash
-hermes plugins list                 # observability/langfuse should show "enabled"
-hermes chat -q "hello"              # then check Langfuse for a "Hermes turn" trace
-```
-
-## Optional tuning
-
-```bash
-HERMES_LANGFUSE_ENV=production       # environment tag
-HERMES_LANGFUSE_RELEASE=v1.0.0       # release tag
-HERMES_LANGFUSE_SAMPLE_RATE=0.5      # sample 50% of traces
-HERMES_LANGFUSE_MAX_CHARS=12000      # max chars per field (default: 12000)
-HERMES_LANGFUSE_DEBUG=true           # verbose plugin logging
-```
-
-## Disable
-
-```bash
-hermes plugins disable observability/langfuse
-```
@@ -27,8 +27,6 @@ from pathlib import Path
 import fire
 import yaml

-from hermes_constants import OPENROUTER_BASE_URL, get_hermes_home
-
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 _hermes_home = get_hermes_home()
@@ -62,6 +60,8 @@ from tools.rl_training_tool import get_missing_keys
 # Config Loading
 # ============================================================================

+from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
+
 DEFAULT_MODEL = "anthropic/claude-opus-4.5"
 DEFAULT_BASE_URL = OPENROUTER_BASE_URL

@@ -412,7 +412,7 @@ def main(
                
                # Run the agent
                print("\n" + "=" * 60)
-                agent.run_conversation(user_input)
+                response = agent.run_conversation(user_input)
                print("\n" + "=" * 60)
                
            except KeyboardInterrupt:
@@ -429,7 +429,7 @@ def main(
        print("-" * 40)
        
        try:
-            agent.run_conversation(task)
+            response = agent.run_conversation(task)
            print("\n" + "=" * 60)
            print("✅ Task completed")
        except KeyboardInterrupt:
@@ -41,48 +41,13 @@ import urllib.request
 import uuid
 from typing import List, Dict, Any, Optional
 from urllib.parse import urlparse, parse_qs, urlunparse
-# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
-# SDK pulls ~240 ms of imports. We expose `OpenAI` as a thin proxy object
-# that imports the SDK on first call/isinstance check. This preserves:
-#   (a) the single in-module `OpenAI(**client_kwargs)` call site at
-#       _create_openai_client, and
-#   (b) `patch("run_agent.OpenAI", ...)` test patterns used by ~28 test files.
+from openai import OpenAI
 import fire
 from datetime import datetime
 from pathlib import Path

 from hermes_constants import get_hermes_home

-
-_OPENAI_CLS_CACHE: Optional[type] = None
-
-
-def _load_openai_cls() -> type:
-    """Import and cache ``openai.OpenAI``."""
-    global _OPENAI_CLS_CACHE
-    if _OPENAI_CLS_CACHE is None:
-        from openai import OpenAI as _cls
-        _OPENAI_CLS_CACHE = _cls
-    return _OPENAI_CLS_CACHE
-
-
-class _OpenAIProxy:
-    """Module-level proxy that looks like ``openai.OpenAI`` but imports lazily."""
-
-    __slots__ = ()
-
-    def __call__(self, *args, **kwargs):
-        return _load_openai_cls()(*args, **kwargs)
-
-    def __instancecheck__(self, obj):
-        return isinstance(obj, _load_openai_cls())
-
-    def __repr__(self):
-        return "<lazy openai.OpenAI proxy>"
-
-
-OpenAI = _OpenAIProxy()
-
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_cli.env_loader import load_hermes_dotenv
@@ -1826,6 +1791,9 @@ class AIAgent:
                )
                _config_context_length = None

+        # Store for reuse in switch_model (so config override persists across model switches)
+        self._config_context_length = _config_context_length
+
        # Resolve custom_providers list once for reuse below (startup
        # context-length override and plugin context-engine init).
        try:
@@ -1884,14 +1852,7 @@ class AIAgent:
                                            file=sys.stderr,
                                        )
                        break
-
-        # Persist for reuse on switch_model / fallback activation. Must come
-        # AFTER the custom_providers branch so per-model overrides aren't lost.
-        self._config_context_length = _config_context_length
-
-        self._ensure_lmstudio_runtime_loaded(_config_context_length)
-
-
+        
        # Select context engine: config-driven (like memory providers).
        # 1. Check config.yaml context.engine setting
        # 2. Check plugins/context_engine/<name>/ directory (repo-shipped)
@@ -2133,39 +2094,6 @@ class AIAgent:
        if hasattr(self, "context_compressor") and self.context_compressor:
            self.context_compressor.on_session_reset()
    
-    def _ensure_lmstudio_runtime_loaded(self, config_context_length: Optional[int] = None) -> None:
-        """
-        Preload the LM Studio model with at least Hermes' minimum context.
-        """
-        if (self.provider or "").strip().lower() != "lmstudio":
-            return
-        try:
-            from agent.model_metadata import MINIMUM_CONTEXT_LENGTH
-            from hermes_cli.models import ensure_lmstudio_model_loaded
-            if config_context_length is None:
-                config_context_length = getattr(self, "_config_context_length", None)
-            target_ctx = max(config_context_length or 0, MINIMUM_CONTEXT_LENGTH)
-            loaded_ctx = ensure_lmstudio_model_loaded(
-                self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
-            )
-            if loaded_ctx:
-                # Push into the live compressor so the status bar reflects the
-                # real loaded ctx the moment the load resolves, instead of
-                # holding the previous model's value (or "ctx --") through the
-                # next render tick.
-                cc = getattr(self, "context_compressor", None)
-                if cc is not None:
-                    cc.update_model(
-                        model=self.model,
-                        context_length=loaded_ctx,
-                        base_url=self.base_url,
-                        api_key=getattr(self, "api_key", ""),
-                        provider=self.provider,
-                        api_mode=self.api_mode,
-                    )
-        except Exception as err:
-            logger.debug("LM Studio preload skipped: %s", err)
-
    def switch_model(self, new_model, new_provider, api_key='', base_url='', api_mode=''):
        """Switch the model/provider in-place for a live agent.

@@ -2261,9 +2189,6 @@ class AIAgent:
            )
        )

-        # ── LM Studio: preload before probing context length ──
-        self._ensure_lmstudio_runtime_loaded()
-
        # ── Update context compressor ──
        if hasattr(self, "context_compressor") and self.context_compressor:
            from agent.model_metadata import get_model_context_length
@@ -2795,6 +2720,7 @@ class AIAgent:
        eff_api_mode = api_mode if api_mode is not None else (self.api_mode or "")
        eff_model = (model if model is not None else self.model) or ""

+        base_lower = eff_base_url.lower()
        model_lower = eff_model.lower()
        provider_lower = eff_provider.lower()
        is_claude = "claude" in model_lower
@@ -4867,145 +4793,6 @@ class AIAgent:
            )
        return messages

-    @staticmethod
-    def _is_thinking_only_assistant(msg: Dict[str, Any]) -> bool:
-        """Return True if ``msg`` is an assistant turn whose only payload is reasoning.
-
-        "Thinking-only" means the model emitted reasoning (``reasoning`` or
-        ``reasoning_content``) but no visible text and no tool_calls. When sent
-        back to providers that convert reasoning into thinking blocks (native
-        Anthropic, OpenRouter Anthropic, third-party Anthropic-compatible
-        gateways), the resulting message has only thinking blocks — which
-        Anthropic rejects with HTTP 400 "The final block in an assistant
-        message cannot be `thinking`."
-
-        Symmetric with Claude Code's ``filterOrphanedThinkingOnlyMessages``
-        (src/utils/messages.ts). We drop the whole turn from the API copy
-        rather than fabricating stub text — the message log (UI transcript)
-        keeps the reasoning block; only the wire copy is cleaned.
-        """
-        if not isinstance(msg, dict) or msg.get("role") != "assistant":
-            return False
-        if msg.get("tool_calls"):
-            return False
-        # Does it have any actual output?
-        content = msg.get("content")
-        if isinstance(content, str):
-            if content.strip():
-                return False
-        elif isinstance(content, list):
-            for block in content:
-                if not isinstance(block, dict):
-                    if block:  # non-empty non-dict string etc.
-                        return False
-                    continue
-                btype = block.get("type")
-                if btype in ("thinking", "redacted_thinking"):
-                    continue
-                if btype == "text":
-                    text = block.get("text", "")
-                    if isinstance(text, str) and text.strip():
-                        return False
-                    continue
-                # tool_use, image, document, etc. — real payload
-                return False
-        elif content is not None and content != "":
-            return False
-        # Content is empty-ish. Is there reasoning to make it thinking-only?
-        reasoning = msg.get("reasoning_content") or msg.get("reasoning")
-        if isinstance(reasoning, str) and reasoning.strip():
-            return True
-        # reasoning_details list form
-        rd = msg.get("reasoning_details")
-        if isinstance(rd, list) and rd:
-            return True
-        return False
-
-    @staticmethod
-    def _drop_thinking_only_and_merge_users(
-        messages: List[Dict[str, Any]],
-    ) -> List[Dict[str, Any]]:
-        """Drop thinking-only assistant turns; merge any adjacent user messages left behind.
-
-        Runs on the per-call ``api_messages`` copy only. The stored
-        conversation history (``self.messages``) is never mutated, so the
-        user still sees the thinking block in the CLI/gateway transcript and
-        session persistence keeps the full trace. Only the wire copy sent to
-        the provider is cleaned.
-
-        Why drop-and-merge rather than inject stub text:
-        - Fabricating ``"."`` / ``"(continued)"`` text lies in the history
-          and makes future turns see model output the model didn't emit.
-        - Dropping the turn preserves honesty; merging adjacent user messages
-          preserves the provider's role-alternation invariant.
-        - This is the pattern used by Claude Code's ``normalizeMessagesForAPI``
-          (filterOrphanedThinkingOnlyMessages + mergeAdjacentUserMessages).
-        """
-        if not messages:
-            return messages
-
-        # Pass 1: drop thinking-only assistant turns.
-        kept = [m for m in messages if not AIAgent._is_thinking_only_assistant(m)]
-        dropped = len(messages) - len(kept)
-        if dropped == 0:
-            return messages
-
-        # Pass 2: merge any newly-adjacent user messages.
-        merged: List[Dict[str, Any]] = []
-        merges = 0
-        for m in kept:
-            prev = merged[-1] if merged else None
-            if (
-                prev is not None
-                and prev.get("role") == "user"
-                and m.get("role") == "user"
-            ):
-                prev_content = prev.get("content", "")
-                cur_content = m.get("content", "")
-                # Work on a copy of ``prev`` so the caller's input dicts are
-                # never mutated. ``_sanitize_api_messages`` upstream already
-                # hands us per-call copies, but staying pure here means we
-                # can be called safely from anywhere (tests, other loops).
-                prev_copy = dict(prev)
-                # Only string-content merge is meaningful for role-alternation
-                # purposes. If either side is a list (multimodal), append as a
-                # separate block rather than collapsing.
-                if isinstance(prev_content, str) and isinstance(cur_content, str):
-                    sep = "\n\n" if prev_content and cur_content else ""
-                    prev_copy["content"] = prev_content + sep + cur_content
-                elif isinstance(prev_content, list) and isinstance(cur_content, list):
-                    prev_copy["content"] = list(prev_content) + list(cur_content)
-                elif isinstance(prev_content, list) and isinstance(cur_content, str):
-                    if cur_content:
-                        prev_copy["content"] = list(prev_content) + [
-                            {"type": "text", "text": cur_content}
-                        ]
-                    else:
-                        prev_copy["content"] = list(prev_content)
-                elif isinstance(prev_content, str) and isinstance(cur_content, list):
-                    new_blocks: List[Dict[str, Any]] = []
-                    if prev_content:
-                        new_blocks.append({"type": "text", "text": prev_content})
-                    new_blocks.extend(cur_content)
-                    prev_copy["content"] = new_blocks
-                else:
-                    # Unknown content shape — fall back to appending separately
-                    # (violates alternation, but safer than raising in a hot path).
-                    merged.append(m)
-                    continue
-                merged[-1] = prev_copy
-                merges += 1
-            else:
-                merged.append(m)
-
-        logger.debug(
-            "Pre-call sanitizer: dropped %d thinking-only assistant turn(s), "
-            "merged %d adjacent user message(s)",
-            dropped,
-            merges,
-        )
-        return merged
-
    @staticmethod
    def _cap_delegate_task_calls(tool_calls: list) -> list:
        """Truncate excess delegate_task calls to max_concurrent_children.
@@ -5318,8 +5105,6 @@ class AIAgent:
            keepalive_http = self._build_keepalive_http_client(client_kwargs.get("base_url", ""))
            if keepalive_http is not None:
                client_kwargs["http_client"] = keepalive_http
-        # Uses the module-level `OpenAI` name, resolved lazily on first
-        # access via __getattr__ below. Tests patch via `run_agent.OpenAI`.
        client = OpenAI(**client_kwargs)
        logger.info(
            "OpenAI client created (%s, shared=%s) %s",
@@ -7367,9 +7152,6 @@ class AIAgent:
                )
            )

-            # LM Studio: preload before probing the fallback's context length.
-            self._ensure_lmstudio_runtime_loaded()
-
            # Update context compressor limits for the fallback model.
            # Without this, compression decisions use the primary model's
            # context window (e.g. 200K) instead of the fallback's (e.g. 32K),
@@ -8089,8 +7871,6 @@ class AIAgent:
            or base_url_host_matches(self.base_url, "moonshot.ai")
            or base_url_host_matches(self.base_url, "moonshot.cn")
        )
-        _is_tokenhub = base_url_host_matches(self._base_url_lower, "tokenhub.tencentmaas.com")
-        _is_lmstudio = (self.provider or "").strip().lower() == "lmstudio"

        # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
        # sentinel (temperature omitted entirely), a numeric override, or None.
@@ -8162,8 +7942,6 @@ class AIAgent:
            is_github_models=_is_gh,
            is_nvidia_nim=_is_nvidia,
            is_kimi=_is_kimi,
-            is_tokenhub=_is_tokenhub,
-            is_lmstudio=_is_lmstudio,
            is_custom_provider=self.provider == "custom",
            ollama_num_ctx=self._ollama_num_ctx,
            provider_preferences=_prefs or None,
@@ -8174,9 +7952,7 @@ class AIAgent:
            omit_temperature=_omit_temp,
            supports_reasoning=self._supports_reasoning_extra_body(),
            github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
-            lmstudio_reasoning_options=self._lmstudio_reasoning_options_cached() if _is_lmstudio else None,
            anthropic_max_output=_ant_max,
-            provider_name=self.provider,
        )

    def _supports_reasoning_extra_body(self) -> bool:
@@ -8200,10 +7976,6 @@ class AIAgent:
                return bool(github_model_reasoning_efforts(self.model))
            except Exception:
                return False
-        if (self.provider or "").strip().lower() == "lmstudio":
-            opts = self._lmstudio_reasoning_options_cached()
-            # "off-only" (or absent) means no real reasoning capability.
-            return any(opt and opt != "off" for opt in opts)
        if "openrouter" not in self._base_url_lower:
            return False
        if "api.mistral.ai" in self._base_url_lower:
@@ -8217,57 +7989,9 @@ class AIAgent:
            "x-ai/",
            "google/gemini-2",
            "qwen/qwen3",
-            "tencent/hy3-preview",
        )
        return any(model.startswith(prefix) for prefix in reasoning_model_prefixes)

-    def _lmstudio_reasoning_options_cached(self) -> list[str]:
-        """Probe LM Studio's published reasoning ``allowed_options`` once per
-        (model, base_url). The list (e.g. ``["off","on"]`` or
-        ``["off","minimal","low"]``) is needed both for the supports-reasoning
-        gate and for clamping the emitted ``reasoning_effort`` so toggle-style
-        models don't 400 on ``high``. Cache is keyed on (model, base_url) so
-        ``/model`` swaps and base-URL changes don't reuse a stale list.
-        Non-empty results are cached permanently (model capabilities don't
-        change). Empty results (transient probe failure OR genuinely
-        non-reasoning model) are cached with a 60-second TTL to avoid an
-        HTTP round-trip on every turn while still retrying reasonably soon.
-        """
-        import time as _time
-
-        cache = getattr(self, "_lm_reasoning_opts_cache", None)
-        if cache is None:
-            cache = self._lm_reasoning_opts_cache = {}
-        key = (self.model, self.base_url)
-        cached = cache.get(key)
-        if cached is not None:
-            opts, ts = cached
-            # Non-empty → permanent. Empty → 60s TTL.
-            if opts or (_time.monotonic() - ts) < 60:
-                return opts
-        try:
-            from hermes_cli.models import lmstudio_model_reasoning_options
-            opts = lmstudio_model_reasoning_options(
-                self.model, self.base_url, getattr(self, "api_key", ""),
-            )
-        except Exception:
-            opts = []
-        cache[key] = (opts, _time.monotonic())
-        return opts
-
-    def _resolve_lmstudio_summary_reasoning_effort(self) -> Optional[str]:
-        """Resolve a safe top-level ``reasoning_effort`` for LM Studio.
-
-        The iteration-limit summary path calls ``chat.completions.create()``
-        directly, bypassing the transport. Share the helper so the two paths
-        can't drift on effort resolution and clamping.
-        """
-        from agent.lmstudio_reasoning import resolve_lmstudio_effort
-        return resolve_lmstudio_effort(
-            self.reasoning_config,
-            self._lmstudio_reasoning_options_cached(),
-        )
-
    def _github_models_reasoning_extra_body(self) -> dict | None:
        """Format reasoning payload for GitHub Models/OpenAI-compatible routes."""
        try:
@@ -8376,31 +8100,6 @@ class AIAgent:
                # as a defensive compatibility fallback (refs #15250).
                msg["reasoning_content"] = ""

-        # Additive fallback (refs #16844, #16884). Streaming-only providers
-        # (glm, MiniMax, gpt-5.x via aigw, Anthropic via openai-compat shims)
-        # accumulate reasoning through ``delta.reasoning_content`` chunks
-        # but never land it on the message object as a top-level attribute,
-        # so neither branch above fires and the chain-of-thought is stored
-        # only under the internal ``reasoning`` key. When the user later
-        # replays that history through a DeepSeek-v4 / Kimi thinking model,
-        # the missing ``reasoning_content`` causes HTTP 400 ("The
-        # reasoning_content in the thinking mode must be passed back to the
-        # API.").
-        #
-        # Promote the already-sanitized streamed ``reasoning_text`` to
-        # ``reasoning_content`` at write time, but ONLY when no prior branch
-        # already set it AND we actually captured reasoning text. This
-        # preserves every existing behavior:
-        #   - SDK-exposed ``reasoning_content`` (OpenAI/Moonshot/DeepSeek SDK)
-        #     still wins.
-        #   - DeepSeek tool-call ""-pad (#15250) still fires.
-        #   - Non-thinking turns with no reasoning leave the field absent,
-        #     so ``_copy_reasoning_content_for_api``'s cross-provider leak
-        #     guard (#15748) and ``reasoning``→``reasoning_content``
-        #     promotion tiers still apply at replay time.
-        if "reasoning_content" not in msg and reasoning_text:
-            msg["reasoning_content"] = reasoning_text
-
        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
            # Pass reasoning_details back unmodified so providers (OpenRouter,
            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
@@ -9771,10 +9470,6 @@ class AIAgent:
                for idx, pfm in enumerate(self.prefill_messages):
                    api_messages.insert(sys_offset + idx, pfm.copy())

-            # Same safety net as the main loop: drop thinking-only assistant
-            # turns so Anthropic-family providers don't 400 the summary call.
-            api_messages = self._drop_thinking_only_and_merge_users(api_messages)
-
            summary_extra_body = {}
            try:
                from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE as _OMIT_TEMP
@@ -9789,19 +9484,7 @@ class AIAgent:
            _omit_summary_temperature = _raw_summary_temp is _OMIT_TEMP
            _summary_temperature = None if _omit_summary_temperature else _raw_summary_temp
            _is_nous = "nousresearch" in self._base_url_lower
-            # LM Studio uses top-level `reasoning_effort` (not extra_body.reasoning).
-            # Mirror ChatCompletionsTransport.build_kwargs() so the summary path
-            # — which calls chat.completions.create() directly without going
-            # through the transport — sends the same shape the transport does.
-            _is_lmstudio_summary = (
-                (self.provider or "").strip().lower() == "lmstudio"
-                and self._supports_reasoning_extra_body()
-            )
-            _lm_reasoning_effort: str | None = (
-                self._resolve_lmstudio_summary_reasoning_effort()
-                if _is_lmstudio_summary else None
-            )
-            if not _is_lmstudio_summary and self._supports_reasoning_extra_body():
+            if self._supports_reasoning_extra_body():
                if self.reasoning_config is not None:
                    summary_extra_body["reasoning"] = self.reasoning_config
                else:
@@ -9828,8 +9511,6 @@ class AIAgent:
                    summary_kwargs["temperature"] = _summary_temperature
                if self.max_tokens is not None:
                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-                if _lm_reasoning_effort is not None:
-                    summary_kwargs["reasoning_effort"] = _lm_reasoning_effort

                # Include provider routing preferences
                provider_preferences = {}
@@ -9854,7 +9535,7 @@ class AIAgent:
                                   is_oauth=self._is_anthropic_oauth,
                                   preserve_dots=self._anthropic_preserve_dots())
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _summary_result = _tsum.normalize_response(summary_response)
+                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
                    final_response = (_summary_result.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
@@ -9884,7 +9565,7 @@ class AIAgent:
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    preserve_dots=self._anthropic_preserve_dots())
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_result = _tretry.normalize_response(retry_response)
+                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
                    final_response = (_retry_result.content or "").strip()
                else:
                    summary_kwargs = {
@@ -9895,8 +9576,6 @@ class AIAgent:
                        summary_kwargs["temperature"] = _summary_temperature
                    if self.max_tokens is not None:
                        summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-                    if _lm_reasoning_effort is not None:
-                        summary_kwargs["reasoning_effort"] = _lm_reasoning_effort
                    if summary_extra_body:
                        summary_kwargs["extra_body"] = summary_extra_body

@@ -10501,16 +10180,6 @@ class AIAgent:
            # manual message manipulation are always caught.
            api_messages = self._sanitize_api_messages(api_messages)

-            # Drop thinking-only assistant turns (reasoning but no visible
-            # output and no tool_calls) and merge any adjacent user messages
-            # left behind. Prevents Anthropic 400s ("The final block in an
-            # assistant message cannot be `thinking`.") and equivalent errors
-            # from third-party Anthropic-compatible gateways that can't replay
-            # a thinking-only turn. Runs on the per-call copy only — the
-            # stored conversation history keeps the reasoning block for the
-            # UI transcript and session persistence.
-            api_messages = self._drop_thinking_only_and_merge_users(api_messages)
-
            # Normalize message whitespace and tool-call JSON for consistent
            # prefix matching.  Ensures bit-perfect prefixes across turns,
            # which enables KV cache reuse on local inference servers
@@ -10706,16 +10375,6 @@ class AIAgent:
                    # session instead of re-failing every retry.
                    if getattr(self, "_disable_streaming", False):
                        _use_streaming = False
-                    # CopilotACPClient communicates via subprocess stdio and
-                    # returns a plain SimpleNamespace — not an iterable
-                    # stream.  Mirror the ACP exclusion used for Responses
-                    # API upgrade (lines ~1083-1085).
-                    elif (
-                        self.provider == "copilot-acp"
-                        or str(self.base_url or "").lower().startswith("acp://copilot")
-                        or str(self.base_url or "").lower().startswith("acp+tcp://")
-                    ):
-                        _use_streaming = False
                    elif not self._has_stream_consumers():
                        # No display/TTS consumer. Still prefer streaming for
                        # health checking, but skip for Mock clients in tests
@@ -11012,7 +10671,12 @@ class AIAgent:
                        # would have been appended in the non-truncated path.
                        _trunc_msg = None
                        _trunc_transport = self._get_transport()
-                        _trunc_result = _trunc_transport.normalize_response(response)
+                        if self.api_mode == "anthropic_messages":
+                            _trunc_result = _trunc_transport.normalize_response(
+                                response, strip_tool_prefix=self._is_anthropic_oauth
+                            )
+                        else:
+                            _trunc_result = _trunc_transport.normalize_response(response)
                        _trunc_msg = _trunc_result

                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
@@ -12350,7 +12014,10 @@ class AIAgent:

            try:
                _transport = self._get_transport()
-                normalized = _transport.normalize_response(response)
+                _normalize_kwargs = {}
+                if self.api_mode == "anthropic_messages":
+                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
+                normalized = _transport.normalize_response(response, **_normalize_kwargs)
                assistant_message = normalized
                finish_reason = normalized.finish_reason
                
@@ -729,12 +729,9 @@ install_system_packages() {
                        return 0
                    fi
                fi
-            elif (: </dev/tty) 2>/dev/null; then
+            elif [ -e /dev/tty ]; then
                # Non-interactive (e.g. curl | bash) but a terminal is available.
                # Read the prompt from /dev/tty (same approach the setup wizard uses).
-                # Probe by actually opening /dev/tty: a bare existence test passes
-                # in Docker builds where the device node is in the mount namespace
-                # but opening fails with ENXIO. See #16746.
                echo ""
                log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager."
                log_info "Hermes Agent itself does not require or retain root access."
@@ -1333,12 +1330,7 @@ run_setup_wizard() {
    # The setup wizard reads from /dev/tty, so it works even when the
    # install script itself is piped (curl | bash). Only skip if no
    # terminal is available at all (e.g. Docker build, CI).
-    #
-    # Probe by actually opening /dev/tty: a bare existence test passes
-    # in Docker builds where the device node is in the mount namespace
-    # but opening fails with ENXIO, so the wizard would proceed and
-    # then crash on `< /dev/tty` below.
-    if ! (: </dev/tty) 2>/dev/null; then
+    if ! [ -e /dev/tty ]; then
        log_info "Setup wizard skipped (no terminal available). Run 'hermes setup' after install."
        return 0
    fi
@@ -1400,10 +1392,7 @@ maybe_start_gateway() {
        fi
    fi

-    # Probe by actually opening /dev/tty: a bare existence test passes
-    # in Docker builds where the device node is in the mount namespace
-    # but opening fails with ENXIO. See #16746.
-    if ! (: </dev/tty) 2>/dev/null; then
+    if ! [ -e /dev/tty ]; then
        log_info "Gateway setup skipped (no terminal available). Run 'hermes gateway install' later."
        return 0
    fi
@@ -41,10 +41,8 @@ PYPROJECT_FILE = REPO_ROOT / "pyproject.toml"
 AUTHOR_MAP = {
    # teknium (multiple emails)
    "teknium1@gmail.com": "teknium1",
-    "qiyin.zuo@pcitc.com": "qiyin-code",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
-    "revar@users.noreply.github.com": "revaraver",
    # Matrix parity salvage batch (April 2026)
    "sr@samirusani": "samrusani",
    "angelclaw@AngelMacBook.local": "angel12",
@@ -53,23 +51,14 @@ AUTHOR_MAP = {
    "adamrummer@gmail.com": "cyclingwithelephants",
    "nbot@liizfq.top": "liizfq",
    "274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi",
-    "dejie.guo@gmail.com": "JayGwod",
    "johnnncenaaa77@gmail.com": "johnncenae",
-    "thomasjhon6666@gmail.com": "ThomassJonax",
    "focusflow.app.help@gmail.com": "yes999zc",
-    "yes999zc@163.com": "yes999zc",
    "343873859@qq.com": "DrStrangerUJN",
    "uzmpsk.dilekakbas@gmail.com": "dlkakbs",
-    "beliefanx@gmail.com": "BeliefanX",
    "jefferson@heimdallstrategy.com": "Mind-Dragon",
-    "steve.westerhouse@origami-analytics.com": "westers",
    "130918800+devorun@users.noreply.github.com": "devorun",
-    "surat.s@itm.kmutnb.ac.th": "beesrsj2500",
-    "beesr@bee.localdomain": "beesrsj2500",
-    "mtf201013@gmail.com": "ma-pony",
    "sonoyuncudmr@gmail.com": "Sonoyunchu",
    "maks.mir@yahoo.com": "say8hi",
-    "27719690+Mirac1eSky@users.noreply.github.com": "Mirac1eSky",
    "web3blind@users.noreply.github.com": "web3blind",
    "julia@alexland.us": "alexg0bot",
    "christian@scheid.tech": "scheidti",
@@ -78,7 +67,6 @@ AUTHOR_MAP = {
    "itonov@proton.me": "Ito-69",
    "glesstech@gmail.com": "georgeglessner",
    "maxim.smetanin@gmail.com": "maxims-oss",
-    "nazirulhafiy@gmail.com": "nazirulhafiy",
    "CREWorx@users.noreply.github.com": "BadTechBandit",
    "yoimexex@gmail.com": "Yoimex",
    "6548898+romanornr@users.noreply.github.com": "romanornr",
@@ -137,7 +125,6 @@ AUTHOR_MAP = {
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
    "254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
    "259807879+Bartok9@users.noreply.github.com": "Bartok9",
-    "270082434+crayfish-ai@users.noreply.github.com": "crayfish-ai",
    "241404605+MestreY0d4-Uninter@users.noreply.github.com": "MestreY0d4-Uninter",
    "268667990+Roy-oss1@users.noreply.github.com": "Roy-oss1",
    "27917469+nosleepcassette@users.noreply.github.com": "nosleepcassette",
@@ -560,7 +547,6 @@ AUTHOR_MAP = {
    "topcheer@me.com": "topcheer",
    "walli@tencent.com": "walli",
    "zhuofengwang@tencent.com": "Zhuofeng-Wang",
-    "simonweng@tencent.com": "Contentment003111",
    # April 2026 salvage-PR batch (#14920, #14986, #14966)
    "mrunmayeerane17@gmail.com": "mrunmayee17",
    "69489633+camaragon@users.noreply.github.com": "camaragon",
@@ -576,7 +562,6 @@ AUTHOR_MAP = {
    "screenmachine@gmail.com": "teknium1",
    "chenzeshi@live.com": "chen1749144759",
    "mor.aleksandr@yahoo.com": "MorAlekss",
-    "276649498+ztexydt-cqh@users.noreply.github.com": "ztexydt-cqh",
    "ash@users.noreply.github.com": "ash",
    "andrewho.sf@gmail.com": "andrewhosf",
    # April 2026 Honcho bug-fix consolidation (#15381)
@@ -585,12 +570,6 @@ AUTHOR_MAP = {
    "dontcallmejames@users.noreply.github.com": "dontcallmejames",
    "hekaru.agent@gmail.com": "hekaru-agent",
    "jas9000@gmail.com": "twozle",
-    "r.filgueiras@apheris.com": "rfilgueiras",
-    "leihaibo1992@gmail.com": "Leihb",
-    # ACP streaming fix salvage (PR #9428 + #16273)
-    "nfb0408@163.com": "ningfangbin",
-    "164839249+Joseph19820124@users.noreply.github.com": "Joseph19820124",
-    "rugved@lmstudio.ai": "rugvedS07",
 }


@@ -118,82 +118,6 @@ class TestThreadLocalApprovalCallback:
        assert worker_saw == [None]
        assert _get_sudo_password_callback() is cb_main

-    def test_sudo_password_cache_does_not_leak_across_threads(self):
-        """Interactive sudo cache must not bleed into another executor thread."""
-        from tools.terminal_tool import (
-            _get_cached_sudo_password,
-            _reset_cached_sudo_passwords,
-            _set_cached_sudo_password,
-        )
-
-        _reset_cached_sudo_passwords()
-        _set_cached_sudo_password("main-thread-password")
-
-        worker_saw = []
-
-        def worker():
-            worker_saw.append(_get_cached_sudo_password())
-
-        t = threading.Thread(target=worker)
-        t.start()
-        t.join()
-
-        assert worker_saw == [""]
-        assert _get_cached_sudo_password() == "main-thread-password"
-
-    def test_sudo_password_cache_isolated_across_acp_sessions_on_same_pool_thread(self):
-        """ACP's ThreadPoolExecutor reuses threads. Two ACP sessions that land
-        on the same reused thread must not share the interactive sudo password
-        cache. The fix wraps each session in contextvars.copy_context() and
-        binds HERMES_SESSION_KEY per session, so the cache scope key differs
-        across sessions even when the underlying thread is identical.
-        """
-        import contextvars
-        from concurrent.futures import ThreadPoolExecutor
-
-        from gateway.session_context import (
-            clear_session_vars,
-            set_session_vars,
-        )
-        from tools.terminal_tool import (
-            _get_cached_sudo_password,
-            _reset_cached_sudo_passwords,
-            _set_cached_sudo_password,
-        )
-
-        _reset_cached_sudo_passwords()
-        executor = ThreadPoolExecutor(max_workers=1)  # force thread reuse
-
-        runs: list[tuple[str, str, str]] = []  # (session_id, before, after)
-
-        def _simulate_acp_session(session_id: str, write_password: str) -> None:
-            tokens = set_session_vars(session_key=session_id)
-            try:
-                observed_before = _get_cached_sudo_password()
-                _set_cached_sudo_password(write_password)
-                observed_after = _get_cached_sudo_password()
-                runs.append((session_id, observed_before, observed_after))
-            finally:
-                clear_session_vars(tokens)
-
-        def _run_in_fresh_context(session_id: str, pw: str) -> str:
-            ctx = contextvars.copy_context()
-            ctx.run(_simulate_acp_session, session_id, pw)
-            return session_id
-
-        try:
-            executor.submit(_run_in_fresh_context, "acp-session-A", "alpha-secret").result()
-            # Same thread. Without the fix B would see "alpha-secret".
-            executor.submit(_run_in_fresh_context, "acp-session-B", "bravo-secret").result()
-        finally:
-            executor.shutdown(wait=True)
-            _reset_cached_sudo_passwords()
-
-        assert runs[0] == ("acp-session-A", "", "alpha-secret")
-        # Core regression guard: B on the same reused thread must see an empty
-        # cache, not A's password.
-        assert runs[1] == ("acp-session-B", "", "bravo-secret")
-

 class TestAcpExecAskGate:
    """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
@@ -68,33 +68,6 @@ class TestBuildAnthropicClient:
            assert "fine-grained-tool-streaming-2025-05-14" in betas
            assert "api_key" not in kwargs

-    def test_oauth_does_not_send_claude_code_spoof_headers(self):
-        """OAuth requests identify as Hermes — no claude-cli UA, no x-app: cli.
-
-        Anthropic's OAuth-gated Messages API accepts requests from non-Claude-Code
-        clients as long as auth is correct and the OAuth beta headers are present.
-        See commit that removed fingerprinting for the live-test write-up.
-        """
-        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
-            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
-            headers = mock_sdk.Anthropic.call_args[1]["default_headers"]
-            assert "user-agent" not in {k.lower() for k in headers}
-            assert "x-app" not in {k.lower() for k in headers}
-
-    def test_oauth_strips_context_1m_beta(self):
-        """context-1m-2025-08-07 is incompatible with OAuth auth — must be stripped.
-
-        Anthropic returns HTTP 400 "This authentication style is incompatible
-        with the long context beta header." when OAuth traffic carries it.
-        """
-        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
-            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
-            betas = mock_sdk.Anthropic.call_args[1]["default_headers"]["anthropic-beta"]
-            assert "context-1m-2025-08-07" not in betas
-            # But other common betas still flow through
-            assert "interleaved-thinking-2025-05-14" in betas
-            assert "oauth-2025-04-20" in betas
-
    def test_api_key_uses_api_key(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            build_anthropic_client("sk-ant-api03-something")
@@ -544,36 +517,6 @@ class TestConvertTools:
        assert convert_tools_to_anthropic([]) == []
        assert convert_tools_to_anthropic(None) == []

-    def test_strips_nullable_union_from_input_schema(self):
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "run",
-                    "description": "Run command",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "command": {"type": "string"},
-                            "timeout": {
-                                "anyOf": [{"type": "integer"}, {"type": "null"}],
-                                "default": None,
-                            },
-                        },
-                        "required": ["command"],
-                    },
-                },
-            }
-        ]
-
-        result = convert_tools_to_anthropic(tools)
-
-        assert result[0]["input_schema"]["properties"]["timeout"] == {
-            "type": "integer",
-            "default": None,
-        }
-        assert result[0]["input_schema"]["required"] == ["command"]
-

 # ---------------------------------------------------------------------------
 # Message conversion
@@ -1509,129 +1509,3 @@ class TestAuxiliaryAuthRefreshRetry:
        mock_refresh.assert_called_once_with("anthropic")
        assert stale_client.chat.completions.create.await_count == 1
        assert fresh_client.chat.completions.create.await_count == 1
-
-
-class TestCodexAdapterReasoningTranslation:
-    """Verify _CodexCompletionsAdapter translates extra_body.reasoning
-    into the Responses API's top-level reasoning + include fields, matching
-    agent/transports/codex.py::build_kwargs() behavior.
-
-    Regression for user feedback (Apr 26): auxiliary callers that configure
-    reasoning via auxiliary.<task>.extra_body.reasoning had that config
-    silently dropped because the adapter only forwarded messages/model/tools.
-    """
-
-    @staticmethod
-    def _build_adapter():
-        """Build a _CodexCompletionsAdapter with a mocked responses.stream()."""
-        from agent.auxiliary_client import _CodexCompletionsAdapter
-        from types import SimpleNamespace
-
-        # Mock the stream context manager: yields no events, get_final_response
-        # returns a minimal empty-output response.
-        fake_final = SimpleNamespace(
-            output=[SimpleNamespace(
-                type="message",
-                content=[SimpleNamespace(type="output_text", text="hi")],
-            )],
-            usage=SimpleNamespace(input_tokens=1, output_tokens=1, total_tokens=2),
-        )
-
-        class _FakeStream:
-            def __enter__(self): return self
-            def __exit__(self, *a): return False
-            def __iter__(self): return iter([])
-            def get_final_response(self): return fake_final
-
-        captured_kwargs = {}
-
-        def _stream(**kwargs):
-            captured_kwargs.update(kwargs)
-            return _FakeStream()
-
-        real_client = MagicMock()
-        real_client.responses.stream = _stream
-        adapter = _CodexCompletionsAdapter(real_client, "gpt-5.3-codex")
-        return adapter, captured_kwargs
-
-    def test_reasoning_effort_medium_translated_to_top_level(self):
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {"effort": "medium"}},
-        )
-        assert captured.get("reasoning") == {"effort": "medium", "summary": "auto"}
-        assert captured.get("include") == ["reasoning.encrypted_content"]
-
-    def test_reasoning_effort_minimal_clamped_to_low(self):
-        """Codex backend rejects 'minimal'; adapter clamps to 'low' per main transport."""
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {"effort": "minimal"}},
-        )
-        assert captured.get("reasoning") == {"effort": "low", "summary": "auto"}
-        assert captured.get("include") == ["reasoning.encrypted_content"]
-
-    def test_reasoning_effort_low_passed_through(self):
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {"effort": "low"}},
-        )
-        assert captured.get("reasoning") == {"effort": "low", "summary": "auto"}
-
-    def test_reasoning_effort_high_passed_through(self):
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {"effort": "high"}},
-        )
-        assert captured.get("reasoning") == {"effort": "high", "summary": "auto"}
-
-    def test_reasoning_disabled_omits_reasoning_and_include(self):
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {"enabled": False}},
-        )
-        assert "reasoning" not in captured
-        assert "include" not in captured
-
-    def test_reasoning_default_effort_when_only_enabled_flag(self):
-        """extra_body={"reasoning": {}} (truthy enabled by omission) → default 'medium'."""
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": {}},
-        )
-        assert captured.get("reasoning") == {"effort": "medium", "summary": "auto"}
-        assert captured.get("include") == ["reasoning.encrypted_content"]
-
-    def test_no_extra_body_means_no_reasoning_keys(self):
-        """Baseline: without extra_body, no reasoning/include is sent (preserves
-        current behavior for callers that don't opt in)."""
-        adapter, captured = self._build_adapter()
-        adapter.create(messages=[{"role": "user", "content": "hi"}])
-        assert "reasoning" not in captured
-        assert "include" not in captured
-
-    def test_extra_body_without_reasoning_key_is_noop(self):
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"metadata": {"source": "test"}},
-        )
-        assert "reasoning" not in captured
-        assert "include" not in captured
-
-    def test_non_dict_reasoning_value_is_ignored_gracefully(self):
-        """Defensive: if a caller accidentally passes a string/None, we
-        silently skip instead of crashing inside the adapter."""
-        adapter, captured = self._build_adapter()
-        adapter.create(
-            messages=[{"role": "user", "content": "hi"}],
-            extra_body={"reasoning": "medium"},  # wrong shape — must not crash
-        )
-        assert "reasoning" not in captured
-
@@ -1,237 +0,0 @@
-"""Tests for transport auto-detection in agent.auxiliary_client.
-
-Auxiliary clients must pick the correct wire protocol (OpenAI
-chat.completions vs native Anthropic Messages) based on the endpoint,
-regardless of which resolve_provider_client branch built them.
-
-Regression target (April 2026): Kimi Coding Plan's ``api.kimi.com/coding``
-endpoint only speaks Anthropic Messages — sending ``kimi-for-coding`` over
-chat.completions returns 404 "resource_not_found_error".  The named
-``kimi-coding`` provider branch in resolve_provider_client used to build a
-plain OpenAI client, so title generation / vision / compression /
-web_extract all failed on Kimi Coding Plan users.
-"""
-
-from __future__ import annotations
-
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-
-@pytest.fixture(autouse=True)
-def _clean_env(monkeypatch):
-    for key in (
-        "OPENAI_API_KEY", "OPENAI_BASE_URL",
-        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
-        "KIMI_API_KEY", "KIMI_CODING_API_KEY", "KIMI_BASE_URL",
-    ):
-        monkeypatch.delenv(key, raising=False)
-
-
-# ---------------------------------------------------------------------------
-# URL detection helper
-# ---------------------------------------------------------------------------
-
-@pytest.mark.parametrize("url,expected,label", [
-    ("https://api.kimi.com/coding/v1", True, "Kimi Coding Plan /v1"),
-    ("https://api.kimi.com/coding", True, "Kimi Coding Plan no /v1"),
-    ("https://api.moonshot.ai/v1", False, "Moonshot legacy"),
-    ("https://api.minimax.io/anthropic", True, "MiniMax /anthropic"),
-    ("https://litellm.example.com/v1/anthropic", True, "/anthropic suffix"),
-    ("https://api.anthropic.com", True, "native Anthropic"),
-    ("https://api.anthropic.com/v1", True, "native Anthropic /v1"),
-    ("https://openrouter.ai/api/v1", False, "OpenRouter"),
-    ("https://api.openai.com/v1", False, "OpenAI"),
-    ("https://inference-api.nousresearch.com/v1", False, "Nous"),
-    ("", False, "empty"),
-    (None, False, "None"),
-])
-def test_endpoint_speaks_anthropic_messages(url, expected, label):
-    from agent.auxiliary_client import _endpoint_speaks_anthropic_messages
-    assert _endpoint_speaks_anthropic_messages(url) is expected, (
-        f"{label}: {url!r} should be {expected}"
-    )
-
-
-# ---------------------------------------------------------------------------
-# _maybe_wrap_anthropic decision table
-# ---------------------------------------------------------------------------
-
-def test_maybe_wrap_anthropic_rewraps_kimi_coding_url():
-    """Plain OpenAI client pointed at api.kimi.com/coding gets rewrapped."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-    fake_anthropic = MagicMock(name="anthropic_sdk_client")
-
-    with patch(
-        "agent.anthropic_adapter.build_anthropic_client",
-        return_value=fake_anthropic,
-    ):
-        result = _maybe_wrap_anthropic(
-            plain_client, "kimi-for-coding", "sk-kimi-test",
-            "https://api.kimi.com/coding", api_mode=None,
-        )
-    assert isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_rewraps_slash_anthropic_url():
-    """Plain OpenAI client pointed at any /anthropic URL gets rewrapped."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-    fake_anthropic = MagicMock(name="anthropic_sdk_client")
-
-    with patch(
-        "agent.anthropic_adapter.build_anthropic_client",
-        return_value=fake_anthropic,
-    ):
-        result = _maybe_wrap_anthropic(
-            plain_client, "MiniMax-M2.7", "mm-key",
-            "https://api.minimax.io/anthropic", api_mode=None,
-        )
-    assert isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_skips_openai_wire_urls():
-    """OpenRouter / OpenAI / Moonshot-legacy stay as plain OpenAI clients."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-    # No patch on build_anthropic_client — if the function tried to call it,
-    # we'd get an AttributeError-style failure. The point is it shouldn't.
-    result = _maybe_wrap_anthropic(
-        plain_client, "claude-sonnet-4.6", "sk-or-test",
-        "https://openrouter.ai/api/v1", api_mode=None,
-    )
-    assert result is plain_client
-    assert not isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_respects_explicit_chat_completions():
-    """api_mode=chat_completions overrides URL heuristics."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-    result = _maybe_wrap_anthropic(
-        plain_client, "kimi-for-coding", "sk-kimi-test",
-        "https://api.kimi.com/coding",
-        api_mode="chat_completions",  # explicit override
-    )
-    assert result is plain_client, "Explicit chat_completions must bypass wrap"
-    assert not isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_honors_explicit_anthropic_messages():
-    """api_mode=anthropic_messages wraps even when URL wouldn't trigger."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-    fake_anthropic = MagicMock(name="anthropic_sdk_client")
-
-    with patch(
-        "agent.anthropic_adapter.build_anthropic_client",
-        return_value=fake_anthropic,
-    ):
-        result = _maybe_wrap_anthropic(
-            plain_client, "model-name", "some-key",
-            "https://opaque.internal/v1",  # URL alone wouldn't trigger
-            api_mode="anthropic_messages",
-        )
-    assert isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_double_wrap_safe():
-    """Already-wrapped AnthropicAuxiliaryClient passes through unchanged."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    already_wrapped = MagicMock(spec=AnthropicAuxiliaryClient)
-    result = _maybe_wrap_anthropic(
-        already_wrapped, "model", "key",
-        "https://api.kimi.com/coding", api_mode=None,
-    )
-    assert result is already_wrapped
-
-
-def test_maybe_wrap_anthropic_codex_client_passes_through():
-    """CodexAuxiliaryClient is never re-dispatched."""
-    from agent.auxiliary_client import (
-        _maybe_wrap_anthropic,
-        CodexAuxiliaryClient,
-        AnthropicAuxiliaryClient,
-    )
-
-    codex_client = MagicMock(spec=CodexAuxiliaryClient)
-    result = _maybe_wrap_anthropic(
-        codex_client, "model", "key",
-        "https://api.kimi.com/coding", api_mode=None,
-    )
-    assert result is codex_client
-    assert not isinstance(result, AnthropicAuxiliaryClient)
-
-
-def test_maybe_wrap_anthropic_sdk_missing_falls_back():
-    """ImportError on anthropic SDK returns plain client with warning."""
-    from agent.auxiliary_client import _maybe_wrap_anthropic, AnthropicAuxiliaryClient
-
-    plain_client = MagicMock(name="plain_openai")
-
-    def _raise_import(*args, **kwargs):
-        raise ImportError("no anthropic SDK")
-
-    with patch(
-        "agent.anthropic_adapter.build_anthropic_client",
-        side_effect=_raise_import,
-    ):
-        # The ImportError is caught on the `from ... import` line inside
-        # _maybe_wrap_anthropic, which runs before build_anthropic_client is
-        # called. To exercise the ImportError path we need to patch the
-        # module lookup itself.
-        import sys as _sys
-        saved = _sys.modules.get("agent.anthropic_adapter")
-        _sys.modules["agent.anthropic_adapter"] = None  # force ImportError
-        try:
-            result = _maybe_wrap_anthropic(
-                plain_client, "kimi-for-coding", "sk-kimi-test",
-                "https://api.kimi.com/coding", api_mode=None,
-            )
-        finally:
-            if saved is not None:
-                _sys.modules["agent.anthropic_adapter"] = saved
-            else:
-                _sys.modules.pop("agent.anthropic_adapter", None)
-
-    assert result is plain_client
-    assert not isinstance(result, AnthropicAuxiliaryClient)
-
-
-# ---------------------------------------------------------------------------
-# Integration: resolve_provider_client for named kimi-coding provider
-# ---------------------------------------------------------------------------
-
-def test_resolve_provider_client_kimi_coding_wraps_anthropic(monkeypatch, tmp_path):
-    """End-to-end: resolve_provider_client('kimi-coding', 'kimi-for-coding')
-    must return AnthropicAuxiliaryClient because /coding speaks Anthropic.
-
-    This is the primary regression guard: the bug that caused title
-    generation 404s on every Kimi Coding Plan user after the "main model
-    for every user" aux design shipped.
-    """
-    from agent.auxiliary_client import (
-        resolve_provider_client,
-        AnthropicAuxiliaryClient,
-    )
-
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    # sk-kimi- prefix triggers /coding endpoint auto-detection
-    monkeypatch.setenv("KIMI_API_KEY", "sk-kimi-faketesttoken123")
-
-    client, model = resolve_provider_client("kimi-coding", "kimi-for-coding")
-    assert client is not None, "Should resolve a client"
-    assert isinstance(client, AnthropicAuxiliaryClient), (
-        "Kimi Coding Plan endpoint (api.kimi.com/coding) speaks Anthropic "
-        "Messages — aux client MUST be AnthropicAuxiliaryClient, got "
-        f"{type(client).__name__}"
-    )
-    assert "kimi.com/coding" in str(client.base_url)
@@ -117,25 +117,7 @@ class TestResolveBedrocRegion:

    def test_defaults_to_us_east_1(self):
        from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch, MagicMock
-        mock_session = MagicMock()
-        mock_session.get_config_variable.return_value = None
-        with patch("botocore.session.get_session", return_value=mock_session):
-            assert resolve_bedrock_region({}) == "us-east-1"
-
-    def test_falls_back_to_botocore_profile_region(self):
-        from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch, MagicMock
-        mock_session = MagicMock()
-        mock_session.get_config_variable.return_value = "eu-central-1"
-        with patch("botocore.session.get_session", return_value=mock_session):
-            assert resolve_bedrock_region({}) == "eu-central-1"
-
-    def test_botocore_failure_falls_back_to_us_east_1(self):
-        from agent.bedrock_adapter import resolve_bedrock_region
-        from unittest.mock import patch
-        with patch("botocore.session.get_session", side_effect=Exception("no botocore")):
-            assert resolve_bedrock_region({}) == "us-east-1"
+        assert resolve_bedrock_region({}) == "us-east-1"


 # ---------------------------------------------------------------------------
@@ -1370,143 +1370,3 @@ def test_nous_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch
    assert len(available) == 1
    assert available[0].refresh_token == "refresh-FRESH"
    assert available[0].last_status is None
-
-
-# ── OpenAI Codex OAuth cross-process sync tests ────────────────────────────
-
-def _codex_auth_store(access: str, refresh: str) -> dict:
-    return {
-        "version": 1,
-        "active_provider": "openai-codex",
-        "providers": {
-            "openai-codex": {
-                "auth_mode": "chatgpt",
-                "tokens": {
-                    "access_token": access,
-                    "refresh_token": refresh,
-                    "id_token": "id-" + access,
-                },
-                "last_refresh": "2026-04-28T00:00:00Z",
-            }
-        },
-    }
-
-
-def test_sync_codex_entry_from_auth_store_adopts_newer_tokens(tmp_path, monkeypatch):
-    """When auth.json has newer Codex tokens, the pool entry should adopt them."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    _write_auth_store(tmp_path, _codex_auth_store("access-OLD", "refresh-OLD"))
-
-    from agent.credential_pool import load_pool
-
-    pool = load_pool("openai-codex")
-    entry = pool.select()
-    assert entry is not None
-    assert entry.access_token == "access-OLD"
-    assert entry.refresh_token == "refresh-OLD"
-
-    # Simulate `hermes auth openai-codex` replacing the token pair on disk.
-    _write_auth_store(tmp_path, _codex_auth_store("access-NEW", "refresh-NEW"))
-
-    synced = pool._sync_codex_entry_from_auth_store(entry)
-    assert synced is not entry
-    assert synced.access_token == "access-NEW"
-    assert synced.refresh_token == "refresh-NEW"
-    assert synced.last_status is None
-    assert synced.last_error_code is None
-    assert synced.last_error_reset_at is None
-
-
-def test_sync_codex_entry_noop_when_tokens_match(tmp_path, monkeypatch):
-    """When auth.json has the same tokens, sync should be a no-op."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    _write_auth_store(tmp_path, _codex_auth_store("access-same", "refresh-same"))
-
-    from agent.credential_pool import load_pool
-
-    pool = load_pool("openai-codex")
-    entry = pool.select()
-    assert entry is not None
-
-    synced = pool._sync_codex_entry_from_auth_store(entry)
-    assert synced is entry
-
-
-def test_codex_exhausted_entry_recovers_via_auth_store_sync(tmp_path, monkeypatch):
-    """An exhausted Codex entry should recover when auth.json has newer tokens.
-
-    Reproduces the Discord report (p1aceho1der, Apr 2026): after a Codex
-    rate-limit reset the user ran `hermes model` to reauth, but the pool
-    entry stayed marked EXHAUSTED with last_error_reset_at many hours in
-    the future — so `_available_entries` kept returning empty and every
-    request failed with "no available entries (all exhausted or empty)".
-    """
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    from agent.credential_pool import load_pool, STATUS_EXHAUSTED
-    from dataclasses import replace as dc_replace
-
-    _write_auth_store(tmp_path, _codex_auth_store("access-OLD", "refresh-OLD"))
-
-    pool = load_pool("openai-codex")
-    entry = pool.select()
-    assert entry is not None
-
-    # Mark entry as exhausted with last_error_reset_at one hour in the
-    # future (Codex 429 weekly-window pattern).
-    now = time.time()
-    exhausted = dc_replace(
-        entry,
-        last_status=STATUS_EXHAUSTED,
-        last_status_at=now,
-        last_error_code=429,
-        last_error_reset_at=now + 3600,
-    )
-    pool._replace_entry(entry, exhausted)
-    pool._persist()
-
-    # Sanity: before the reauth, _available_entries refuses to return
-    # this entry because last_error_reset_at is in the future.
-    # (clear_expired would only clear it AFTER exhausted_until elapsed.)
-    available_before = pool._available_entries(clear_expired=True, refresh=False)
-    assert available_before == []
-
-    # Simulate `hermes model` / `hermes auth` refreshing the tokens.
-    _write_auth_store(tmp_path, _codex_auth_store("access-FRESH", "refresh-FRESH"))
-
-    available = pool._available_entries(clear_expired=True, refresh=False)
-    assert len(available) == 1
-    assert available[0].access_token == "access-FRESH"
-    assert available[0].refresh_token == "refresh-FRESH"
-    assert available[0].last_status is None
-    assert available[0].last_error_reset_at is None
-
-
-def test_codex_exhausted_entry_stays_stuck_without_auth_store_update(tmp_path, monkeypatch):
-    """Regression guard: if auth.json tokens haven't changed, the exhausted
-    entry must stay stuck behind its reset window — sync must not spuriously
-    clear status just because the entry is STATUS_EXHAUSTED."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    from agent.credential_pool import load_pool, STATUS_EXHAUSTED
-    from dataclasses import replace as dc_replace
-
-    _write_auth_store(tmp_path, _codex_auth_store("access-same", "refresh-same"))
-
-    pool = load_pool("openai-codex")
-    entry = pool.select()
-    assert entry is not None
-
-    now = time.time()
-    exhausted = dc_replace(
-        entry,
-        last_status=STATUS_EXHAUSTED,
-        last_status_at=now,
-        last_error_code=429,
-        last_error_reset_at=now + 3600,
-    )
-    pool._replace_entry(entry, exhausted)
-    pool._persist()
-
-    # auth.json unchanged → sync returns same entry → exhausted_until check
-    # still skips it.
-    available = pool._available_entries(clear_expired=True, refresh=False)
-    assert available == []
@@ -274,15 +274,13 @@ class TestQueryLocalContextLengthLmStudio:
        return client_mock

    def test_lmstudio_exact_key_match(self):
-        """Resolves loaded ctx when key matches exactly."""
+        """Reads max_context_length when key matches exactly."""
        from agent.model_metadata import _query_local_context_length

        native_resp = self._make_resp(200, {
            "models": [
-                {"key": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 1_048_576,
-                 "loaded_instances": [{"config": {"context_length": 131072}}]},
+                {"key": "nvidia/nvidia-nemotron-super-49b-v1", "id": "nvidia/nvidia-nemotron-super-49b-v1",
+                 "max_context_length": 131072},
            ]
        })
        client_mock = self._make_client(
@@ -312,8 +310,7 @@ class TestQueryLocalContextLengthLmStudio:
            "models": [
                {"key": "nvidia/nvidia-nemotron-super-49b-v1",
                 "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 1_048_576,
-                 "loaded_instances": [{"config": {"context_length": 131072}}]},
+                 "max_context_length": 131072},
            ]
        })
        client_mock = self._make_client(
@@ -466,10 +463,7 @@ class TestFetchEndpointModelMetadataLmStudio:
                    {
                        "key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
                        "id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
-                        "max_context_length": 1_048_576,
-                        "loaded_instances": [
-                            {"config": {"context_length": 131072}}
-                        ],
+                        "max_context_length": 131072,
                    }
                ]
            }
@@ -182,7 +182,7 @@ class TestMaybeAutoTitle:
            import time
            time.sleep(0.3)
            mock_auto.assert_called_once_with(
-                db, "sess-1", "hello", "hi there", failure_callback=None, main_runtime=None
+                db, "sess-1", "hello", "hi there", failure_callback=None
            )

    def test_forwards_failure_callback_to_worker(self):
@@ -202,7 +202,7 @@ class TestMaybeAutoTitle:
            import time
            time.sleep(0.3)
            mock_auto.assert_called_once_with(
-                db, "sess-1", "hello", "hi there", failure_callback=_cb, main_runtime=None
+                db, "sess-1", "hello", "hi there", failure_callback=_cb
            )

    def test_skips_if_no_response(self):
@@ -4,7 +4,7 @@ import pytest
 from types import SimpleNamespace

 from agent.transports import get_transport
-from agent.transports.types import NormalizedResponse
+from agent.transports.types import NormalizedResponse, ToolCall


@pytest.fixture
@@ -122,90 +122,6 @@ class TestChatCompletionsBuildKwargs:
        )
        assert kw["extra_body"]["think"] is False

-    def test_gemini_without_explicit_reasoning_config_keeps_existing_behavior(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-3-flash-preview",
-            messages=msgs,
-            provider_name="gemini",
-        )
-        assert "thinking_config" not in kw.get("extra_body", {})
-
-    def test_gemini_flash_reasoning_maps_to_thinking_config(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-3-flash-preview",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": True, "effort": "high"},
-        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-            "thinkingLevel": "high",
-        }
-
-    def test_gemini_25_reasoning_only_enables_visible_thoughts(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-2.5-flash",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": True, "effort": "high"},
-        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-        }
-
-    def test_gemini_pro_reasoning_clamps_to_supported_levels(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="google/gemini-3.1-pro-preview",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": True, "effort": "medium"},
-        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-            "thinkingLevel": "low",
-        }
-
-    def test_gemini_disabled_reasoning_hides_thoughts(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-3-flash-preview",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": False},
-        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": False,
-        }
-
-    def test_gemini_xhigh_clamps_to_high(self, transport):
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-3-flash-preview",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": True, "effort": "xhigh"},
-        )
-        assert kw["extra_body"]["thinking_config"]["thinkingLevel"] == "high"
-
-    def test_gemini_flash_minimal_clamps_to_low(self, transport):
-        # Gemini 3 Flash documents low/medium/high; "minimal" isn't accepted,
-        # so clamp it down to "low" rather than forwarding it verbatim.
-        msgs = [{"role": "user", "content": "Hi"}]
-        kw = transport.build_kwargs(
-            model="gemini-3-flash-preview",
-            messages=msgs,
-            provider_name="gemini",
-            reasoning_config={"enabled": True, "effort": "minimal"},
-        )
-        assert kw["extra_body"]["thinking_config"] == {
-            "includeThoughts": True,
-            "thinkingLevel": "low",
-        }
-
    def test_max_tokens_with_fn(self, transport):
        msgs = [{"role": "user", "content": "Hi"}]
        kw = transport.build_kwargs(
@@ -376,80 +292,6 @@ class TestChatCompletionsKimi:
        assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]


-class TestChatCompletionsLmStudioReasoning:
-    """LM Studio publishes per-model reasoning ``allowed_options``. When the
-    user requests an effort the model can't honor (e.g. ``high`` on a
-    toggle-style ``["off","on"]`` model), the transport omits
-    ``reasoning_effort`` so LM Studio falls back to the model's default —
-    silently downgrading "high" to "low" would mislead the user.
-    """
-
-    def test_omits_effort_when_high_not_allowed_toggle(self, transport):
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"effort": "high"},
-            lmstudio_reasoning_options=["off", "on"],
-        )
-        assert "reasoning_effort" not in kw
-
-    def test_omits_effort_when_high_not_allowed_minimal_low(self, transport):
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"effort": "high"},
-            lmstudio_reasoning_options=["off", "minimal", "low"],
-        )
-        assert "reasoning_effort" not in kw
-
-    def test_passes_through_when_effort_allowed(self, transport):
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"effort": "high"},
-            lmstudio_reasoning_options=["off", "low", "medium", "high"],
-        )
-        assert kw["reasoning_effort"] == "high"
-
-    def test_passes_through_aliased_on_for_toggle(self, transport):
-        # User has reasoning enabled at the default "medium"; toggle model
-        # publishes ["off","on"] which aliases to {"none","medium"}, so the
-        # default request is honorable and gets sent.
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"effort": "medium"},
-            lmstudio_reasoning_options=["off", "on"],
-        )
-        assert kw["reasoning_effort"] == "medium"
-
-    def test_disabled_keeps_none_when_off_allowed(self, transport):
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"enabled": False},
-            lmstudio_reasoning_options=["off", "on"],
-        )
-        assert kw["reasoning_effort"] == "none"
-
-    def test_no_options_falls_back_to_legacy_behavior(self, transport):
-        # When the probe failed or returned nothing, allowed_options is unknown;
-        # send whatever the user picked rather than blocking the request.
-        kw = transport.build_kwargs(
-            model="gpt-oss", messages=[{"role": "user", "content": "Hi"}],
-            is_lmstudio=True,
-            supports_reasoning=True,
-            reasoning_config={"effort": "high"},
-            lmstudio_reasoning_options=None,
-        )
-        assert kw["reasoning_effort"] == "high"
-
-
 class TestChatCompletionsValidate:

    def test_none(self, transport):
--- a/Show More
+++ b/Show More