Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 18e6cd9938 |
@@ -5,9 +5,7 @@
|
||||
|
||||
# Dependencies
|
||||
node_modules
|
||||
**/node_modules
|
||||
.venv
|
||||
**/.venv
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
|
||||
@@ -13,7 +13,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nix-lockfile-check:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
@@ -36,12 +36,6 @@ jobs:
|
||||
LINK_SHA: ${{ steps.sha.outputs.full }}
|
||||
run: nix run .#fix-lockfiles -- --check
|
||||
|
||||
- name: Fail if check crashed without reporting
|
||||
if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
|
||||
run: |
|
||||
echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
|
||||
exit 1
|
||||
|
||||
- name: Post sticky PR comment (stale)
|
||||
if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
|
||||
uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728 # v2.9.1
|
||||
|
||||
@@ -1,13 +1,6 @@
|
||||
name: Nix Lockfile Fix
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'ui-tui/package-lock.json'
|
||||
- 'ui-tui/package.json'
|
||||
- 'web/package-lock.json'
|
||||
- 'web/package.json'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr_number:
|
||||
@@ -26,103 +19,9 @@ concurrency:
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# ── Auto-fix on main ───────────────────────────────────────────────
|
||||
# Fires when a push to main touches package.json or package-lock.json
|
||||
# in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
|
||||
# update commit directly to main so Nix builds never stay broken.
|
||||
#
|
||||
# Safety invariants:
|
||||
# 1. The fix commit only touches nix/*.nix files, which are NOT in
|
||||
# the paths filter above, so this cannot re-trigger itself.
|
||||
# 2. An explicit file-whitelist check before commit aborts if
|
||||
# fix-lockfiles ever modifies unexpected files.
|
||||
# 3. Job-level concurrency with cancel-in-progress: true ensures
|
||||
# back-to-back pushes collapse to the newest; ref: main checkout
|
||||
# always operates on the latest branch state.
|
||||
# 4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
|
||||
# triggers downstream nix.yml verification.
|
||||
auto-fix-main:
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 25
|
||||
concurrency:
|
||||
group: auto-fix-main
|
||||
cancel-in-progress: true
|
||||
steps:
|
||||
- name: Generate GitHub App token
|
||||
id: app-token
|
||||
uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00 # v1.9.3
|
||||
with:
|
||||
app-id: ${{ secrets.APP_ID }}
|
||||
private-key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ steps.app-token.outputs.token }}
|
||||
|
||||
- uses: ./.github/actions/nix-setup
|
||||
|
||||
- name: Apply lockfile hashes
|
||||
id: apply
|
||||
run: nix run .#fix-lockfiles -- --apply
|
||||
|
||||
- name: Commit & push
|
||||
if: steps.apply.outputs.changed == 'true'
|
||||
shell: bash
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure only nix files were modified — prevents accidental
|
||||
# self-triggering if fix-lockfiles ever touches package files.
|
||||
unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
|
||||
if [ -n "$unexpected" ]; then
|
||||
echo "::error::Unexpected modified files: $unexpected"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Record the base SHA before committing — used to detect package
|
||||
# file changes if we need to rebase after a non-fast-forward push.
|
||||
BASE_SHA="$(git rev-parse HEAD)"
|
||||
|
||||
git config user.name 'github-actions[bot]'
|
||||
git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
|
||||
git add nix/tui.nix nix/web.nix
|
||||
git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
|
||||
-m "Source: $GITHUB_SHA" \
|
||||
-m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
|
||||
|
||||
# Retry push with rebase in case main advanced with an unrelated
|
||||
# commit during the nix build. Without this, a non-fast-forward
|
||||
# rejection silently loses the fix. If package files changed during
|
||||
# the rebase, abort — a fresh auto-fix run will handle the new state.
|
||||
for attempt in 1 2 3; do
|
||||
if git push origin HEAD:main; then
|
||||
exit 0
|
||||
fi
|
||||
echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
|
||||
git fetch origin main
|
||||
|
||||
# If package files changed between our base and the new main,
|
||||
# our computed hashes are stale. Abort and let the next triggered
|
||||
# run recompute from the correct package-lock state.
|
||||
pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
|
||||
'ui-tui/package-lock.json' 'ui-tui/package.json' \
|
||||
'web/package-lock.json' 'web/package.json' || true)"
|
||||
if [ -n "$pkg_changed" ]; then
|
||||
echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git rebase origin/main
|
||||
done
|
||||
echo "::error::Failed to push after 3 rebase attempts"
|
||||
exit 1
|
||||
|
||||
# ── PR fix (manual / checkbox) ─────────────────────────────────────
|
||||
# Existing behavior: run on manual dispatch OR when a task-list
|
||||
# checkbox in the sticky lockfile-check comment flips from [ ] to [x].
|
||||
fix:
|
||||
# Run on manual dispatch OR when a task-list checkbox in the sticky
|
||||
# lockfile-check comment flips from `[ ]` to `[x]`.
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'issue_comment'
|
||||
|
||||
@@ -69,5 +69,3 @@ mini-swe-agent/
|
||||
.nix-stamps/
|
||||
result
|
||||
website/static/api/skills-index.json
|
||||
models-dev-upstream/
|
||||
.venv
|
||||
|
||||
@@ -38,7 +38,7 @@ hermes-agent/
|
||||
│ │ # homeassistant, signal, matrix, mattermost, email, sms,
|
||||
│ │ # dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
|
||||
│ │ # webhook, api_server, ...). See ADDING_A_PLATFORM.md.
|
||||
│ └── builtin_hooks/ # Extension point for always-registered gateway hooks (none shipped)
|
||||
│ └── builtin_hooks/ # Always-registered gateway hooks (boot-md, ...)
|
||||
├── plugins/ # Plugin system (see "Plugins" section below)
|
||||
│ ├── memory/ # Memory-provider plugins (honcho, mem0, supermemory, ...)
|
||||
│ ├── context_engine/ # Context-engine plugins
|
||||
|
||||
+1
-1
@@ -494,7 +494,7 @@ branding:
|
||||
agent_name: "My Agent"
|
||||
welcome: "Welcome message"
|
||||
response_label: " ⚔ Agent "
|
||||
prompt_symbol: "⚔"
|
||||
prompt_symbol: "⚔ ❯ "
|
||||
|
||||
tool_prefix: "╎" # Tool output line prefix
|
||||
```
|
||||
|
||||
+2
-8
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -45,13 +45,7 @@ COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build && \
|
||||
rm -rf node_modules/@hermes/ink && \
|
||||
rm -rf packages/hermes-ink/node_modules && \
|
||||
cp -R packages/hermes-ink node_modules/@hermes/ink && \
|
||||
npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
|
||||
rm -rf node_modules/@hermes/ink/node_modules/react && \
|
||||
node --input-type=module -e "await import('@hermes/ink')"
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
|
||||
@@ -112,17 +112,6 @@ def main() -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
+1
-28
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
@@ -575,22 +574,6 @@ class HermesACPAgent(acp.Agent):
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
# Bind HERMES_SESSION_KEY for this session so per-session caches
|
||||
# (e.g. the interactive sudo password cache in tools.terminal_tool)
|
||||
# scope to the ACP session rather than leaking across sessions
|
||||
# that land on the same reused executor thread. This call runs
|
||||
# inside a contextvars.copy_context() below, so the ContextVar
|
||||
# write is isolated from other concurrent ACP sessions.
|
||||
try:
|
||||
from gateway.session_context import (
|
||||
clear_session_vars,
|
||||
set_session_vars,
|
||||
)
|
||||
session_tokens = set_session_vars(session_key=session_id)
|
||||
except Exception:
|
||||
session_tokens = None
|
||||
clear_session_vars = None # type: ignore[assignment]
|
||||
logger.debug("Could not set ACP session context", exc_info=True)
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
@@ -624,19 +607,9 @@ class HermesACPAgent(acp.Agent):
|
||||
_terminal_tool.set_approval_callback(previous_approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not restore approval callback", exc_info=True)
|
||||
if session_tokens is not None and clear_session_vars is not None:
|
||||
try:
|
||||
clear_session_vars(session_tokens)
|
||||
except Exception:
|
||||
logger.debug("Could not clear ACP session context", exc_info=True)
|
||||
|
||||
try:
|
||||
# Wrap the executor call in a fresh copy of the current context so
|
||||
# concurrent ACP sessions on the shared ThreadPoolExecutor don't
|
||||
# stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
|
||||
# particular — used by the interactive sudo password cache scope).
|
||||
ctx = contextvars.copy_context()
|
||||
result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
|
||||
result = await loop.run_in_executor(_executor, _run_agent)
|
||||
except Exception:
|
||||
logger.exception("Executor error for session %s", session_id)
|
||||
return PromptResponse(stop_reason="end_turn")
|
||||
|
||||
+78
-155
@@ -22,25 +22,10 @@ from hermes_constants import get_hermes_home
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
|
||||
# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
|
||||
# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
|
||||
# read_claude_code_credentials_from_keychain) are all on cold user-triggered
|
||||
# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
|
||||
# the module after the first call and returns None on ImportError.
|
||||
_anthropic_sdk: Any = ... # sentinel — None means "tried and missing"
|
||||
|
||||
|
||||
def _get_anthropic_sdk():
|
||||
"""Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
|
||||
global _anthropic_sdk
|
||||
if _anthropic_sdk is ...:
|
||||
try:
|
||||
import anthropic as _sdk
|
||||
_anthropic_sdk = _sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None
|
||||
return _anthropic_sdk
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
except ImportError:
|
||||
_anthropic_sdk = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -217,33 +202,19 @@ def _forbids_sampling_params(model: str) -> bool:
|
||||
|
||||
|
||||
# Beta headers for enhanced features (sent with ALL auth types).
|
||||
# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
|
||||
# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
|
||||
# beta headers are still accepted (harmless no-op) but not required. Kept
|
||||
# here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
|
||||
# that still gate on the headers continue to get the enhanced features.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
|
||||
# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
|
||||
# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
|
||||
# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
|
||||
# at 200K even though model_metadata.py advertises 1M. The header is a harmless
|
||||
# no-op on endpoints where 1M is GA.
|
||||
#
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models or once
|
||||
# Bedrock/Azure promote 1M to GA.
|
||||
# Migration guide: remove these if you no longer support ≤4.5 models.
|
||||
_COMMON_BETAS = [
|
||||
"interleaved-thinking-2025-05-14",
|
||||
"fine-grained-tool-streaming-2025-05-14",
|
||||
"context-1m-2025-08-07",
|
||||
]
|
||||
# MiniMax's Anthropic-compatible endpoints fail tool-use requests when
|
||||
# the fine-grained tool streaming beta is present. Omit it so tool calls
|
||||
# fall back to the provider's default response path.
|
||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||
# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
|
||||
# Bearer-auth (MiniMax) endpoints since they host their own models and
|
||||
# unknown Anthropic beta headers risk request rejection.
|
||||
_CONTEXT_1M_BETA = "context-1m-2025-08-07"
|
||||
|
||||
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||
@@ -257,11 +228,10 @@ _OAUTH_ONLY_BETAS = [
|
||||
"oauth-2025-04-20",
|
||||
]
|
||||
|
||||
# Claude Code version — sent on OAuth token-exchange / refresh requests
|
||||
# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
|
||||
# OAuth flow validates the UA and may reject requests with a version that's
|
||||
# too old, so detecting dynamically keeps users on a current Claude Code
|
||||
# install from hitting stale-version errors during login/refresh.
|
||||
# Claude Code identity — required for OAuth requests to be routed correctly.
|
||||
# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
|
||||
# The version must stay reasonably current — Anthropic rejects OAuth requests
|
||||
# when the spoofed user-agent version is too far behind the actual release.
|
||||
_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
|
||||
_claude_code_version_cache: Optional[str] = None
|
||||
|
||||
@@ -269,9 +239,9 @@ _claude_code_version_cache: Optional[str] = None
|
||||
def _detect_claude_code_version() -> str:
|
||||
"""Detect the installed Claude Code version, fall back to a static constant.
|
||||
|
||||
Used only by the OAuth token-exchange / refresh flow
|
||||
(``platform.claude.com/v1/oauth/token``). The Messages API client no
|
||||
longer sends a claude-cli user-agent.
|
||||
Anthropic's OAuth infrastructure validates the user-agent version and may
|
||||
reject requests with a version that's too old. Detecting dynamically means
|
||||
users who keep Claude Code updated never hit stale-version 400s.
|
||||
"""
|
||||
import subprocess as _sp
|
||||
|
||||
@@ -291,13 +261,12 @@ def _detect_claude_code_version() -> str:
|
||||
return _CLAUDE_CODE_VERSION_FALLBACK
|
||||
|
||||
|
||||
def _get_claude_code_version() -> str:
|
||||
"""Lazily detect the installed Claude Code version for OAuth flow headers.
|
||||
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
|
||||
_MCP_TOOL_PREFIX = "mcp_"
|
||||
|
||||
Used only on the OAuth token-exchange and refresh endpoints
|
||||
(``platform.claude.com/v1/oauth/token``). The Messages API client does
|
||||
not send a claude-cli user-agent.
|
||||
"""
|
||||
|
||||
def _get_claude_code_version() -> str:
|
||||
"""Lazily detect the installed Claude Code version when OAuth headers need it."""
|
||||
global _claude_code_version_cache
|
||||
if _claude_code_version_cache is None:
|
||||
_claude_code_version_cache = _detect_claude_code_version()
|
||||
@@ -388,14 +357,9 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
|
||||
that include Anthropic's ``fine-grained-tool-streaming`` beta — every
|
||||
tool-use message triggers a connection error. Strip that beta for
|
||||
Bearer-auth endpoints while keeping all other betas intact.
|
||||
|
||||
The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
|
||||
endpoints — MiniMax hosts its own models, not Claude, so the header is
|
||||
irrelevant at best and risks request rejection at worst.
|
||||
"""
|
||||
if _requires_bearer_auth(base_url):
|
||||
_stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
|
||||
return [b for b in _COMMON_BETAS if b not in _stripped]
|
||||
return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
|
||||
return _COMMON_BETAS
|
||||
|
||||
|
||||
@@ -410,7 +374,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
|
||||
Returns an anthropic.Anthropic instance.
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
@@ -467,21 +430,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
elif _is_oauth_token(api_key):
|
||||
# OAuth access token / setup-token → Bearer auth + OAuth-only betas.
|
||||
# The OAuth-specific beta headers are still required by Anthropic's
|
||||
# OAuth-gated Messages API path; the Claude Code user-agent / x-app
|
||||
# spoofing is deliberately NOT sent — Hermes identifies as itself.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
|
||||
# OAuth requests that carry it with
|
||||
# "This authentication style is incompatible with the long
|
||||
# context beta header."
|
||||
# Subscription-gated OAuth traffic gets the 200K default window.
|
||||
oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
|
||||
all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
|
||||
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
|
||||
# Anthropic routes OAuth requests based on user-agent and headers;
|
||||
# without Claude Code's fingerprint, requests get intermittent 500s.
|
||||
all_betas = common_betas + _OAUTH_ONLY_BETAS
|
||||
kwargs["auth_token"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"anthropic-beta": ",".join(all_betas),
|
||||
"user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||||
"x-app": "cli",
|
||||
}
|
||||
else:
|
||||
# Regular API key → x-api-key header + common betas
|
||||
@@ -499,16 +456,8 @@ def build_anthropic_bedrock_client(region: str):
|
||||
Claude feature parity: prompt caching, thinking budgets, adaptive
|
||||
thinking, fast mode — features not available via the Converse API.
|
||||
|
||||
Attaches the common Anthropic beta headers as client-level defaults so
|
||||
that Bedrock-hosted Claude models get the same enhanced features as
|
||||
native Anthropic. The ``context-1m-2025-08-07`` beta in particular
|
||||
unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
|
||||
it, Bedrock caps these models at 200K even though the Anthropic API
|
||||
serves them with 1M natively.
|
||||
|
||||
Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
|
||||
"""
|
||||
_anthropic_sdk = _get_anthropic_sdk()
|
||||
if _anthropic_sdk is None:
|
||||
raise ImportError(
|
||||
"The 'anthropic' package is required for the Bedrock provider. "
|
||||
@@ -524,7 +473,6 @@ def build_anthropic_bedrock_client(region: str):
|
||||
return _anthropic_sdk.AnthropicBedrock(
|
||||
aws_region=region,
|
||||
timeout=Timeout(timeout=900.0, connect=10.0),
|
||||
default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
|
||||
)
|
||||
|
||||
|
||||
@@ -540,6 +488,9 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
@@ -825,45 +776,17 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
"""Resolve an Anthropic token from all available sources.
|
||||
|
||||
Priority:
|
||||
1. Hermes credential pool (``~/.hermes/auth.json`` →
|
||||
``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
|
||||
own PKCE login flow. Entries are auto-refreshed when near
|
||||
expiry. Env-sourced pool entries (``source="env:..."``) are
|
||||
skipped here so the env-var priority logic below still runs.
|
||||
2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
|
||||
3. CLAUDE_CODE_OAUTH_TOKEN env var
|
||||
4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
|
||||
1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
|
||||
2. CLAUDE_CODE_OAUTH_TOKEN env var
|
||||
3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
|
||||
— with automatic refresh if expired and a refresh token is available
|
||||
5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
|
||||
4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
|
||||
|
||||
Returns the token string or None.
|
||||
"""
|
||||
# 1. Hermes credential pool — the live source of truth for tokens
|
||||
# minted via ``hermes login anthropic`` / the dashboard PKCE flow.
|
||||
# ``select()`` picks the best available entry and refreshes it if
|
||||
# it's near expiry, so callers always get a fresh token.
|
||||
#
|
||||
# Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
|
||||
# those are passthroughs of the env var, and the env-var branches
|
||||
# below have richer priority logic (``_prefer_refreshable_claude_code_token``)
|
||||
# that can upgrade a static env OAuth token to a refreshed
|
||||
# Claude Code token. Letting the pool win here would short-circuit
|
||||
# that upgrade.
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("anthropic")
|
||||
entry = pool.select()
|
||||
if entry and entry.access_token and not entry.source.startswith("env:"):
|
||||
return entry.access_token
|
||||
except Exception as exc:
|
||||
# Pool lookup is best-effort — fall through to env/file sources
|
||||
# if anything goes wrong (e.g. auth.json corruption during a
|
||||
# concurrent write).
|
||||
logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
|
||||
# 2. Hermes-managed OAuth/setup token env var
|
||||
# 1. Hermes-managed OAuth/setup token env var
|
||||
token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||||
if token:
|
||||
preferred = _prefer_refreshable_claude_code_token(token, creds)
|
||||
@@ -871,7 +794,7 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
return preferred
|
||||
return token
|
||||
|
||||
# 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
|
||||
# 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
|
||||
cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||||
if cc_token:
|
||||
preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
|
||||
@@ -879,12 +802,12 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
return preferred
|
||||
return cc_token
|
||||
|
||||
# 4. Claude Code credential file
|
||||
# 3. Claude Code credential file
|
||||
resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
|
||||
if resolved_claude_token:
|
||||
return resolved_claude_token
|
||||
|
||||
# 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
|
||||
# 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
|
||||
# This remains as a compatibility fallback for pre-migration Hermes configs.
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if api_key:
|
||||
@@ -1131,33 +1054,6 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
||||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool schemas before sending them to Anthropic.
|
||||
|
||||
Anthropic's tool schema validator rejects nullable unions such as
|
||||
``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
|
||||
commonly emits for optional fields. Tool optionality is represented by
|
||||
the parent ``required`` array, so we delegate to the shared
|
||||
``strip_nullable_unions`` helper to collapse nullable unions to the
|
||||
non-null branch while preserving metadata like description/default.
|
||||
|
||||
``keep_nullable_hint=False`` because the Anthropic validator does not
|
||||
recognize the OpenAPI-style ``nullable: true`` extension and strict
|
||||
schema-to-grammar converters may reject unknown keywords.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
from tools.schema_sanitizer import strip_nullable_unions
|
||||
|
||||
normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
|
||||
normalized = {**normalized, "properties": {}}
|
||||
return normalized
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
@@ -1168,9 +1064,7 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
result.append({
|
||||
"name": fn.get("name", ""),
|
||||
"description": fn.get("description", ""),
|
||||
"input_schema": _normalize_tool_input_schema(
|
||||
fn.get("parameters", {"type": "object", "properties": {}})
|
||||
),
|
||||
"input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
|
||||
})
|
||||
return result
|
||||
|
||||
@@ -1649,10 +1543,8 @@ def build_anthropic_kwargs(
|
||||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||||
|
||||
When *is_oauth* is True, enables the OAuth-only beta headers required by
|
||||
Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
|
||||
the default headers are set by build_anthropic_client). No system-prompt
|
||||
or tool-name rewriting is performed — Hermes identifies as itself.
|
||||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||||
|
||||
When *preserve_dots* is True, model name dots are not converted to hyphens
|
||||
(for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
|
||||
@@ -1685,11 +1577,45 @@ def build_anthropic_kwargs(
|
||||
if context_length and effective_max_tokens > context_length:
|
||||
effective_max_tokens = max(context_length - 1, 1)
|
||||
|
||||
# OAuth requests go through Anthropic's subscription-gated Messages
|
||||
# endpoint but otherwise send the real Hermes system prompt and real
|
||||
# Hermes tool names — the only OAuth-specific wire differences are
|
||||
# Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
|
||||
# build_anthropic_client and the fast-mode branch below).
|
||||
# ── OAuth: Claude Code identity ──────────────────────────────────
|
||||
if is_oauth:
|
||||
# 1. Prepend Claude Code system prompt identity
|
||||
cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
|
||||
if isinstance(system, list):
|
||||
system = [cc_block] + system
|
||||
elif isinstance(system, str) and system:
|
||||
system = [cc_block, {"type": "text", "text": system}]
|
||||
else:
|
||||
system = [cc_block]
|
||||
|
||||
# 2. Sanitize system prompt — replace product name references
|
||||
# to avoid Anthropic's server-side content filters.
|
||||
for block in system:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = block.get("text", "")
|
||||
text = text.replace("Hermes Agent", "Claude Code")
|
||||
text = text.replace("Hermes agent", "Claude Code")
|
||||
text = text.replace("hermes-agent", "claude-code")
|
||||
text = text.replace("Nous Research", "Anthropic")
|
||||
block["text"] = text
|
||||
|
||||
# 3. Prefix tool names with mcp_ (Claude Code convention)
|
||||
if anthropic_tools:
|
||||
for tool in anthropic_tools:
|
||||
if "name" in tool:
|
||||
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
|
||||
|
||||
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
|
||||
for msg in anthropic_messages:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "tool_use" and "name" in block:
|
||||
if not block["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
block["name"] = _MCP_TOOL_PREFIX + block["name"]
|
||||
elif block.get("type") == "tool_result" and "tool_use_id" in block:
|
||||
pass # tool_result uses ID, not name
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
@@ -1780,9 +1706,6 @@ def build_anthropic_kwargs(
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
# Strip context-1m — incompatible with OAuth auth. See matching
|
||||
# comment in build_anthropic_client().
|
||||
betas = [b for b in betas if b != _CONTEXT_1M_BETA]
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||
|
||||
+24
-273
@@ -41,57 +41,10 @@ import threading
|
||||
import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
|
||||
# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
|
||||
# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
|
||||
# first call and forwards, so:
|
||||
# (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
|
||||
# (Python's function-scope name lookup resolves `OpenAI` to the proxy
|
||||
# object bound in module globals here, without triggering any import);
|
||||
# (b) external code can still do `auxiliary_client.OpenAI` or
|
||||
# `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
|
||||
# and patch replaces the module attribute as usual;
|
||||
# (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
|
||||
# (which is harmless — annotations aren't type-checked at runtime).
|
||||
# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
|
||||
if TYPE_CHECKING:
|
||||
from openai import OpenAI # noqa: F401 — type hints only
|
||||
|
||||
_OPENAI_CLS_CACHE: Optional[type] = None
|
||||
|
||||
|
||||
def _load_openai_cls() -> type:
|
||||
"""Import and cache ``openai.OpenAI``."""
|
||||
global _OPENAI_CLS_CACHE
|
||||
if _OPENAI_CLS_CACHE is None:
|
||||
from openai import OpenAI as _cls
|
||||
_OPENAI_CLS_CACHE = _cls
|
||||
return _OPENAI_CLS_CACHE
|
||||
|
||||
|
||||
class _OpenAIProxy:
|
||||
"""Module-level proxy that looks like the ``openai.OpenAI`` class.
|
||||
|
||||
Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
|
||||
real SDK class, importing the SDK lazily on first use.
|
||||
"""
|
||||
|
||||
__slots__ = ()
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
return _load_openai_cls()(*args, **kwargs)
|
||||
|
||||
def __instancecheck__(self, obj):
|
||||
return isinstance(obj, _load_openai_cls())
|
||||
|
||||
def __repr__(self):
|
||||
return "<lazy openai.OpenAI proxy>"
|
||||
|
||||
|
||||
OpenAI = _OpenAIProxy() # module-level name, resolves lazily on call/isinstance
|
||||
from openai import OpenAI
|
||||
|
||||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
@@ -129,8 +82,6 @@ _PROVIDER_ALIASES = {
|
||||
"moonshot": "kimi-coding",
|
||||
"kimi-cn": "kimi-coding-cn",
|
||||
"moonshot-cn": "kimi-coding-cn",
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
@@ -141,10 +92,6 @@ _PROVIDER_ALIASES = {
|
||||
"github-models": "copilot",
|
||||
"github-copilot-acp": "copilot-acp",
|
||||
"copilot-acp-agent": "copilot-acp",
|
||||
"tencent": "tencent-tokenhub",
|
||||
"tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub",
|
||||
"tencentmaas": "tencent-tokenhub",
|
||||
}
|
||||
|
||||
|
||||
@@ -208,7 +155,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"stepfun": "step-3.5-flash",
|
||||
"kimi-coding-cn": "kimi-k2-turbo-preview",
|
||||
"gmi": "google/gemini-3.1-flash-lite-preview",
|
||||
"minimax": "MiniMax-M2.7",
|
||||
"minimax-cn": "MiniMax-M2.7",
|
||||
"anthropic": "claude-haiku-4-5-20251001",
|
||||
@@ -217,7 +163,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"opencode-go": "glm-5",
|
||||
"kilocode": "google/gemini-3-flash-preview",
|
||||
"ollama-cloud": "nemotron-3-nano:30b",
|
||||
"tencent-tokenhub": "hy3-preview",
|
||||
}
|
||||
|
||||
# Vision-specific model overrides for direct providers.
|
||||
@@ -457,33 +402,6 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Translate extra_body.reasoning (chat.completions shape) into the
|
||||
# Responses API's top-level reasoning + include fields. Mirrors
|
||||
# agent/transports/codex.py::build_kwargs() so auxiliary callers
|
||||
# that configure reasoning via auxiliary.<task>.extra_body get the
|
||||
# same behavior as the main agent's Codex transport.
|
||||
extra_body = kwargs.get("extra_body") or {}
|
||||
if isinstance(extra_body, dict):
|
||||
reasoning_cfg = extra_body.get("reasoning")
|
||||
if isinstance(reasoning_cfg, dict):
|
||||
if reasoning_cfg.get("enabled") is False:
|
||||
# Reasoning explicitly disabled — do not set reasoning
|
||||
# or include. The Codex backend still thinks by
|
||||
# default, but we honor the caller's intent where the
|
||||
# API allows it.
|
||||
pass
|
||||
else:
|
||||
effort = reasoning_cfg.get("effort", "medium")
|
||||
# Codex backend rejects "minimal"; clamp to "low" to
|
||||
# match the main-agent Codex transport behavior.
|
||||
if effort == "minimal":
|
||||
effort = "low"
|
||||
resp_kwargs["reasoning"] = {
|
||||
"effort": effort,
|
||||
"summary": "auto",
|
||||
}
|
||||
resp_kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
@@ -713,7 +631,9 @@ class _AnthropicCompletionsAdapter:
|
||||
|
||||
response = self._client.messages.create(**anthropic_kwargs)
|
||||
_transport = get_transport("anthropic_messages")
|
||||
_nr = _transport.normalize_response(response)
|
||||
_nr = _transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_oauth
|
||||
)
|
||||
|
||||
# ToolCall already duck-types as OpenAI shape (.type, .function.name,
|
||||
# .function.arguments) via properties, so no wrapping needed.
|
||||
@@ -791,116 +711,6 @@ class AsyncAnthropicAuxiliaryClient:
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
|
||||
"""True if the endpoint at ``base_url`` speaks the Anthropic Messages
|
||||
protocol instead of OpenAI chat.completions.
|
||||
|
||||
Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
|
||||
auxiliary client and the main agent stay in sync on transport selection.
|
||||
Covers:
|
||||
|
||||
- Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
|
||||
Anthropic-compatible gateways).
|
||||
- ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
|
||||
speaks Claude-Code's native Anthropic shape; ``chat.completions``
|
||||
returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
|
||||
- ``api.anthropic.com`` (native Anthropic).
|
||||
"""
|
||||
normalized = (base_url or "").strip().lower().rstrip("/")
|
||||
if not normalized:
|
||||
return False
|
||||
if normalized.endswith("/anthropic"):
|
||||
return True
|
||||
hostname = base_url_hostname(normalized)
|
||||
if hostname == "api.anthropic.com":
|
||||
return True
|
||||
if hostname == "api.kimi.com" and "/coding" in normalized:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _maybe_wrap_anthropic(
|
||||
client_obj: Any,
|
||||
model: str,
|
||||
api_key: str,
|
||||
base_url: str,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> Any:
|
||||
"""Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
|
||||
the endpoint actually speaks Anthropic Messages.
|
||||
|
||||
This is the single chokepoint for aux-client transport correction.
|
||||
Runs at the end of every ``resolve_provider_client`` branch so that
|
||||
api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
|
||||
future /anthropic gateways all land on the right wire format
|
||||
regardless of which branch built the client.
|
||||
|
||||
Returns ``client_obj`` unchanged when:
|
||||
|
||||
- It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
|
||||
- The endpoint is an OpenAI-wire endpoint.
|
||||
- ``api_mode`` is explicitly set to a non-Anthropic transport.
|
||||
- The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
|
||||
"""
|
||||
# Already wrapped — don't double-wrap.
|
||||
if isinstance(client_obj, AnthropicAuxiliaryClient):
|
||||
return client_obj
|
||||
# Other specialized adapters we should never re-dispatch.
|
||||
if isinstance(client_obj, CodexAuxiliaryClient):
|
||||
return client_obj
|
||||
try:
|
||||
from agent.gemini_native_adapter import GeminiNativeClient
|
||||
if isinstance(client_obj, GeminiNativeClient):
|
||||
return client_obj
|
||||
except ImportError:
|
||||
pass
|
||||
try:
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
if isinstance(client_obj, CopilotACPClient):
|
||||
return client_obj
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Explicit non-anthropic api_mode wins over URL heuristics.
|
||||
if api_mode and api_mode != "anthropic_messages":
|
||||
return client_obj
|
||||
|
||||
should_wrap = (
|
||||
api_mode == "anthropic_messages"
|
||||
or _endpoint_speaks_anthropic_messages(base_url)
|
||||
)
|
||||
if not should_wrap:
|
||||
return client_obj
|
||||
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
|
||||
"not installed — falling back to OpenAI-wire (will likely 404).",
|
||||
base_url,
|
||||
)
|
||||
return client_obj
|
||||
|
||||
try:
|
||||
real_client = build_anthropic_client(api_key, base_url)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Failed to build Anthropic client for %s (%s) — falling back to "
|
||||
"OpenAI-wire client.", base_url, exc,
|
||||
)
|
||||
return client_obj
|
||||
|
||||
logger.debug(
|
||||
"Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
|
||||
"(model=%s, base_url=%s, api_mode=%s)",
|
||||
model, base_url[:60] if base_url else "", api_mode or "auto-detected",
|
||||
)
|
||||
return AnthropicAuxiliaryClient(
|
||||
real_client, model, api_key, base_url, is_oauth=False,
|
||||
)
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
||||
@@ -1071,9 +881,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
|
||||
return _client, model
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
||||
|
||||
creds = resolve_api_key_provider_credentials(provider_id)
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
@@ -1099,9 +907,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
_client = OpenAI(api_key=api_key, base_url=base_url, **extra)
|
||||
_client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
|
||||
return _client, model
|
||||
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
|
||||
|
||||
return None, None
|
||||
|
||||
@@ -1385,13 +1191,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
# URL-based anthropic detection for custom endpoints that didn't set
|
||||
# api_mode explicitly (e.g. kimi.com/coding reached via custom config).
|
||||
_fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
_fallback_client = _maybe_wrap_anthropic(
|
||||
_fallback_client, model, custom_key, custom_base, custom_mode,
|
||||
)
|
||||
return _fallback_client, model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1942,20 +1742,8 @@ def resolve_provider_client(
|
||||
return True
|
||||
return False
|
||||
|
||||
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
|
||||
api_key_str: str = ""):
|
||||
"""Wrap a plain OpenAI client in the correct transport adapter.
|
||||
|
||||
Handles two cases:
|
||||
- ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
|
||||
(explicit ``api_mode=codex_responses`` or api.openai.com + codex
|
||||
model name).
|
||||
- ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
|
||||
Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
|
||||
suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
|
||||
|
||||
Clients that are already specialized wrappers pass through unchanged.
|
||||
"""
|
||||
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
|
||||
"""Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
|
||||
if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
|
||||
logger.debug(
|
||||
"resolve_provider_client: wrapping client in CodexAuxiliaryClient "
|
||||
@@ -1963,11 +1751,7 @@ def resolve_provider_client(
|
||||
api_mode or "auto-detected", final_model_str,
|
||||
base_url_str[:60] if base_url_str else "")
|
||||
return CodexAuxiliaryClient(client_obj, final_model_str)
|
||||
# Anthropic-wire endpoints: rewrap plain OpenAI clients so
|
||||
# chat.completions.create() is translated to /v1/messages.
|
||||
return _maybe_wrap_anthropic(
|
||||
client_obj, final_model_str, api_key_str, base_url_str, api_mode,
|
||||
)
|
||||
return client_obj
|
||||
|
||||
# ── Auto: try all providers in priority order ────────────────────
|
||||
if provider == "auto":
|
||||
@@ -2047,7 +1831,7 @@ def resolve_provider_client(
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
custom_base = _to_openai_base_url(explicit_base_url).strip()
|
||||
custom_base = explicit_base_url.strip()
|
||||
custom_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||
@@ -2060,7 +1844,7 @@ def resolve_provider_client(
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(
|
||||
model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
|
||||
model or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
@@ -2075,7 +1859,7 @@ def resolve_provider_client(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base, custom_key)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
@@ -2085,8 +1869,7 @@ def resolve_provider_client(
|
||||
if client is not None:
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
_cbase = str(getattr(client, "base_url", "") or "")
|
||||
_ckey = str(getattr(client, "api_key", "") or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase, _ckey)
|
||||
client = _wrap_if_needed(client, final_model, _cbase)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: custom/main requested "
|
||||
@@ -2109,22 +1892,10 @@ def resolve_provider_client(
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model
|
||||
or custom_entry.get("model")
|
||||
or (main_runtime.get("model") if main_runtime else None)
|
||||
or _read_main_model()
|
||||
or "gpt-4o-mini",
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
# anthropic_messages talks to the /anthropic surface directly;
|
||||
# OpenAI-wire paths (chat_completions / codex_responses) need the
|
||||
# /v1 equivalent. Rewrite only on the OpenAI-wire path so the
|
||||
# Anthropic fallback SDK still sees the original URL.
|
||||
if entry_api_mode == "anthropic_messages":
|
||||
openai_base = custom_base
|
||||
else:
|
||||
openai_base = _to_openai_base_url(custom_base)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(openai_base)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
@@ -2143,12 +1914,7 @@ def resolve_provider_client(
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
# Fallback went OpenAI-wire after all — redo the query
|
||||
# extraction against the rewritten /v1 URL.
|
||||
_fallback_base = _to_openai_base_url(custom_base)
|
||||
_fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
|
||||
_fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
|
||||
client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
@@ -2167,7 +1933,7 @@ def resolve_provider_client(
|
||||
):
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, openai_base, custom_key)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
@@ -2260,11 +2026,8 @@ def resolve_provider_client(
|
||||
|
||||
# Honor api_mode for any API-key provider (e.g. direct OpenAI with
|
||||
# codex-family models). The copilot-specific wrapping above handles
|
||||
# copilot; this covers the general case (#6800). Also rewraps
|
||||
# Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
|
||||
# /anthropic-suffixed gateways) so named providers like kimi-coding
|
||||
# land on the right transport without needing per-provider branches.
|
||||
client = _wrap_if_needed(client, final_model, base_url, api_key)
|
||||
# copilot; this covers the general case (#6800).
|
||||
client = _wrap_if_needed(client, final_model, base_url)
|
||||
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
@@ -2272,12 +2035,7 @@ def resolve_provider_client(
|
||||
|
||||
if pconfig.auth_type == "external_process":
|
||||
creds = resolve_external_process_provider_credentials(provider)
|
||||
final_model = _normalize_resolved_model(
|
||||
model
|
||||
or (main_runtime.get("model") if main_runtime else None)
|
||||
or _read_main_model(),
|
||||
provider,
|
||||
)
|
||||
final_model = _normalize_resolved_model(model or _read_main_model(), provider)
|
||||
if provider == "copilot-acp":
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
base_url = str(creds.get("base_url", "")).strip()
|
||||
@@ -2800,19 +2558,12 @@ def _is_openrouter_client(client: Any) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
|
||||
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
|
||||
if _is_openrouter_client(client):
|
||||
return True
|
||||
return bool(cached_default and "/" in cached_default)
|
||||
|
||||
|
||||
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
|
||||
"""Keep slash-bearing model IDs only for cached clients that support them.
|
||||
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
|
||||
|
||||
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
|
||||
"""
|
||||
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
|
||||
if model and "/" in model and not _is_openrouter_client(client):
|
||||
return cached_default
|
||||
return model or cached_default
|
||||
|
||||
|
||||
@@ -291,52 +291,14 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
|
||||
def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
|
||||
"""Resolve the AWS region for Bedrock API calls.
|
||||
|
||||
Priority:
|
||||
1. AWS_REGION env var
|
||||
2. AWS_DEFAULT_REGION env var
|
||||
3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
|
||||
4. us-east-1 (hard fallback)
|
||||
|
||||
The boto3 fallback is critical for EU/AP users who configure their region
|
||||
in ~/.aws/config via a named profile rather than env vars — without it,
|
||||
live model discovery would always return us.* profile IDs regardless of
|
||||
the user's actual region.
|
||||
Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
|
||||
"""
|
||||
env = env if env is not None else os.environ
|
||||
explicit = (
|
||||
return (
|
||||
env.get("AWS_REGION", "").strip()
|
||||
or env.get("AWS_DEFAULT_REGION", "").strip()
|
||||
or "us-east-1"
|
||||
)
|
||||
if explicit:
|
||||
return explicit
|
||||
try:
|
||||
import botocore.session
|
||||
region = botocore.session.get_session().get_config_variable("region")
|
||||
if region:
|
||||
return region
|
||||
except Exception:
|
||||
pass
|
||||
return "us-east-1"
|
||||
|
||||
|
||||
def bedrock_model_ids_or_none() -> Optional[List[str]]:
|
||||
"""Live-discover Bedrock model IDs for the active region.
|
||||
|
||||
Returns a list of model ID strings if discovery succeeds and yields
|
||||
at least one model, or ``None`` on failure / empty result. Callers
|
||||
should fall back to the static curated list when ``None`` is returned.
|
||||
|
||||
This helper consolidates the discover → extract-ids → fallback
|
||||
pattern that was previously duplicated across ``provider_model_ids``,
|
||||
``list_authenticated_providers`` section 2, and section 3.
|
||||
"""
|
||||
try:
|
||||
discovered = discover_bedrock_models(resolve_bedrock_region())
|
||||
if discovered:
|
||||
return [m["id"] for m in discovered]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -338,10 +338,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -445,17 +441,6 @@ class ContextCompressor(ContextEngine):
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
# When summary generation fails and a static fallback is inserted,
|
||||
# record how many turns were unrecoverably dropped so callers
|
||||
# (gateway hygiene, /compress) can surface a visible warning.
|
||||
self._last_summary_dropped_count: int = 0
|
||||
self._last_summary_fallback_used: bool = False
|
||||
# When a user-configured summary model fails and we recover by
|
||||
# retrying on the main model, record the failure so gateway /
|
||||
# CLI callers can still warn the user even though compression
|
||||
# succeeded. Silent recovery would hide the broken config.
|
||||
self._last_aux_model_failure_error: Optional[str] = None
|
||||
self._last_aux_model_failure_model: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -915,50 +900,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
"Falling back to main model '%s' for compression.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure so callers can warn the user
|
||||
# even if the retry-on-main succeeds — a misconfigured aux
|
||||
# model is something the user needs to fix.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0 # no cooldown
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic) # retry immediately
|
||||
|
||||
# Unknown-error best-effort retry on main model. Losing N turns of
|
||||
# context is almost always worse than one extra summary attempt, so
|
||||
# if we haven't already fallen back and the summary model differs
|
||||
# from the main model, try once more on main before entering
|
||||
# cooldown. Errors that DID match _is_model_not_found above are
|
||||
# already handled by the fast-path retry; this branch catches
|
||||
# everything else (400s, provider-specific "no route" strings,
|
||||
# aggregator rejections, etc.) where auto-retry is still safer
|
||||
# than dropping the turns.
|
||||
if (
|
||||
self.summary_model
|
||||
and self.summary_model != self.model
|
||||
and not getattr(self, "_summary_model_fallen_back", False)
|
||||
):
|
||||
self._summary_model_fallen_back = True
|
||||
logging.warning(
|
||||
"Summary model '%s' failed (%s). "
|
||||
"Retrying on main model '%s' before giving up.",
|
||||
self.summary_model, e, self.model,
|
||||
)
|
||||
# Record the aux-model failure (see 404 branch above) — user
|
||||
# should know their configured model is broken even if main
|
||||
# recovers the call.
|
||||
_err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(_err_text) > 220:
|
||||
_err_text = _err_text[:217].rstrip() + "..."
|
||||
self._last_aux_model_failure_error = _err_text
|
||||
self._last_aux_model_failure_model = self.summary_model
|
||||
self.summary_model = "" # empty = use main model
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
|
||||
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
@@ -1251,13 +1196,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
related to this topic and be more aggressive about compressing
|
||||
everything else. Inspired by Claude Code's ``/compact``.
|
||||
"""
|
||||
# Reset per-call summary failure state — callers inspect these fields
|
||||
# after compress() returns to decide whether to surface a warning.
|
||||
self._last_summary_dropped_count = 0
|
||||
self._last_summary_fallback_used = False
|
||||
self._last_summary_error = None
|
||||
self._last_aux_model_failure_error = None
|
||||
self._last_aux_model_failure_model = None
|
||||
n_messages = len(messages)
|
||||
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||
_min_for_compress = self.protect_first_n + 3 + 1
|
||||
@@ -1336,13 +1274,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
if not self.quiet_mode:
|
||||
logger.warning("Summary generation failed — inserting static fallback context marker")
|
||||
n_dropped = compress_end - compress_start
|
||||
self._last_summary_dropped_count = n_dropped
|
||||
self._last_summary_fallback_used = True
|
||||
summary = (
|
||||
f"{SUMMARY_PREFIX}\n"
|
||||
f"Summary generation was unavailable. {n_dropped} message(s) were "
|
||||
f"Summary generation was unavailable. {n_dropped} conversation turns were "
|
||||
f"removed to free context space but could not be summarized. The removed "
|
||||
f"messages contained earlier work in this session. Continue based on the "
|
||||
f"turns contained earlier work in this session. Continue based on the "
|
||||
f"recent messages below and the current state of any files or resources."
|
||||
)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import random
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
import os
|
||||
import re
|
||||
from dataclasses import dataclass, fields, replace
|
||||
from datetime import datetime
|
||||
@@ -455,70 +456,6 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Codex device_code pool entry from auth.json if tokens differ.
|
||||
|
||||
When a Codex OAuth access token expires (or the ChatGPT account hits
|
||||
its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
|
||||
with a ``last_error_reset_at`` that can be many hours in the future.
|
||||
Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
|
||||
performs a fresh device-code login and writes new tokens to
|
||||
``auth.json`` under ``_auth_store_lock``. Without this sync the pool
|
||||
entry stays frozen until ``last_error_reset_at`` elapses — even
|
||||
though fresh credentials are sitting on disk — and every request
|
||||
fails with "no available entries (all exhausted or empty)".
|
||||
|
||||
Mirrors the Nous/Anthropic resync paths above. Only applies to
|
||||
device_code-sourced entries; env/API-key-sourced entries have no
|
||||
auth.json shadow to sync from.
|
||||
"""
|
||||
if self.provider != "openai-codex" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
if not isinstance(state, dict):
|
||||
return entry
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return entry
|
||||
store_access = tokens.get("access_token", "")
|
||||
store_refresh = tokens.get("refresh_token", "")
|
||||
# Adopt auth.json tokens when either side differs. Codex refresh
|
||||
# tokens are single-use too, so a fresh refresh_token from
|
||||
# another process means our entry's pair is consumed/stale.
|
||||
entry_access = entry.access_token or ""
|
||||
entry_refresh = entry.refresh_token or ""
|
||||
if store_access and (
|
||||
store_access != entry_access
|
||||
or (store_refresh and store_refresh != entry_refresh)
|
||||
):
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing Codex tokens from auth.json "
|
||||
"(refreshed by another process)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh or entry.refresh_token,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if state.get("last_refresh"):
|
||||
field_updates["last_refresh"] = state["last_refresh"]
|
||||
updated = replace(entry, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
@@ -851,18 +788,6 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For openai-codex entries, same pattern: the user may have
|
||||
# re-authed via `hermes model` / `hermes auth` after a 429/401,
|
||||
# leaving fresh tokens on disk while the pool entry is still
|
||||
# frozen behind last_error_reset_at (can be hours in the
|
||||
# future for ChatGPT weekly windows).
|
||||
if (self.provider == "openai-codex"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_codex_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
|
||||
@@ -47,6 +47,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
|
||||
@@ -91,7 +91,6 @@ class ClassifiedError:
|
||||
_BILLING_PATTERNS = [
|
||||
"insufficient credits",
|
||||
"insufficient_quota",
|
||||
"insufficient balance",
|
||||
"credit balance",
|
||||
"credits have been exhausted",
|
||||
"top up your credits",
|
||||
|
||||
@@ -30,6 +30,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
@@ -41,6 +42,7 @@ from agent import google_oauth
|
||||
from agent.gemini_schema import sanitize_gemini_tool_parameters
|
||||
from agent.google_code_assist import (
|
||||
CODE_ASSIST_ENDPOINT,
|
||||
FREE_TIER_ID,
|
||||
CodeAssistError,
|
||||
ProjectContext,
|
||||
resolve_project_context,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
|
||||
# object, which is only a subset of OpenAPI 3.0 / JSON Schema. Strip fields
|
||||
|
||||
@@ -29,6 +29,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
|
||||
@@ -49,13 +49,14 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import socket
|
||||
import stat
|
||||
import threading
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
@@ -97,7 +98,6 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"
|
||||
|
||||
# Regex patterns for fallback scraping from an installed gemini-cli.
|
||||
import re as _re
|
||||
from utils import atomic_replace
|
||||
_CLIENT_ID_PATTERN = _re.compile(
|
||||
r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
|
||||
)
|
||||
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, path)
|
||||
finally:
|
||||
try:
|
||||
if tmp_path.exists():
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
"""LM Studio reasoning-effort resolution shared by the chat-completions
|
||||
transport and run_agent's iteration-limit summary path.
|
||||
|
||||
LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
|
||||
``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
|
||||
graduated models). We map the user's ``reasoning_config`` onto LM Studio's
|
||||
OpenAI-compatible vocabulary, then clamp against the model's allowed set so
|
||||
the server doesn't 400 on an unsupported effort.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
# LM Studio accepts these top-level reasoning_effort values via its
|
||||
# OpenAI-compatible chat.completions endpoint.
|
||||
_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
|
||||
|
||||
# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
|
||||
# Map them onto the OpenAI-compatible request vocabulary.
|
||||
_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
|
||||
|
||||
|
||||
def resolve_lmstudio_effort(
|
||||
reasoning_config: Optional[dict],
|
||||
allowed_options: Optional[List[str]],
|
||||
) -> Optional[str]:
|
||||
"""Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
|
||||
|
||||
``None`` means "omit the field": the user picked a level the model can't
|
||||
honor, so let LM Studio fall back to the model's declared default rather
|
||||
than silently substituting a different effort. When ``allowed_options`` is
|
||||
falsy (probe failed), skip clamping and send the resolved effort anyway.
|
||||
"""
|
||||
effort = "medium"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
if reasoning_config.get("enabled") is False:
|
||||
effort = "none"
|
||||
else:
|
||||
raw = (reasoning_config.get("effort") or "").strip().lower()
|
||||
raw = _LM_EFFORT_ALIASES.get(raw, raw)
|
||||
if raw in _LM_VALID_EFFORTS:
|
||||
effort = raw
|
||||
if allowed_options:
|
||||
allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
|
||||
if effort not in allowed:
|
||||
return None
|
||||
return effort
|
||||
+6
-114
@@ -28,6 +28,7 @@ Usage in run_agent.py:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
@@ -62,124 +63,15 @@ def sanitize_context(text: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
class StreamingContextScrubber:
|
||||
"""Stateful scrubber for streaming text that may contain split memory-context spans.
|
||||
|
||||
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
|
||||
a ``<memory-context>`` opened in one delta and closed in a later delta
|
||||
leaks its payload to the UI because the non-greedy block regex needs
|
||||
both tags in one string. This scrubber runs a small state machine
|
||||
across deltas, holding back partial-tag tails and discarding
|
||||
everything inside a span (including the system-note line).
|
||||
|
||||
Usage::
|
||||
|
||||
scrubber = StreamingContextScrubber()
|
||||
for delta in stream:
|
||||
visible = scrubber.feed(delta)
|
||||
if visible:
|
||||
emit(visible)
|
||||
trailing = scrubber.flush() # at end of stream
|
||||
if trailing:
|
||||
emit(trailing)
|
||||
|
||||
The scrubber is re-entrant per agent instance. Callers building new
|
||||
top-level responses (new turn) should create a fresh scrubber or call
|
||||
``reset()``.
|
||||
"""
|
||||
|
||||
_OPEN_TAG = "<memory-context>"
|
||||
_CLOSE_TAG = "</memory-context>"
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._in_span: bool = False
|
||||
self._buf: str = ""
|
||||
|
||||
def reset(self) -> None:
|
||||
self._in_span = False
|
||||
self._buf = ""
|
||||
|
||||
def feed(self, text: str) -> str:
|
||||
"""Return the visible portion of ``text`` after scrubbing.
|
||||
|
||||
Any trailing fragment that could be the start of an open/close tag
|
||||
is held back in the internal buffer and surfaced on the next
|
||||
``feed()`` call or discarded/emitted by ``flush()``.
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
buf = self._buf + text
|
||||
self._buf = ""
|
||||
out: list[str] = []
|
||||
|
||||
while buf:
|
||||
if self._in_span:
|
||||
idx = buf.lower().find(self._CLOSE_TAG)
|
||||
if idx == -1:
|
||||
# Hold back a potential partial close tag; drop the rest
|
||||
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
|
||||
self._buf = buf[-held:] if held else ""
|
||||
return "".join(out)
|
||||
# Found close — skip span content + tag, continue
|
||||
buf = buf[idx + len(self._CLOSE_TAG):]
|
||||
self._in_span = False
|
||||
else:
|
||||
idx = buf.lower().find(self._OPEN_TAG)
|
||||
if idx == -1:
|
||||
# No open tag — hold back a potential partial open tag
|
||||
held = self._max_partial_suffix(buf, self._OPEN_TAG)
|
||||
if held:
|
||||
out.append(buf[:-held])
|
||||
self._buf = buf[-held:]
|
||||
else:
|
||||
out.append(buf)
|
||||
return "".join(out)
|
||||
# Emit text before the tag, enter span
|
||||
if idx > 0:
|
||||
out.append(buf[:idx])
|
||||
buf = buf[idx + len(self._OPEN_TAG):]
|
||||
self._in_span = True
|
||||
|
||||
return "".join(out)
|
||||
|
||||
def flush(self) -> str:
|
||||
"""Emit any held-back buffer at end-of-stream.
|
||||
|
||||
If we're still inside an unterminated span the remaining content is
|
||||
discarded (safer: leaking partial memory context is worse than a
|
||||
truncated answer). Otherwise the held-back partial-tag tail is
|
||||
emitted verbatim (it turned out not to be a real tag).
|
||||
"""
|
||||
if self._in_span:
|
||||
self._buf = ""
|
||||
self._in_span = False
|
||||
return ""
|
||||
tail = self._buf
|
||||
self._buf = ""
|
||||
return tail
|
||||
|
||||
@staticmethod
|
||||
def _max_partial_suffix(buf: str, tag: str) -> int:
|
||||
"""Return the length of the longest buf-suffix that is a tag-prefix.
|
||||
|
||||
Case-insensitive. Returns 0 if no suffix could start the tag.
|
||||
"""
|
||||
tag_lower = tag.lower()
|
||||
buf_lower = buf.lower()
|
||||
max_check = min(len(buf_lower), len(tag_lower) - 1)
|
||||
for i in range(max_check, 0, -1):
|
||||
if tag_lower.startswith(buf_lower[-i:]):
|
||||
return i
|
||||
return 0
|
||||
|
||||
|
||||
def build_memory_context_block(raw_context: str) -> str:
|
||||
"""Wrap prefetched memory in a fenced block with system note."""
|
||||
"""Wrap prefetched memory in a fenced block with system note.
|
||||
|
||||
The fence prevents the model from treating recalled context as user
|
||||
discourse. Injected at API-call time only — never persisted.
|
||||
"""
|
||||
if not raw_context or not raw_context.strip():
|
||||
return ""
|
||||
clean = sanitize_context(raw_context)
|
||||
if clean != raw_context:
|
||||
logger.warning("memory provider returned pre-wrapped context; stripped")
|
||||
return (
|
||||
"<memory-context>\n"
|
||||
"[System note: The following is recalled memory context, "
|
||||
|
||||
+25
-49
@@ -51,8 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"qwen-oauth",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"tencent-tokenhub",
|
||||
"custom", "local",
|
||||
# Common aliases
|
||||
"google", "google-gemini", "google-ai-studio",
|
||||
@@ -61,9 +59,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||
"ollama",
|
||||
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
|
||||
"mimo", "xiaomi-mimo",
|
||||
"tencent", "tokenhub", "tencent-cloud", "tencentmaas",
|
||||
"arcee-ai", "arceeai",
|
||||
"gmi-cloud", "gmicloud",
|
||||
"xai", "x-ai", "x.ai", "grok",
|
||||
"nvidia", "nim", "nvidia-nim", "nemotron",
|
||||
"qwen-portal",
|
||||
@@ -210,8 +206,6 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"grok": 131072, # catch-all (grok-beta, unknown grok-*)
|
||||
# Kimi
|
||||
"kimi": 262144,
|
||||
# Tencent — Hy3 Preview (Hunyuan) with 256K context window
|
||||
"hy3-preview": 256000,
|
||||
# Nemotron — NVIDIA's open-weights series (128K context across all sizes)
|
||||
"nemotron": 131072,
|
||||
# Arcee
|
||||
@@ -313,8 +307,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
||||
"integrate.api.nvidia.com": "nvidia",
|
||||
"api.xiaomimimo.com": "xiaomi",
|
||||
"xiaomimimo.com": "xiaomi",
|
||||
"api.gmi-serving.com": "gmi",
|
||||
"tokenhub.tencentmaas.com": "tencent-tokenhub",
|
||||
"ollama.com": "ollama-cloud",
|
||||
}
|
||||
|
||||
@@ -625,6 +617,8 @@ def fetch_endpoint_model_metadata(
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
@@ -708,29 +702,6 @@ def fetch_endpoint_model_metadata(
|
||||
return {}
|
||||
|
||||
|
||||
def _resolve_endpoint_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
api_key: str = "",
|
||||
) -> Optional[int]:
|
||||
"""Resolve context length from an endpoint's live ``/models`` metadata."""
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
return None
|
||||
|
||||
|
||||
def _get_context_cache_path() -> Path:
|
||||
"""Return path to the persistent context length cache file."""
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -1014,7 +985,10 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
ctx = cfg.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
break
|
||||
# Fall back to max_context_length (theoretical model max)
|
||||
ctx = m.get("max_context_length") or m.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
|
||||
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
|
||||
resp = client.get(f"{server_url}/v1/models/{model}")
|
||||
@@ -1276,10 +1250,7 @@ def get_model_context_length(
|
||||
model = _strip_provider_prefix(model)
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
# LM Studio is excluded — its loaded context length is transient (the
|
||||
# user can reload the model with a different context_length at any time
|
||||
# via /api/v1/models/load), so a stale cached value would mask reloads.
|
||||
if base_url and provider != "lmstudio":
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
@@ -1324,16 +1295,28 @@ def get_model_context_length(
|
||||
# returns 128k) instead of the model's full context (400k). models.dev
|
||||
# has the correct per-provider values and is checked at step 5+.
|
||||
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
|
||||
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if context_length is not None:
|
||||
return context_length
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# 3. Try querying local server directly
|
||||
if is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
@@ -1391,12 +1374,6 @@ def get_model_context_length(
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider == "gmi" and base_url:
|
||||
# GMI exposes authoritative context_length via /models, but it is not
|
||||
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
|
||||
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
|
||||
if ctx is not None:
|
||||
return ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
@@ -1423,8 +1400,7 @@ def get_model_context_length(
|
||||
if base_url and is_local_endpoint(base_url):
|
||||
local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
|
||||
if local_ctx and local_ctx > 0:
|
||||
if provider != "lmstudio":
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
save_context_length(model, base_url, local_ctx)
|
||||
return local_ctx
|
||||
|
||||
# 10. Default fallback — 128K
|
||||
|
||||
@@ -18,7 +18,6 @@ import os
|
||||
import tempfile
|
||||
import time
|
||||
from typing import Any, Mapping, Optional
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -119,7 +118,7 @@ def record_nous_rate_limit(
|
||||
try:
|
||||
with os.fdopen(fd, "w") as f:
|
||||
json.dump(state, f)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, path)
|
||||
except Exception:
|
||||
# Clean up temp file on failure
|
||||
try:
|
||||
|
||||
@@ -310,10 +310,6 @@ PLATFORM_HINTS = {
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
"Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
|
||||
"`inline code`, ```code blocks```, [links](url), and ## headers. "
|
||||
"Telegram has NO table syntax — prefer bullet lists or labeled "
|
||||
"key: value pairs over pipe tables (any tables you do emit are "
|
||||
"auto-rewritten into row-group bullets, which you can produce "
|
||||
"directly for cleaner output). "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Images "
|
||||
"(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
|
||||
|
||||
+6
-58
@@ -56,12 +56,8 @@ _SENSITIVE_BODY_KEYS = frozenset({
|
||||
})
|
||||
|
||||
# Snapshot at import time so runtime env mutations (e.g. LLM-generated
|
||||
# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
|
||||
# mid-session. OFF by default — user must opt in via
|
||||
# `security.redact_secrets: true` in config.yaml (bridged to this env var
|
||||
# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
|
||||
# in ~/.hermes/.env.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
|
||||
# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
|
||||
_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
|
||||
|
||||
# Known API key prefixes -- match the prefix + contiguous token chars
|
||||
_PREFIX_PATTERNS = [
|
||||
@@ -184,59 +180,11 @@ _PREFIX_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
def mask_secret(
|
||||
value: str,
|
||||
*,
|
||||
head: int = 4,
|
||||
tail: int = 4,
|
||||
floor: int = 12,
|
||||
placeholder: str = "***",
|
||||
empty: str = "",
|
||||
) -> str:
|
||||
"""Mask a secret for display, preserving ``head`` and ``tail`` characters.
|
||||
|
||||
Canonical helper for display-time redaction across Hermes — used by
|
||||
``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
|
||||
a secret needs to be shown truncated for debuggability while still
|
||||
keeping the bulk hidden.
|
||||
|
||||
Args:
|
||||
value: The secret to mask. ``None``/empty returns ``empty``.
|
||||
head: Leading characters to preserve. Default 4.
|
||||
tail: Trailing characters to preserve. Default 4.
|
||||
floor: Values shorter than ``head + tail + floor_margin`` are
|
||||
fully masked (returns ``placeholder``). Default 12 —
|
||||
matches the existing config/status/dump convention.
|
||||
placeholder: Value returned for too-short inputs. Default ``"***"``.
|
||||
empty: Value returned when ``value`` is falsy (None, ""). The
|
||||
caller can override this to e.g. ``color("(not set)",
|
||||
Colors.DIM)`` for user-facing display.
|
||||
|
||||
Examples:
|
||||
>>> mask_secret("sk-proj-abcdef1234567890")
|
||||
'sk-p...7890'
|
||||
>>> mask_secret("short") # fully masked
|
||||
'***'
|
||||
>>> mask_secret("") # empty default
|
||||
''
|
||||
>>> mask_secret("", empty="(not set)") # empty override
|
||||
'(not set)'
|
||||
>>> mask_secret("long-token", head=6, tail=4, floor=18)
|
||||
'***'
|
||||
"""
|
||||
if not value:
|
||||
return empty
|
||||
if len(value) < floor:
|
||||
return placeholder
|
||||
return f"{value[:head]}...{value[-tail:]}"
|
||||
|
||||
|
||||
def _mask_token(token: str) -> str:
|
||||
"""Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
|
||||
# Empty input: historically this returned "***" rather than "". Preserve.
|
||||
if not token:
|
||||
"""Mask a token, preserving prefix for long tokens."""
|
||||
if len(token) < 18:
|
||||
return "***"
|
||||
return mask_secret(token, head=6, tail=4, floor=18)
|
||||
return f"{token[:6]}...{token[-4:]}"
|
||||
|
||||
|
||||
def _redact_query_string(query: str) -> str:
|
||||
@@ -309,7 +257,7 @@ def redact_sensitive_text(text: str) -> str:
|
||||
"""Apply all redaction patterns to a block of text.
|
||||
|
||||
Safe to call on any string -- non-matching text passes through unchanged.
|
||||
Disabled by default — enable via security.redact_secrets: true in config.yaml.
|
||||
Disabled when security.redact_secrets is false in config.yaml.
|
||||
"""
|
||||
if text is None:
|
||||
return None
|
||||
|
||||
@@ -76,7 +76,6 @@ except ImportError: # pragma: no cover
|
||||
fcntl = None # type: ignore[assignment]
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -569,7 +568,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
|
||||
try:
|
||||
with os.fdopen(fd, "w") as fh:
|
||||
fh.write(json.dumps(data, indent=2, sort_keys=True))
|
||||
atomic_replace(tmp_path, p)
|
||||
os.replace(tmp_path, p)
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
@@ -30,12 +30,10 @@ def generate_title(
|
||||
assistant_response: str,
|
||||
timeout: float = 30.0,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> Optional[str]:
|
||||
"""Generate a session title from the first exchange.
|
||||
|
||||
Uses the main runtime's model when available, falling back to the
|
||||
auxiliary LLM client (cheapest/fastest available model).
|
||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||
Returns the title string or None on failure.
|
||||
|
||||
``failure_callback`` is invoked with ``(task, exception)`` when the
|
||||
@@ -59,7 +57,6 @@ def generate_title(
|
||||
max_tokens=500,
|
||||
temperature=0.3,
|
||||
timeout=timeout,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
title = (response.choices[0].message.content or "").strip()
|
||||
# Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
|
||||
@@ -89,7 +86,6 @@ def auto_title_session(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -111,7 +107,7 @@ def auto_title_session(
|
||||
return
|
||||
|
||||
title = generate_title(
|
||||
user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
|
||||
user_message, assistant_response, failure_callback=failure_callback
|
||||
)
|
||||
if not title:
|
||||
return
|
||||
@@ -130,7 +126,6 @@ def maybe_auto_title(
|
||||
assistant_response: str,
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
main_runtime: dict = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -152,7 +147,7 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
|
||||
kwargs={"failure_callback": failure_callback},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -85,6 +85,9 @@ class AnthropicTransport(ProviderTransport):
|
||||
from agent.anthropic_adapter import _to_plain_data
|
||||
from agent.transports.types import ToolCall
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
@@ -99,10 +102,13 @@ class AnthropicTransport(ProviderTransport):
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
name=block.name,
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -12,65 +12,12 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.lmstudio_reasoning import resolve_lmstudio_effort
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
|
||||
"""Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
|
||||
|
||||
Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
|
||||
They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
|
||||
then request thought parts with ``includeThoughts`` enabled.
|
||||
"""
|
||||
if reasoning_config is None or not isinstance(reasoning_config, dict):
|
||||
return None
|
||||
|
||||
if reasoning_config.get("enabled") is False:
|
||||
# Gemini can hide thought parts even when internal thinking still
|
||||
# happens; omit thinkingLevel to avoid model-specific validation quirks.
|
||||
return {"includeThoughts": False}
|
||||
|
||||
effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
|
||||
if effort == "none":
|
||||
return {"includeThoughts": False}
|
||||
|
||||
thinking_config: Dict[str, Any] = {"includeThoughts": True}
|
||||
normalized_model = (model or "").strip().lower()
|
||||
if normalized_model.startswith("google/"):
|
||||
normalized_model = normalized_model.split("/", 1)[1]
|
||||
|
||||
# Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
|
||||
# coarse effort levels. ``includeThoughts`` alone is enough to surface
|
||||
# thought parts without risking request validation errors.
|
||||
if normalized_model.startswith("gemini-2.5-"):
|
||||
return thinking_config
|
||||
|
||||
if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
|
||||
effort = "medium"
|
||||
|
||||
# Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
|
||||
# is stricter (low/high). Clamp Hermes' wider effort set to what each
|
||||
# family accepts so we never forward an undocumented level verbatim.
|
||||
if normalized_model.startswith(("gemini-3", "gemini-3.1")):
|
||||
if "flash" in normalized_model:
|
||||
if effort in {"minimal", "low"}:
|
||||
thinking_config["thinkingLevel"] = "low"
|
||||
elif effort in {"high", "xhigh"}:
|
||||
thinking_config["thinkingLevel"] = "high"
|
||||
else:
|
||||
thinking_config["thinkingLevel"] = "medium"
|
||||
elif "pro" in normalized_model:
|
||||
thinking_config["thinkingLevel"] = (
|
||||
"high" if effort in {"high", "xhigh"} else "low"
|
||||
)
|
||||
|
||||
return thinking_config
|
||||
|
||||
|
||||
class ChatCompletionsTransport(ProviderTransport):
|
||||
"""Transport for api_mode='chat_completions'.
|
||||
|
||||
@@ -154,7 +101,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
is_github_models: bool
|
||||
is_nvidia_nim: bool
|
||||
is_kimi: bool
|
||||
is_lmstudio: bool
|
||||
is_custom_provider: bool
|
||||
ollama_num_ctx: int | None
|
||||
# Provider routing
|
||||
@@ -168,7 +114,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
# Reasoning
|
||||
supports_reasoning: bool
|
||||
github_reasoning_extra: dict | None
|
||||
lmstudio_reasoning_options: list[str] | None # raw allowed_options from /api/v1/models
|
||||
# Claude on OpenRouter/Nous max output
|
||||
anthropic_max_output: int | None
|
||||
# Extra
|
||||
@@ -243,7 +188,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
anthropic_max_out = params.get("anthropic_max_output")
|
||||
is_nvidia_nim = params.get("is_nvidia_nim", False)
|
||||
is_kimi = params.get("is_kimi", False)
|
||||
is_tokenhub = params.get("is_tokenhub", False)
|
||||
reasoning_config = params.get("reasoning_config")
|
||||
|
||||
if ephemeral is not None and max_tokens_fn:
|
||||
@@ -275,40 +219,12 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
|
||||
# Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
|
||||
if is_tokenhub:
|
||||
_tokenhub_thinking_off = bool(
|
||||
reasoning_config
|
||||
and isinstance(reasoning_config, dict)
|
||||
and reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _tokenhub_thinking_off:
|
||||
_tokenhub_effort = "high"
|
||||
if reasoning_config and isinstance(reasoning_config, dict):
|
||||
_e = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_tokenhub_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _tokenhub_effort
|
||||
|
||||
# LM Studio: top-level reasoning_effort. Only emit when the model
|
||||
# declares reasoning support via /api/v1/models capabilities (gated
|
||||
# upstream by params["supports_reasoning"]). resolve_lmstudio_effort
|
||||
# is shared with run_agent's summary path so both stay in sync.
|
||||
if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
|
||||
_lm_effort = resolve_lmstudio_effort(
|
||||
reasoning_config,
|
||||
params.get("lmstudio_reasoning_options"),
|
||||
)
|
||||
if _lm_effort is not None:
|
||||
api_kwargs["reasoning_effort"] = _lm_effort
|
||||
|
||||
# extra_body assembly
|
||||
extra_body: Dict[str, Any] = {}
|
||||
|
||||
is_openrouter = params.get("is_openrouter", False)
|
||||
is_nous = params.get("is_nous", False)
|
||||
is_github_models = params.get("is_github_models", False)
|
||||
provider_name = str(params.get("provider_name") or "").strip().lower()
|
||||
|
||||
provider_prefs = params.get("provider_preferences")
|
||||
if provider_prefs and is_openrouter:
|
||||
@@ -324,9 +240,8 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
# Reasoning. LM Studio is handled above via top-level reasoning_effort,
|
||||
# so skip emitting extra_body.reasoning for it.
|
||||
if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
|
||||
# Reasoning
|
||||
if params.get("supports_reasoning", False):
|
||||
if is_github_models:
|
||||
gh_reasoning = params.get("github_reasoning_extra")
|
||||
if gh_reasoning is not None:
|
||||
@@ -362,11 +277,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if is_qwen:
|
||||
extra_body["vl_high_resolution_images"] = True
|
||||
|
||||
if provider_name in {"gemini", "google-gemini-cli"}:
|
||||
thinking_config = _build_gemini_thinking_config(model, reasoning_config)
|
||||
if thinking_config:
|
||||
extra_body["thinking_config"] = thinking_config
|
||||
|
||||
# Merge any pre-built extra_body additions
|
||||
additions = params.get("extra_body_additions")
|
||||
if additions:
|
||||
|
||||
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
|
||||
|
||||
class ResponsesApiTransport(ProviderTransport):
|
||||
@@ -151,6 +151,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
"""Normalize Codex Responses API response to NormalizedResponse."""
|
||||
from agent.codex_responses_adapter import (
|
||||
_normalize_codex_response,
|
||||
_extract_responses_message_text,
|
||||
_extract_responses_reasoning_text,
|
||||
)
|
||||
|
||||
# _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
|
||||
|
||||
@@ -30,13 +30,14 @@ model:
|
||||
# "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
|
||||
# "kilocode" - KiloCode gateway (requires: KILOCODE_API_KEY)
|
||||
# "ai-gateway" - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
|
||||
# "lmstudio" - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
|
||||
#
|
||||
# Local servers (LM Studio, Ollama, vLLM, llama.cpp):
|
||||
# "custom" - Any other OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# LM Studio is first-class and uses provider: "lmstudio".
|
||||
# It works with both no-auth and auth-enabled server modes.
|
||||
# "custom" - Any OpenAI-compatible endpoint. Set base_url below.
|
||||
# Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
|
||||
# Example for LM Studio:
|
||||
# provider: "lmstudio"
|
||||
# base_url: "http://localhost:1234/v1"
|
||||
# No API key needed — local servers typically ignore auth.
|
||||
#
|
||||
# Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
|
||||
provider: "auto"
|
||||
@@ -927,7 +928,7 @@ display:
|
||||
# agent_name: "My Agent" # Banner title and branding
|
||||
# welcome: "Welcome message" # Shown at CLI startup
|
||||
# response_label: " ⚔ Agent " # Response box header label
|
||||
# prompt_symbol: "⚔" # Prompt symbol (bare token; renderers add trailing space)
|
||||
# prompt_symbol: "⚔ ❯ " # Prompt symbol
|
||||
# tool_prefix: "╎" # Tool output line prefix (default: ┊)
|
||||
#
|
||||
skin: default
|
||||
|
||||
@@ -69,9 +69,7 @@ from agent.usage_pricing import (
|
||||
format_duration_compact,
|
||||
format_token_count_compact,
|
||||
)
|
||||
# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
|
||||
# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
|
||||
# needed when the user runs `/limits`. Lazy-imported inside the handler below.
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
from hermes_cli.banner import _format_context_length, format_banner_version_label
|
||||
|
||||
_COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
|
||||
@@ -5459,8 +5457,6 @@ class HermesCLI:
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=self.provider or "",
|
||||
current_base_url=self.base_url or "",
|
||||
current_model=self.model or "",
|
||||
user_providers=user_provs,
|
||||
custom_providers=custom_provs,
|
||||
max_models=50,
|
||||
@@ -6004,7 +6000,6 @@ class HermesCLI:
|
||||
platform_status = {
|
||||
Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
|
||||
Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
|
||||
Platform.SLACK: ("Slack", "SLACK_BOT_TOKEN"),
|
||||
Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
|
||||
}
|
||||
|
||||
@@ -6236,8 +6231,6 @@ class HermesCLI:
|
||||
self._console_print(f" Status bar {state}")
|
||||
elif canonical == "verbose":
|
||||
self._toggle_verbose()
|
||||
elif canonical == "footer":
|
||||
self._handle_footer_command(cmd_original)
|
||||
elif canonical == "yolo":
|
||||
self._toggle_yolo()
|
||||
elif canonical == "reasoning":
|
||||
@@ -6865,58 +6858,6 @@ class HermesCLI:
|
||||
if self._apply_tui_skin_style():
|
||||
print(" Prompt + TUI colors updated.")
|
||||
|
||||
def _handle_footer_command(self, cmd_original: str) -> None:
|
||||
"""Toggle or inspect ``display.runtime_footer.enabled`` from the CLI.
|
||||
|
||||
Usage:
|
||||
/footer → toggle
|
||||
/footer on|off → explicit
|
||||
/footer status → show current state
|
||||
"""
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.colors import Colors as _Colors
|
||||
|
||||
# Parse arg
|
||||
arg = ""
|
||||
try:
|
||||
parts = (cmd_original or "").strip().split(None, 1)
|
||||
if len(parts) > 1:
|
||||
arg = parts[1].strip().lower()
|
||||
except Exception:
|
||||
arg = ""
|
||||
|
||||
cfg = load_config() or {}
|
||||
footer_cfg = ((cfg.get("display") or {}).get("runtime_footer") or {})
|
||||
current = bool(footer_cfg.get("enabled", False))
|
||||
fields = footer_cfg.get("fields") or ["model", "context_pct", "cwd"]
|
||||
|
||||
if arg in ("status", "?"):
|
||||
state = "ON" if current else "OFF"
|
||||
_cprint(
|
||||
f" {_Colors.BOLD}Runtime footer:{_Colors.RESET} {state}\n"
|
||||
f" Fields: {', '.join(fields)}"
|
||||
)
|
||||
return
|
||||
|
||||
if arg in ("on", "enable", "true", "1"):
|
||||
new_state = True
|
||||
elif arg in ("off", "disable", "false", "0"):
|
||||
new_state = False
|
||||
elif arg == "":
|
||||
new_state = not current
|
||||
else:
|
||||
_cprint(" Usage: /footer [on|off|status]")
|
||||
return
|
||||
|
||||
if save_config_value("display.runtime_footer.enabled", new_state):
|
||||
state = (
|
||||
f"{_Colors.GREEN}ON{_Colors.RESET}" if new_state
|
||||
else f"{_Colors.DIM}OFF{_Colors.RESET}"
|
||||
)
|
||||
_cprint(f" Runtime footer: {state}")
|
||||
else:
|
||||
_cprint(" Failed to save runtime_footer setting to config.yaml")
|
||||
|
||||
def _toggle_verbose(self):
|
||||
"""Cycle tool progress mode: off → new → all → verbose → off."""
|
||||
cycle = ["off", "new", "all", "verbose"]
|
||||
@@ -7157,15 +7098,9 @@ class HermesCLI:
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
# Pass None as system_message so _compress_context rebuilds
|
||||
# the system prompt from scratch via _build_system_prompt(None).
|
||||
# Passing _cached_system_prompt caused duplication because
|
||||
# _build_system_prompt appends system_message to prompt_parts
|
||||
# which already contain the agent identity — resulting in the
|
||||
# identity block appearing twice (issue #15281).
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
None,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
@@ -7289,8 +7224,6 @@ class HermesCLI:
|
||||
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
|
||||
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
|
||||
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
|
||||
# Lazy import — pulls the OpenAI SDK chain, only needed here.
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
account_snapshot = None
|
||||
if provider:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
|
||||
@@ -8880,13 +8813,6 @@ class HermesCLI:
|
||||
response,
|
||||
self.conversation_history,
|
||||
failure_callback=_title_failure_cb,
|
||||
main_runtime={
|
||||
"model": self.model,
|
||||
"provider": self.provider,
|
||||
"base_url": self.base_url,
|
||||
"api_key": self.api_key,
|
||||
"api_mode": self.api_mode,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
+2
-3
@@ -21,7 +21,6 @@ from typing import Optional, Dict, List, Any, Union
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from hermes_time import now as _hermes_now
|
||||
from utils import atomic_replace
|
||||
|
||||
try:
|
||||
from croniter import croniter
|
||||
@@ -368,7 +367,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, JOBS_FILE)
|
||||
os.replace(tmp_path, JOBS_FILE)
|
||||
_secure_file(JOBS_FILE)
|
||||
except BaseException:
|
||||
try:
|
||||
@@ -864,7 +863,7 @@ def save_job_output(job_id: str, output: str):
|
||||
f.write(output)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, output_file)
|
||||
os.replace(tmp_path, output_file)
|
||||
_secure_file(output_file)
|
||||
except BaseException:
|
||||
try:
|
||||
|
||||
+1
-3
@@ -198,9 +198,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
|
||||
if resolved:
|
||||
parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
|
||||
if resolved_is_explicit:
|
||||
chat_id = parsed_chat_id
|
||||
if parsed_thread_id is not None:
|
||||
thread_id = parsed_thread_id
|
||||
chat_id, thread_id = parsed_chat_id, parsed_thread_id
|
||||
else:
|
||||
chat_id = resolved
|
||||
except Exception:
|
||||
|
||||
@@ -36,7 +36,6 @@
|
||||
|
||||
imports = [
|
||||
./nix/packages.nix
|
||||
./nix/overlays.nix
|
||||
./nix/nixosModules.nix
|
||||
./nix/checks.nix
|
||||
./nix/devShell.nix
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
|
||||
|
||||
This hook is always registered. It silently skips if no BOOT.md exists.
|
||||
To activate, create ``~/.hermes/BOOT.md`` with instructions for the
|
||||
agent to execute on every gateway restart.
|
||||
|
||||
Example BOOT.md::
|
||||
|
||||
# Startup Checklist
|
||||
|
||||
1. Check if any cron jobs failed overnight
|
||||
2. Send a status update to Discord #general
|
||||
3. If there are errors in /opt/app/deploy.log, summarize them
|
||||
|
||||
The agent runs in a background thread so it doesn't block gateway
|
||||
startup. If nothing needs attention, it replies with [SILENT] to
|
||||
suppress delivery.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger("hooks.boot-md")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
HERMES_HOME = get_hermes_home()
|
||||
BOOT_FILE = HERMES_HOME / "BOOT.md"
|
||||
|
||||
|
||||
def _build_boot_prompt(content: str) -> str:
|
||||
"""Wrap BOOT.md content in a system-level instruction."""
|
||||
return (
|
||||
"You are running a startup boot checklist. Follow the BOOT.md "
|
||||
"instructions below exactly.\n\n"
|
||||
"---\n"
|
||||
f"{content}\n"
|
||||
"---\n\n"
|
||||
"Execute each instruction. If you need to send a message to a "
|
||||
"platform, use the send_message tool.\n"
|
||||
"If nothing needs attention and there is nothing to report, "
|
||||
"reply with ONLY: [SILENT]"
|
||||
)
|
||||
|
||||
|
||||
def _run_boot_agent(content: str) -> None:
|
||||
"""Spawn a one-shot agent session to execute the boot instructions."""
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
|
||||
prompt = _build_boot_prompt(content)
|
||||
agent = AIAgent(
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
max_iterations=20,
|
||||
)
|
||||
result = agent.run_conversation(prompt)
|
||||
response = result.get("final_response", "")
|
||||
if response and "[SILENT]" not in response:
|
||||
logger.info("boot-md completed: %s", response[:200])
|
||||
else:
|
||||
logger.info("boot-md completed (nothing to report)")
|
||||
except Exception as e:
|
||||
logger.error("boot-md agent failed: %s", e)
|
||||
|
||||
|
||||
async def handle(event_type: str, context: dict) -> None:
|
||||
"""Gateway startup handler — run BOOT.md if it exists."""
|
||||
if not BOOT_FILE.exists():
|
||||
return
|
||||
|
||||
content = BOOT_FILE.read_text(encoding="utf-8").strip()
|
||||
if not content:
|
||||
return
|
||||
|
||||
logger.info("Running BOOT.md (%d chars)", len(content))
|
||||
|
||||
# Run in a background thread so we don't block gateway startup.
|
||||
thread = threading.Thread(
|
||||
target=_run_boot_agent,
|
||||
args=(content,),
|
||||
name="boot-md",
|
||||
daemon=True,
|
||||
)
|
||||
thread.start()
|
||||
+1
-16
@@ -566,8 +566,6 @@ def load_gateway_config() -> GatewayConfig:
|
||||
existing = {}
|
||||
# Deep-merge extra dicts so gateway.json defaults survive
|
||||
merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
|
||||
if plat_name == Platform.SLACK.value and "enabled" in plat_block:
|
||||
merged_extra["_enabled_explicit"] = True
|
||||
merged = {**existing, **plat_block}
|
||||
if merged_extra:
|
||||
merged["extra"] = merged_extra
|
||||
@@ -612,21 +610,16 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
|
||||
else:
|
||||
bridged["channel_prompts"] = channel_prompts
|
||||
enabled_was_explicit = "enabled" in platform_cfg
|
||||
if not bridged and not enabled_was_explicit:
|
||||
if not bridged:
|
||||
continue
|
||||
plat_data = platforms_data.setdefault(plat.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
plat_data = {}
|
||||
platforms_data[plat.value] = plat_data
|
||||
if enabled_was_explicit:
|
||||
plat_data["enabled"] = platform_cfg["enabled"]
|
||||
extra = plat_data.setdefault("extra", {})
|
||||
if not isinstance(extra, dict):
|
||||
extra = {}
|
||||
plat_data["extra"] = extra
|
||||
if plat == Platform.SLACK and enabled_was_explicit:
|
||||
extra["_enabled_explicit"] = True
|
||||
extra.update(bridged)
|
||||
|
||||
# Slack settings → env vars (env vars take precedence)
|
||||
@@ -948,14 +941,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
# No yaml config for Slack — env-only setup, enable it
|
||||
config.platforms[Platform.SLACK] = PlatformConfig()
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
else:
|
||||
slack_config = config.platforms[Platform.SLACK]
|
||||
enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
|
||||
if not slack_config.enabled and not enabled_was_explicit:
|
||||
# Top-level Slack settings such as channel prompts should not
|
||||
# turn an env-token setup into a disabled platform. Only an
|
||||
# explicit slack.enabled/platforms.slack.enabled false should.
|
||||
slack_config.enabled = True
|
||||
# If yaml config exists, respect its enabled flag (don't override
|
||||
# explicit enabled: false). Token is still stored so skills that
|
||||
# send Slack messages can use it without activating the gateway adapter.
|
||||
|
||||
+12
-6
@@ -52,13 +52,19 @@ class HookRegistry:
|
||||
return list(self._loaded_hooks)
|
||||
|
||||
def _register_builtin_hooks(self) -> None:
|
||||
"""Register built-in hooks that are always active.
|
||||
"""Register built-in hooks that are always active."""
|
||||
try:
|
||||
from gateway.builtin_hooks.boot_md import handle as boot_md_handle
|
||||
|
||||
Currently empty — no shipped built-in hooks. Kept as the extension
|
||||
point for future always-on gateway hooks so they drop in without
|
||||
re-plumbing discover_and_load().
|
||||
"""
|
||||
return
|
||||
self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
|
||||
self._loaded_hooks.append({
|
||||
"name": "boot-md",
|
||||
"description": "Run ~/.hermes/BOOT.md on gateway startup",
|
||||
"events": ["gateway:startup"],
|
||||
"path": "(builtin)",
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
|
||||
|
||||
def discover_and_load(self) -> None:
|
||||
"""
|
||||
|
||||
+1
-2
@@ -28,7 +28,6 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_dir
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
|
||||
@@ -60,7 +59,7 @@ def _secure_write(path: Path, data: str) -> None:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(tmp_path, str(path))
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except OSError:
|
||||
|
||||
+11
-81
@@ -307,14 +307,9 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
"""Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.
|
||||
|
||||
Returns ``(session_kwargs, request_kwargs)`` where:
|
||||
- With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
|
||||
for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
|
||||
- HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
|
||||
- None → ``({}, {})``.
|
||||
|
||||
Prefer the connector path: it works transparently with libraries
|
||||
(like mautrix) that call ``session.request()`` without forwarding
|
||||
per-request ``proxy=`` kwargs.
|
||||
- SOCKS → ``({"connector": ProxyConnector(...)}, {})``
|
||||
- HTTP → ``({}, {"proxy": url})``
|
||||
- None → ``({}, {})``
|
||||
|
||||
Usage::
|
||||
|
||||
@@ -325,20 +320,20 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
"""
|
||||
if not proxy_url:
|
||||
return {}, {}
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
if proxy_url.lower().startswith("socks"):
|
||||
connector = ProxyConnector.from_url(proxy_url, rdns=True)
|
||||
return {"connector": connector}, {}
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return {}, {}
|
||||
return {}, {"proxy": proxy_url}
|
||||
return {}, {"proxy": proxy_url}
|
||||
|
||||
|
||||
def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
|
||||
@@ -907,41 +902,6 @@ class MessageEvent:
|
||||
return args
|
||||
|
||||
|
||||
_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
|
||||
re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
|
||||
re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
|
||||
)
|
||||
|
||||
|
||||
def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
|
||||
"""Rewrite a tiny set of DM plaintext admin phrases into slash commands.
|
||||
|
||||
This keeps high-impact operational phrases like ``restart gateway`` out of
|
||||
the LLM/tool path, where they can trigger a self-restart from inside the
|
||||
currently running agent and leave the gateway stuck in ``draining`` while it
|
||||
waits for that same agent to finish.
|
||||
|
||||
Scope is intentionally narrow: DM text messages only, exact restart-style
|
||||
phrases only. Group chats keep natural-language semantics.
|
||||
"""
|
||||
try:
|
||||
if event is None or event.message_type != MessageType.TEXT:
|
||||
return
|
||||
text = (event.text or "").strip()
|
||||
if not text or text.startswith("/"):
|
||||
return
|
||||
source = getattr(event, "source", None)
|
||||
if getattr(source, "chat_type", None) != "dm":
|
||||
return
|
||||
for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
|
||||
if pattern.match(text):
|
||||
event.text = "/restart"
|
||||
return
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
@dataclass
|
||||
class SendResult:
|
||||
"""Result of sending a message."""
|
||||
@@ -1742,41 +1702,13 @@ class BasePlatformAdapter(ABC):
|
||||
the agent is waiting for dangerous-command approval). This is critical
|
||||
for Slack's Assistant API where ``assistant_threads_setStatus`` disables
|
||||
the compose box — pausing lets the user type ``/approve`` or ``/deny``.
|
||||
|
||||
Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
|
||||
network round-trip can't stall the refresh cadence. Telegram- and
|
||||
Discord-side typing expire after ~5s; if any individual send_typing
|
||||
takes longer than the refresh interval, the bubble would die and
|
||||
stay dead until that call returns. Abandoning the slow call lets
|
||||
the next tick fire a fresh send_typing on schedule — as long as
|
||||
one of them succeeds within the 5s platform-side window, the bubble
|
||||
stays visible across provider stalls / upstream API timeouts.
|
||||
"""
|
||||
# Bound each send_typing round-trip so the refresh cadence isn't
|
||||
# gated on network health. Must stay below ``interval`` so a slow
|
||||
# call gets abandoned before the next scheduled tick.
|
||||
_send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
|
||||
try:
|
||||
while True:
|
||||
if stop_event is not None and stop_event.is_set():
|
||||
return
|
||||
if chat_id not in self._typing_paused:
|
||||
try:
|
||||
await asyncio.wait_for(
|
||||
self.send_typing(chat_id, metadata=metadata),
|
||||
timeout=_send_typing_timeout,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
# Slow network — abandon this tick, keep the loop
|
||||
# on schedule so the next send_typing fires fresh.
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as typing_err:
|
||||
logger.debug(
|
||||
"[%s] send_typing error (non-fatal): %s",
|
||||
self.name, typing_err,
|
||||
)
|
||||
await self.send_typing(chat_id, metadata=metadata)
|
||||
if stop_event is None:
|
||||
await asyncio.sleep(interval)
|
||||
continue
|
||||
@@ -2228,8 +2160,6 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
if not self._message_handler:
|
||||
return
|
||||
|
||||
coerce_plaintext_gateway_command(event)
|
||||
|
||||
session_key = build_session_key(
|
||||
event.source,
|
||||
|
||||
@@ -305,7 +305,7 @@ class VoiceReceiver:
|
||||
encrypted = bytes(payload_with_nonce[:-4])
|
||||
|
||||
try:
|
||||
import nacl.secret # noqa: E402 — delayed import, only in voice path
|
||||
import nacl.secret # noqa: delayed import – only in voice path
|
||||
box = nacl.secret.Aead(self._secret_key)
|
||||
decrypted = box.decrypt(encrypted, header, bytes(nonce))
|
||||
except Exception as e:
|
||||
@@ -813,14 +813,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
|
||||
return
|
||||
|
||||
# Discord's per-app command-management bucket is ~5 writes / 20 s,
|
||||
# so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
|
||||
# desired = 107 writes) takes several minutes of forced waits.
|
||||
# A flat 30 s budget blew up reliably under bucket pressure and
|
||||
# left slash commands broken for ~60 min until the bucket fully
|
||||
# recovered. Use a wide ceiling; the cap still guards against a
|
||||
# true hang. (#16713)
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
|
||||
summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
|
||||
logger.info(
|
||||
"[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
|
||||
self.name,
|
||||
@@ -832,11 +825,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
summary["deleted"],
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[%s] Slash command sync timed out — Discord rate-limit bucket "
|
||||
"may be saturated; will retry on next reconnect",
|
||||
self.name,
|
||||
)
|
||||
logger.warning("[%s] Slash command sync timed out after 30s", self.name)
|
||||
except asyncio.CancelledError:
|
||||
raise
|
||||
except Exception as e: # pragma: no cover - defensive logging
|
||||
|
||||
@@ -974,6 +974,7 @@ def build_whole_comment_prompt(
|
||||
|
||||
def _resolve_model_and_runtime() -> Tuple[str, dict]:
|
||||
"""Resolve model and provider credentials, same as gateway message handling."""
|
||||
import os
|
||||
from gateway.run import _load_gateway_config, _resolve_gateway_model
|
||||
|
||||
user_config = _load_gateway_config()
|
||||
|
||||
@@ -11,10 +11,10 @@ import logging
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
from typing import TYPE_CHECKING, Dict, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+44
-420
@@ -11,7 +11,6 @@ Environment variables:
|
||||
MATRIX_PASSWORD Password (alternative to access token)
|
||||
MATRIX_ENCRYPTION Set "true" to enable E2EE
|
||||
MATRIX_DEVICE_ID Stable device ID for E2EE persistence across restarts
|
||||
MATRIX_PROXY HTTP(S) or SOCKS proxy URL for Matrix traffic
|
||||
MATRIX_ALLOWED_USERS Comma-separated Matrix user IDs (@user:server)
|
||||
MATRIX_HOME_ROOM Room ID for cron/notification delivery
|
||||
MATRIX_REACTIONS Set "false" to disable processing lifecycle reactions
|
||||
@@ -19,7 +18,6 @@ Environment variables:
|
||||
MATRIX_REQUIRE_MENTION Require @mention in rooms (default: true)
|
||||
MATRIX_FREE_RESPONSE_ROOMS Comma-separated room IDs exempt from mention requirement
|
||||
MATRIX_AUTO_THREAD Auto-create threads for room messages (default: true)
|
||||
MATRIX_DM_AUTO_THREAD Auto-create threads for DM messages (default: false)
|
||||
MATRIX_RECOVERY_KEY Recovery key for cross-signing verification after device key rotation
|
||||
MATRIX_DM_MENTION_THREADS Create a thread when bot is @mentioned in a DM (default: false)
|
||||
"""
|
||||
@@ -32,8 +30,6 @@ import mimetypes
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from html import escape as _html_escape
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Set
|
||||
@@ -99,25 +95,11 @@ from gateway.platforms.base import (
|
||||
MessageType,
|
||||
ProcessingOutcome,
|
||||
SendResult,
|
||||
resolve_proxy_url,
|
||||
proxy_kwargs_for_aiohttp,
|
||||
)
|
||||
from gateway.platforms.helpers import ThreadParticipationTracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _MatrixApprovalPrompt:
|
||||
"""Tracks a pending Matrix reaction-based exec approval prompt."""
|
||||
|
||||
def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
|
||||
self.session_key = session_key
|
||||
self.chat_id = chat_id
|
||||
self.message_id = message_id
|
||||
self.resolved = resolved
|
||||
self.bot_reaction_events: dict[str, str] = {} # emoji -> event_id
|
||||
|
||||
# Matrix message size limit (4000 chars practical, spec has no hard limit
|
||||
# but clients render poorly above this).
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
@@ -132,85 +114,11 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
|
||||
# Grace period: ignore messages older than this many seconds before startup.
|
||||
_STARTUP_GRACE_SECONDS = 5
|
||||
|
||||
_OUTBOUND_MENTION_RE = re.compile(
|
||||
r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
|
||||
)
|
||||
|
||||
_E2EE_INSTALL_HINT = (
|
||||
"Install with: pip install 'mautrix[encryption]' (requires libolm C library)"
|
||||
)
|
||||
|
||||
_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
|
||||
".jpg",
|
||||
".jpeg",
|
||||
".png",
|
||||
".gif",
|
||||
".webp",
|
||||
".bmp",
|
||||
".svg",
|
||||
".heic",
|
||||
".heif",
|
||||
".avif",
|
||||
})
|
||||
|
||||
|
||||
def _looks_like_matrix_image_filename(text: str) -> bool:
|
||||
"""Return True when Matrix image body text is probably just a transport filename.
|
||||
|
||||
Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
|
||||
filename when the user did not add a caption. Treating that raw filename as
|
||||
user-authored text confuses downstream vision enrichment.
|
||||
"""
|
||||
candidate = str(text or "").strip()
|
||||
if not candidate or "\n" in candidate or candidate.endswith("/"):
|
||||
return False
|
||||
|
||||
name = Path(candidate).name
|
||||
if not name or name != candidate:
|
||||
return False
|
||||
|
||||
suffix = Path(name).suffix.lower()
|
||||
if not suffix:
|
||||
return False
|
||||
|
||||
guessed_type, _ = mimetypes.guess_type(name)
|
||||
if guessed_type and guessed_type.startswith("image/"):
|
||||
return True
|
||||
return suffix in _MATRIX_IMAGE_FILENAME_EXTS
|
||||
|
||||
|
||||
def _create_matrix_session(proxy_url: str | None):
|
||||
"""Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
|
||||
|
||||
mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
|
||||
per-request ``proxy=`` kwargs. For HTTP(S) proxies we use aiohttp's native
|
||||
``proxy=`` session parameter which sets a default for every request. For SOCKS
|
||||
we use ``aiohttp_socks.ProxyConnector`` (connector-level).
|
||||
When no proxy is configured we enable ``trust_env`` so standard env vars
|
||||
(``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
|
||||
"""
|
||||
import aiohttp
|
||||
|
||||
if not proxy_url:
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
if proxy_url.split("://")[0].lower().startswith("socks"):
|
||||
try:
|
||||
from aiohttp_socks import ProxyConnector
|
||||
|
||||
return aiohttp.ClientSession(
|
||||
connector=ProxyConnector.from_url(proxy_url, rdns=True),
|
||||
)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"aiohttp_socks not installed — SOCKS proxy %s ignored. "
|
||||
"Run: pip install aiohttp-socks",
|
||||
proxy_url,
|
||||
)
|
||||
return aiohttp.ClientSession(trust_env=True)
|
||||
|
||||
return aiohttp.ClientSession(proxy=proxy_url)
|
||||
|
||||
|
||||
def _check_e2ee_deps() -> bool:
|
||||
"""Return True if mautrix E2EE dependencies (python-olm) are available."""
|
||||
@@ -352,9 +260,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"1",
|
||||
"yes",
|
||||
)
|
||||
self._dm_auto_thread: bool = os.getenv(
|
||||
"MATRIX_DM_AUTO_THREAD", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
self._dm_mention_threads: bool = os.getenv(
|
||||
"MATRIX_DM_MENTION_THREADS", "false"
|
||||
).lower() in ("true", "1", "yes")
|
||||
@@ -365,11 +270,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
).lower() not in ("false", "0", "no")
|
||||
self._pending_reactions: dict[tuple[str, str], str] = {}
|
||||
|
||||
# Proxy support — resolve once at init, reuse for all HTTP traffic.
|
||||
self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
|
||||
if self._proxy_url:
|
||||
logger.info("Matrix: proxy configured — %s", self._proxy_url)
|
||||
|
||||
# Text batching: merge rapid successive messages (Telegram-style).
|
||||
# Matrix clients split long messages around 4000 chars.
|
||||
self._text_batch_delay_seconds = float(
|
||||
@@ -381,18 +281,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
self._pending_text_batches: Dict[str, MessageEvent] = {}
|
||||
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
|
||||
|
||||
# Matrix reaction-based dangerous command approvals.
|
||||
self._approval_reaction_map = {
|
||||
"✅": "once",
|
||||
"❎": "deny",
|
||||
}
|
||||
self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
|
||||
self._approval_prompt_by_session: Dict[str, str] = {}
|
||||
allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
|
||||
self._allowed_user_ids: Set[str] = {
|
||||
u.strip() for u in allowed_users_raw.split(",") if u.strip()
|
||||
}
|
||||
|
||||
def _is_duplicate_event(self, event_id) -> bool:
|
||||
"""Return True if this event was already processed. Tracks the ID otherwise."""
|
||||
if not event_id:
|
||||
@@ -438,7 +326,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
|
||||
logger.error("Matrix: post-upload key verification failed: %s", exc)
|
||||
return False
|
||||
return True
|
||||
|
||||
@@ -454,7 +342,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: cannot verify device keys on server: %s — refusing E2EE",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -469,7 +356,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
try:
|
||||
await olm.share_keys()
|
||||
except Exception as exc:
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
|
||||
logger.error("Matrix: failed to re-upload device keys: %s", exc)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
|
||||
@@ -509,7 +396,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Try generating a new access token to get a fresh device.",
|
||||
client.device_id,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
return await self._reverify_keys_after_upload(client, local_ed25519)
|
||||
@@ -534,11 +420,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
_STORE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create the HTTP API layer.
|
||||
client_session = _create_matrix_session(self._proxy_url)
|
||||
api = HTTPAPI(
|
||||
base_url=self._homeserver,
|
||||
token=self._access_token or "",
|
||||
client_session=client_session,
|
||||
)
|
||||
|
||||
# Create the client.
|
||||
@@ -581,7 +465,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.error(
|
||||
"Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
await api.session.close()
|
||||
return False
|
||||
@@ -724,44 +607,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
logger.warning(
|
||||
"Matrix: recovery key verification failed: %s", exc
|
||||
)
|
||||
else:
|
||||
# No recovery key — bootstrap cross-signing if the bot
|
||||
# has none yet. Without this, Element shows "Encrypted
|
||||
# by a device not verified by its owner" on every
|
||||
# message from this bot, indefinitely. mautrix's
|
||||
# generate_recovery_key does the full flow: generates
|
||||
# MSK/SSK/USK, uploads private keys to SSSS, publishes
|
||||
# public keys to the homeserver, and signs the current
|
||||
# device with the new SSK. Some homeservers require UIA
|
||||
# for /keys/device_signing/upload — those will need an
|
||||
# alternate path; Continuwuity and Synapse-with-shared-
|
||||
# secret accept the unauthenticated upload.
|
||||
try:
|
||||
own_xsign = await olm.get_own_cross_signing_public_keys()
|
||||
except Exception as exc:
|
||||
own_xsign = None
|
||||
logger.warning(
|
||||
"Matrix: cross-signing key lookup failed: %s", exc
|
||||
)
|
||||
if own_xsign is None:
|
||||
try:
|
||||
new_recovery_key = await olm.generate_recovery_key()
|
||||
logger.warning(
|
||||
"Matrix: bootstrapped cross-signing for %s. "
|
||||
"SAVE THIS RECOVERY KEY — set "
|
||||
"MATRIX_RECOVERY_KEY for future restarts so "
|
||||
"the bot can re-sign its device after key "
|
||||
"rotation: %s",
|
||||
client.mxid,
|
||||
new_recovery_key,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Matrix: cross-signing bootstrap failed "
|
||||
"(non-fatal — Element will show 'not "
|
||||
"verified by its owner'): %s",
|
||||
exc,
|
||||
)
|
||||
|
||||
client.crypto = olm
|
||||
logger.info(
|
||||
@@ -819,7 +664,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: initial sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
else:
|
||||
logger.warning(
|
||||
"Matrix: initial sync returned unexpected type %s",
|
||||
@@ -883,8 +727,17 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)
|
||||
|
||||
last_event_id = None
|
||||
for i, chunk in enumerate(chunks):
|
||||
msg_content = self._build_text_message_content(chunk)
|
||||
for chunk in chunks:
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": chunk,
|
||||
}
|
||||
|
||||
# Convert markdown to HTML for rich rendering.
|
||||
html = self._markdown_to_html(chunk)
|
||||
if html and html != chunk:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
# Reply-to support.
|
||||
if reply_to:
|
||||
@@ -991,21 +844,25 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"""Edit an existing message (via m.replace)."""
|
||||
|
||||
formatted = self.format_message(content)
|
||||
new_content = self._build_text_message_content(formatted)
|
||||
msg_content: Dict[str, Any] = {
|
||||
"msgtype": "m.text",
|
||||
"body": f"* {formatted}",
|
||||
"m.new_content": new_content,
|
||||
"m.new_content": {
|
||||
"msgtype": "m.text",
|
||||
"body": formatted,
|
||||
},
|
||||
"m.relates_to": {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
},
|
||||
}
|
||||
if "m.mentions" in new_content:
|
||||
msg_content["m.mentions"] = new_content["m.mentions"]
|
||||
if "formatted_body" in new_content:
|
||||
|
||||
html = self._markdown_to_html(formatted)
|
||||
if html and html != formatted:
|
||||
msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
|
||||
msg_content["m.new_content"]["formatted_body"] = html
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
|
||||
msg_content["m.relates_to"] = {
|
||||
"rel_type": "m.replace",
|
||||
"event_id": message_id,
|
||||
}
|
||||
msg_content["formatted_body"] = f"* {html}"
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -1038,12 +895,10 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Try aiohttp first (always available), fall back to httpx
|
||||
try:
|
||||
import aiohttp as _aiohttp
|
||||
_sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
|
||||
async with _aiohttp.ClientSession(**_sess_kw) as http:
|
||||
|
||||
async with _aiohttp.ClientSession(trust_env=True) as http:
|
||||
async with http.get(
|
||||
image_url,
|
||||
timeout=_aiohttp.ClientTimeout(total=30),
|
||||
**_req_kw,
|
||||
image_url, timeout=_aiohttp.ClientTimeout(total=30)
|
||||
) as resp:
|
||||
resp.raise_for_status()
|
||||
data = await resp.read()
|
||||
@@ -1053,10 +908,8 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
except ImportError:
|
||||
import httpx
|
||||
_httpx_kw: dict = {}
|
||||
if self._proxy_url:
|
||||
_httpx_kw["proxy"] = self._proxy_url
|
||||
async with httpx.AsyncClient(**_httpx_kw) as http:
|
||||
|
||||
async with httpx.AsyncClient() as http:
|
||||
resp = await http.get(image_url, follow_redirects=True, timeout=30)
|
||||
resp.raise_for_status()
|
||||
data = resp.content
|
||||
@@ -1131,56 +984,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
|
||||
)
|
||||
|
||||
async def send_exec_approval(
|
||||
self,
|
||||
chat_id: str,
|
||||
command: str,
|
||||
session_key: str,
|
||||
description: str = "dangerous command",
|
||||
metadata: Optional[dict] = None,
|
||||
) -> SendResult:
|
||||
"""Send a reaction-based exec approval prompt for Matrix."""
|
||||
if not self._client:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
|
||||
text = (
|
||||
"⚠️ **Dangerous command requires approval**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reason: {description}\n\n"
|
||||
"Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
|
||||
"`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
|
||||
"You can also click the reaction to approve:\n"
|
||||
"✅ = /approve\n"
|
||||
"❎ = /deny"
|
||||
)
|
||||
|
||||
result = await self.send(chat_id, text, metadata=metadata)
|
||||
if not result.success or not result.message_id:
|
||||
return result
|
||||
|
||||
prompt = _MatrixApprovalPrompt(
|
||||
session_key=session_key,
|
||||
chat_id=chat_id,
|
||||
message_id=result.message_id,
|
||||
)
|
||||
old_event = self._approval_prompt_by_session.get(session_key)
|
||||
if old_event:
|
||||
self._approval_prompts_by_event.pop(old_event, None)
|
||||
self._approval_prompts_by_event[result.message_id] = prompt
|
||||
self._approval_prompt_by_session[session_key] = result.message_id
|
||||
|
||||
for emoji in ("✅", "❎"):
|
||||
try:
|
||||
reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
|
||||
# Save the bot's reaction event_id for later cleanup
|
||||
if reaction_result:
|
||||
prompt.bot_reaction_events[emoji] = str(reaction_result)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
|
||||
|
||||
return result
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Pass-through — Matrix supports standard Markdown natively."""
|
||||
# Strip image markdown; media is uploaded separately.
|
||||
@@ -1312,15 +1115,9 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
next_batch = await client.sync_store.get_next_batch()
|
||||
while not self._closing:
|
||||
try:
|
||||
# Wrap in asyncio.wait_for to guard against TCP-level hangs
|
||||
# that the Matrix long-poll timeout cannot catch. Long-poll
|
||||
# is 30s, so 45s gives 15s slack for network drain.
|
||||
sync_data = await asyncio.wait_for(
|
||||
client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
),
|
||||
timeout=45.0,
|
||||
sync_data = await client.sync(
|
||||
since=next_batch,
|
||||
timeout=30000,
|
||||
)
|
||||
|
||||
# nio returns SyncError objects (not exceptions) for auth
|
||||
@@ -1356,7 +1153,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
await asyncio.gather(*tasks)
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: sync event dispatch error: %s", exc)
|
||||
await self._join_pending_invites(sync_data)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
@@ -1443,15 +1239,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
room_id = str(getattr(event, "room_id", ""))
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
|
||||
# Diagnostic: confirm the callback is firing at all when DEBUG is on.
|
||||
# Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
|
||||
logger.debug(
|
||||
"Matrix: callback fired — event %s from %s in %s",
|
||||
getattr(event, "event_id", "?"),
|
||||
sender,
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Ignore own messages (case-insensitive; also drops when our own
|
||||
# user_id hasn't been resolved yet — see _is_self_sender docstring
|
||||
# and issue #15763).
|
||||
@@ -1563,12 +1350,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
in_bot_thread = bool(thread_id and thread_id in self._threads)
|
||||
if self._require_mention and not is_free_room and not in_bot_thread:
|
||||
if not is_mentioned:
|
||||
logger.debug(
|
||||
"Matrix: ignoring message %s in %s — no @mention "
|
||||
"(set MATRIX_REQUIRE_MENTION=false to disable)",
|
||||
event_id,
|
||||
room_id,
|
||||
)
|
||||
return None
|
||||
|
||||
# DM mention-thread.
|
||||
@@ -1581,7 +1362,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
body = self._strip_mention(body)
|
||||
|
||||
# Auto-thread.
|
||||
if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
|
||||
if not is_dm and not thread_id and self._auto_thread:
|
||||
thread_id = event_id
|
||||
self._threads.mark(thread_id)
|
||||
|
||||
@@ -1823,9 +1604,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return
|
||||
body, is_dm, chat_type, thread_id, display_name, source = ctx
|
||||
|
||||
if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
|
||||
body = ""
|
||||
|
||||
allow_http_fallback = bool(http_url) and not is_encrypted_media
|
||||
media_urls = (
|
||||
[cached_path]
|
||||
@@ -1855,35 +1633,13 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
"Matrix: invited to %s — joining",
|
||||
room_id,
|
||||
)
|
||||
await self._join_room_by_id(room_id)
|
||||
|
||||
async def _join_room_by_id(self, room_id: str) -> bool:
|
||||
"""Join a room by ID and refresh local caches on success."""
|
||||
if not room_id:
|
||||
return False
|
||||
if room_id in self._joined_rooms:
|
||||
return True
|
||||
try:
|
||||
await self._client.join_room(RoomID(room_id))
|
||||
self._joined_rooms.add(room_id)
|
||||
logger.info("Matrix: joined %s", room_id)
|
||||
await self._refresh_dm_cache()
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning("Matrix: error joining %s: %s", room_id, exc)
|
||||
return False
|
||||
|
||||
async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
|
||||
"""Join rooms still present in rooms.invite after sync processing."""
|
||||
rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
|
||||
invites = rooms.get("invite", {})
|
||||
if not isinstance(invites, dict):
|
||||
return
|
||||
for room_id in invites:
|
||||
if room_id in self._joined_rooms:
|
||||
continue
|
||||
logger.info("Matrix: reconciling pending invite for %s", room_id)
|
||||
await self._join_room_by_id(str(room_id))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Reactions (send, receive, processing lifecycle)
|
||||
@@ -1998,51 +1754,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
room_id,
|
||||
)
|
||||
|
||||
# Check if this reaction resolves a pending approval prompt.
|
||||
prompt = self._approval_prompts_by_event.get(reacts_to)
|
||||
if prompt and not prompt.resolved:
|
||||
if room_id != prompt.chat_id:
|
||||
return
|
||||
if self._allowed_user_ids and sender not in self._allowed_user_ids:
|
||||
logger.info(
|
||||
"Matrix: ignoring approval reaction from unauthorized user %s on %s",
|
||||
sender, reacts_to,
|
||||
)
|
||||
return
|
||||
choice = self._approval_reaction_map.get(key)
|
||||
if not choice:
|
||||
return
|
||||
try:
|
||||
from tools.approval import resolve_gateway_approval
|
||||
|
||||
count = resolve_gateway_approval(prompt.session_key, choice)
|
||||
if count:
|
||||
prompt.resolved = True
|
||||
self._approval_prompts_by_event.pop(reacts_to, None)
|
||||
self._approval_prompt_by_session.pop(prompt.session_key, None)
|
||||
logger.info(
|
||||
"Matrix reaction resolved %d approval(s) for session %s "
|
||||
"(choice=%s, user=%s)",
|
||||
count, prompt.session_key, choice, sender,
|
||||
)
|
||||
# Redact bot's seed reactions, leaving only the user's
|
||||
await self._redact_bot_approval_reactions(room_id, prompt)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
|
||||
|
||||
async def _redact_bot_approval_reactions(
|
||||
self,
|
||||
room_id: str,
|
||||
prompt: "_MatrixApprovalPrompt",
|
||||
) -> None:
|
||||
"""Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
|
||||
for emoji, evt_id in prompt.bot_reaction_events.items():
|
||||
try:
|
||||
await self.redact_message(room_id, evt_id, "approval resolved")
|
||||
logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
|
||||
except Exception as exc:
|
||||
logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Text message aggregation (handles Matrix client-side splits)
|
||||
# ------------------------------------------------------------------
|
||||
@@ -2268,7 +1979,11 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
if not self._client or not text:
|
||||
return SendResult(success=False, error="No client or empty text")
|
||||
|
||||
msg_content = self._build_text_message_content(text, msgtype=msgtype)
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
html = self._markdown_to_html(text)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
try:
|
||||
event_id = await self._client.send_message_event(
|
||||
@@ -2331,77 +2046,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Mention detection helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
|
||||
"""Build Matrix text content with HTML and outbound mention metadata."""
|
||||
msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
|
||||
mention_user_ids = self._extract_outbound_mentions(text)
|
||||
if mention_user_ids:
|
||||
msg_content["m.mentions"] = {"user_ids": mention_user_ids}
|
||||
|
||||
html_source = self._inject_outbound_mention_links(text)
|
||||
html = self._markdown_to_html(html_source)
|
||||
if html and html != text:
|
||||
msg_content["format"] = "org.matrix.custom.html"
|
||||
msg_content["formatted_body"] = html
|
||||
|
||||
return msg_content
|
||||
|
||||
def _extract_outbound_mentions(self, text: str) -> list[str]:
|
||||
"""Return unique Matrix user IDs mentioned in outbound text."""
|
||||
protected, _ = self._protect_outbound_mention_regions(text)
|
||||
seen: Set[str] = set()
|
||||
mentions: list[str] = []
|
||||
for match in _OUTBOUND_MENTION_RE.finditer(protected):
|
||||
user_id = match.group(1)
|
||||
if user_id not in seen:
|
||||
seen.add(user_id)
|
||||
mentions.append(user_id)
|
||||
return mentions
|
||||
|
||||
def _inject_outbound_mention_links(self, text: str) -> str:
|
||||
"""Wrap outbound Matrix mentions in markdown links outside code spans."""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
protected, placeholders = self._protect_outbound_mention_regions(text)
|
||||
|
||||
linked = _OUTBOUND_MENTION_RE.sub(
|
||||
lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
|
||||
protected,
|
||||
)
|
||||
|
||||
for idx, original in enumerate(placeholders):
|
||||
linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
|
||||
|
||||
return linked
|
||||
|
||||
def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
|
||||
"""Protect markdown regions where outbound mentions should stay literal."""
|
||||
placeholders: list[str] = []
|
||||
|
||||
def _protect(fragment: str) -> str:
|
||||
idx = len(placeholders)
|
||||
placeholders.append(fragment)
|
||||
return f"\x00MENTION_PROTECTED{idx}\x00"
|
||||
|
||||
protected = re.sub(
|
||||
r"```[\s\S]*?```",
|
||||
lambda match: _protect(match.group(0)),
|
||||
text or "",
|
||||
)
|
||||
protected = re.sub(
|
||||
r"`[^`\n]+`",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
protected = re.sub(
|
||||
r"\[[^\]]+\]\([^)]+\)",
|
||||
lambda match: _protect(match.group(0)),
|
||||
protected,
|
||||
)
|
||||
|
||||
return protected, placeholders
|
||||
|
||||
def _is_bot_mentioned(
|
||||
self,
|
||||
body: str,
|
||||
@@ -2436,33 +2080,13 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
return False
|
||||
|
||||
def _strip_mention(self, body: str) -> str:
|
||||
"""Remove explicit bot mentions from message body.
|
||||
"""Strip the bot's full MXID (``@user:server``) from *body*.
|
||||
|
||||
Important: only strip explicit mention tokens (``@user:server`` or
|
||||
``@localpart``). Do NOT strip bare words matching the bot localpart,
|
||||
otherwise normal phrases like "Hermes Agent" become "Agent".
|
||||
The bare localpart is intentionally *not* stripped — it would
|
||||
mangle file paths like ``/home/hermes/media/file.png``.
|
||||
"""
|
||||
if not body:
|
||||
return ""
|
||||
|
||||
# Strip explicit full MXID mentions.
|
||||
if self._user_id:
|
||||
body = body.replace(self._user_id, "")
|
||||
|
||||
# Strip explicit @localpart mentions only (not bare localpart words).
|
||||
if self._user_id and ":" in self._user_id:
|
||||
localpart = self._user_id.split(":")[0].lstrip("@")
|
||||
if localpart:
|
||||
body = re.sub(
|
||||
r'(?<![\w])@' + re.escape(localpart) + r'\b',
|
||||
'',
|
||||
body,
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
|
||||
# Normalize spacing after mention removal.
|
||||
body = re.sub(r'[ \t]{2,}', ' ', body)
|
||||
body = re.sub(r'\s+([,.;:!?])', r'\1', body)
|
||||
return body.strip()
|
||||
|
||||
async def _get_display_name(self, room_id: str, user_id: str) -> str:
|
||||
|
||||
@@ -412,6 +412,7 @@ class MattermostAdapter(BasePlatformAdapter):
|
||||
|
||||
import aiohttp
|
||||
|
||||
last_exc = None
|
||||
file_data = None
|
||||
ct = "application/octet-stream"
|
||||
fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
|
||||
|
||||
@@ -1957,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a C2C user via REST API."""
|
||||
self._next_msg_seq(reply_to or openid)
|
||||
msg_seq = self._next_msg_seq(reply_to or openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -1970,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
|
||||
self, group_openid: str, content: str, reply_to: Optional[str] = None
|
||||
) -> SendResult:
|
||||
"""Send text to a group via REST API."""
|
||||
self._next_msg_seq(reply_to or group_openid)
|
||||
msg_seq = self._next_msg_seq(reply_to or group_openid)
|
||||
body = self._build_text_body(content, reply_to)
|
||||
if reply_to:
|
||||
body["msg_id"] = reply_to
|
||||
@@ -2135,6 +2135,11 @@ class QQAdapter(BasePlatformAdapter):
|
||||
|
||||
# Route
|
||||
chat_type = self._guess_chat_type(chat_id)
|
||||
target_path = (
|
||||
f"/v2/users/{chat_id}/files"
|
||||
if chat_type == "c2c"
|
||||
else f"/v2/groups/{chat_id}/files"
|
||||
)
|
||||
|
||||
if chat_type == "guild":
|
||||
# Guild channels don't support native media upload in the same way
|
||||
|
||||
@@ -84,7 +84,6 @@ from gateway.platforms.telegram_network import (
|
||||
discover_fallback_ips,
|
||||
parse_fallback_ip_env,
|
||||
)
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
def check_telegram_requirements() -> bool:
|
||||
@@ -123,12 +122,12 @@ def _strip_mdv2(text: str) -> str:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown table → Telegram-friendly row groups
|
||||
# Markdown table → code block conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
|
||||
# so pipe tables render as noisy backslash-pipe text with no alignment.
|
||||
# Reformating each row into a bold heading plus bullet list keeps the content
|
||||
# readable on mobile clients while preserving the source data.
|
||||
# Wrapping the table in a fenced code block makes Telegram render it as
|
||||
# monospace preformatted text with columns intact.
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells containing
|
||||
# only dashes (with optional leading/trailing colons for alignment) separated
|
||||
@@ -145,49 +144,13 @@ def _is_table_row(line: str) -> bool:
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def _split_markdown_table_row(line: str) -> list[str]:
|
||||
"""Split a simple GFM table row into stripped cell values."""
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [cell.strip() for cell in stripped.split("|")]
|
||||
|
||||
|
||||
def _render_table_block_for_telegram(table_block: list[str]) -> str:
|
||||
"""Render a detected GFM table as Telegram-friendly row groups."""
|
||||
if len(table_block) < 3:
|
||||
return "\n".join(table_block)
|
||||
|
||||
headers = _split_markdown_table_row(table_block[0])
|
||||
if len(headers) < 2:
|
||||
return "\n".join(table_block)
|
||||
|
||||
rendered_rows: list[str] = []
|
||||
for index, row in enumerate(table_block[2:], start=1):
|
||||
cells = _split_markdown_table_row(row)
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([""] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[: len(headers)]
|
||||
|
||||
heading = next((cell for cell in cells if cell), f"Row {index}")
|
||||
rendered_rows.append(f"**{heading}**")
|
||||
rendered_rows.extend(
|
||||
f"• {header}: {value}" for header, value in zip(headers, cells)
|
||||
)
|
||||
|
||||
return "\n\n".join(rendered_rows)
|
||||
|
||||
|
||||
def _wrap_markdown_tables(text: str) -> str:
|
||||
"""Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
|
||||
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
|
||||
|
||||
Detected by a row containing '|' immediately followed by a delimiter
|
||||
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
|
||||
non-blank lines are consumed as the table body and rewritten as
|
||||
per-row bullet groups. Tables inside existing fenced code blocks are left
|
||||
non-blank lines are consumed as the table body and included in the
|
||||
wrapped block. Tables inside existing fenced code blocks are left
|
||||
alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
@@ -224,7 +187,9 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
while j < len(lines) and _is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append(_render_table_block_for_telegram(table_block))
|
||||
out.append('```')
|
||||
out.extend(table_block)
|
||||
out.append('```')
|
||||
i = j
|
||||
continue
|
||||
|
||||
@@ -369,49 +334,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
|
||||
return {"disable_web_page_preview": True}
|
||||
|
||||
async def _drain_polling_connections(self) -> None:
|
||||
"""Reset the httpx connection pool used for getUpdates polling.
|
||||
|
||||
Network errors (especially through proxies like sing-box) can leave
|
||||
httpx connections in a half-closed state that still occupy pool slots.
|
||||
After enough reconnect cycles the pool fills up entirely, causing
|
||||
``Pool timeout: All connections in the connection pool are occupied.``
|
||||
|
||||
We reset ONLY ``_request[0]`` (the getUpdates request) — the general
|
||||
request (``_request[1]``) is left untouched so concurrent
|
||||
``send_message`` / ``edit_message`` calls are never interrupted.
|
||||
|
||||
Implementation note: accesses ``Bot._request[0]`` which is the
|
||||
get-updates ``BaseRequest`` in the PTB 22.x internal tuple
|
||||
``(get_updates_request, general_request)``. There is no public
|
||||
accessor for the polling request; review if upgrading to PTB 23+.
|
||||
"""
|
||||
if not (self._app and self._app.bot):
|
||||
return
|
||||
try:
|
||||
# PTB 22.x: _request is a (get_updates, general) tuple;
|
||||
# no public accessor exists for the polling request.
|
||||
polling_req = self._app.bot._request[0] # noqa: SLF001
|
||||
except Exception:
|
||||
return
|
||||
try:
|
||||
await polling_req.shutdown()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request shutdown failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
try:
|
||||
await polling_req.initialize()
|
||||
logger.debug(
|
||||
"[%s] Polling request pool drained before reconnect", self.name
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"[%s] Polling request re-initialize failed (non-fatal)",
|
||||
self.name, exc_info=True,
|
||||
)
|
||||
|
||||
async def _handle_polling_network_error(self, error: Exception) -> None:
|
||||
"""Reconnect polling after a transient network interruption.
|
||||
|
||||
@@ -457,8 +379,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await self._drain_polling_connections()
|
||||
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -506,7 +426,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(RETRY_DELAY)
|
||||
await self._drain_polling_connections()
|
||||
try:
|
||||
await self._app.updater.start_polling(
|
||||
allowed_updates=Update.ALL_TYPES,
|
||||
@@ -635,7 +554,7 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
_yaml.dump(config, f, default_flow_style=False, sort_keys=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, config_path)
|
||||
os.replace(tmp_path, config_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -2161,8 +2080,10 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
text = content
|
||||
|
||||
# 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
|
||||
# before the normal MarkdownV2 conversions run.
|
||||
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
|
||||
# render tables natively, but fenced code blocks render as
|
||||
# monospace preformatted text with columns intact. The wrapped
|
||||
# tables then flow through step (1) below as protected regions.
|
||||
text = _wrap_markdown_tables(text)
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
|
||||
@@ -89,7 +89,6 @@ MAX_CONSECUTIVE_FAILURES = 3
|
||||
RETRY_DELAY_SECONDS = 2
|
||||
BACKOFF_DELAY_SECONDS = 30
|
||||
SESSION_EXPIRED_ERRCODE = -14
|
||||
RATE_LIMIT_ERRCODE = -2 # iLink frequency limit — backoff and retry
|
||||
MESSAGE_DEDUP_TTL_SECONDS = 300
|
||||
|
||||
MEDIA_IMAGE = 1
|
||||
@@ -1114,7 +1113,7 @@ async def qr_login(
|
||||
class WeixinAdapter(BasePlatformAdapter):
|
||||
"""Native Hermes adapter for Weixin personal accounts."""
|
||||
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
MAX_MESSAGE_LENGTH = 4000
|
||||
|
||||
# WeChat does not support editing sent messages — streaming must use the
|
||||
# fallback "send-final-only" path so the cursor (▉) is never left visible.
|
||||
@@ -1139,10 +1138,10 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
|
||||
).strip().rstrip("/")
|
||||
self._send_chunk_delay_seconds = float(
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
|
||||
extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
|
||||
)
|
||||
self._send_chunk_retries = int(
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
|
||||
extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
|
||||
)
|
||||
self._send_chunk_retry_delay_seconds = float(
|
||||
extra.get("send_chunk_retry_delay_seconds")
|
||||
@@ -1532,28 +1531,6 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
self.name, _safe_id(chat_id),
|
||||
)
|
||||
continue
|
||||
# Rate limit (-2) — backoff and retry
|
||||
is_rate_limited = (
|
||||
ret == RATE_LIMIT_ERRCODE
|
||||
or errcode == RATE_LIMIT_ERRCODE
|
||||
)
|
||||
if is_rate_limited:
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
|
||||
# Record the error so we raise a descriptive
|
||||
# RuntimeError (instead of AssertionError) if the
|
||||
# loop exhausts with the server still rate-limiting.
|
||||
last_error = RuntimeError(
|
||||
f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
)
|
||||
if attempt >= self._send_chunk_retries:
|
||||
break
|
||||
wait = self._send_chunk_retry_delay_seconds * 3 # 3x backoff for rate limit
|
||||
logger.warning(
|
||||
"[%s] rate limited for %s; backing off %.1fs before retry",
|
||||
self.name, _safe_id(chat_id), wait,
|
||||
)
|
||||
await asyncio.sleep(wait)
|
||||
continue
|
||||
errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
|
||||
raise RuntimeError(
|
||||
f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
|
||||
|
||||
@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
|
||||
encode_get_group_member_list,
|
||||
next_seq_no,
|
||||
)
|
||||
from gateway.session import build_session_key
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1897,7 +1897,7 @@ class OwnerCommandMiddleware(InboundMiddleware):
|
||||
return None, None, False
|
||||
|
||||
# Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
|
||||
# owner_id = (push or {}).get("bot_owner_id") or ""
|
||||
owner_id = (push or {}).get("bot_owner_id") or ""
|
||||
# is_owner = bool(owner_id) and owner_id == from_account
|
||||
is_owner = True
|
||||
return cmd, cmd_line, is_owner
|
||||
|
||||
@@ -21,10 +21,12 @@ import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Any
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -19,8 +19,9 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码(纯 Python 实现)
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import struct
|
||||
import threading
|
||||
from typing import Optional
|
||||
from typing import Optional, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+51
-488
@@ -31,12 +31,6 @@ from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, Any, List
|
||||
|
||||
# account_usage imports the OpenAI SDK chain (~230 ms). Only needed by
|
||||
# /usage; we still import it at module top in the gateway because test
|
||||
# patches (tests/gateway/test_usage_command.py) target
|
||||
# `gateway.run.fetch_account_usage` as a module-level attribute. The
|
||||
# gateway is a long-running daemon, so its boot cost matters less than
|
||||
# preserving the established test-patch surface.
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
|
||||
# --- Agent cache tuning ---------------------------------------------------
|
||||
@@ -46,133 +40,6 @@ from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
# from _enforce_agent_cache_cap() and _session_expiry_watcher() below.
|
||||
_AGENT_CACHE_MAX_SIZE = 128
|
||||
_AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h
|
||||
# Only auto-continue interrupted gateway turns while the interruption is fresh.
|
||||
# Stale tool-tail/resume markers can otherwise revive an unrelated old task
|
||||
# after a gateway restart when the user's next message starts new work.
|
||||
#
|
||||
# The freshness signal is the timestamp of the last transcript row, which
|
||||
# ``hermes_state.get_messages`` carries on every persisted message. This
|
||||
# handles the two auto-continue cases uniformly:
|
||||
# * resume_pending (gateway restart/shutdown watchdog marked the session)
|
||||
# * tool-tail (last persisted message is a tool result the agent
|
||||
# never got to reply to)
|
||||
# In both cases "when did we last do anything on this transcript" is the
|
||||
# correct freshness question, so one signal replaces two divergent ones.
|
||||
#
|
||||
# Default window: 1 hour. This comfortably covers ``agent.gateway_timeout``
|
||||
# (30 min default) plus runtime slack — a legitimate long-running turn that
|
||||
# gets interrupted near its timeout boundary and is resumed shortly after
|
||||
# is still classified fresh. Override via
|
||||
# ``config.yaml`` ``agent.gateway_auto_continue_freshness``.
|
||||
_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60
|
||||
|
||||
|
||||
def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
|
||||
"""Best-effort conversion of stored gateway timestamps to epoch seconds.
|
||||
|
||||
Missing/unparseable timestamps return None so legacy transcripts keep the
|
||||
historical auto-continue behaviour instead of being silently dropped.
|
||||
Accepts: datetime, epoch seconds (int/float), epoch milliseconds (when
|
||||
the magnitude exceeds year-2286), ISO-8601 strings (with or without a
|
||||
trailing ``Z``), and numeric strings.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value.timestamp()
|
||||
if isinstance(value, bool): # bool is a subclass of int — skip it
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
# Some platform events use milliseconds; Hermes state rows use seconds.
|
||||
return float(value) / 1000.0 if float(value) > 10_000_000_000 else float(value)
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
numeric = float(text)
|
||||
return numeric / 1000.0 if numeric > 10_000_000_000 else numeric
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
return datetime.fromisoformat(text.replace("Z", "+00:00")).timestamp()
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _auto_continue_freshness_window() -> float:
|
||||
"""Return the configured auto-continue freshness window in seconds.
|
||||
|
||||
Reads ``HERMES_AUTO_CONTINUE_FRESHNESS`` (bridged from
|
||||
``config.yaml`` ``agent.gateway_auto_continue_freshness`` at gateway
|
||||
startup, same pattern as ``HERMES_AGENT_TIMEOUT``). Falls back to the
|
||||
module default when unset or malformed. Non-positive values disable
|
||||
the freshness gate (restores the pre-fix "always fresh" behaviour for
|
||||
users who want to opt out).
|
||||
"""
|
||||
raw = os.environ.get("HERMES_AUTO_CONTINUE_FRESHNESS")
|
||||
if raw is None or raw == "":
|
||||
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
|
||||
|
||||
def _is_fresh_gateway_interruption(
|
||||
value: Any,
|
||||
*,
|
||||
now: Optional[float] = None,
|
||||
window_secs: Optional[float] = None,
|
||||
) -> bool:
|
||||
"""Return True when an interruption marker is fresh enough to auto-continue.
|
||||
|
||||
Unknown timestamps are treated as fresh for backward compatibility with
|
||||
legacy transcripts (pre-dating timestamp persistence) and with in-memory
|
||||
test scaffolding that constructs history entries without timestamps.
|
||||
|
||||
A non-positive ``window_secs`` disables the gate (always fresh), which
|
||||
restores the pre-fix behaviour for users who opt out via config.
|
||||
"""
|
||||
window = (
|
||||
float(window_secs)
|
||||
if window_secs is not None
|
||||
else float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
)
|
||||
if window <= 0:
|
||||
return True
|
||||
timestamp = _coerce_gateway_timestamp(value)
|
||||
if timestamp is None:
|
||||
return True
|
||||
current = time.time() if now is None else now
|
||||
return current - timestamp <= window
|
||||
|
||||
|
||||
def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
|
||||
"""Return the ``timestamp`` of the last usable transcript row, if any.
|
||||
|
||||
Skips metadata-only rows (``session_meta``, system injections) that are
|
||||
dropped before being handed to the agent. Returns ``None`` when no
|
||||
usable row carries a timestamp — callers should treat that as "fresh"
|
||||
for backward compatibility.
|
||||
"""
|
||||
if not history:
|
||||
return None
|
||||
for msg in reversed(history):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = msg.get("role")
|
||||
if not role or role in ("session_meta", "system"):
|
||||
continue
|
||||
ts = msg.get("timestamp")
|
||||
if ts is not None:
|
||||
return ts
|
||||
# First non-meta row without a timestamp — legacy transcript row.
|
||||
# Returning None lets the caller fall through to the legacy-fresh path.
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SSL certificate auto-detection for NixOS and other non-standard systems.
|
||||
@@ -346,13 +213,6 @@ if _config_path.exists():
|
||||
os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
|
||||
if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
|
||||
os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
|
||||
if (
|
||||
"gateway_auto_continue_freshness" in _agent_cfg
|
||||
and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
|
||||
):
|
||||
os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
|
||||
_agent_cfg["gateway_auto_continue_freshness"]
|
||||
)
|
||||
_display_cfg = _cfg.get("display", {})
|
||||
if _display_cfg and isinstance(_display_cfg, dict):
|
||||
if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
|
||||
@@ -649,31 +509,15 @@ def _platform_config_key(platform: "Platform") -> str:
|
||||
|
||||
|
||||
def _load_gateway_config() -> dict:
|
||||
"""Load and parse ~/.hermes/config.yaml, returning {} on any error.
|
||||
|
||||
Uses the module-level ``_hermes_home`` (so tests that monkeypatch it
|
||||
still see their fixture) and shares the mtime-keyed raw-yaml cache
|
||||
from ``hermes_cli.config.read_raw_config`` when the paths match.
|
||||
"""
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
try:
|
||||
from hermes_cli.config import get_config_path, read_raw_config
|
||||
# Fast path: if _hermes_home agrees with the canonical config
|
||||
# location, reuse the shared cache. Otherwise fall through to a
|
||||
# direct read (keeps test fixtures with a monkeypatched
|
||||
# _hermes_home working).
|
||||
if config_path == get_config_path():
|
||||
return read_raw_config()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
"""Load and parse ~/.hermes/config.yaml, returning {} on any error."""
|
||||
try:
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
if config_path.exists():
|
||||
import yaml
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
logger.debug("Could not load gateway config from %s", config_path)
|
||||
logger.debug("Could not load gateway config from %s", _hermes_home / 'config.yaml')
|
||||
return {}
|
||||
|
||||
|
||||
@@ -1293,14 +1137,14 @@ class GatewayRunner:
|
||||
|
||||
service_tier = getattr(self, "_service_tier", None)
|
||||
if not service_tier:
|
||||
route["request_overrides"] = {}
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
try:
|
||||
overrides = resolve_fast_mode_overrides(route["model"])
|
||||
except Exception:
|
||||
overrides = None
|
||||
route["request_overrides"] = overrides or {}
|
||||
route["request_overrides"] = overrides
|
||||
return route
|
||||
|
||||
async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
|
||||
@@ -3927,8 +3771,6 @@ class GatewayRunner:
|
||||
return await self._handle_yolo_command(event)
|
||||
if _cmd_def_inner.name == "verbose":
|
||||
return await self._handle_verbose_command(event)
|
||||
if _cmd_def_inner.name == "footer":
|
||||
return await self._handle_footer_command(event)
|
||||
|
||||
# Gateway-handled info/control commands with dedicated
|
||||
# running-agent handlers.
|
||||
@@ -4149,9 +3991,6 @@ class GatewayRunner:
|
||||
if canonical == "verbose":
|
||||
return await self._handle_verbose_command(event)
|
||||
|
||||
if canonical == "footer":
|
||||
return await self._handle_footer_command(event)
|
||||
|
||||
if canonical == "yolo":
|
||||
return await self._handle_yolo_command(event)
|
||||
|
||||
@@ -4607,7 +4446,9 @@ class GatewayRunner:
|
||||
# Read privacy.redact_pii from config (re-read per message)
|
||||
_redact_pii = False
|
||||
try:
|
||||
_pcfg = _load_gateway_config()
|
||||
import yaml as _pii_yaml
|
||||
with open(_config_path, encoding="utf-8") as _pf:
|
||||
_pcfg = _pii_yaml.safe_load(_pf) or {}
|
||||
_redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -4750,15 +4591,18 @@ class GatewayRunner:
|
||||
_hyg_model = "anthropic/claude-sonnet-4.6"
|
||||
_hyg_threshold_pct = 0.85
|
||||
_hyg_compression_enabled = True
|
||||
_hyg_hard_msg_limit = 400
|
||||
_hyg_config_context_length = None
|
||||
_hyg_provider = None
|
||||
_hyg_base_url = None
|
||||
_hyg_api_key = None
|
||||
_hyg_data = {}
|
||||
try:
|
||||
_hyg_data = _load_gateway_config()
|
||||
if _hyg_data:
|
||||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||||
if _hyg_cfg_path.exists():
|
||||
import yaml as _hyg_yaml
|
||||
with open(_hyg_cfg_path, encoding="utf-8") as _hyg_f:
|
||||
_hyg_data = _hyg_yaml.safe_load(_hyg_f) or {}
|
||||
|
||||
# Resolve model name (same logic as run_sync)
|
||||
_model_cfg = _hyg_data.get("model", {})
|
||||
if isinstance(_model_cfg, str):
|
||||
@@ -4785,14 +4629,6 @@ class GatewayRunner:
|
||||
_hyg_compression_enabled = str(
|
||||
_comp_cfg.get("enabled", True)
|
||||
).lower() in ("true", "1", "yes")
|
||||
_raw_hard_limit = _comp_cfg.get("hygiene_hard_message_limit")
|
||||
if _raw_hard_limit is not None:
|
||||
try:
|
||||
_parsed = int(_raw_hard_limit)
|
||||
if _parsed > 0:
|
||||
_hyg_hard_msg_limit = _parsed
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
|
||||
try:
|
||||
_hyg_model, _hyg_runtime = self._resolve_session_agent_runtime(
|
||||
@@ -4874,10 +4710,8 @@ class GatewayRunner:
|
||||
# collection, which prevents compression, which causes more
|
||||
# disconnects. 400 messages is well above normal sessions
|
||||
# but catches runaway growth before it becomes unrecoverable.
|
||||
# Threshold is configurable via
|
||||
# compression.hygiene_hard_message_limit.
|
||||
# (#2153)
|
||||
_HARD_MSG_LIMIT = _hyg_hard_msg_limit
|
||||
_HARD_MSG_LIMIT = 400
|
||||
_needs_compress = (
|
||||
_approx_tokens >= _compress_token_threshold
|
||||
or _msg_count >= _HARD_MSG_LIMIT
|
||||
@@ -4966,58 +4800,6 @@ class GatewayRunner:
|
||||
"compression",
|
||||
f"{_new_tokens:,}",
|
||||
)
|
||||
|
||||
# If summary generation failed, the
|
||||
# compressor inserted a static fallback
|
||||
# placeholder and the dropped turns are
|
||||
# gone for good. Surface a visible
|
||||
# warning to the gateway user — agent.log
|
||||
# alone is invisible on TG/Discord/etc.
|
||||
_comp = getattr(_hyg_agent, "context_compressor", None)
|
||||
if _comp is not None and getattr(_comp, "_last_summary_fallback_used", False):
|
||||
_dropped = getattr(_comp, "_last_summary_dropped_count", 0)
|
||||
_err = getattr(_comp, "_last_summary_error", None) or "unknown error"
|
||||
_warn_msg = (
|
||||
"⚠️ Context compression summary failed "
|
||||
f"({_err}). {_dropped} historical message(s) "
|
||||
"were removed and replaced with a placeholder. "
|
||||
"Earlier context is no longer recoverable. "
|
||||
"Consider /reset for a clean session, or check "
|
||||
"your auxiliary.compression model configuration."
|
||||
)
|
||||
try:
|
||||
_adapter = self.adapters.get(source.platform)
|
||||
if _adapter and source.chat_id:
|
||||
await _adapter.send(source.chat_id, _warn_msg, metadata=_hyg_meta)
|
||||
except Exception as _werr:
|
||||
logger.warning(
|
||||
"Failed to deliver compression-failure warning to user: %s",
|
||||
_werr,
|
||||
)
|
||||
# Separately: if the user's CONFIGURED aux
|
||||
# model failed and we recovered by falling
|
||||
# back to the main model, tell them — a
|
||||
# misconfigured auxiliary.compression.model
|
||||
# is something only they can fix, and
|
||||
# silent recovery would hide it.
|
||||
elif _comp is not None and getattr(_comp, "_last_aux_model_failure_model", None):
|
||||
_aux_model = getattr(_comp, "_last_aux_model_failure_model", "")
|
||||
_aux_err = getattr(_comp, "_last_aux_model_failure_error", None) or "unknown error"
|
||||
_aux_msg = (
|
||||
f"ℹ️ Configured compression model `{_aux_model}` "
|
||||
f"failed ({_aux_err}). Recovered using your main "
|
||||
"model — context is intact — but you may want to "
|
||||
"check `auxiliary.compression.model` in config.yaml."
|
||||
)
|
||||
try:
|
||||
_adapter = self.adapters.get(source.platform)
|
||||
if _adapter and source.chat_id:
|
||||
await _adapter.send(source.chat_id, _aux_msg, metadata=_hyg_meta)
|
||||
except Exception as _werr:
|
||||
logger.warning(
|
||||
"Failed to deliver aux-model-fallback notice to user: %s",
|
||||
_werr,
|
||||
)
|
||||
finally:
|
||||
self._cleanup_agent_resources(_hyg_agent)
|
||||
|
||||
@@ -5245,27 +5027,6 @@ class GatewayRunner:
|
||||
display_reasoning = last_reasoning.strip()
|
||||
response = f"💭 **Reasoning:**\n```\n{display_reasoning}\n```\n\n{response}"
|
||||
|
||||
# Runtime-metadata footer — only on the FINAL message of the turn.
|
||||
# Off by default (display.runtime_footer.enabled=false). When
|
||||
# streaming already delivered the body, we can't mutate the sent
|
||||
# text, so we fire a separate trailing send below.
|
||||
_footer_line = ""
|
||||
try:
|
||||
from gateway.runtime_footer import build_footer_line as _bfl
|
||||
_footer_line = _bfl(
|
||||
user_config=_load_gateway_config(),
|
||||
platform_key=_platform_config_key(source.platform),
|
||||
model=agent_result.get("model"),
|
||||
context_tokens=agent_result.get("last_prompt_tokens", 0) or 0,
|
||||
context_length=agent_result.get("context_length") or None,
|
||||
cwd=os.environ.get("TERMINAL_CWD", ""),
|
||||
)
|
||||
except Exception as _footer_err:
|
||||
logger.debug("runtime_footer build failed: %s", _footer_err)
|
||||
_footer_line = ""
|
||||
if _footer_line and response and not agent_result.get("already_sent"):
|
||||
response = f"{response}\n\n{_footer_line}"
|
||||
|
||||
# Emit agent:end hook
|
||||
await self.hooks.emit("agent:end", {
|
||||
**hook_ctx,
|
||||
@@ -5436,17 +5197,6 @@ class GatewayRunner:
|
||||
await self._deliver_media_from_response(
|
||||
response, event, _media_adapter,
|
||||
)
|
||||
# Streaming already delivered the body text, but the footer was
|
||||
# intentionally held back (see the `not already_sent` gate above).
|
||||
# Send it now as a small trailing message so Telegram/Discord/etc.
|
||||
# still surface the runtime metadata on the final reply.
|
||||
if _footer_line:
|
||||
try:
|
||||
_foot_adapter = self.adapters.get(source.platform)
|
||||
if _foot_adapter:
|
||||
await _foot_adapter.send(source.chat_id, _footer_line)
|
||||
except Exception as _e:
|
||||
logger.debug("trailing footer send failed: %s", _e)
|
||||
return None
|
||||
|
||||
return response
|
||||
@@ -5529,8 +5279,11 @@ class GatewayRunner:
|
||||
custom_provs = None
|
||||
|
||||
try:
|
||||
data = _load_gateway_config()
|
||||
if data:
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
import yaml as _info_yaml
|
||||
with open(cfg_path, encoding="utf-8") as f:
|
||||
data = _info_yaml.safe_load(f) or {}
|
||||
model_cfg = data.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
@@ -6129,8 +5882,9 @@ class GatewayRunner:
|
||||
custom_provs = None
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
try:
|
||||
cfg = _load_gateway_config()
|
||||
if cfg:
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
current_model = model_cfg.get("default", "")
|
||||
@@ -6169,7 +5923,6 @@ class GatewayRunner:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_base_url=current_base_url,
|
||||
current_model=current_model,
|
||||
user_providers=user_provs,
|
||||
custom_providers=custom_provs,
|
||||
max_models=50,
|
||||
@@ -6291,7 +6044,6 @@ class GatewayRunner:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_base_url=current_base_url,
|
||||
current_model=current_model,
|
||||
user_providers=user_provs,
|
||||
custom_providers=custom_provs,
|
||||
max_models=5,
|
||||
@@ -6437,14 +6189,20 @@ class GatewayRunner:
|
||||
|
||||
async def _handle_personality_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /personality command - list or set a personality."""
|
||||
import yaml
|
||||
from hermes_constants import display_hermes_home
|
||||
|
||||
args = event.get_command_args().strip().lower()
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
|
||||
try:
|
||||
config = _load_gateway_config()
|
||||
personalities = config.get("agent", {}).get("personalities", {}) if config else {}
|
||||
if config_path.exists():
|
||||
with open(config_path, 'r', encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f) or {}
|
||||
personalities = config.get("agent", {}).get("personalities", {})
|
||||
else:
|
||||
config = {}
|
||||
personalities = {}
|
||||
except Exception:
|
||||
config = {}
|
||||
personalities = {}
|
||||
@@ -6574,10 +6332,18 @@ class GatewayRunner:
|
||||
|
||||
env_key = f"{platform_name.upper()}_HOME_CHANNEL"
|
||||
|
||||
# Save to .env so it persists across restarts
|
||||
# Save to config.yaml
|
||||
try:
|
||||
from hermes_cli.config import save_env_value
|
||||
save_env_value(env_key, str(chat_id))
|
||||
import yaml
|
||||
config_path = _hermes_home / 'config.yaml'
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
user_config[env_key] = chat_id
|
||||
atomic_yaml_write(config_path, user_config)
|
||||
# Also set in the current environment so it takes effect immediately
|
||||
os.environ[env_key] = str(chat_id)
|
||||
except Exception as e:
|
||||
return f"Failed to save home channel: {e}"
|
||||
|
||||
@@ -7438,13 +7204,17 @@ class GatewayRunner:
|
||||
``display.platforms.<platform>.tool_progress`` so each channel can
|
||||
have its own verbosity level independently.
|
||||
"""
|
||||
import yaml
|
||||
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
platform_key = _platform_config_key(event.source.platform)
|
||||
|
||||
# --- check config gate ------------------------------------------------
|
||||
try:
|
||||
user_config = _load_gateway_config()
|
||||
user_config = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
|
||||
except Exception:
|
||||
gate_enabled = False
|
||||
@@ -7492,94 +7262,6 @@ class GatewayRunner:
|
||||
logger.warning("Failed to save tool_progress mode: %s", e)
|
||||
return f"{descriptions[new_mode]}\n_(could not save to config: {e})_"
|
||||
|
||||
async def _handle_footer_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /footer command — toggle the runtime-metadata footer.
|
||||
|
||||
Usage:
|
||||
/footer → toggle on/off
|
||||
/footer on → enable globally
|
||||
/footer off → disable globally
|
||||
/footer status → show current state + fields
|
||||
|
||||
The footer is saved to ``display.runtime_footer.enabled`` (global).
|
||||
Per-platform overrides under ``display.platforms.<platform>.runtime_footer``
|
||||
are respected but not modified here — edit config.yaml directly for
|
||||
per-platform control.
|
||||
"""
|
||||
from gateway.runtime_footer import resolve_footer_config
|
||||
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
platform_key = _platform_config_key(event.source.platform)
|
||||
|
||||
# --- parse argument -------------------------------------------------
|
||||
arg = ""
|
||||
try:
|
||||
text = (getattr(event, "message", None) or "").strip()
|
||||
if text.startswith("/"):
|
||||
parts = text.split(None, 1)
|
||||
if len(parts) > 1:
|
||||
arg = parts[1].strip().lower()
|
||||
except Exception:
|
||||
arg = ""
|
||||
|
||||
# --- load config ----------------------------------------------------
|
||||
try:
|
||||
user_config: dict = _load_gateway_config()
|
||||
except Exception as e:
|
||||
return f"⚠️ Could not read config.yaml: {e}"
|
||||
|
||||
effective = resolve_footer_config(user_config, platform_key)
|
||||
|
||||
if arg in ("status", "?"):
|
||||
state = "ON" if effective["enabled"] else "OFF"
|
||||
fields = ", ".join(effective.get("fields") or [])
|
||||
return (
|
||||
f"📎 Runtime footer: **{state}**\n"
|
||||
f"Fields: `{fields}`\n"
|
||||
f"Platform: `{platform_key}`"
|
||||
)
|
||||
|
||||
if arg in ("on", "enable", "true", "1"):
|
||||
new_state = True
|
||||
elif arg in ("off", "disable", "false", "0"):
|
||||
new_state = False
|
||||
elif arg == "":
|
||||
new_state = not effective["enabled"]
|
||||
else:
|
||||
return "Usage: `/footer [on|off|status]`"
|
||||
|
||||
# --- write global flag ---------------------------------------------
|
||||
try:
|
||||
if not isinstance(user_config.get("display"), dict):
|
||||
user_config["display"] = {}
|
||||
display = user_config["display"]
|
||||
if not isinstance(display.get("runtime_footer"), dict):
|
||||
display["runtime_footer"] = {}
|
||||
display["runtime_footer"]["enabled"] = new_state
|
||||
atomic_yaml_write(config_path, user_config)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to save runtime_footer.enabled: %s", e)
|
||||
return f"⚠️ Could not save config: {e}"
|
||||
|
||||
state = "ON" if new_state else "OFF"
|
||||
example = ""
|
||||
if new_state:
|
||||
# Show a preview using current agent state if available.
|
||||
from gateway.runtime_footer import format_runtime_footer
|
||||
preview = format_runtime_footer(
|
||||
model=_resolve_gateway_model(user_config) or None,
|
||||
context_tokens=0,
|
||||
context_length=None,
|
||||
fields=effective.get("fields") or ["model", "context_pct", "cwd"],
|
||||
)
|
||||
if preview:
|
||||
example = f"\nExample: `{preview}`"
|
||||
return (
|
||||
f"📎 Runtime footer: **{state}**"
|
||||
f"{example}\n"
|
||||
f"_(saved globally — takes effect on next message)_"
|
||||
)
|
||||
|
||||
async def _handle_compress_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /compress command -- manually compress conversation context.
|
||||
|
||||
@@ -7615,6 +7297,7 @@ class GatewayRunner:
|
||||
for m in history
|
||||
if m.get("role") in ("user", "assistant") and m.get("content")
|
||||
]
|
||||
original_count = len(msgs)
|
||||
approx_tokens = estimate_messages_tokens_rough(msgs)
|
||||
|
||||
tmp_agent = AIAgent(
|
||||
@@ -7660,17 +7343,6 @@ class GatewayRunner:
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
# Detect summary-generation failure so we can surface a
|
||||
# visible warning to the user even on the manual /compress
|
||||
# path (otherwise the failure is silently logged).
|
||||
_summary_failed = bool(getattr(compressor, "_last_summary_fallback_used", False))
|
||||
_dropped_count = int(getattr(compressor, "_last_summary_dropped_count", 0) or 0)
|
||||
_summary_err = getattr(compressor, "_last_summary_error", None)
|
||||
# Separately: did the user's CONFIGURED aux model fail
|
||||
# and we recovered via main? Surface that as an info
|
||||
# note so they can fix their config.
|
||||
_aux_fail_model = getattr(compressor, "_last_aux_model_failure_model", None)
|
||||
_aux_fail_err = getattr(compressor, "_last_aux_model_failure_error", None)
|
||||
finally:
|
||||
self._cleanup_agent_resources(tmp_agent)
|
||||
lines = [f"🗜️ {summary['headline']}"]
|
||||
@@ -7679,20 +7351,6 @@ class GatewayRunner:
|
||||
lines.append(summary["token_line"])
|
||||
if summary["note"]:
|
||||
lines.append(summary["note"])
|
||||
if _summary_failed:
|
||||
lines.append(
|
||||
f"⚠️ Summary generation failed ({_summary_err or 'unknown error'}). "
|
||||
f"{_dropped_count} historical message(s) were removed and replaced "
|
||||
"with a placeholder; earlier context is no longer recoverable. "
|
||||
"Consider checking your auxiliary.compression model configuration."
|
||||
)
|
||||
elif _aux_fail_model:
|
||||
lines.append(
|
||||
f"ℹ️ Configured compression model `{_aux_fail_model}` failed "
|
||||
f"({_aux_fail_err or 'unknown error'}). Recovered using your main "
|
||||
"model — context is intact — but you may want to check "
|
||||
"`auxiliary.compression.model` in config.yaml."
|
||||
)
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
logger.warning("Manual compress failed: %s", e)
|
||||
@@ -8825,7 +8483,6 @@ class GatewayRunner:
|
||||
The enriched message string with vision descriptions prepended.
|
||||
"""
|
||||
from tools.vision_tools import vision_analyze_tool
|
||||
from agent.memory_manager import sanitize_context
|
||||
|
||||
analysis_prompt = (
|
||||
"Describe everything visible in this image in thorough detail. "
|
||||
@@ -8844,7 +8501,6 @@ class GatewayRunner:
|
||||
result = json.loads(result_json)
|
||||
if result.get("success"):
|
||||
description = result.get("analysis", "")
|
||||
description = sanitize_context(description)
|
||||
enriched_parts.append(
|
||||
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
||||
f"[If you need a closer look, use vision_analyze with "
|
||||
@@ -9207,47 +8863,12 @@ class GatewayRunner:
|
||||
|
||||
_MAX_INTERRUPT_DEPTH = 3 # Cap recursive interrupt handling (#816)
|
||||
|
||||
# Config keys whose values MUST invalidate the gateway's cached agent
|
||||
# when they change. The agent bakes these into its compressor / context
|
||||
# handling at construction time, so a mid-running-gateway config edit
|
||||
# would otherwise be silently ignored until the user triggers a
|
||||
# different cache eviction (model switch, /reset, etc.).
|
||||
#
|
||||
# Each entry is a tuple of (section, key) read from the raw config dict.
|
||||
# Add more here as new baked-at-construction config settings are added.
|
||||
_CACHE_BUSTING_CONFIG_KEYS: tuple = (
|
||||
("model", "context_length"),
|
||||
("compression", "enabled"),
|
||||
("compression", "threshold"),
|
||||
("compression", "target_ratio"),
|
||||
("compression", "protect_last_n"),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
|
||||
"""Pull the subset of config values that must bust the agent cache.
|
||||
|
||||
Returns a flat dict keyed by 'section.key'. Missing keys and
|
||||
non-dict sections yield None values, which still contribute to
|
||||
the signature (so 'absent' vs 'present-and-null' differ).
|
||||
"""
|
||||
out: Dict[str, Any] = {}
|
||||
cfg = user_config if isinstance(user_config, dict) else {}
|
||||
for section, key in cls._CACHE_BUSTING_CONFIG_KEYS:
|
||||
section_val = cfg.get(section)
|
||||
if isinstance(section_val, dict):
|
||||
out[f"{section}.{key}"] = section_val.get(key)
|
||||
else:
|
||||
out[f"{section}.{key}"] = None
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _agent_config_signature(
|
||||
model: str,
|
||||
runtime: dict,
|
||||
enabled_toolsets: list,
|
||||
ephemeral_prompt: str,
|
||||
cache_keys: dict | None = None,
|
||||
) -> str:
|
||||
"""Compute a stable string key from agent config values.
|
||||
|
||||
@@ -9255,12 +8876,6 @@ class GatewayRunner:
|
||||
discarded and rebuilt. When it stays the same, the cached agent is
|
||||
reused — preserving the frozen system prompt and tool schemas for
|
||||
prompt cache hits.
|
||||
|
||||
``cache_keys`` is an optional flat dict of additional config values
|
||||
that should invalidate the cache when they change. Callers pass
|
||||
the output of ``_extract_cache_busting_config(user_config)`` so
|
||||
edits to model.context_length / compression.* in config.yaml are
|
||||
picked up on the next gateway message without a manual restart.
|
||||
"""
|
||||
import hashlib, json as _j
|
||||
|
||||
@@ -9271,8 +8886,6 @@ class GatewayRunner:
|
||||
_api_key = str(runtime.get("api_key", "") or "")
|
||||
_api_key_fingerprint = hashlib.sha256(_api_key.encode()).hexdigest() if _api_key else ""
|
||||
|
||||
_cache_keys_sorted = sorted((cache_keys or {}).items())
|
||||
|
||||
blob = _j.dumps(
|
||||
[
|
||||
model,
|
||||
@@ -9284,7 +8897,6 @@ class GatewayRunner:
|
||||
# reasoning_config excluded — it's set per-message on the
|
||||
# cached agent and doesn't affect system prompt or tools.
|
||||
ephemeral_prompt or "",
|
||||
_cache_keys_sorted,
|
||||
],
|
||||
sort_keys=True,
|
||||
default=str,
|
||||
@@ -10537,7 +10149,6 @@ class GatewayRunner:
|
||||
turn_route["runtime"],
|
||||
enabled_toolsets,
|
||||
combined_ephemeral,
|
||||
cache_keys=self._extract_cache_busting_config(user_config),
|
||||
)
|
||||
agent = None
|
||||
_cache_lock = getattr(self, "_agent_cache_lock", None)
|
||||
@@ -10604,7 +10215,7 @@ class GatewayRunner:
|
||||
agent.status_callback = _status_callback_sync
|
||||
agent.reasoning_config = reasoning_config
|
||||
agent.service_tier = self._service_tier
|
||||
agent.request_overrides = turn_route.get("request_overrides") or {}
|
||||
agent.request_overrides = turn_route.get("request_overrides")
|
||||
|
||||
_bg_review_release = threading.Event()
|
||||
_bg_review_pending: list[str] = []
|
||||
@@ -10825,23 +10436,6 @@ class GatewayRunner:
|
||||
# anything (tool, assistant with unfinished work, etc.), so we
|
||||
# give a stronger, reason-aware instruction that subsumes the
|
||||
# tool-tail case.
|
||||
#
|
||||
# Freshness gate (#16802): both branches are gated on the age
|
||||
# of the last persisted transcript row. That is the correct
|
||||
# "when did we last do anything here" signal for both the
|
||||
# resume_pending path (restart watchdog) and the tool-tail
|
||||
# path (in-flight tool loop killed). We read ``history[-1]``
|
||||
# here because ``agent_history`` has already stripped the
|
||||
# ``timestamp`` field off tool/tool_call rows for API purity
|
||||
# (see the `k != "timestamp"` filter above). Rows without a
|
||||
# timestamp (legacy transcripts) are treated as fresh so the
|
||||
# historical auto-continue behaviour is preserved.
|
||||
_freshness_window = _auto_continue_freshness_window()
|
||||
_interruption_is_fresh = _is_fresh_gateway_interruption(
|
||||
_last_transcript_timestamp(history),
|
||||
window_secs=_freshness_window,
|
||||
)
|
||||
|
||||
_resume_entry = None
|
||||
if session_key:
|
||||
try:
|
||||
@@ -10849,14 +10443,7 @@ class GatewayRunner:
|
||||
except Exception:
|
||||
_resume_entry = None
|
||||
_is_resume_pending = bool(
|
||||
_resume_entry is not None
|
||||
and getattr(_resume_entry, "resume_pending", False)
|
||||
and _interruption_is_fresh
|
||||
)
|
||||
_has_fresh_tool_tail = bool(
|
||||
agent_history
|
||||
and agent_history[-1].get("role") == "tool"
|
||||
and _interruption_is_fresh
|
||||
_resume_entry is not None and getattr(_resume_entry, "resume_pending", False)
|
||||
)
|
||||
|
||||
if _is_resume_pending:
|
||||
@@ -10876,7 +10463,7 @@ class GatewayRunner:
|
||||
f"message below.]\n\n"
|
||||
+ message
|
||||
)
|
||||
elif _has_fresh_tool_tail:
|
||||
elif agent_history and agent_history[-1].get("role") == "tool":
|
||||
message = (
|
||||
"[System note: Your previous turn was interrupted before you could "
|
||||
"process the last tool result(s). The conversation history contains "
|
||||
@@ -10939,13 +10526,11 @@ class GatewayRunner:
|
||||
_last_prompt_toks = 0
|
||||
_input_toks = 0
|
||||
_output_toks = 0
|
||||
_context_length = 0
|
||||
_agent = agent_holder[0]
|
||||
if _agent and hasattr(_agent, "context_compressor"):
|
||||
_last_prompt_toks = getattr(_agent.context_compressor, "last_prompt_tokens", 0)
|
||||
_input_toks = getattr(_agent, "session_prompt_tokens", 0)
|
||||
_output_toks = getattr(_agent, "session_completion_tokens", 0)
|
||||
_context_length = getattr(_agent.context_compressor, "context_length", 0) or 0
|
||||
_resolved_model = getattr(_agent, "model", None) if _agent else None
|
||||
|
||||
if not final_response:
|
||||
@@ -10962,7 +10547,6 @@ class GatewayRunner:
|
||||
"input_tokens": _input_toks,
|
||||
"output_tokens": _output_toks,
|
||||
"model": _resolved_model,
|
||||
"context_length": _context_length,
|
||||
}
|
||||
|
||||
# Scan tool results for MEDIA:<path> tags that need to be delivered
|
||||
@@ -11045,13 +10629,6 @@ class GatewayRunner:
|
||||
final_response,
|
||||
all_msgs,
|
||||
failure_callback=_title_failure_cb,
|
||||
main_runtime={
|
||||
"model": getattr(agent, "model", None),
|
||||
"provider": getattr(agent, "provider", None),
|
||||
"base_url": getattr(agent, "base_url", None),
|
||||
"api_key": getattr(agent, "api_key", None),
|
||||
"api_mode": getattr(agent, "api_mode", None),
|
||||
} if agent else None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -11067,7 +10644,6 @@ class GatewayRunner:
|
||||
"input_tokens": _input_toks,
|
||||
"output_tokens": _output_toks,
|
||||
"model": _resolved_model,
|
||||
"context_length": _context_length,
|
||||
"session_id": effective_session_id,
|
||||
"response_previewed": result.get("response_previewed", False),
|
||||
}
|
||||
@@ -12016,19 +11592,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
atexit.register(remove_pid_file)
|
||||
atexit.register(release_gateway_runtime_lock)
|
||||
|
||||
# MCP tool discovery — run in an executor so the asyncio event loop
|
||||
# stays responsive even when a configured MCP server is slow or
|
||||
# unreachable. discover_mcp_tools() uses a blocking 120s wait
|
||||
# internally; calling it from the loop thread would freeze platform
|
||||
# heartbeats (Discord shard, Telegram polling) until it returned.
|
||||
# See #16856.
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
_loop = asyncio.get_running_loop()
|
||||
await _loop.run_in_executor(None, discover_mcp_tools)
|
||||
except Exception as e:
|
||||
logger.debug("MCP tool discovery failed: %s", e)
|
||||
|
||||
# Start the gateway
|
||||
success = await runner.start()
|
||||
if not success:
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
"""Gateway runtime-metadata footer.
|
||||
|
||||
Renders a compact footer showing runtime state (model, context %, cwd) and
|
||||
appends it to the FINAL message of an agent turn when enabled. Off by default
|
||||
to keep replies minimal.
|
||||
|
||||
Config (``~/.hermes/config.yaml``)::
|
||||
|
||||
display:
|
||||
runtime_footer:
|
||||
enabled: true # off by default
|
||||
fields: [model, context_pct, cwd] # order shown; drop any to hide
|
||||
|
||||
Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
|
||||
Users can toggle the global setting with ``/footer on|off`` from both the CLI
|
||||
and any gateway platform.
|
||||
|
||||
The footer is appended to the final response text in ``gateway/run.py`` right
|
||||
before returning the response to the adapter send path — so it only lands on
|
||||
the final message a user sees, not on tool-progress updates or streaming
|
||||
partials. When streaming is on and the final text has already been delivered
|
||||
piecemeal, the footer is sent as a separate trailing message via
|
||||
``send_trailing_footer()``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
|
||||
_SEP = " · "
|
||||
|
||||
|
||||
def _home_relative_cwd(cwd: str) -> str:
|
||||
"""Return *cwd* with ``$HOME`` collapsed to ``~``. Empty string if unset."""
|
||||
if not cwd:
|
||||
return ""
|
||||
try:
|
||||
home = os.path.expanduser("~")
|
||||
p = os.path.abspath(cwd)
|
||||
if home and (p == home or p.startswith(home + os.sep)):
|
||||
return "~" + p[len(home):]
|
||||
return p
|
||||
except Exception:
|
||||
return cwd
|
||||
|
||||
|
||||
def _model_short(model: Optional[str]) -> str:
|
||||
"""Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
|
||||
if not model:
|
||||
return ""
|
||||
return model.rsplit("/", 1)[-1]
|
||||
|
||||
|
||||
def resolve_footer_config(
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Resolve effective runtime-footer config for *platform_key*.
|
||||
|
||||
Merge order (later wins):
|
||||
1. Built-in defaults (enabled=False)
|
||||
2. ``display.runtime_footer``
|
||||
3. ``display.platforms.<platform_key>.runtime_footer``
|
||||
"""
|
||||
resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
|
||||
cfg = (user_config or {}).get("display") or {}
|
||||
|
||||
global_cfg = cfg.get("runtime_footer")
|
||||
if isinstance(global_cfg, dict):
|
||||
if "enabled" in global_cfg:
|
||||
resolved["enabled"] = bool(global_cfg.get("enabled"))
|
||||
if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
|
||||
resolved["fields"] = [str(f) for f in global_cfg["fields"]]
|
||||
|
||||
if platform_key:
|
||||
platforms = cfg.get("platforms") or {}
|
||||
plat_cfg = platforms.get(platform_key)
|
||||
if isinstance(plat_cfg, dict):
|
||||
plat_footer = plat_cfg.get("runtime_footer")
|
||||
if isinstance(plat_footer, dict):
|
||||
if "enabled" in plat_footer:
|
||||
resolved["enabled"] = bool(plat_footer.get("enabled"))
|
||||
if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
|
||||
resolved["fields"] = [str(f) for f in plat_footer["fields"]]
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def format_runtime_footer(
|
||||
*,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
fields: Iterable[str] = _DEFAULT_FIELDS,
|
||||
) -> str:
|
||||
"""Render the footer line, or return "" if no fields have data.
|
||||
|
||||
Fields are skipped silently when their underlying data is missing — a
|
||||
partially-populated footer is better than a line with ``?%`` or empty slots.
|
||||
"""
|
||||
parts: list[str] = []
|
||||
for field in fields:
|
||||
if field == "model":
|
||||
m = _model_short(model)
|
||||
if m:
|
||||
parts.append(m)
|
||||
elif field == "context_pct":
|
||||
if context_length and context_length > 0 and context_tokens >= 0:
|
||||
pct = max(0, min(100, round((context_tokens / context_length) * 100)))
|
||||
parts.append(f"{pct}%")
|
||||
elif field == "cwd":
|
||||
rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
|
||||
if rel:
|
||||
parts.append(rel)
|
||||
# Unknown field names are silently ignored.
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
return _SEP.join(parts)
|
||||
|
||||
|
||||
def build_footer_line(
|
||||
*,
|
||||
user_config: dict[str, Any] | None,
|
||||
platform_key: str | None,
|
||||
model: Optional[str],
|
||||
context_tokens: int,
|
||||
context_length: Optional[int],
|
||||
cwd: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Top-level entry point used by gateway/run.py.
|
||||
|
||||
Returns the footer text (empty string when disabled or no data). Callers
|
||||
append this to the final response themselves, preserving a single blank
|
||||
line of separation.
|
||||
"""
|
||||
cfg = resolve_footer_config(user_config, platform_key)
|
||||
if not cfg.get("enabled"):
|
||||
return ""
|
||||
return format_runtime_footer(
|
||||
model=model,
|
||||
context_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
cwd=cwd,
|
||||
fields=cfg.get("fields") or _DEFAULT_FIELDS,
|
||||
)
|
||||
+19
-5
@@ -62,8 +62,8 @@ from .config import (
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -705,7 +705,7 @@ class SessionStore:
|
||||
json.dump(data, f, indent=2)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, sessions_file)
|
||||
os.replace(tmp_path, sessions_file)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -1257,11 +1257,25 @@ class SessionStore:
|
||||
Used by /retry, /undo, and /compress to persist modified conversation history.
|
||||
Rewrites both SQLite and legacy JSONL storage.
|
||||
"""
|
||||
# SQLite: replace atomically so a mid-rewrite failure doesn't leave
|
||||
# the session half-empty in the DB while JSONL still has history.
|
||||
# SQLite: clear old messages and re-insert
|
||||
if self._db:
|
||||
try:
|
||||
self._db.replace_messages(session_id, messages)
|
||||
self._db.clear_messages(session_id)
|
||||
for msg in messages:
|
||||
role = msg.get("role", "unknown")
|
||||
self._db.append_message(
|
||||
session_id=session_id,
|
||||
role=role,
|
||||
content=msg.get("content"),
|
||||
tool_name=msg.get("tool_name"),
|
||||
tool_calls=msg.get("tool_calls"),
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
reasoning=msg.get("reasoning") if role == "assistant" else None,
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
|
||||
+4
-48
@@ -43,7 +43,6 @@ import yaml
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -110,12 +109,6 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
|
||||
DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
|
||||
GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60 # refresh 60s before expiry
|
||||
|
||||
# LM Studio's default no-auth mode still requires *some* non-empty bearer for
|
||||
# the API-key code paths (auxiliary_client, runtime resolver) to treat the
|
||||
# provider as configured. This sentinel is sent only to LM Studio, never to
|
||||
# any remote service.
|
||||
LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Provider Registry
|
||||
@@ -166,14 +159,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
auth_type="oauth_external",
|
||||
inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
|
||||
),
|
||||
"lmstudio": ProviderConfig(
|
||||
id="lmstudio",
|
||||
name="LM Studio",
|
||||
auth_type="api_key",
|
||||
inference_base_url="http://127.0.0.1:1234/v1",
|
||||
api_key_env_vars=("LM_API_KEY",),
|
||||
base_url_env_var="LM_BASE_URL",
|
||||
),
|
||||
"copilot": ProviderConfig(
|
||||
id="copilot",
|
||||
name="GitHub Copilot",
|
||||
@@ -239,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("ARCEEAI_API_KEY",),
|
||||
base_url_env_var="ARCEE_BASE_URL",
|
||||
),
|
||||
"gmi": ProviderConfig(
|
||||
id="gmi",
|
||||
name="GMI Cloud",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://api.gmi-serving.com/v1",
|
||||
api_key_env_vars=("GMI_API_KEY",),
|
||||
base_url_env_var="GMI_BASE_URL",
|
||||
),
|
||||
"minimax": ProviderConfig(
|
||||
id="minimax",
|
||||
name="MiniMax",
|
||||
@@ -363,14 +340,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=("XIAOMI_API_KEY",),
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
),
|
||||
"tencent-tokenhub": ProviderConfig(
|
||||
id="tencent-tokenhub",
|
||||
name="Tencent TokenHub",
|
||||
auth_type="api_key",
|
||||
inference_base_url="https://tokenhub.tencentmaas.com/v1",
|
||||
api_key_env_vars=("TOKENHUB_API_KEY",),
|
||||
base_url_env_var="TOKENHUB_BASE_URL",
|
||||
),
|
||||
"ollama-cloud": ProviderConfig(
|
||||
id="ollama-cloud",
|
||||
name="Ollama Cloud",
|
||||
@@ -843,7 +812,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
|
||||
handle.write(payload)
|
||||
handle.flush()
|
||||
os.fsync(handle.fileno())
|
||||
atomic_replace(tmp_path, auth_file)
|
||||
os.replace(tmp_path, auth_file)
|
||||
try:
|
||||
dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
|
||||
except OSError:
|
||||
@@ -1151,7 +1120,6 @@ def resolve_provider(
|
||||
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
|
||||
"step": "stepfun", "stepfun-coding-plan": "stepfun",
|
||||
"arcee-ai": "arcee", "arceeai": "arcee",
|
||||
"gmi-cloud": "gmi", "gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
|
||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||
@@ -1164,13 +1132,11 @@ def resolve_provider(
|
||||
"qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
|
||||
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
|
||||
"tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
|
||||
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
|
||||
"go": "opencode-go", "opencode-go-sub": "opencode-go",
|
||||
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
|
||||
"lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
|
||||
# Local server aliases — route through the generic custom provider
|
||||
"lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
|
||||
"ollama": "custom", "ollama_cloud": "ollama-cloud",
|
||||
"vllm": "custom", "llamacpp": "custom",
|
||||
"llama.cpp": "custom", "llama-cpp": "custom",
|
||||
@@ -1217,11 +1183,8 @@ def resolve_provider(
|
||||
continue
|
||||
# GitHub tokens are commonly present for repo/tool access but should not
|
||||
# hijack inference auto-selection unless the user explicitly chooses
|
||||
# Copilot/GitHub Models as the provider. LM Studio is a local server
|
||||
# whose availability isn't implied by LM_API_KEY presence (it may be
|
||||
# offline, and the no-auth setup uses a placeholder value), so it
|
||||
# also requires explicit selection.
|
||||
if pid in ("copilot", "lmstudio"):
|
||||
# Copilot/GitHub Models as the provider.
|
||||
if pid == "copilot":
|
||||
continue
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if has_usable_secret(os.getenv(env_var, "")):
|
||||
@@ -3499,13 +3462,6 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
|
||||
key_source = ""
|
||||
api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)
|
||||
|
||||
# No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
|
||||
# see the local server as configured. doctor still reports unconfigured
|
||||
# because get_api_key_provider_status uses the raw secret resolver.
|
||||
if not api_key and provider_id == "lmstudio":
|
||||
api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
|
||||
key_source = key_source or "default"
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
|
||||
|
||||
@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
+57
-152
@@ -696,78 +696,6 @@ def run_quick_backup(args) -> None:
|
||||
print("No state files found to snapshot.")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared full-zip backup helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
|
||||
"""Write a full zip snapshot of ``hermes_root`` to ``out_path``.
|
||||
|
||||
Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
|
||||
Returns the output path on success, None on failure (nothing to back up,
|
||||
or write error — caller should surface the outcome but not raise).
|
||||
"""
|
||||
files_to_add: list[tuple[Path, Path]] = []
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
|
||||
dp = Path(dirpath)
|
||||
# Prune excluded directories in-place so os.walk doesn't descend
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
|
||||
for fname in filenames:
|
||||
fpath = dp / fname
|
||||
try:
|
||||
rel = fpath.relative_to(hermes_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if _should_exclude(rel):
|
||||
continue
|
||||
|
||||
# Skip the output zip itself if it already exists inside root.
|
||||
try:
|
||||
if fpath.resolve() == out_path.resolve():
|
||||
continue
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
files_to_add.append((fpath, rel))
|
||||
except OSError as exc:
|
||||
logger.warning("Full-zip backup: walk failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not files_to_add:
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
||||
for abs_path, rel_path in files_to_add:
|
||||
try:
|
||||
if abs_path.suffix == ".db":
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_db = Path(tmp.name)
|
||||
try:
|
||||
if _safe_copy_db(abs_path, tmp_db):
|
||||
zf.write(tmp_db, arcname=str(rel_path))
|
||||
finally:
|
||||
tmp_db.unlink(missing_ok=True)
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
|
||||
continue
|
||||
except OSError as exc:
|
||||
logger.warning("Full-zip backup: zip write failed: %s", exc)
|
||||
# Best-effort cleanup of partial file
|
||||
try:
|
||||
out_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
return out_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-update auto-backup
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -840,87 +768,64 @@ def create_pre_update_backup(
|
||||
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
||||
out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
|
||||
|
||||
result = _write_full_zip_backup(out_path, hermes_root)
|
||||
if result is None:
|
||||
# Collect files (same logic as run_backup, minus the chatty progress prints)
|
||||
files_to_add: list[tuple[Path, Path]] = []
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
|
||||
dp = Path(dirpath)
|
||||
# Prune excluded directories in-place so os.walk doesn't descend
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
|
||||
for fname in filenames:
|
||||
fpath = dp / fname
|
||||
try:
|
||||
rel = fpath.relative_to(hermes_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if _should_exclude(rel):
|
||||
continue
|
||||
|
||||
# Skip the output zip itself if it already exists
|
||||
try:
|
||||
if fpath.resolve() == out_path.resolve():
|
||||
continue
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
files_to_add.append((fpath, rel))
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: walk failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not files_to_add:
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
||||
for abs_path, rel_path in files_to_add:
|
||||
try:
|
||||
if abs_path.suffix == ".db":
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_db = Path(tmp.name)
|
||||
try:
|
||||
if _safe_copy_db(abs_path, tmp_db):
|
||||
zf.write(tmp_db, arcname=str(rel_path))
|
||||
finally:
|
||||
tmp_db.unlink(missing_ok=True)
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
|
||||
continue
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: zip write failed: %s", exc)
|
||||
# Best-effort cleanup of partial file
|
||||
try:
|
||||
out_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_pre_update_backups(backup_dir, keep=keep)
|
||||
return out_path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-migration auto-backup (used by `hermes claw migrate`)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PRE_MIGRATION_PREFIX = "pre-migration-"
|
||||
_PRE_MIGRATION_DEFAULT_KEEP = 5
|
||||
|
||||
|
||||
def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
|
||||
"""Remove oldest pre-migration backups beyond the keep limit.
|
||||
|
||||
Only touches files matching ``pre-migration-*.zip`` so other backups in
|
||||
the same directory are never touched.
|
||||
"""
|
||||
if keep < 0:
|
||||
keep = 0
|
||||
if not backup_dir.exists():
|
||||
return 0
|
||||
|
||||
backups = sorted(
|
||||
(p for p in backup_dir.iterdir()
|
||||
if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
|
||||
key=lambda p: p.name,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
deleted = 0
|
||||
for p in backups[keep:]:
|
||||
try:
|
||||
p.unlink()
|
||||
deleted += 1
|
||||
except OSError as exc:
|
||||
logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
|
||||
|
||||
return deleted
|
||||
|
||||
|
||||
def create_pre_migration_backup(
|
||||
hermes_home: Optional[Path] = None,
|
||||
keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
|
||||
) -> Optional[Path]:
|
||||
"""Create a full zip backup of HERMES_HOME under ``backups/`` before a
|
||||
``hermes claw migrate`` apply.
|
||||
|
||||
Shares implementation with :func:`create_pre_update_backup` via
|
||||
``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
|
||||
restorable with ``hermes import <archive>``. Writes to
|
||||
``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
|
||||
old pre-migration backups.
|
||||
|
||||
Returns the path to the created zip, or ``None`` if nothing was found
|
||||
to back up (fresh install) or the write failed. Never raises — the
|
||||
caller decides whether to abort or proceed.
|
||||
"""
|
||||
hermes_root = hermes_home or get_default_hermes_root()
|
||||
if not hermes_root.is_dir():
|
||||
return None
|
||||
|
||||
# Reuses the shared backups/ directory so `hermes import` and the
|
||||
# update-backup listing pick up pre-migration archives too.
|
||||
backup_dir = _pre_update_backup_dir(hermes_root)
|
||||
try:
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc:
|
||||
logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
|
||||
return None
|
||||
|
||||
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
||||
out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
|
||||
|
||||
result = _write_full_zip_backup(out_path, hermes_root)
|
||||
if result is None:
|
||||
return None
|
||||
|
||||
_prune_pre_migration_backups(backup_dir, keep=keep)
|
||||
return out_path
|
||||
|
||||
@@ -562,6 +562,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
right_content = "\n".join(right_lines)
|
||||
layout_table.add_row(left_content, right_content)
|
||||
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
|
||||
+6
-67
@@ -4,8 +4,7 @@ Usage:
|
||||
hermes claw migrate # Preview then migrate (always shows preview first)
|
||||
hermes claw migrate --dry-run # Preview only, no changes
|
||||
hermes claw migrate --yes # Skip confirmation prompt
|
||||
hermes claw migrate --preset full --overwrite --migrate-secrets # Full run w/ secrets
|
||||
hermes claw migrate --no-backup # Skip pre-migration snapshot
|
||||
hermes claw migrate --preset full --overwrite # Full migration, overwrite conflicts
|
||||
hermes claw cleanup # Archive leftover OpenClaw directories
|
||||
hermes claw cleanup --dry-run # Preview what would be archived
|
||||
"""
|
||||
@@ -16,7 +15,6 @@ import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
|
||||
from hermes_constants import get_optional_skills_dir
|
||||
@@ -323,13 +321,10 @@ def _cmd_migrate(args):
|
||||
migrate_secrets = getattr(args, "migrate_secrets", False)
|
||||
workspace_target = getattr(args, "workspace_target", None)
|
||||
skill_conflict = getattr(args, "skill_conflict", "skip")
|
||||
no_backup = getattr(args, "no_backup", False)
|
||||
|
||||
# Secrets are never included implicitly — they must be explicitly requested
|
||||
# via --migrate-secrets, even under --preset full. This mirrors OpenClaw's
|
||||
# migrate-hermes posture (two-phase: run once without secrets, rerun with
|
||||
# --include-secrets) and prevents a --preset full invocation from silently
|
||||
# importing API keys that the user may not have intended to copy.
|
||||
# If using the "full" preset, secrets are included by default
|
||||
if preset == "full":
|
||||
migrate_secrets = True
|
||||
|
||||
print()
|
||||
print(
|
||||
@@ -436,24 +431,15 @@ def _cmd_migrate(args):
|
||||
|
||||
preview_summary = preview_report.get("summary", {})
|
||||
preview_count = preview_summary.get("migrated", 0)
|
||||
preview_conflicts = preview_summary.get("conflict", 0)
|
||||
|
||||
# "Nothing to migrate" means nothing migrated AND nothing blocked by
|
||||
# conflicts. If there are conflicts, we still want to show the plan and
|
||||
# surface the refusal/--overwrite guidance instead of silently bailing.
|
||||
if preview_count == 0 and preview_conflicts == 0:
|
||||
if preview_count == 0:
|
||||
print()
|
||||
print_info("Nothing to migrate from OpenClaw.")
|
||||
_print_migration_report(preview_report, dry_run=True)
|
||||
return
|
||||
|
||||
print()
|
||||
if preview_count > 0:
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
else:
|
||||
print_header(
|
||||
f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
|
||||
)
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
print_info("No changes have been made yet. Review the list below:")
|
||||
_print_migration_report(preview_report, dry_run=True)
|
||||
|
||||
@@ -461,24 +447,6 @@ def _cmd_migrate(args):
|
||||
if dry_run:
|
||||
return
|
||||
|
||||
# ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
|
||||
# Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
|
||||
# on conflicts unless the user explicitly opts in to overwriting. Without
|
||||
# this guard, the user would answer "yes, proceed" and silently end up
|
||||
# with a migration that skipped every conflicting item.
|
||||
if preview_conflicts > 0 and not overwrite:
|
||||
print()
|
||||
print_error(
|
||||
f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
|
||||
)
|
||||
print_info(
|
||||
"Each conflict is an item whose target already exists in ~/.hermes/. "
|
||||
"Re-run with --overwrite to replace conflicting targets (item-level "
|
||||
"backups are written to the migration report directory)."
|
||||
)
|
||||
print_info("Or re-run with --dry-run to review the full plan.")
|
||||
return
|
||||
|
||||
# ── Phase 2: Confirm and execute ───────────────────────────
|
||||
print()
|
||||
if not auto_yes:
|
||||
@@ -490,32 +458,6 @@ def _cmd_migrate(args):
|
||||
print_info("Migration cancelled.")
|
||||
return
|
||||
|
||||
# ── Phase 2b: Pre-apply backup of the Hermes home ─────────
|
||||
# Delegates to hermes_cli.backup.create_pre_migration_backup(), which
|
||||
# shares implementation with the pre-update backup (same exclusion
|
||||
# rules, same SQLite safe-copy, zip format) so the archive is
|
||||
# restorable with `hermes import`. Mirrors OpenClaw's
|
||||
# createPreMigrationBackup posture — one atomic restore point before
|
||||
# any mutation, auto-pruned to the last 5 pre-migration zips.
|
||||
backup_archive: Optional[Path] = None
|
||||
if not no_backup:
|
||||
try:
|
||||
from hermes_cli.backup import create_pre_migration_backup, _format_size
|
||||
backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
|
||||
if backup_archive:
|
||||
size_str = _format_size(backup_archive.stat().st_size)
|
||||
print()
|
||||
print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
|
||||
print_info(f"Restore with: hermes import {backup_archive.name}")
|
||||
except Exception as e:
|
||||
print()
|
||||
print_error(f"Could not create pre-migration backup: {e}")
|
||||
print_info(
|
||||
"Re-run with --no-backup to skip, or free up disk space under the Hermes home."
|
||||
)
|
||||
logger.debug("Pre-migration backup error", exc_info=True)
|
||||
return
|
||||
|
||||
try:
|
||||
migrator = mod.Migrator(
|
||||
source_root=source_dir.resolve(),
|
||||
@@ -534,9 +476,6 @@ def _cmd_migrate(args):
|
||||
print()
|
||||
print_error(f"Migration failed: {e}")
|
||||
logger.debug("OpenClaw migration error", exc_info=True)
|
||||
if backup_archive:
|
||||
print_info(f"A pre-migration backup is available at: {backup_archive}")
|
||||
print_info(f"Restore with: hermes import {backup_archive.name}")
|
||||
return
|
||||
|
||||
# Print results
|
||||
|
||||
@@ -115,9 +115,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
|
||||
"Configuration", cli_only=True,
|
||||
gateway_config_gate="display.tool_progress_command"),
|
||||
CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
|
||||
"Configuration", args_hint="[on|off|status]",
|
||||
subcommands=("on", "off", "status")),
|
||||
CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
|
||||
"Configuration"),
|
||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
@@ -128,9 +125,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
|
||||
cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
|
||||
subcommands=("kaomoji", "emoji", "unicode", "ascii")),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
@@ -949,42 +943,6 @@ def slack_subcommand_map() -> dict[str, str]:
|
||||
# Autocomplete
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# Per-process cache for /model<space> LM Studio autocomplete. Probing on
|
||||
# every keystroke would block the UI; a short TTL keeps it live without
|
||||
# hammering the server.
|
||||
_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
|
||||
|
||||
|
||||
def _lmstudio_completion_models() -> list[str]:
|
||||
"""Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
|
||||
global _LMSTUDIO_COMPLETION_CACHE
|
||||
# Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
|
||||
if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store() or {}
|
||||
if "lmstudio" not in (store.get("providers") or {}) \
|
||||
and "lmstudio" not in (store.get("credential_pool") or {}):
|
||||
return []
|
||||
except Exception:
|
||||
return []
|
||||
now = time.time()
|
||||
if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
|
||||
return _LMSTUDIO_COMPLETION_CACHE[1]
|
||||
try:
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
models = fetch_lmstudio_models(
|
||||
api_key=os.environ.get("LM_API_KEY", ""),
|
||||
base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
|
||||
timeout=0.8,
|
||||
)
|
||||
except Exception:
|
||||
models = []
|
||||
_LMSTUDIO_COMPLETION_CACHE = (now, models)
|
||||
return models
|
||||
|
||||
|
||||
class SlashCommandCompleter(Completer):
|
||||
"""Autocomplete for built-in slash commands, subcommands, and skill commands."""
|
||||
|
||||
@@ -1408,19 +1366,6 @@ class SlashCommandCompleter(Completer):
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# LM Studio: surface locally-loaded models. Gated on the user actually
|
||||
# having LM Studio configured (env var or auth-store entry) so we
|
||||
# don't probe 127.0.0.1 on every keystroke for users who don't use it.
|
||||
for name in _lmstudio_completion_models():
|
||||
if name in seen:
|
||||
continue
|
||||
if name.startswith(sub_lower) and name != sub_lower:
|
||||
yield Completion(
|
||||
name,
|
||||
start_position=-len(sub_text),
|
||||
display=name,
|
||||
display_meta="LM Studio",
|
||||
)
|
||||
|
||||
def get_completions(self, document, complete_event):
|
||||
text = document.text_before_cursor
|
||||
|
||||
+37
-247
@@ -30,67 +30,34 @@ logger = logging.getLogger(__name__)
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
_ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
|
||||
# (path, mtime_ns, size) -> cached expanded config dict.
|
||||
# load_config() returns a deepcopy of the cached value when the file
|
||||
# hasn't changed since the last load, skipping yaml.safe_load +
|
||||
# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
|
||||
# save_config() + migrate_config() write via atomic_yaml_write which
|
||||
# produces a fresh inode, so stat() sees a new mtime_ns and the next
|
||||
# load repopulates automatically — no explicit invalidation hook.
|
||||
_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
|
||||
# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
|
||||
# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
|
||||
# the user's on-disk values without defaults merged in.
|
||||
_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
|
||||
# Env var names written to .env that aren't in OPTIONAL_ENV_VARS
|
||||
# (managed by setup/provider flows directly).
|
||||
_EXTRA_ENV_KEYS = frozenset({
|
||||
"OPENAI_API_KEY", "OPENAI_BASE_URL",
|
||||
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||
"DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
|
||||
"TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
|
||||
"SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
|
||||
"DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
|
||||
"SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
|
||||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
|
||||
"SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
|
||||
"DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
|
||||
"DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
|
||||
"FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
|
||||
"FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
|
||||
"YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
|
||||
"WECOM_BOT_ID", "WECOM_SECRET",
|
||||
"WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
|
||||
"WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
|
||||
"WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
|
||||
"WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
|
||||
"WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
|
||||
"WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
|
||||
"WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
|
||||
"BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
|
||||
"BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
|
||||
"QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
|
||||
"QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME", # legacy aliases (pre-rename, still read for back-compat)
|
||||
"QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
|
||||
"QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
|
||||
"TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
|
||||
"WHATSAPP_MODE", "WHATSAPP_ENABLED",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
|
||||
"MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
|
||||
"MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
|
||||
"MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
|
||||
"MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
|
||||
"MATRIX_RECOVERY_KEY",
|
||||
# Langfuse observability plugin — optional tuning keys + standard SDK vars.
|
||||
# Activation is via plugins.enabled (opt-in through `hermes plugins enable
|
||||
# observability/langfuse` or `hermes tools → Langfuse`); credentials gate
|
||||
# the plugin at runtime.
|
||||
"HERMES_LANGFUSE_ENV",
|
||||
"HERMES_LANGFUSE_RELEASE",
|
||||
"HERMES_LANGFUSE_SAMPLE_RATE",
|
||||
"HERMES_LANGFUSE_MAX_CHARS",
|
||||
"HERMES_LANGFUSE_DEBUG",
|
||||
"LANGFUSE_PUBLIC_KEY",
|
||||
"LANGFUSE_SECRET_KEY",
|
||||
"LANGFUSE_BASE_URL",
|
||||
})
|
||||
import yaml
|
||||
|
||||
@@ -239,7 +206,6 @@ def get_container_exec_info() -> Optional[dict]:
|
||||
|
||||
# Re-export from hermes_constants — canonical definition lives there.
|
||||
from hermes_constants import get_hermes_home # noqa: F811,E402
|
||||
from utils import atomic_replace
|
||||
|
||||
def get_config_path() -> Path:
|
||||
"""Get the main config file path."""
|
||||
@@ -423,20 +389,6 @@ DEFAULT_CONFIG = {
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
# Freshness window for the gateway auto-continue note (seconds).
|
||||
# After a gateway crash/restart/SIGTERM mid-run, the next user
|
||||
# message gets a "[System note: your previous turn was
|
||||
# interrupted — process the unfinished tool result(s) first]"
|
||||
# prepended so the model picks up where it left off. That's the
|
||||
# right behaviour while the interruption is fresh, but stale
|
||||
# markers (transcript last touched hours or days ago) can revive
|
||||
# an unrelated old task when the user's next message starts new
|
||||
# work. This window is the max age of the last persisted
|
||||
# transcript row for which we still inject the continue note.
|
||||
# Default 3600s comfortably covers a long turn (gateway_timeout
|
||||
# default is 1800s) plus runtime slack. Set to 0 to disable the
|
||||
# gate and restore pre-fix behaviour (always inject).
|
||||
"gateway_auto_continue_freshness": 3600,
|
||||
# How user-attached images are presented to the main model on each turn.
|
||||
# "auto" — attach natively when the active model reports
|
||||
# supports_vision=True AND the user hasn't explicitly
|
||||
@@ -594,7 +546,7 @@ DEFAULT_CONFIG = {
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
"target_ratio": 0.20, # fraction of threshold to preserve as recent tail
|
||||
"protect_last_n": 20, # minimum recent messages to keep uncompressed
|
||||
"hygiene_hard_message_limit": 400, # gateway session-hygiene force-compress threshold by message count
|
||||
|
||||
},
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
@@ -703,11 +655,6 @@ DEFAULT_CONFIG = {
|
||||
"personality": "kawaii",
|
||||
"resume_display": "full",
|
||||
"busy_input_mode": "interrupt", # interrupt | queue | steer
|
||||
# When true, `hermes --tui` auto-resumes the most recent human-
|
||||
# facing session on launch instead of forging a fresh one.
|
||||
# Mirrors `hermes -c` muscle memory. Default off so existing
|
||||
# users aren't surprised. HERMES_TUI_RESUME=<id> always wins.
|
||||
"tui_auto_resume_recent": False,
|
||||
"bell_on_complete": False,
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
@@ -715,9 +662,6 @@ DEFAULT_CONFIG = {
|
||||
"inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage)
|
||||
"show_cost": False, # Show $ cost in the status bar (off by default)
|
||||
"skin": "default",
|
||||
# TUI busy indicator style: kaomoji (default), emoji, unicode (braille
|
||||
# spinner), or ascii. Live-swappable via `/indicator <style>`.
|
||||
"tui_status_indicator": "kaomoji",
|
||||
"user_message_preview": { # CLI: how many submitted user-message lines to echo back in scrollback
|
||||
"first_lines": 2,
|
||||
"last_lines": 2,
|
||||
@@ -727,14 +671,6 @@ DEFAULT_CONFIG = {
|
||||
"tool_progress_overrides": {}, # DEPRECATED — use display.platforms instead
|
||||
"tool_preview_length": 0, # Max chars for tool call previews (0 = no limit, show full paths/commands)
|
||||
"platforms": {}, # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
|
||||
# Gateway runtime-metadata footer appended to the FINAL message of a turn
|
||||
# (disabled by default to keep replies minimal). When enabled, renders
|
||||
# e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
|
||||
# display.platforms.<platform>.runtime_footer.
|
||||
"runtime_footer": {
|
||||
"enabled": False,
|
||||
"fields": ["model", "context_pct", "cwd"], # Order shown; drop any to hide
|
||||
},
|
||||
},
|
||||
|
||||
# Web dashboard settings
|
||||
@@ -952,7 +888,6 @@ DEFAULT_CONFIG = {
|
||||
|
||||
# Telegram platform settings (gateway mode)
|
||||
"telegram": {
|
||||
"reactions": False, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
|
||||
},
|
||||
|
||||
@@ -1007,7 +942,7 @@ DEFAULT_CONFIG = {
|
||||
# Pre-exec security scanning via tirith
|
||||
"security": {
|
||||
"allow_private_urls": False, # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
|
||||
"redact_secrets": False,
|
||||
"redact_secrets": True,
|
||||
"tirith_enabled": True,
|
||||
"tirith_path": "tirith",
|
||||
"tirith_timeout": 5,
|
||||
@@ -1231,22 +1166,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"LM_API_KEY": {
|
||||
"description": "LM Studio bearer token for auth-enabled local servers",
|
||||
"prompt": "LM Studio API key / bearer token",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"LM_BASE_URL": {
|
||||
"description": "LM Studio base URL override",
|
||||
"prompt": "LM Studio base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GLM_API_KEY": {
|
||||
"description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
|
||||
"prompt": "Z.AI / GLM API key",
|
||||
@@ -1335,22 +1254,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GMI_API_KEY": {
|
||||
"description": "GMI Cloud API key",
|
||||
"prompt": "GMI Cloud API key",
|
||||
"url": "https://www.gmicloud.ai/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"GMI_BASE_URL": {
|
||||
"description": "GMI Cloud base URL override",
|
||||
"prompt": "GMI Cloud base URL (leave empty for default)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"MINIMAX_API_KEY": {
|
||||
"description": "MiniMax API key (international)",
|
||||
"prompt": "MiniMax API key",
|
||||
@@ -1773,30 +1676,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
},
|
||||
|
||||
# ── Langfuse observability ──
|
||||
"HERMES_LANGFUSE_PUBLIC_KEY": {
|
||||
"description": "Langfuse project public key (pk-lf-...)",
|
||||
"prompt": "Langfuse public key",
|
||||
"url": "https://cloud.langfuse.com",
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
},
|
||||
"HERMES_LANGFUSE_SECRET_KEY": {
|
||||
"description": "Langfuse project secret key (sk-lf-...)",
|
||||
"prompt": "Langfuse secret key",
|
||||
"url": "https://cloud.langfuse.com",
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"HERMES_LANGFUSE_BASE_URL": {
|
||||
"description": "Langfuse server URL (default: https://cloud.langfuse.com)",
|
||||
"prompt": "Langfuse server URL (leave empty for cloud.langfuse.com)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Messaging platforms ──
|
||||
"TELEGRAM_BOT_TOKEN": {
|
||||
"description": "Telegram bot token from @BotFather",
|
||||
@@ -1944,14 +1823,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"MATRIX_DM_AUTO_THREAD": {
|
||||
"description": "Auto-create threads for DM messages in Matrix (default: false)",
|
||||
"prompt": "Auto-create threads in DMs (true/false)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"MATRIX_DEVICE_ID": {
|
||||
"description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
|
||||
"prompt": "Matrix device ID (stable across restarts)",
|
||||
@@ -2293,21 +2164,14 @@ def _normalize_custom_provider_entry(
|
||||
"baseUrl": "base_url",
|
||||
"apiMode": "api_mode",
|
||||
"keyEnv": "key_env",
|
||||
"apiKeyEnv": "key_env", # alias — OpenClaw-compatible + docs variant
|
||||
"defaultModel": "default_model",
|
||||
"contextLength": "context_length",
|
||||
"rateLimitDelay": "rate_limit_delay",
|
||||
}
|
||||
# api_key_env is a documented snake_case alias for key_env (see
|
||||
# website/docs/guides/azure-foundry.md). Normalize it up front so the
|
||||
# rest of the normalizer treats it as the canonical field.
|
||||
if "api_key_env" in entry and "key_env" not in entry:
|
||||
entry["key_env"] = entry["api_key_env"]
|
||||
_KNOWN_KEYS = {
|
||||
"name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
|
||||
"name", "api", "url", "base_url", "api_key", "key_env",
|
||||
"api_mode", "transport", "model", "default_model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
"request_timeout_seconds", "stale_timeout_seconds",
|
||||
}
|
||||
for camel, snake in _CAMEL_ALIASES.items():
|
||||
if camel in entry and snake not in entry:
|
||||
@@ -2559,9 +2423,6 @@ _KNOWN_ROOT_KEYS = {
|
||||
_VALID_CUSTOM_PROVIDER_FIELDS = {
|
||||
"name", "base_url", "api_key", "api_mode", "model", "models",
|
||||
"context_length", "rate_limit_delay",
|
||||
# key_env is read at runtime by runtime_provider.py and auxiliary_client.py
|
||||
# — include it here so the set accurately describes the supported schema.
|
||||
"key_env",
|
||||
}
|
||||
|
||||
# Fields that look like they should be inside custom_providers, not at root
|
||||
@@ -2638,32 +2499,10 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
|
||||
"Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
|
||||
))
|
||||
|
||||
# ── fallback_model: single dict OR list of dicts (chain) ─────────────
|
||||
# ── fallback_model must be a top-level dict with provider + model ────
|
||||
fb = config.get("fallback_model")
|
||||
if fb is not None:
|
||||
if isinstance(fb, list):
|
||||
# Chain fallback — validate each entry
|
||||
for i, entry in enumerate(fb):
|
||||
if not isinstance(entry, dict):
|
||||
issues.append(ConfigIssue(
|
||||
"error",
|
||||
f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
|
||||
"Each entry needs provider + model",
|
||||
))
|
||||
else:
|
||||
if not entry.get("provider"):
|
||||
issues.append(ConfigIssue(
|
||||
"warning",
|
||||
f"fallback_model[{i}] is missing 'provider' field",
|
||||
"Add: provider: openrouter (or another provider)",
|
||||
))
|
||||
if not entry.get("model"):
|
||||
issues.append(ConfigIssue(
|
||||
"warning",
|
||||
f"fallback_model[{i}] is missing 'model' field",
|
||||
"Add: model: <model-name>",
|
||||
))
|
||||
elif not isinstance(fb, dict):
|
||||
if not isinstance(fb, dict):
|
||||
issues.append(ConfigIssue(
|
||||
"error",
|
||||
f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
|
||||
@@ -3456,62 +3295,25 @@ def read_raw_config() -> Dict[str, Any]:
|
||||
be parsed. Use this for lightweight config reads where you just need a
|
||||
single value and don't want the overhead of ``load_config()``'s deep-merge
|
||||
+ migration pipeline.
|
||||
|
||||
Cached on the config file's (mtime_ns, size) — same strategy as
|
||||
``load_config()``. Returns a deepcopy on every call since some callers
|
||||
mutate the result before passing to ``save_config()``.
|
||||
"""
|
||||
try:
|
||||
config_path = get_config_path()
|
||||
st = config_path.stat()
|
||||
cache_key = (st.st_mtime_ns, st.st_size)
|
||||
except (FileNotFoundError, OSError):
|
||||
return {}
|
||||
|
||||
path_key = str(config_path)
|
||||
cached = _RAW_CONFIG_CACHE.get(path_key)
|
||||
if cached is not None and cached[:2] == cache_key:
|
||||
return copy.deepcopy(cached[2])
|
||||
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
if not isinstance(data, dict):
|
||||
data = {}
|
||||
_RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
|
||||
return data
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load configuration from ~/.hermes/config.yaml.
|
||||
|
||||
Cached on the config file's (mtime_ns, size). Returns a deepcopy of
|
||||
the cached value when unchanged, since most call sites mutate the
|
||||
result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
|
||||
The cache is keyed on ``str(config_path)`` so profile switches
|
||||
(which change ``HERMES_HOME`` and therefore ``get_config_path()``)
|
||||
don't collide.
|
||||
"""
|
||||
"""Load configuration from ~/.hermes/config.yaml."""
|
||||
ensure_hermes_home()
|
||||
config_path = get_config_path()
|
||||
path_key = str(config_path)
|
||||
|
||||
try:
|
||||
st = config_path.stat()
|
||||
cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
|
||||
except FileNotFoundError:
|
||||
cache_key = None
|
||||
|
||||
cached = _LOAD_CONFIG_CACHE.get(path_key)
|
||||
if cached is not None and cache_key is not None and cached[:2] == cache_key:
|
||||
return copy.deepcopy(cached[2])
|
||||
|
||||
|
||||
config = copy.deepcopy(DEFAULT_CONFIG)
|
||||
|
||||
if cache_key is not None:
|
||||
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
user_config = yaml.safe_load(f) or {}
|
||||
@@ -3529,26 +3331,20 @@ def load_config() -> Dict[str, Any]:
|
||||
|
||||
normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
|
||||
expanded = _expand_env_vars(normalized)
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
|
||||
if cache_key is not None:
|
||||
_LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
|
||||
else:
|
||||
_LOAD_CONFIG_CACHE.pop(path_key, None)
|
||||
_LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
|
||||
return expanded
|
||||
|
||||
|
||||
_SECURITY_COMMENT = """
|
||||
# ── Security ──────────────────────────────────────────────────────────
|
||||
# Secret redaction is OFF by default — tool output (terminal stdout,
|
||||
# read_file results, web content) passes through unmodified. Set
|
||||
# redact_secrets to true to mask strings that look like API keys, tokens,
|
||||
# and passwords before they enter the model context and logs.
|
||||
# API keys, tokens, and passwords are redacted from tool output by default.
|
||||
# Set to false to see full values (useful for debugging auth issues).
|
||||
# tirith pre-exec scanning is enabled by default when the tirith binary
|
||||
# is available. Configure via security.tirith_* keys or env vars
|
||||
# (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
|
||||
#
|
||||
# security:
|
||||
# redact_secrets: true
|
||||
# redact_secrets: false
|
||||
# tirith_enabled: true
|
||||
# tirith_path: "tirith"
|
||||
# tirith_timeout: 5
|
||||
@@ -3581,11 +3377,11 @@ _FALLBACK_COMMENT = """
|
||||
|
||||
_COMMENTED_SECTIONS = """
|
||||
# ── Security ──────────────────────────────────────────────────────────
|
||||
# Secret redaction is OFF by default. Set to true to mask strings that
|
||||
# look like API keys, tokens, and passwords in tool output and logs.
|
||||
# API keys, tokens, and passwords are redacted from tool output by default.
|
||||
# Set to false to see full values (useful for debugging auth issues).
|
||||
#
|
||||
# security:
|
||||
# redact_secrets: true
|
||||
# redact_secrets: false
|
||||
|
||||
# ── Fallback Model ────────────────────────────────────────────────────
|
||||
# Automatic provider failover when primary is unavailable.
|
||||
@@ -3636,12 +3432,7 @@ def save_config(config: Dict[str, Any]):
|
||||
if not sec or sec.get("redact_secrets") is None:
|
||||
parts.append(_SECURITY_COMMENT)
|
||||
fb = normalized.get("fallback_model", {})
|
||||
fb_is_valid = False
|
||||
if isinstance(fb, list):
|
||||
fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
|
||||
elif isinstance(fb, dict):
|
||||
fb_is_valid = bool(fb.get("provider") and fb.get("model"))
|
||||
if not fb_is_valid:
|
||||
if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
|
||||
parts.append(_FALLBACK_COMMENT)
|
||||
|
||||
atomic_yaml_write(
|
||||
@@ -3767,7 +3558,7 @@ def sanitize_env_file() -> int:
|
||||
f.writelines(sanitized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, env_path)
|
||||
os.replace(tmp_path, env_path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
@@ -3830,7 +3621,7 @@ def save_env_value(key: str, value: str):
|
||||
value = _check_non_ascii_credential(key, value)
|
||||
ensure_hermes_home()
|
||||
env_path = get_env_path()
|
||||
|
||||
|
||||
# On Windows, open() defaults to the system locale (cp1252) which can
|
||||
# cause OSError errno 22 on UTF-8 .env files.
|
||||
read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
|
||||
@@ -3842,7 +3633,7 @@ def save_env_value(key: str, value: str):
|
||||
lines = f.readlines()
|
||||
# Sanitize on every read: split concatenated keys, drop stale placeholders
|
||||
lines = _sanitize_env_lines(lines)
|
||||
|
||||
|
||||
# Find and update or append
|
||||
found = False
|
||||
for i, line in enumerate(lines):
|
||||
@@ -3850,7 +3641,7 @@ def save_env_value(key: str, value: str):
|
||||
lines[i] = f"{key}={value}\n"
|
||||
found = True
|
||||
break
|
||||
|
||||
|
||||
if not found:
|
||||
# Ensure there's a newline at the end of the file before appending
|
||||
if lines and not lines[-1].endswith("\n"):
|
||||
@@ -3870,7 +3661,7 @@ def save_env_value(key: str, value: str):
|
||||
f.writelines(lines)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, env_path)
|
||||
os.replace(tmp_path, env_path)
|
||||
# Restore original permissions before _secure_file may tighten them.
|
||||
if original_mode is not None:
|
||||
try:
|
||||
@@ -3926,7 +3717,7 @@ def remove_env_value(key: str) -> bool:
|
||||
f.writelines(new_lines)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp_path, env_path)
|
||||
os.replace(tmp_path, env_path)
|
||||
if original_mode is not None:
|
||||
try:
|
||||
os.chmod(env_path, original_mode)
|
||||
@@ -4013,13 +3804,12 @@ def get_env_value(key: str) -> Optional[str]:
|
||||
# =============================================================================
|
||||
|
||||
def redact_key(key: str) -> str:
|
||||
"""Redact an API key for display.
|
||||
|
||||
Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
|
||||
"(not set)" placeholder in dim color for the empty case.
|
||||
"""
|
||||
from agent.redact import mask_secret
|
||||
return mask_secret(key, empty=color("(not set)", Colors.DIM))
|
||||
"""Redact an API key for display."""
|
||||
if not key:
|
||||
return color("(not set)", Colors.DIM)
|
||||
if len(key) < 12:
|
||||
return "***"
|
||||
return key[:4] + "..." + key[-4:]
|
||||
|
||||
|
||||
def show_config():
|
||||
|
||||
+2
-2
@@ -7,6 +7,7 @@ Currently supports:
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
@@ -17,7 +18,6 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -79,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(".json.tmp")
|
||||
tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
|
||||
atomic_replace(tmp, path)
|
||||
os.replace(tmp, path)
|
||||
except OSError:
|
||||
# Non-fatal — worst case the user has to run ``hermes debug delete``
|
||||
# manually.
|
||||
|
||||
@@ -13,6 +13,7 @@ automatically.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
+13
-78
@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
|
||||
"Z_AI_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"KIMI_CN_API_KEY",
|
||||
"GMI_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"MINIMAX_CN_API_KEY",
|
||||
"KILOCODE_API_KEY",
|
||||
@@ -57,7 +56,6 @@ _PROVIDER_ENV_HINTS = (
|
||||
"OPENCODE_ZEN_API_KEY",
|
||||
"OPENCODE_GO_API_KEY",
|
||||
"XIAOMI_API_KEY",
|
||||
"TOKENHUB_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
@@ -293,23 +291,15 @@ def run_doctor(args):
|
||||
|
||||
known_providers: set = set()
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
PROVIDER_REGISTRY,
|
||||
resolve_provider as _resolve_auth_provider,
|
||||
)
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
|
||||
except Exception:
|
||||
_resolve_auth_provider = None
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
|
||||
from hermes_cli.providers import (
|
||||
normalize_provider as _normalize_catalog_provider,
|
||||
resolve_provider_full as _resolve_provider_full,
|
||||
)
|
||||
from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
|
||||
except Exception:
|
||||
_compatible_custom_providers = None
|
||||
_normalize_catalog_provider = None
|
||||
_resolve_provider_full = None
|
||||
|
||||
custom_providers = []
|
||||
@@ -329,43 +319,17 @@ def run_doctor(args):
|
||||
if name:
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
valid_provider_ids = set(known_providers)
|
||||
provider_ids_to_accept = {provider} if provider else set()
|
||||
if _normalize_catalog_provider is not None:
|
||||
for known_provider in known_providers:
|
||||
try:
|
||||
valid_provider_ids.add(_normalize_catalog_provider(known_provider))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
runtime_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_auth_provider is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
try:
|
||||
runtime_provider = _resolve_auth_provider(provider)
|
||||
provider_ids_to_accept.add(runtime_provider)
|
||||
except Exception:
|
||||
runtime_provider = provider
|
||||
|
||||
catalog_provider = provider
|
||||
canonical_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
catalog_provider = provider_def.id if provider_def is not None else None
|
||||
if catalog_provider is not None:
|
||||
provider_ids_to_accept.add(catalog_provider)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
if provider and provider != "auto":
|
||||
if catalog_provider is None or (
|
||||
known_providers
|
||||
and not (provider_ids_to_accept & valid_provider_ids)
|
||||
):
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
|
||||
check_fail(
|
||||
f"model.provider '{provider_raw}' is not a recognised provider",
|
||||
@@ -378,24 +342,7 @@ def run_doctor(args):
|
||||
)
|
||||
|
||||
# Warn if model is set to a provider-prefixed name on a provider that doesn't use them
|
||||
provider_for_policy = runtime_provider or catalog_provider
|
||||
providers_accepting_vendor_slugs = {
|
||||
"openrouter",
|
||||
"custom",
|
||||
"auto",
|
||||
"ai-gateway",
|
||||
"kilocode",
|
||||
"opencode-zen",
|
||||
"huggingface",
|
||||
"lmstudio",
|
||||
"nous",
|
||||
}
|
||||
if (
|
||||
default_model
|
||||
and "/" in default_model
|
||||
and provider_for_policy
|
||||
and provider_for_policy not in providers_accepting_vendor_slugs
|
||||
):
|
||||
if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
|
||||
check_warn(
|
||||
f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
|
||||
"(vendor-prefixed slugs belong to aggregators like openrouter)",
|
||||
@@ -411,24 +358,20 @@ def run_doctor(args):
|
||||
# own env-var checks elsewhere in doctor, and get_auth_status()
|
||||
# returns a bare {logged_in: False} for anything it doesn't
|
||||
# explicitly dispatch, which would produce false positives.
|
||||
if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
|
||||
if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
|
||||
pconfig = PROVIDER_REGISTRY.get(runtime_provider)
|
||||
pconfig = PROVIDER_REGISTRY.get(canonical_provider)
|
||||
if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
|
||||
status = get_auth_status(runtime_provider) or {}
|
||||
configured = bool(
|
||||
status.get("configured")
|
||||
or status.get("logged_in")
|
||||
or status.get("api_key")
|
||||
)
|
||||
status = get_auth_status(canonical_provider) or {}
|
||||
configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
|
||||
if not configured:
|
||||
check_fail(
|
||||
f"model.provider '{runtime_provider}' is set but no API key is configured",
|
||||
f"model.provider '{canonical_provider}' is set but no API key is configured",
|
||||
"(check ~/.hermes/.env or run 'hermes setup')",
|
||||
)
|
||||
issues.append(
|
||||
f"No credentials found for provider '{runtime_provider}'. "
|
||||
f"No credentials found for provider '{canonical_provider}'. "
|
||||
f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
|
||||
f"or switch providers with 'hermes config set model.provider <name>'"
|
||||
)
|
||||
@@ -572,14 +515,7 @@ def run_doctor(args):
|
||||
if shutil.which("codex"):
|
||||
check_ok("codex CLI")
|
||||
else:
|
||||
# Native OAuth uses Hermes' own device-code flow — the Codex CLI is
|
||||
# only needed if you want to import existing tokens from
|
||||
# ~/.codex/auth.json. Downgrade to info so users running
|
||||
# `hermes auth openai-codex` aren't told they're missing something.
|
||||
check_info(
|
||||
"codex CLI not installed "
|
||||
"(optional — only required to import tokens from an existing Codex CLI login)"
|
||||
)
|
||||
check_warn("codex CLI not found", "(required for openai-codex login)")
|
||||
|
||||
# =========================================================================
|
||||
# Check: Directory structure
|
||||
@@ -1001,7 +937,6 @@ def run_doctor(args):
|
||||
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
|
||||
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
|
||||
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
|
||||
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
|
||||
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
|
||||
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
|
||||
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
|
||||
|
||||
+6
-8
@@ -33,14 +33,12 @@ def _get_git_commit(project_root: Path) -> str:
|
||||
|
||||
|
||||
def _redact(value: str) -> str:
|
||||
"""Redact all but first 4 and last 4 chars.
|
||||
|
||||
Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
|
||||
an empty value (matches the historical behavior of this helper —
|
||||
``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
|
||||
"""
|
||||
from agent.redact import mask_secret
|
||||
return mask_secret(value)
|
||||
"""Redact all but first 4 and last 4 chars."""
|
||||
if not value:
|
||||
return ""
|
||||
if len(value) < 12:
|
||||
return "***"
|
||||
return value[:4] + "..." + value[-4:]
|
||||
|
||||
|
||||
def _gateway_status() -> str:
|
||||
|
||||
@@ -7,7 +7,6 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
# Env var name suffixes that indicate credential values. These are the
|
||||
@@ -128,7 +127,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
|
||||
f.writelines(sanitized)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
atomic_replace(tmp, path)
|
||||
os.replace(tmp, path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
|
||||
@@ -2953,7 +2953,7 @@ def _setup_sms():
|
||||
def _setup_dingtalk():
|
||||
"""Configure DingTalk — QR scan (recommended) or manual credential entry."""
|
||||
from hermes_cli.setup import (
|
||||
prompt_choice, prompt_yes_no, print_success, print_warning,
|
||||
prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
|
||||
)
|
||||
|
||||
dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
|
||||
@@ -3504,6 +3504,7 @@ def _setup_qqbot():
|
||||
method_idx = prompt_choice(" How would you like to set up QQ Bot?", method_choices, 0)
|
||||
|
||||
credentials = None
|
||||
used_qr = False
|
||||
|
||||
if method_idx == 0:
|
||||
# ── QR scan-to-configure ──
|
||||
@@ -3514,6 +3515,8 @@ def _setup_qqbot():
|
||||
print()
|
||||
print_warning(" QQ Bot setup cancelled.")
|
||||
return
|
||||
if credentials:
|
||||
used_qr = True
|
||||
if not credentials:
|
||||
print_info(" QR setup did not complete. Continuing with manual input.")
|
||||
|
||||
|
||||
+2
-1
@@ -19,8 +19,9 @@ format) lives there.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
def hooks_command(args) -> None:
|
||||
|
||||
+38
-219
@@ -829,29 +829,8 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti
|
||||
)
|
||||
|
||||
|
||||
_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
|
||||
|
||||
|
||||
def _tui_need_npm_install(root: Path) -> bool:
|
||||
"""True when @hermes/ink is missing or node_modules is behind package-lock.json.
|
||||
|
||||
Compares ``package-lock.json`` against ``node_modules/.package-lock.json``
|
||||
(npm's hidden lockfile) by **content**, not mtime: git checkouts and npm
|
||||
rewrites can bump the root lockfile's timestamp even when installed deps
|
||||
already match, which used to trigger a spurious "Installing TUI
|
||||
dependencies" on every launch.
|
||||
|
||||
For each entry in the root lock's ``packages`` map:
|
||||
- missing from hidden lock → reinstall (unless the entry is marked
|
||||
``optional`` or ``peer``, which npm may intentionally skip per platform)
|
||||
- present but with differing fields (excluding npm-written runtime
|
||||
annotations like ``ideallyInert``) → reinstall
|
||||
|
||||
Extra entries that exist only in the hidden lock are ignored — stale
|
||||
transitives left over from a removed dependency don't break runtime and
|
||||
we'd rather not force a reinstall for them. Falls back to mtime
|
||||
comparison if either lockfile is unparseable.
|
||||
"""
|
||||
"""True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull)."""
|
||||
ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
|
||||
if not ink.is_file():
|
||||
return True
|
||||
@@ -861,35 +840,7 @@ def _tui_need_npm_install(root: Path) -> bool:
|
||||
marker = root / "node_modules" / ".package-lock.json"
|
||||
if not marker.is_file():
|
||||
return True
|
||||
|
||||
# Compare lockfile contents, not mtimes: git checkouts and npm rewrites
|
||||
# can bump the root lockfile timestamp even when installed deps already
|
||||
# match. Fall back to mtime when either file is unparseable.
|
||||
try:
|
||||
wanted = json.loads(lock.read_text(encoding="utf-8")).get("packages") or {}
|
||||
installed = json.loads(marker.read_text(encoding="utf-8")).get("packages") or {}
|
||||
except (OSError, UnicodeDecodeError, json.JSONDecodeError):
|
||||
return lock.stat().st_mtime > marker.stat().st_mtime
|
||||
|
||||
def comparable(pkg: dict) -> dict:
|
||||
return {k: v for k, v in pkg.items() if k not in _NPM_LOCK_RUNTIME_KEYS}
|
||||
|
||||
for name, pkg in wanted.items():
|
||||
if not name:
|
||||
continue
|
||||
|
||||
if not isinstance(pkg, dict):
|
||||
continue
|
||||
|
||||
if name not in installed:
|
||||
if pkg.get("optional") or pkg.get("peer"):
|
||||
continue
|
||||
return True
|
||||
|
||||
if isinstance(installed[name], dict) and comparable(pkg) != comparable(installed[name]):
|
||||
return True
|
||||
|
||||
return False
|
||||
return lock.stat().st_mtime > marker.stat().st_mtime
|
||||
|
||||
|
||||
def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
|
||||
@@ -1817,11 +1768,8 @@ def select_provider_and_model(args=None):
|
||||
"huggingface",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"gmi",
|
||||
"nvidia",
|
||||
"ollama-cloud",
|
||||
"tencent-tokenhub",
|
||||
"lmstudio",
|
||||
):
|
||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||
|
||||
@@ -2048,11 +1996,7 @@ def _aux_select_for_task(task: str) -> None:
|
||||
|
||||
# Gather authenticated providers (has credentials + curated model list)
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=current_provider,
|
||||
current_model=current_model,
|
||||
current_base_url=current_base_url,
|
||||
)
|
||||
providers = list_authenticated_providers(current_provider=current_provider)
|
||||
except Exception as exc:
|
||||
print(f"Could not detect authenticated providers: {exc}")
|
||||
providers = []
|
||||
@@ -4382,7 +4326,6 @@ def _model_flow_bedrock(config, current_model=""):
|
||||
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
"""Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
|
||||
from hermes_cli.auth import (
|
||||
LMSTUDIO_NOAUTH_PLACEHOLDER,
|
||||
PROVIDER_REGISTRY,
|
||||
_prompt_model_selection,
|
||||
_save_model_choice,
|
||||
@@ -4417,20 +4360,13 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
try:
|
||||
import getpass
|
||||
|
||||
if provider_id == "lmstudio":
|
||||
prompt = f"{key_env} (Enter for no-auth default {LMSTUDIO_NOAUTH_PLACEHOLDER!r}): "
|
||||
else:
|
||||
prompt = f"{key_env} (or Enter to cancel): "
|
||||
new_key = getpass.getpass(prompt).strip()
|
||||
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return
|
||||
if not new_key:
|
||||
if provider_id == "lmstudio":
|
||||
new_key = LMSTUDIO_NOAUTH_PLACEHOLDER
|
||||
else:
|
||||
print("Cancelled.")
|
||||
return
|
||||
print("Cancelled.")
|
||||
return
|
||||
save_env_value(key_env, new_key)
|
||||
existing_key = new_key
|
||||
print("API key saved.")
|
||||
@@ -4497,21 +4433,10 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
print(" Tier check: could not verify (proceeding anyway).")
|
||||
print()
|
||||
|
||||
# Optional base URL override.
|
||||
# Precedence: env var → config.yaml model.base_url → registry default.
|
||||
# Reading config.yaml prevents silently overwriting a saved remote URL
|
||||
# (e.g. a remote LM Studio endpoint) with localhost when the user just
|
||||
# presses Enter at the prompt below.
|
||||
# Optional base URL override
|
||||
current_base = ""
|
||||
if base_url_env:
|
||||
current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
|
||||
if not current_base:
|
||||
try:
|
||||
_m = load_config().get("model") or {}
|
||||
if str(_m.get("provider") or "").strip().lower() == provider_id:
|
||||
current_base = str(_m.get("base_url") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
effective_base = current_base or pconfig.inference_base_url
|
||||
|
||||
try:
|
||||
@@ -4533,22 +4458,8 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
# 2. Curated static fallback list (offline insurance)
|
||||
# 3. Live /models endpoint probe (small providers without models.dev data)
|
||||
#
|
||||
# LM Studio: live /api/v1/models probe (no models.dev catalog).
|
||||
# Ollama Cloud: merged discovery (live API + models.dev + disk cache).
|
||||
if provider_id == "lmstudio":
|
||||
from hermes_cli.auth import AuthError
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
|
||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||
try:
|
||||
model_list = fetch_lmstudio_models(api_key=api_key_for_probe, base_url=effective_base)
|
||||
except AuthError as exc:
|
||||
print(f" LM Studio rejected the request: {exc}")
|
||||
print(" Set LM_API_KEY (or update it) to match the server's bearer token.")
|
||||
model_list = []
|
||||
if model_list:
|
||||
print(f" Found {len(model_list)} model(s) from LM Studio")
|
||||
elif provider_id == "ollama-cloud":
|
||||
# Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
|
||||
if provider_id == "ollama-cloud":
|
||||
from hermes_cli.models import fetch_ollama_cloud_models
|
||||
|
||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||
@@ -4770,6 +4681,7 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
read_claude_code_credentials,
|
||||
is_claude_code_token_valid,
|
||||
_is_oauth_token,
|
||||
_resolve_claude_code_token_from_credentials,
|
||||
)
|
||||
|
||||
cc_creds = read_claude_code_credentials()
|
||||
@@ -5251,93 +5163,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _warn_stale_dashboard_processes() -> None:
|
||||
"""Warn about running dashboard processes that still hold pre-update code.
|
||||
|
||||
``hermes dashboard`` is a long-lived server process commonly started and
|
||||
forgotten. When ``hermes update`` replaces files on disk, the running
|
||||
process keeps the old Python backend in memory while the JS bundle on
|
||||
disk is updated, causing a silent frontend/backend mismatch (e.g. new
|
||||
auth headers the old backend doesn't recognise → every API call 401s).
|
||||
|
||||
Unlike the gateway, the dashboard has no service manager (systemd /
|
||||
launchd), so we can only warn — we don't auto-kill user-managed
|
||||
background processes.
|
||||
"""
|
||||
patterns = [
|
||||
"hermes dashboard",
|
||||
"hermes_cli.main dashboard",
|
||||
"hermes_cli/main.py dashboard",
|
||||
]
|
||||
self_pid = os.getpid()
|
||||
dashboard_pids: list[int] = []
|
||||
|
||||
try:
|
||||
if sys.platform == "win32":
|
||||
result = subprocess.run(
|
||||
["wmic", "process", "get", "ProcessId,CommandLine",
|
||||
"/FORMAT:LIST"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return
|
||||
current_cmd = ""
|
||||
for line in result.stdout.split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("CommandLine="):
|
||||
current_cmd = line[len("CommandLine="):]
|
||||
elif line.startswith("ProcessId="):
|
||||
pid_str = line[len("ProcessId="):]
|
||||
if (any(p in current_cmd for p in patterns)
|
||||
and int(pid_str) != self_pid):
|
||||
try:
|
||||
dashboard_pids.append(int(pid_str))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# Linux / macOS: scan the process table via ps and match against
|
||||
# the same explicit patterns list used on Windows. Using ps
|
||||
# (rather than `pgrep -f "hermes.*dashboard"`) keeps us consistent
|
||||
# with `hermes_cli.gateway._scan_gateway_pids` and avoids the
|
||||
# greedy regex matching unrelated cmdlines that merely contain
|
||||
# both words (e.g. a chat session discussing "dashboard").
|
||||
result = subprocess.run(
|
||||
["ps", "-A", "-o", "pid=,command="],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.split("\n"):
|
||||
stripped = line.strip()
|
||||
if not stripped or "grep" in stripped:
|
||||
continue
|
||||
parts = stripped.split(None, 1)
|
||||
if len(parts) != 2:
|
||||
continue
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
except ValueError:
|
||||
continue
|
||||
command = parts[1]
|
||||
if (any(p in command for p in patterns)
|
||||
and pid != self_pid):
|
||||
dashboard_pids.append(pid)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
|
||||
return
|
||||
|
||||
if not dashboard_pids:
|
||||
return
|
||||
|
||||
print()
|
||||
print(f"⚠ {len(dashboard_pids)} dashboard process(es) still running "
|
||||
f"with the previous version:")
|
||||
for pid in dashboard_pids:
|
||||
print(f" PID {pid}")
|
||||
print(" The running backend may not match the updated frontend,")
|
||||
print(" causing silent auth failures or empty data.")
|
||||
print(" Restart them to pick up the changes:")
|
||||
print(" kill <pid> && hermes dashboard --port <port> ...")
|
||||
|
||||
|
||||
def _update_via_zip(args):
|
||||
"""Update Hermes Agent by downloading a ZIP archive.
|
||||
|
||||
@@ -5472,7 +5297,6 @@ def _update_via_zip(args):
|
||||
|
||||
print()
|
||||
print("✓ Update complete!")
|
||||
_warn_stale_dashboard_processes()
|
||||
|
||||
|
||||
def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
|
||||
@@ -7174,7 +6998,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
print(
|
||||
f" ⚠ {svc_name} died after restart, retrying..."
|
||||
)
|
||||
subprocess.run(
|
||||
retry = subprocess.run(
|
||||
scope_cmd + ["restart", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
@@ -7289,10 +7113,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
except Exception as e:
|
||||
logger.debug("Legacy unit check during update failed: %s", e)
|
||||
|
||||
# Warn about stale dashboard processes — the dashboard has no
|
||||
# service manager, so we can only tell the user to restart them.
|
||||
_warn_stale_dashboard_processes()
|
||||
|
||||
print()
|
||||
print("Tip: You can now select a provider and model:")
|
||||
print(" hermes model # Select provider and model")
|
||||
@@ -7941,12 +7761,31 @@ For more help on a command:
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"--provider",
|
||||
# No `choices=` here: user-defined providers from config.yaml `providers:`
|
||||
# are also valid values, and runtime resolution (resolve_runtime_provider)
|
||||
# handles validation/error reporting consistently with the top-level
|
||||
# `--provider` flag.
|
||||
choices=[
|
||||
"auto",
|
||||
"openrouter",
|
||||
"nous",
|
||||
"openai-codex",
|
||||
"copilot-acp",
|
||||
"copilot",
|
||||
"anthropic",
|
||||
"gemini",
|
||||
"xai",
|
||||
"ollama-cloud",
|
||||
"huggingface",
|
||||
"zai",
|
||||
"kimi-coding",
|
||||
"kimi-coding-cn",
|
||||
"stepfun",
|
||||
"minimax",
|
||||
"minimax-cn",
|
||||
"kilocode",
|
||||
"xiaomi",
|
||||
"arcee",
|
||||
"nvidia",
|
||||
],
|
||||
default=None,
|
||||
help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
|
||||
help="Inference provider (default: auto)",
|
||||
)
|
||||
chat_parser.add_argument(
|
||||
"-v", "--verbose", action="store_true", help="Verbose output"
|
||||
@@ -9786,26 +9625,17 @@ Examples:
|
||||
"--preset",
|
||||
choices=["user-data", "full"],
|
||||
default="full",
|
||||
help="Migration preset (default: full). Neither preset imports secrets — "
|
||||
"pass --migrate-secrets to include API keys.",
|
||||
help="Migration preset (default: full). 'user-data' excludes secrets",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--overwrite",
|
||||
action="store_true",
|
||||
help="Overwrite existing files (default: refuse to apply when the plan has conflicts)",
|
||||
help="Overwrite existing files (default: skip conflicts)",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--migrate-secrets",
|
||||
action="store_true",
|
||||
help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.). "
|
||||
"Required even under --preset full.",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--no-backup",
|
||||
action="store_true",
|
||||
help="Skip the pre-migration zip snapshot of ~/.hermes/ (by default a "
|
||||
"single restore-point archive is written to ~/.hermes/backups/ "
|
||||
"before apply; restorable with 'hermes import').",
|
||||
help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)",
|
||||
)
|
||||
claw_migrate.add_argument(
|
||||
"--workspace-target", help="Absolute path to copy workspace instructions into"
|
||||
@@ -10220,17 +10050,6 @@ Examples:
|
||||
logger.debug(
|
||||
"plugin discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
@@ -46,6 +46,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
@@ -53,7 +54,6 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
from utils import atomic_replace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -190,7 +190,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
atomic_replace(tmp, path)
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
@@ -213,15 +213,10 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:
|
||||
|
||||
|
||||
def _ensure_direct_aliases() -> None:
|
||||
"""Lazy-load direct aliases on first use.
|
||||
|
||||
Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
|
||||
the module attribute. This keeps `from hermes_cli.model_switch import
|
||||
DIRECT_ALIASES` references valid in callers — rebinding would leave them
|
||||
pointing at a stale empty dict.
|
||||
"""
|
||||
"""Lazy-load direct aliases on first use."""
|
||||
global DIRECT_ALIASES
|
||||
if not DIRECT_ALIASES:
|
||||
DIRECT_ALIASES.update(_load_direct_aliases())
|
||||
DIRECT_ALIASES = _load_direct_aliases()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -984,7 +979,6 @@ def list_authenticated_providers(
|
||||
user_providers: dict = None,
|
||||
custom_providers: list | None = None,
|
||||
max_models: int = 8,
|
||||
current_model: str = "",
|
||||
) -> List[dict]:
|
||||
"""Detect which providers have credentials and list their curated models.
|
||||
|
||||
@@ -1031,34 +1025,6 @@ def list_authenticated_providers(
|
||||
if "ollama-cloud" not in curated:
|
||||
from hermes_cli.models import fetch_ollama_cloud_models
|
||||
curated["ollama-cloud"] = fetch_ollama_cloud_models()
|
||||
# LM Studio has no static catalog — probe its native /api/v1/models
|
||||
# endpoint live so the picker reflects whatever the user has loaded.
|
||||
# Base URL precedence: LM_BASE_URL env var > active config's base_url
|
||||
# (when current provider is lmstudio) > 127.0.0.1 default.
|
||||
# On auth rejection or unreachable server, fall back to the caller-supplied
|
||||
# current model so the picker still shows something when offline / mis-keyed.
|
||||
if "lmstudio" not in curated and (
|
||||
os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
|
||||
):
|
||||
from hermes_cli.models import fetch_lmstudio_models
|
||||
from hermes_cli.auth import AuthError
|
||||
is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
|
||||
lm_base = (
|
||||
os.environ.get("LM_BASE_URL")
|
||||
or (current_base_url if is_current_lmstudio and current_base_url else None)
|
||||
or "http://127.0.0.1:1234/v1"
|
||||
)
|
||||
try:
|
||||
live = fetch_lmstudio_models(
|
||||
api_key=os.environ.get("LM_API_KEY", ""),
|
||||
base_url=lm_base,
|
||||
timeout=1.5, # Smaller timeout for picker
|
||||
)
|
||||
except AuthError:
|
||||
live = []
|
||||
if not live and is_current_lmstudio and current_model:
|
||||
live = [current_model]
|
||||
curated["lmstudio"] = live
|
||||
|
||||
# --- 1. Check Hermes-mapped providers ---
|
||||
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
|
||||
@@ -1209,15 +1175,6 @@ def list_authenticated_providers(
|
||||
|
||||
if hermes_slug in {"copilot", "copilot-acp"}:
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
# For aws_sdk providers (bedrock), use live discovery so the list
|
||||
# reflects the active region (eu.*, ap.*) not the static us.* list.
|
||||
elif overlay.auth_type == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
|
||||
except Exception:
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
@@ -1280,30 +1237,10 @@ def list_authenticated_providers(
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Special case: aws_sdk auth (bedrock) — no API key env vars,
|
||||
# credentials come from the boto3 credential chain (env vars,
|
||||
# ~/.aws/credentials, instance roles, etc.)
|
||||
if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials
|
||||
_cp_has_creds = has_aws_credentials()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not _cp_has_creds:
|
||||
continue
|
||||
|
||||
# For bedrock, use live discovery so the list reflects the active
|
||||
# region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
|
||||
if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
_ids = bedrock_model_ids_or_none()
|
||||
_cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
|
||||
except Exception:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
else:
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
_cp_model_ids = curated.get(_cp.slug, [])
|
||||
_cp_total = len(_cp_model_ids)
|
||||
_cp_top = _cp_model_ids[:max_models]
|
||||
|
||||
@@ -1375,23 +1312,8 @@ def list_authenticated_providers(
|
||||
if fb:
|
||||
models_list = list(fb)
|
||||
|
||||
# Prefer the endpoint's live /models list when credentials are
|
||||
# available. This keeps OpenAI-compatible relays (for example CRS)
|
||||
# in sync when the server catalog changes without requiring the
|
||||
# user to mirror every model into config.yaml.
|
||||
api_key = str(ep_cfg.get("api_key", "") or "").strip()
|
||||
if not api_key:
|
||||
key_env = str(ep_cfg.get("key_env", "") or "").strip()
|
||||
api_key = os.environ.get(key_env, "").strip() if key_env else ""
|
||||
if api_url and api_key:
|
||||
try:
|
||||
from hermes_cli.models import fetch_api_models
|
||||
live_models = fetch_api_models(api_key, api_url)
|
||||
if live_models:
|
||||
models_list = live_models
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try to probe /v1/models if URL is set (but don't block on it)
|
||||
# For now just show what we know from config
|
||||
results.append({
|
||||
"slug": ep_name,
|
||||
"name": display_name,
|
||||
|
||||
+38
-437
@@ -44,7 +44,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
("tencent/hy3-preview:free", "free"),
|
||||
("openai/gpt-5.3-codex", ""),
|
||||
("google/gemini-3-pro-image-preview", ""),
|
||||
("google/gemini-3-flash-preview", ""),
|
||||
@@ -107,57 +106,11 @@ def _codex_curated_models() -> list[str]:
|
||||
return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))
|
||||
|
||||
|
||||
# Static fallback for xAI when the models.dev disk cache is empty (fresh
|
||||
# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
|
||||
# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
|
||||
# or retires a model, the disk cache picks it up on the next refresh and the
|
||||
# fallback here only matters until that refresh lands.
|
||||
_XAI_STATIC_FALLBACK: list[str] = [
|
||||
"grok-4.20-0309-reasoning",
|
||||
"grok-4.20-0309-non-reasoning",
|
||||
"grok-4.20-multi-agent-0309",
|
||||
"grok-4-1-fast",
|
||||
"grok-4-1-fast-non-reasoning",
|
||||
"grok-4-fast",
|
||||
"grok-4-fast-non-reasoning",
|
||||
"grok-4",
|
||||
"grok-code-fast-1",
|
||||
]
|
||||
|
||||
|
||||
def _xai_curated_models() -> list[str]:
|
||||
"""Derive the xAI-direct curated list from models.dev disk cache.
|
||||
|
||||
Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
|
||||
runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
|
||||
when the cache is empty or unreadable. Hermes refreshes the cache from
|
||||
https://models.dev/api.json on normal use, so this list self-heals as
|
||||
xAI renames models.
|
||||
|
||||
Mirrors ``_codex_curated_models()``'s role for openai-codex.
|
||||
"""
|
||||
try:
|
||||
from agent.models_dev import _load_disk_cache
|
||||
data = _load_disk_cache()
|
||||
xai = data.get("xai") if isinstance(data, dict) else None
|
||||
models = xai.get("models") if isinstance(xai, dict) else None
|
||||
if isinstance(models, dict) and models:
|
||||
ids = [mid for mid in models.keys() if isinstance(mid, str)]
|
||||
if ids:
|
||||
return sorted(ids)
|
||||
except Exception:
|
||||
# Any failure (missing file, malformed JSON, import error)
|
||||
# falls through to the static list.
|
||||
pass
|
||||
return list(_XAI_STATIC_FALLBACK)
|
||||
|
||||
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"tencent/hy3-preview",
|
||||
"anthropic/claude-opus-4.7",
|
||||
"anthropic/claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
@@ -240,7 +193,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"glm-4.5",
|
||||
"glm-4.5-flash",
|
||||
],
|
||||
"xai": _xai_curated_models(),
|
||||
"xai": [
|
||||
"grok-4.20-reasoning",
|
||||
"grok-4-1-fast-reasoning",
|
||||
],
|
||||
"nvidia": [
|
||||
# NVIDIA flagship reasoning models
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
@@ -317,22 +273,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"mimo-v2-omni",
|
||||
"mimo-v2-flash",
|
||||
],
|
||||
"tencent-tokenhub": [
|
||||
"hy3-preview",
|
||||
],
|
||||
"arcee": [
|
||||
"trinity-large-thinking",
|
||||
"trinity-large-preview",
|
||||
"trinity-mini",
|
||||
],
|
||||
"gmi": [
|
||||
"zai-org/GLM-5.1-FP8",
|
||||
"deepseek-ai/DeepSeek-V3.2",
|
||||
"moonshotai/Kimi-K2.5",
|
||||
"google/gemini-3.1-flash-lite-preview",
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
],
|
||||
"opencode-zen": [
|
||||
"kimi-k2.5",
|
||||
"gpt-5.4-pro",
|
||||
@@ -397,7 +342,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
# to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
|
||||
# or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
|
||||
"alibaba": [
|
||||
"qwen3.6-plus",
|
||||
"kimi-k2.5",
|
||||
"qwen3.5-plus",
|
||||
"qwen3-coder-plus",
|
||||
@@ -765,15 +709,14 @@ class ProviderEntry(NamedTuple):
|
||||
label: str
|
||||
tui_desc: str # detailed description for `hermes model` TUI
|
||||
|
||||
|
||||
CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
|
||||
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
|
||||
ProviderEntry("lmstudio", "LM Studio", "LM Studio (local desktop app with built-in model server)"),
|
||||
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
|
||||
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
|
||||
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
|
||||
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
|
||||
ProviderEntry("tencent-tokenhub", "Tencent TokenHub", "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
|
||||
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
|
||||
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
|
||||
ProviderEntry("copilot", "GitHub Copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
|
||||
@@ -792,7 +735,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
|
||||
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
|
||||
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
|
||||
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
|
||||
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
@@ -827,8 +769,6 @@ _PROVIDER_ALIASES = {
|
||||
"stepfun-coding-plan": "stepfun",
|
||||
"arcee-ai": "arcee",
|
||||
"arceeai": "arcee",
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
"minimax-china": "minimax-cn",
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
@@ -856,10 +796,6 @@ _PROVIDER_ALIASES = {
|
||||
"huggingface-hub": "huggingface",
|
||||
"mimo": "xiaomi",
|
||||
"xiaomi-mimo": "xiaomi",
|
||||
"tencent": "tencent-tokenhub",
|
||||
"tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub",
|
||||
"tencentmaas": "tencent-tokenhub",
|
||||
"aws": "bedrock",
|
||||
"aws-bedrock": "bedrock",
|
||||
"amazon-bedrock": "bedrock",
|
||||
@@ -871,9 +807,6 @@ _PROVIDER_ALIASES = {
|
||||
"nvidia-nim": "nvidia",
|
||||
"build-nvidia": "nvidia",
|
||||
"nemotron": "nvidia",
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
"lm_studio": "lmstudio",
|
||||
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
|
||||
"ollama_cloud": "ollama-cloud",
|
||||
}
|
||||
@@ -1680,41 +1613,31 @@ def provider_label(provider: Optional[str]) -> str:
|
||||
|
||||
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||
#
|
||||
# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
|
||||
# is assumed to support Priority Processing. service_tier=priority is silently
|
||||
# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
|
||||
# strip the field), so false positives are harmless. Codex-series models
|
||||
# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
|
||||
# service_tier parameter through the Codex Responses API.
|
||||
_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
|
||||
"gpt-",
|
||||
"o1",
|
||||
# Only the bare model slug is stored (no vendor prefix).
|
||||
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"o3",
|
||||
"o4",
|
||||
)
|
||||
|
||||
|
||||
def _is_openai_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is an OpenAI flagship eligible for Priority Processing."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
if not base:
|
||||
return False
|
||||
# Exclude Codex-series — they route through the Codex Responses API
|
||||
# which doesn't accept service_tier.
|
||||
if "codex" in base:
|
||||
return False
|
||||
return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
|
||||
|
||||
"o4-mini",
|
||||
})
|
||||
|
||||
# Models that support Anthropic Fast Mode (speed="fast").
|
||||
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||
#
|
||||
# Pattern-based matching — any claude-* model is eligible. The anthropic
|
||||
# adapter gates speed=fast on native Anthropic endpoints only (see
|
||||
# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
|
||||
# third-party proxies that would reject the beta header are protected.
|
||||
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
||||
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
||||
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
||||
"claude-opus-4-6",
|
||||
"claude-opus-4.6",
|
||||
})
|
||||
|
||||
|
||||
def _strip_vendor_prefix(model_id: str) -> str:
|
||||
@@ -1727,14 +1650,20 @@ def _strip_vendor_prefix(model_id: str) -> str:
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose the /fast toggle for this model."""
|
||||
return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
if raw in _PRIORITY_PROCESSING_MODELS:
|
||||
return True
|
||||
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
||||
# and OpenRouter variant tags (:fast, :beta) for matching.
|
||||
base = raw.split(":")[0]
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
|
||||
|
||||
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||
"""Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
|
||||
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
||||
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||
base = raw.split(":")[0]
|
||||
return base.startswith("claude-")
|
||||
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||
|
||||
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
@@ -1756,61 +1685,14 @@ def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | Non
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog.
|
||||
|
||||
Resolution order:
|
||||
1. ``resolve_api_key_provider_credentials("copilot")`` — env vars
|
||||
(``COPILOT_GITHUB_TOKEN`` / ``GH_TOKEN`` / ``GITHUB_TOKEN``) plus
|
||||
the ``gh auth token`` CLI fallback.
|
||||
2. ``read_credential_pool("copilot")`` — a token (typically a
|
||||
``gho_*`` from device-code login, or a fine-grained PAT) stored in
|
||||
``auth.json`` under ``credential_pool.copilot[]``. The pool is
|
||||
populated by ``hermes auth add copilot`` and by ``_seed_from_env``
|
||||
when the env var is set in ``~/.hermes/.env``.
|
||||
|
||||
Without (2), users whose only Copilot credential is in the pool see
|
||||
the ``/model`` picker fall back to a stale hardcoded list because the
|
||||
live catalog fetch silently 401s. To avoid wedging on a malformed pool
|
||||
entry, each candidate is exchanged via ``exchange_copilot_token`` —
|
||||
only entries that actually exchange successfully are returned, so a
|
||||
later valid entry is reachable when an earlier one is unsupported.
|
||||
"""
|
||||
"""Best-effort GitHub token for fetching the Copilot model catalog."""
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
|
||||
creds = resolve_api_key_provider_credentials("copilot")
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
if api_key:
|
||||
return api_key
|
||||
return str(creds.get("api_key") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import read_credential_pool
|
||||
from hermes_cli.copilot_auth import (
|
||||
exchange_copilot_token,
|
||||
validate_copilot_token,
|
||||
)
|
||||
|
||||
for entry in read_credential_pool("copilot"):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
raw = str(entry.get("access_token") or "").strip()
|
||||
if not raw:
|
||||
continue
|
||||
valid, _ = validate_copilot_token(raw)
|
||||
if not valid:
|
||||
continue
|
||||
try:
|
||||
api_token, _expires_at = exchange_copilot_token(raw)
|
||||
except Exception:
|
||||
continue
|
||||
if api_token:
|
||||
return api_token
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ""
|
||||
return ""
|
||||
|
||||
|
||||
# Providers where models.dev is treated as authoritative: curated static
|
||||
@@ -1967,19 +1849,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "gmi":
|
||||
try:
|
||||
from hermes_cli.auth import resolve_api_key_provider_credentials
|
||||
|
||||
creds = resolve_api_key_provider_credentials("gmi")
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip()
|
||||
if api_key and base_url:
|
||||
live = fetch_api_models(api_key, base_url)
|
||||
if live:
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "custom":
|
||||
base_url = _get_custom_base_url()
|
||||
if base_url:
|
||||
@@ -1992,18 +1861,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
live = fetch_api_models(api_key, base_url)
|
||||
if live:
|
||||
return live
|
||||
# Bedrock uses live discovery keyed by the resolved AWS region so that
|
||||
# EU/AP users see eu.*/ap.* model IDs instead of the static us.* list.
|
||||
# Note: early return intentionally skips _MODELS_DEV_PREFERRED merge
|
||||
# below — bedrock is not expected to appear in that table.
|
||||
if normalized == "bedrock":
|
||||
try:
|
||||
from agent.bedrock_adapter import bedrock_model_ids_or_none
|
||||
ids = bedrock_model_ids_or_none()
|
||||
if ids is not None:
|
||||
return ids
|
||||
except Exception:
|
||||
pass
|
||||
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
|
||||
if normalized in _MODELS_DEV_PREFERRED:
|
||||
return _merge_with_models_dev(normalized, curated_static)
|
||||
@@ -2199,228 +2056,6 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
|
||||
"""Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
|
||||
|
||||
Returns ``None`` when the base URL is empty/invalid.
|
||||
"""
|
||||
root = (base_url or "").strip().rstrip("/")
|
||||
if root.endswith("/v1"):
|
||||
root = root[:-3].rstrip("/")
|
||||
return root or None
|
||||
|
||||
|
||||
def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
|
||||
"""Build HTTP headers for LM Studio native API requests."""
|
||||
headers = {"User-Agent": _HERMES_USER_AGENT}
|
||||
token = str(api_key or "").strip()
|
||||
if token:
|
||||
headers["Authorization"] = f"Bearer {token}"
|
||||
return headers
|
||||
|
||||
|
||||
def _lmstudio_fetch_raw_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[dict]]:
|
||||
"""Fetch the raw model list from LM Studio's ``/api/v1/models``.
|
||||
|
||||
Returns the ``models`` list of dicts on success, ``None`` on network
|
||||
errors or malformed responses. Raises ``AuthError`` on HTTP 401/403.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=timeout) as resp:
|
||||
payload = json.loads(resp.read().decode())
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code in (401, 403):
|
||||
from hermes_cli.auth import AuthError
|
||||
raise AuthError(
|
||||
f"LM Studio rejected the request with HTTP {exc.code}.",
|
||||
provider="lmstudio",
|
||||
code="auth_rejected",
|
||||
) from exc
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s failed with HTTP %s", server_root, exc.code,
|
||||
)
|
||||
return None
|
||||
except Exception as exc:
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s failed: %s", server_root, exc,
|
||||
)
|
||||
return None
|
||||
|
||||
raw_models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if not isinstance(raw_models, list):
|
||||
import logging
|
||||
logging.getLogger(__name__).debug(
|
||||
"LM Studio probe at %s returned malformed payload (no `models` list)",
|
||||
server_root,
|
||||
)
|
||||
return None
|
||||
return raw_models
|
||||
|
||||
|
||||
def probe_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> Optional[list[str]]:
|
||||
"""Probe LM Studio's model listing.
|
||||
|
||||
Returns chat-capable model keys on success, including the valid empty-list
|
||||
case when the server is reachable but has no non-embedding models.
|
||||
Returns ``None`` on network errors, malformed responses, or empty/invalid
|
||||
base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
|
||||
separately from reachability problems.
|
||||
"""
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
keys: list[str] = []
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if str(raw.get("type") or "").strip().lower() == "embedding":
|
||||
continue
|
||||
key = str(raw.get("key") or raw.get("id") or "").strip()
|
||||
if key and key not in keys:
|
||||
keys.append(key)
|
||||
return keys
|
||||
|
||||
|
||||
def fetch_lmstudio_models(
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> list[str]:
|
||||
"""Fetch LM Studio chat-capable model keys from native ``/api/v1/models``.
|
||||
|
||||
Returns a list of model keys (e.g. ``publisher/model-name``) with embedding
|
||||
models filtered out. Returns an empty list on network errors, malformed
|
||||
responses, or empty/invalid base URLs.
|
||||
|
||||
Raises ``AuthError`` on HTTP 401/403 so callers can distinguish a missing
|
||||
or wrong ``LM_API_KEY`` from an unreachable server — the most common
|
||||
LM Studio support case once auth-enabled mode is turned on.
|
||||
"""
|
||||
models = probe_lmstudio_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
return models or []
|
||||
|
||||
|
||||
def ensure_lmstudio_model_loaded(
|
||||
model: str,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str],
|
||||
target_context_length: int,
|
||||
timeout: float = 120.0,
|
||||
) -> Optional[int]:
|
||||
"""Ensure LM Studio has ``model`` loaded with at least ``target_context_length``.
|
||||
|
||||
No-op when an instance is already loaded with sufficient context. Otherwise
|
||||
POSTs ``/api/v1/models/load`` to (re)load with the target context, capped
|
||||
at the model's ``max_context_length``. Returns the resolved loaded context
|
||||
length, or ``None`` when the probe / load failed.
|
||||
"""
|
||||
server_root = _lmstudio_server_root(base_url)
|
||||
if not server_root:
|
||||
return None
|
||||
|
||||
headers = _lmstudio_request_headers(api_key)
|
||||
|
||||
try:
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
|
||||
except Exception:
|
||||
raw_models = None
|
||||
if raw_models is None:
|
||||
return None
|
||||
|
||||
target_entry = None
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if raw.get("key") == model or raw.get("id") == model:
|
||||
target_entry = raw
|
||||
break
|
||||
if target_entry is None:
|
||||
return None
|
||||
|
||||
max_ctx = target_entry.get("max_context_length")
|
||||
if isinstance(max_ctx, int) and max_ctx > 0:
|
||||
target_context_length = min(target_context_length, max_ctx)
|
||||
|
||||
for inst in target_entry.get("loaded_instances") or []:
|
||||
cfg = inst.get("config") if isinstance(inst, dict) else None
|
||||
loaded_ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
|
||||
if isinstance(loaded_ctx, int) and loaded_ctx >= target_context_length:
|
||||
return loaded_ctx
|
||||
|
||||
body = json.dumps({
|
||||
"model": model,
|
||||
"context_length": target_context_length,
|
||||
}).encode()
|
||||
load_headers = dict(headers)
|
||||
load_headers["Content-Type"] = "application/json"
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
urllib.request.Request(
|
||||
server_root + "/api/v1/models/load",
|
||||
data=body,
|
||||
headers=load_headers,
|
||||
method="POST",
|
||||
),
|
||||
timeout=timeout,
|
||||
) as resp:
|
||||
resp.read()
|
||||
except Exception:
|
||||
return None
|
||||
return target_context_length
|
||||
|
||||
|
||||
def lmstudio_model_reasoning_options(
|
||||
model: str,
|
||||
base_url: Optional[str],
|
||||
api_key: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
) -> list[str]:
|
||||
"""Return the reasoning ``allowed_options`` LM Studio publishes for ``model``.
|
||||
|
||||
Pulls ``capabilities.reasoning.allowed_options`` from ``/api/v1/models``.
|
||||
Returns ``[]`` when the model is unknown, the endpoint is unreachable,
|
||||
or the model does not declare a reasoning capability.
|
||||
"""
|
||||
try:
|
||||
raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
|
||||
except Exception:
|
||||
raw_models = None
|
||||
if not raw_models:
|
||||
return []
|
||||
|
||||
for raw in raw_models:
|
||||
if not isinstance(raw, dict):
|
||||
continue
|
||||
if raw.get("key") != model and raw.get("id") != model:
|
||||
continue
|
||||
caps = raw.get("capabilities")
|
||||
reasoning = caps.get("reasoning") if isinstance(caps, dict) else None
|
||||
opts = reasoning.get("allowed_options") if isinstance(reasoning, dict) else None
|
||||
if isinstance(opts, list):
|
||||
return [str(o).strip().lower() for o in opts if isinstance(o, str)]
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
|
||||
catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
|
||||
if not catalog:
|
||||
@@ -3016,40 +2651,6 @@ def validate_requested_model(
|
||||
"message": "Model names cannot contain spaces.",
|
||||
}
|
||||
|
||||
if normalized == "lmstudio":
|
||||
from hermes_cli.auth import AuthError
|
||||
# Use probe_lmstudio_models so we can distinguish None (unreachable
|
||||
# / malformed response) from [] (reachable, but no chat-capable models
|
||||
# are loaded). fetch_lmstudio_models collapses both to [].
|
||||
try:
|
||||
models = probe_lmstudio_models(api_key=api_key, base_url=base_url)
|
||||
except AuthError as exc:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": (
|
||||
f"{exc} Set `LM_API_KEY` (or update it) to match the server's bearer token."
|
||||
),
|
||||
}
|
||||
if models is None:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": f"Could not reach LM Studio's `/api/v1/models` to validate `{requested}`.",
|
||||
}
|
||||
if not models:
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": (
|
||||
f"LM Studio is reachable but no chat-capable models are loaded. "
|
||||
f"Load `{requested}` in LM Studio (Developer tab → Load Model) and try again."
|
||||
),
|
||||
}
|
||||
if requested_for_lookup in set(models):
|
||||
return {"accepted": True, "persist": True, "recognized": True, "message": None}
|
||||
return {
|
||||
"accepted": False, "persist": False, "recognized": False,
|
||||
"message": f"Model `{requested}` was not found in LM Studio's model listing.",
|
||||
}
|
||||
|
||||
if normalized == "custom":
|
||||
# Try probing with correct auth for the api_mode.
|
||||
if api_mode == "anthropic_messages":
|
||||
|
||||
+11
-28
@@ -128,44 +128,27 @@ def _run_agent(
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
explicit_base_url_from_alias: Optional[str] = None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
# First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
|
||||
# These map a user-defined alias to (model, provider, base_url) for
|
||||
# endpoints not in any catalog (local servers, custom proxies, etc.).
|
||||
try:
|
||||
from hermes_cli import model_switch as _ms
|
||||
_ms._ensure_direct_aliases()
|
||||
direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
|
||||
except Exception:
|
||||
direct = None
|
||||
if direct is not None:
|
||||
effective_model = direct.model
|
||||
effective_provider = direct.provider
|
||||
if direct.base_url:
|
||||
explicit_base_url_from_alias = direct.base_url.rstrip("/")
|
||||
else:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
explicit_base_url=explicit_base_url_from_alias,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
|
||||
@@ -79,20 +79,6 @@ VALID_HOOKS: Set[str] = {
|
||||
# {"action": "allow"} / None -> normal dispatch
|
||||
# Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
|
||||
"pre_gateway_dispatch",
|
||||
# Approval lifecycle hooks. Fired by tools/approval.py when a dangerous
|
||||
# command needs user approval -- fires BOTH for CLI-interactive prompts
|
||||
# and for gateway/ACP approvals (Telegram, Discord, Slack, TUI, etc.).
|
||||
# Observers only: return values are ignored. Plugins cannot veto or
|
||||
# pre-answer an approval from these hooks (use pre_tool_call to block
|
||||
# a tool before it reaches approval).
|
||||
#
|
||||
# Kwargs for pre_approval_request:
|
||||
# command: str, description: str, pattern_key: str, pattern_keys: list[str],
|
||||
# session_key: str, surface: "cli" | "gateway"
|
||||
# Kwargs for post_approval_response: same as above plus
|
||||
# choice: "once" | "session" | "always" | "deny" | "timeout"
|
||||
"pre_approval_request",
|
||||
"post_approval_response",
|
||||
}
|
||||
|
||||
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
|
||||
|
||||
@@ -999,6 +999,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
# We need to map logical cursor positions to screen rows
|
||||
# accounting for non-navigable separator/headers
|
||||
|
||||
draw_row = 0 # tracks navigable item index
|
||||
|
||||
# --- General Plugins section ---
|
||||
if n_plugins > 0:
|
||||
|
||||
+2
-58
@@ -954,59 +954,6 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
|
||||
# Rename
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
|
||||
"""Rename Honcho host blocks for a renamed profile without changing peers."""
|
||||
old_host = f"hermes.{old_name}"
|
||||
new_host = f"hermes.{new_name}"
|
||||
|
||||
candidates = [
|
||||
new_dir / "honcho.json",
|
||||
_get_default_hermes_home() / "honcho.json",
|
||||
Path.home() / ".honcho" / "config.json",
|
||||
]
|
||||
|
||||
seen: set[Path] = set()
|
||||
for path in candidates:
|
||||
try:
|
||||
resolved = path.resolve()
|
||||
except OSError:
|
||||
resolved = path
|
||||
if resolved in seen or not path.is_file():
|
||||
continue
|
||||
seen.add(resolved)
|
||||
|
||||
try:
|
||||
raw = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
hosts = raw.get("hosts")
|
||||
if not isinstance(hosts, dict) or old_host not in hosts:
|
||||
continue
|
||||
|
||||
if new_host in hosts:
|
||||
print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
|
||||
continue
|
||||
|
||||
block = hosts[old_host]
|
||||
if isinstance(block, dict) and "aiPeer" not in block:
|
||||
bare = old_host.split(".", 1)[1] if "." in old_host else old_host
|
||||
block["aiPeer"] = bare
|
||||
hosts[new_host] = hosts.pop(old_host)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
try:
|
||||
tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
||||
tmp.replace(path)
|
||||
except OSError:
|
||||
try:
|
||||
tmp.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
continue
|
||||
|
||||
print(f"✓ Honcho host updated: {old_host} → {new_host}")
|
||||
|
||||
|
||||
def rename_profile(old_name: str, new_name: str) -> Path:
|
||||
"""Rename a profile: directory, wrapper script, service, active_profile.
|
||||
|
||||
@@ -1037,10 +984,7 @@ def rename_profile(old_name: str, new_name: str) -> Path:
|
||||
old_dir.rename(new_dir)
|
||||
print(f"✓ Renamed {old_dir.name} → {new_dir.name}")
|
||||
|
||||
# 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity
|
||||
_migrate_honcho_profile_host(old_name, new_name, new_dir)
|
||||
|
||||
# 4. Update wrapper script
|
||||
# 3. Update wrapper script
|
||||
remove_wrapper_script(old_name)
|
||||
collision = check_alias_collision(new_name)
|
||||
if not collision:
|
||||
@@ -1049,7 +993,7 @@ def rename_profile(old_name: str, new_name: str) -> Path:
|
||||
else:
|
||||
print(f"⚠ Cannot create alias '{new_name}' — {collision}")
|
||||
|
||||
# 5. Update active_profile if it pointed to old name
|
||||
# 4. Update active_profile if it pointed to old name
|
||||
try:
|
||||
if get_active_profile() == old_name:
|
||||
set_active_profile(new_name)
|
||||
|
||||
@@ -71,13 +71,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
auth_type="oauth_external",
|
||||
base_url_override="cloudcode-pa://google",
|
||||
),
|
||||
"lmstudio": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
auth_type="api_key",
|
||||
extra_env_vars=("LM_API_KEY",),
|
||||
base_url_override="http://127.0.0.1:1234/v1",
|
||||
base_url_env_var="LM_BASE_URL",
|
||||
),
|
||||
"copilot-acp": HermesOverlay(
|
||||
transport="codex_responses",
|
||||
auth_type="external_process",
|
||||
@@ -165,21 +158,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="XIAOMI_BASE_URL",
|
||||
),
|
||||
"tencent-tokenhub": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="TOKENHUB_BASE_URL",
|
||||
),
|
||||
"arcee": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_override="https://api.arcee.ai/api/v1",
|
||||
base_url_env_var="ARCEE_BASE_URL",
|
||||
),
|
||||
"gmi": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
extra_env_vars=("GMI_API_KEY",),
|
||||
base_url_override="https://api.gmi-serving.com/v1",
|
||||
base_url_env_var="GMI_BASE_URL",
|
||||
),
|
||||
"ollama-cloud": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
@@ -190,10 +173,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat", # default; overridden by api_mode in config
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
"bedrock": HermesOverlay(
|
||||
transport="bedrock_converse",
|
||||
auth_type="aws_sdk",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -308,12 +287,6 @@ ALIASES: Dict[str, str] = {
|
||||
"mimo": "xiaomi",
|
||||
"xiaomi-mimo": "xiaomi",
|
||||
|
||||
# tencent
|
||||
"tencent": "tencent-tokenhub",
|
||||
"tokenhub": "tencent-tokenhub",
|
||||
"tencent-cloud": "tencent-tokenhub",
|
||||
"tencentmaas": "tencent-tokenhub",
|
||||
|
||||
# bedrock
|
||||
"aws": "bedrock",
|
||||
"aws-bedrock": "bedrock",
|
||||
@@ -324,10 +297,6 @@ ALIASES: Dict[str, str] = {
|
||||
"arcee-ai": "arcee",
|
||||
"arceeai": "arcee",
|
||||
|
||||
# gmi
|
||||
"gmi-cloud": "gmi",
|
||||
"gmicloud": "gmi",
|
||||
|
||||
# Local server aliases → virtual "local" concept (resolved via user config)
|
||||
"lmstudio": "lmstudio",
|
||||
"lm-studio": "lmstudio",
|
||||
@@ -350,9 +319,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
|
||||
"copilot-acp": "GitHub Copilot ACP",
|
||||
"stepfun": "StepFun Step Plan",
|
||||
"xiaomi": "Xiaomi MiMo",
|
||||
"gmi": "GMI Cloud",
|
||||
"tencent-tokenhub": "Tencent TokenHub",
|
||||
"lmstudio": "LM Studio",
|
||||
"local": "Local endpoint",
|
||||
"bedrock": "AWS Bedrock",
|
||||
"ollama-cloud": "Ollama Cloud",
|
||||
|
||||
@@ -260,16 +260,11 @@ def _resolve_runtime_from_pool_entry(
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if provider in ("opencode-zen", "opencode-go"):
|
||||
# Re-derive api_mode from the effective model rather than the
|
||||
# persisted api_mode: the opencode providers serve both
|
||||
# anthropic_messages and chat_completions models, so the previous
|
||||
# session's mode must not leak across /model switches.
|
||||
# Refs #16878.
|
||||
if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, effective_model)
|
||||
elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
|
||||
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
|
||||
@@ -469,30 +464,6 @@ def _resolve_named_custom_runtime(
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
# Bare `provider="custom"` with an explicit base_url (e.g. propagated
|
||||
# from a `model_aliases:` direct-alias resolution) — build a runtime
|
||||
# directly so the alias's base_url actually takes effect.
|
||||
requested_norm = (requested_provider or "").strip().lower()
|
||||
if requested_norm == "custom" and explicit_base_url:
|
||||
base_url = explicit_base_url.strip().rstrip("/")
|
||||
api_key_candidates = [
|
||||
(explicit_api_key or "").strip(),
|
||||
os.getenv("OPENAI_API_KEY", "").strip(),
|
||||
os.getenv("OPENROUTER_API_KEY", "").strip(),
|
||||
]
|
||||
api_key = next(
|
||||
(c for c in api_key_candidates if has_usable_secret(c)),
|
||||
"",
|
||||
) or "no-key-required"
|
||||
return {
|
||||
"provider": "custom",
|
||||
"api_mode": _detect_api_mode_for_url(base_url) or "chat_completions",
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": "direct-alias",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
custom_provider = _get_named_custom_provider(requested_provider)
|
||||
if not custom_provider:
|
||||
return None
|
||||
@@ -1124,34 +1095,13 @@ def resolve_runtime_provider(
|
||||
cfg_base_url and "azure.com" in cfg_base_url.lower()
|
||||
)
|
||||
if _is_azure_endpoint:
|
||||
# Honor user-specified env var hints on the model config before
|
||||
# falling back to the built-in AZURE_ANTHROPIC_KEY / ANTHROPIC_API_KEY
|
||||
# chain. Accept both `key_env` (Hermes canonical — matches the
|
||||
# custom_providers field name) and `api_key_env` (documented in the
|
||||
# Azure Foundry guide and read by most Hermes-compatible importers).
|
||||
# Matches the config.yaml examples in website/docs/guides/azure-foundry.md.
|
||||
token = ""
|
||||
for hint_key in ("key_env", "api_key_env"):
|
||||
env_var = str(model_cfg.get(hint_key) or "").strip()
|
||||
if env_var:
|
||||
token = os.getenv(env_var, "").strip()
|
||||
if token:
|
||||
break
|
||||
# Next: an inline api_key on the model config (useful in multi-profile
|
||||
# setups that want to avoid env-var juggling).
|
||||
if not token:
|
||||
token = str(model_cfg.get("api_key") or "").strip()
|
||||
# Finally fall back to the historical fixed names.
|
||||
if not token:
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or "
|
||||
"ANTHROPIC_API_KEY, or point key_env/api_key_env in your "
|
||||
"config.yaml model section at a custom env var."
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
|
||||
)
|
||||
else:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
@@ -1262,20 +1212,15 @@ def resolve_runtime_provider(
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Only honor persisted api_mode when it belongs to the same provider family.
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if provider in ("opencode-zen", "opencode-go"):
|
||||
# opencode-zen/go must always re-derive api_mode from the
|
||||
# target model (not the stale persisted api_mode), because
|
||||
# the same provider serves both anthropic_messages
|
||||
# (e.g. minimax-m2.7) and chat_completions (e.g.
|
||||
# deepseek-v4-flash) and switching models via /model would
|
||||
# otherwise carry the previous mode forward, stripping /v1
|
||||
# from base_url for chat_completions models and 404'ing.
|
||||
# Refs #16878.
|
||||
if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
# Prefer the target_model from the caller (explicit mid-session
|
||||
# switch) over the stale model.default; see _resolve_runtime_from_pool_entry
|
||||
# for the same rationale.
|
||||
_effective = target_model or model_cfg.get("default", "")
|
||||
api_mode = opencode_model_api_mode(provider, _effective)
|
||||
elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
|
||||
api_mode = configured_mode
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
|
||||
@@ -712,6 +712,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
|
||||
if isinstance(_m, dict):
|
||||
selected_provider = _m.get("provider")
|
||||
|
||||
nous_subscription_selected = selected_provider == "nous"
|
||||
|
||||
# ── Same-provider fallback & rotation setup (full setup only) ──
|
||||
if not quick and _supports_same_provider_pool_setup(selected_provider):
|
||||
try:
|
||||
|
||||
+14
-104
@@ -68,7 +68,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
|
||||
welcome: "Welcome message" # Shown at CLI startup
|
||||
goodbye: "Goodbye! ⚕" # Shown on exit
|
||||
response_label: " ⚕ Hermes " # Response box header label
|
||||
prompt_symbol: "❯" # Input prompt symbol (bare token; renderers add trailing space)
|
||||
prompt_symbol: "❯ " # Input prompt symbol
|
||||
help_header: "(^_^)? Commands" # /help header text
|
||||
|
||||
# Tool prefix: character for tool output lines (default: ┊)
|
||||
@@ -103,10 +103,6 @@ BUILT-IN SKINS
|
||||
- ``slate`` — Cool blue developer-focused theme
|
||||
- ``daylight`` — Light background theme with dark text and blue accents
|
||||
- ``warm-lightmode`` — Warm brown/gold text for light terminal backgrounds
|
||||
- ``poseidon`` — Ocean-god theme (deep blue and seafoam)
|
||||
- ``sisyphus`` — Austere grayscale with boulder motif
|
||||
- ``charizard`` — Volcanic burnt-orange and ember
|
||||
- ``bunnny`` — Barbie-pink coquette theme (sparkles, hearts, bunnies)
|
||||
|
||||
USER SKINS
|
||||
==========
|
||||
@@ -194,7 +190,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "(^_^)? Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
@@ -246,7 +242,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Ares Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Farewell, warrior! ⚔",
|
||||
"response_label": " ⚔ Ares ",
|
||||
"prompt_symbol": "⚔",
|
||||
"prompt_symbol": "⚔ ❯ ",
|
||||
"help_header": "(⚔) Available Commands",
|
||||
},
|
||||
"tool_prefix": "╎",
|
||||
@@ -305,7 +301,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "[?] Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
@@ -344,7 +340,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "(^_^)? Available Commands",
|
||||
},
|
||||
"tool_prefix": "┊",
|
||||
@@ -381,7 +377,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! ⚕",
|
||||
"response_label": " ⚕ Hermes ",
|
||||
"prompt_symbol": "❯",
|
||||
"prompt_symbol": "❯ ",
|
||||
"help_header": "[?] Available Commands",
|
||||
},
|
||||
"tool_prefix": "│",
|
||||
@@ -418,7 +414,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Goodbye! \u2695",
|
||||
"response_label": " \u2695 Hermes ",
|
||||
"prompt_symbol": "\u276f",
|
||||
"prompt_symbol": "\u276f ",
|
||||
"help_header": "(^_^)? Available Commands",
|
||||
},
|
||||
"tool_prefix": "\u250a",
|
||||
@@ -471,7 +467,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Poseidon Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Fair winds! Ψ",
|
||||
"response_label": " Ψ Poseidon ",
|
||||
"prompt_symbol": "Ψ",
|
||||
"prompt_symbol": "Ψ ❯ ",
|
||||
"help_header": "(Ψ) Available Commands",
|
||||
},
|
||||
"tool_prefix": "│",
|
||||
@@ -543,7 +539,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Sisyphus Agent! Type your message or /help for commands.",
|
||||
"goodbye": "The boulder waits. ◉",
|
||||
"response_label": " ◉ Sisyphus ",
|
||||
"prompt_symbol": "◉",
|
||||
"prompt_symbol": "◉ ❯ ",
|
||||
"help_header": "(◉) Available Commands",
|
||||
},
|
||||
"tool_prefix": "│",
|
||||
@@ -616,7 +612,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
"welcome": "Welcome to Charizard Agent! Type your message or /help for commands.",
|
||||
"goodbye": "Flame out! ✦",
|
||||
"response_label": " ✦ Charizard ",
|
||||
"prompt_symbol": "✦",
|
||||
"prompt_symbol": "✦ ❯ ",
|
||||
"help_header": "(✦) Available Commands",
|
||||
},
|
||||
"tool_prefix": "│",
|
||||
@@ -640,83 +636,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
|
||||
[#F29C38]⠀⠀⠀⠀⠀⠀⠀⣼⡟⠀⠀⢻⣧⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[dim #7A3511]⠀⠀⠀⠀⠀⠀⠀tail flame lit⠀⠀⠀⠀⠀⠀⠀⠀[/]""",
|
||||
},
|
||||
"bunnny": {
|
||||
"name": "bunnny",
|
||||
"description": "Barbie-pink coquette theme — sparkles, bows, and bubblegum",
|
||||
"colors": {
|
||||
"banner_border": "#E91E63",
|
||||
"banner_title": "#FF3366",
|
||||
"banner_accent": "#FF69B4",
|
||||
"banner_dim": "#C2185B",
|
||||
"banner_text": "#FFF0F5",
|
||||
"ui_accent": "#FF3366",
|
||||
"ui_label": "#FF69B4",
|
||||
"ui_ok": "#FFB6C1",
|
||||
"ui_error": "#FF1744",
|
||||
"ui_warn": "#FFAB91",
|
||||
"prompt": "#FFF0F5",
|
||||
"input_rule": "#E91E63",
|
||||
"response_border": "#FF69B4",
|
||||
"status_bar_bg": "#2A0E1E",
|
||||
"status_bar_text": "#FFE4EC",
|
||||
"status_bar_strong": "#FF3366",
|
||||
"status_bar_dim": "#8E4B6B",
|
||||
"status_bar_good": "#FFB6C1",
|
||||
"status_bar_warn": "#FF69B4",
|
||||
"status_bar_bad": "#FF3366",
|
||||
"status_bar_critical": "#FF1744",
|
||||
"session_label": "#FF69B4",
|
||||
"session_border": "#8E4B6B",
|
||||
"voice_status_bg": "#2A0E1E",
|
||||
"completion_menu_bg": "#2A0E1E",
|
||||
"completion_menu_current_bg": "#5A1D3A",
|
||||
"completion_menu_meta_bg": "#2A0E1E",
|
||||
"completion_menu_meta_current_bg": "#5A1D3A",
|
||||
},
|
||||
"spinner": {
|
||||
"waiting_faces": ["(♡)", "(✿)", "(✧)", "(❀)", "(ෆ)", "(˘ᵕ˘)", "(⑅)"],
|
||||
"thinking_faces": ["(♡)", "(✧)", "(❀)", "(✿)", "(ෆ)", "(˘ᵕ˘)"],
|
||||
"thinking_verbs": [
|
||||
"sparkling", "twirling", "glittering", "frosting",
|
||||
"bedazzling", "bowtying", "sprinkling sugar", "picking ribbons",
|
||||
"glossing up", "curating the vibe", "dusting pink",
|
||||
"tying a little bow", "making it cute",
|
||||
],
|
||||
"wings": [
|
||||
["⟪♡", "♡⟫"],
|
||||
["⟪✧", "✧⟫"],
|
||||
["⟪✿", "✿⟫"],
|
||||
["⟪❀", "❀⟫"],
|
||||
["⟪ෆ", "ෆ⟫"],
|
||||
],
|
||||
},
|
||||
"branding": {
|
||||
"agent_name": "Hermes Agent",
|
||||
"welcome": "hi bestie ♡ welcome to Hermes Agent! type your message or /help for commands (ノ◕ヮ◕)ノ*:・゚✧",
|
||||
"goodbye": "bye bestie ♡ ✧",
|
||||
"response_label": " ♡ Hermes ",
|
||||
"prompt_symbol": "♡",
|
||||
"help_header": "(ノ◕ヮ◕)ノ*:・゚✧ Commands",
|
||||
},
|
||||
"tool_prefix": "♡",
|
||||
"banner_logo": """[bold #FFB6C1]██╗ ██╗███████╗██████╗ ███╗ ███╗███████╗███████╗ ██╗ ██╗ [/]
|
||||
[bold #FF69B4]██║ ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ████████╗[/]
|
||||
[#FF3C7F]███████║█████╗ ██████╔╝██╔████╔██║█████╗ ███████╗ ╚██████╔╝[/]
|
||||
[#FF3366]██╔══██║██╔══╝ ██╔══██╗██║╚██╔╝██║██╔══╝ ╚════██║ ╚████╔╝ [/]
|
||||
[#E91E63]██║ ██║███████╗██║ ██║██║ ╚═╝ ██║███████╗███████║ ╚██╔╝ [/]
|
||||
[#C2185B]╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝ ╚═╝ [/]""",
|
||||
"banner_hero": """[#FF69B4]⠀⠀✧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✧⠀⠀[/]
|
||||
[#FFB6C1]⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⠀⠀⠀⠀⠀⢀⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀[/]
|
||||
[#FF69B4]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣯⢬⣷⡀⠀⠀⣴⡯⢌⣧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FF3366]⠀✿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿♡⠹⣷⠀⢸⡝♡⢸⡿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✿⠀[/]
|
||||
[#FF3C7F]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠻⣧⣀⣿⣦⣼⡁⣠⣿⠃⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FF3366]⠀⠀⠀⠀✧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡾⠋⠀⠀⠀⠈⣙⣯⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✧[/]
|
||||
[#FF3366]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠀⠀⠀⠀⠀⠀⠀⠸⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#E91E63]⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀⠀⠀⠀⢰⡧⢄⢰⡆⠀⢰⡆⡠⢄⣧⠀⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀[/]
|
||||
[#C2185B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠳⣼⣤⣤⣤⣤⣤⣧⠾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
|
||||
[#FF69B4]⠀⠀⠀⠀⠀✿⠀⠀⠀⠀⠀⠀❀⠀⠀⠀⠀⠀❀⠀⠀❀⠀⠀⠀⠀⠀❀⠀⠀⠀⠀⠀⠀✿⠀⠀⠀⠀⠀[/]
|
||||
[dim #C2185B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀xoxo⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]""",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -861,21 +780,12 @@ def init_skin_from_config(config: dict) -> None:
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def get_active_prompt_symbol(fallback: str = "❯") -> str:
|
||||
"""Return the interactive prompt symbol with a single trailing space.
|
||||
|
||||
Skins store ``prompt_symbol`` as a bare token (no spaces). The trailing
|
||||
space is appended here so callers can drop it straight into a rendered
|
||||
prompt without hand-rolling whitespace.
|
||||
"""
|
||||
def get_active_prompt_symbol(fallback: str = "❯ ") -> str:
|
||||
"""Get the interactive prompt symbol from the active skin."""
|
||||
try:
|
||||
raw = get_active_skin().get_branding("prompt_symbol", fallback)
|
||||
return get_active_skin().get_branding("prompt_symbol", fallback)
|
||||
except Exception:
|
||||
raw = fallback
|
||||
|
||||
cleaned = (raw or fallback).strip()
|
||||
|
||||
return f"{cleaned or fallback.strip()} "
|
||||
return fallback
|
||||
|
||||
|
||||
|
||||
|
||||
+7
-27
@@ -6,7 +6,7 @@ Shows the status of all Hermes Agent components.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess # noqa: F401 — re-exported for tests that monkeypatch status.subprocess to guard against regressions
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||
@@ -26,15 +26,12 @@ def check_mark(ok: bool) -> str:
|
||||
return color("✗", Colors.RED)
|
||||
|
||||
def redact_key(key: str) -> str:
|
||||
"""Redact an API key for display.
|
||||
|
||||
Thin wrapper over :func:`agent.redact.mask_secret`. Preserves the
|
||||
"(not set)" placeholder in dim color to match ``hermes config``'s
|
||||
output (previously this variant was missing the DIM color —
|
||||
consolidated via PR that also introduced ``mask_secret``).
|
||||
"""
|
||||
from agent.redact import mask_secret
|
||||
return mask_secret(key, empty=color("(not set)", Colors.DIM))
|
||||
"""Redact an API key for display."""
|
||||
if not key:
|
||||
return "(not set)"
|
||||
if len(key) < 12:
|
||||
return "***"
|
||||
return key[:4] + "..." + key[-4:]
|
||||
|
||||
|
||||
def _format_iso_timestamp(value) -> str:
|
||||
@@ -277,23 +274,6 @@ def show_status(args):
|
||||
label = "configured" if configured else "not configured (run: hermes model)"
|
||||
print(f" {pname:<16} {check_mark(configured)} {label}")
|
||||
|
||||
# LM Studio reachability — only probe when it's the active provider so
|
||||
# users with foreign configs don't see noise. Auth rejection vs. silent
|
||||
# empty list is the most common LM Studio support case.
|
||||
if _effective_provider_label() == "LM Studio":
|
||||
from hermes_cli.models import probe_lmstudio_models
|
||||
model_cfg = config.get("model")
|
||||
base = (model_cfg.get("base_url") if isinstance(model_cfg, dict) else None) or get_env_value("LM_BASE_URL") or "http://127.0.0.1:1234/v1"
|
||||
try:
|
||||
models = probe_lmstudio_models(api_key=get_env_value("LM_API_KEY") or "", base_url=base, timeout=1.5)
|
||||
if models is None:
|
||||
ok, msg = False, f"unreachable at {base}"
|
||||
else:
|
||||
ok, msg = True, f"reachable ({len(models)} model(s)) at {base}"
|
||||
except AuthError:
|
||||
ok, msg = False, "auth rejected — set LM_API_KEY"
|
||||
print(f" {'LM Studio':<16} {check_mark(ok)} {msg}")
|
||||
|
||||
# =========================================================================
|
||||
# Terminal Configuration
|
||||
# =========================================================================
|
||||
|
||||
@@ -263,6 +263,7 @@ TIPS = [
|
||||
"hermes status --deep runs deeper diagnostic checks across all components.",
|
||||
|
||||
# --- Hidden Gems & Power-User Tricks ---
|
||||
"BOOT.md at ~/.hermes/BOOT.md runs automatically on every gateway start — use it for startup checks.",
|
||||
"Cron jobs can attach a Python script (--script) whose stdout is injected into the prompt as context.",
|
||||
"Cron scripts live in ~/.hermes/scripts/ and run before the agent — perfect for data collection pipelines.",
|
||||
"prefill_messages_file in config.yaml injects few-shot examples into every API call, never saved to history.",
|
||||
|
||||
+1
-159
@@ -425,31 +425,6 @@ TOOL_CATEGORIES = {
|
||||
},
|
||||
],
|
||||
},
|
||||
"langfuse": {
|
||||
"name": "Langfuse Observability",
|
||||
"icon": "📊",
|
||||
"providers": [
|
||||
{
|
||||
"name": "Langfuse Cloud",
|
||||
"tag": "Hosted Langfuse (cloud.langfuse.com)",
|
||||
"env_vars": [
|
||||
{"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)", "url": "https://cloud.langfuse.com"},
|
||||
{"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)", "url": "https://cloud.langfuse.com"},
|
||||
],
|
||||
"post_setup": "langfuse",
|
||||
},
|
||||
{
|
||||
"name": "Langfuse Self-Hosted",
|
||||
"tag": "Self-hosted Langfuse instance",
|
||||
"env_vars": [
|
||||
{"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)"},
|
||||
{"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)"},
|
||||
{"key": "HERMES_LANGFUSE_BASE_URL", "prompt": "Langfuse server URL (e.g. http://localhost:3000)", "default": "http://localhost:3000"},
|
||||
],
|
||||
"post_setup": "langfuse",
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
# Simple env-var requirements for toolsets NOT in TOOL_CATEGORIES.
|
||||
@@ -467,10 +442,7 @@ def _run_post_setup(post_setup_key: str):
|
||||
import shutil
|
||||
if post_setup_key in ("agent_browser", "browserbase"):
|
||||
node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
|
||||
npm_bin = shutil.which("npm")
|
||||
npx_bin = shutil.which("npx")
|
||||
# Step 1: install the agent-browser npm package into node_modules/
|
||||
if not node_modules.exists() and npm_bin:
|
||||
if not node_modules.exists() and shutil.which("npm"):
|
||||
_print_info(" Installing Node.js dependencies for browser tools...")
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
@@ -482,94 +454,8 @@ def _run_post_setup(post_setup_key: str):
|
||||
else:
|
||||
from hermes_constants import display_hermes_home
|
||||
_print_warning(f" npm install failed - run manually: cd {display_hermes_home()}/hermes-agent && npm install")
|
||||
if result.stderr:
|
||||
_print_info(f" {result.stderr.strip()[:200]}")
|
||||
elif not node_modules.exists():
|
||||
_print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)")
|
||||
return
|
||||
|
||||
# Step 2: only the local browser provider actually needs Chromium on
|
||||
# disk. Cloud providers (Browserbase, Browser Use, Firecrawl) host
|
||||
# their own Chromium and don't need the local install.
|
||||
if post_setup_key != "agent_browser":
|
||||
return
|
||||
|
||||
# Step 3: ensure the Chromium / headless-shell build agent-browser
|
||||
# drives is actually installed. Without it the CLI hangs on first
|
||||
# use until the command timeout fires. Skip inside Docker — the
|
||||
# image bakes Chromium in at build time, and runtime users usually
|
||||
# can't write to PLAYWRIGHT_BROWSERS_PATH anyway.
|
||||
try:
|
||||
# Import lazily so the tools_config UI doesn't pull in the full
|
||||
# browser_tool module at import time.
|
||||
from tools.browser_tool import (
|
||||
_chromium_installed,
|
||||
_running_in_docker,
|
||||
)
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
_print_warning(f" Could not check Chromium status: {exc}")
|
||||
return
|
||||
|
||||
if _chromium_installed():
|
||||
_print_success(" Chromium browser already installed")
|
||||
return
|
||||
|
||||
if _running_in_docker():
|
||||
_print_warning(
|
||||
" Chromium is missing but you're running in Docker."
|
||||
)
|
||||
_print_info(
|
||||
" Pull the latest image to get the bundled Chromium:"
|
||||
)
|
||||
_print_info(
|
||||
" docker pull ghcr.io/nousresearch/hermes-agent:latest"
|
||||
)
|
||||
return
|
||||
|
||||
if not npx_bin:
|
||||
_print_warning(
|
||||
" npx not found - install Chromium manually: npx agent-browser install --with-deps"
|
||||
)
|
||||
return
|
||||
|
||||
_print_info(" Installing Chromium (~170MB one-time download)...")
|
||||
import subprocess
|
||||
# Prefer the bundled agent-browser install subcommand so the
|
||||
# version of Chromium matches the CLI. Fall back to npx shim on
|
||||
# setups where the local bin stub isn't present.
|
||||
local_ab = PROJECT_ROOT / "node_modules" / ".bin" / "agent-browser"
|
||||
if sys.platform == "win32":
|
||||
local_ab_win = local_ab.with_suffix(".cmd")
|
||||
if local_ab_win.exists():
|
||||
local_ab = local_ab_win
|
||||
install_cmd = (
|
||||
[str(local_ab), "install", "--with-deps"]
|
||||
if local_ab.exists()
|
||||
else [npx_bin, "-y", "agent-browser", "install", "--with-deps"]
|
||||
)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
install_cmd,
|
||||
capture_output=True, text=True, cwd=str(PROJECT_ROOT), timeout=600,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
_print_success(" Chromium installed")
|
||||
# Invalidate the cached "missing" result so subsequent
|
||||
# check_browser_requirements() calls see the new install.
|
||||
import tools.browser_tool as _bt
|
||||
_bt._cached_chromium_installed = None
|
||||
else:
|
||||
_print_warning(" Chromium install failed:")
|
||||
tail = (result.stderr or result.stdout or "").strip().splitlines()[-3:]
|
||||
for line in tail:
|
||||
_print_info(f" {line[:200]}")
|
||||
_print_info(" Run manually: npx agent-browser install --with-deps")
|
||||
except subprocess.TimeoutExpired:
|
||||
_print_warning(" Chromium install timed out (>10min)")
|
||||
_print_info(" Run manually: npx agent-browser install --with-deps")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Chromium install failed: {exc}")
|
||||
_print_info(" Run manually: npx agent-browser install --with-deps")
|
||||
|
||||
elif post_setup_key == "camofox":
|
||||
camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
|
||||
@@ -681,40 +567,6 @@ def _run_post_setup(post_setup_key: str):
|
||||
_print_info(" git submodule update --init --recursive")
|
||||
_print_info(' uv pip install -e "./tinker-atropos"')
|
||||
|
||||
elif post_setup_key == "langfuse":
|
||||
# Install the langfuse SDK.
|
||||
try:
|
||||
__import__("langfuse")
|
||||
_print_success(" langfuse SDK already installed")
|
||||
except ImportError:
|
||||
import subprocess
|
||||
_print_info(" Installing langfuse SDK...")
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "langfuse", "--quiet"],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
_print_success(" langfuse SDK installed")
|
||||
else:
|
||||
_print_warning(" langfuse SDK install failed — run manually: pip install langfuse")
|
||||
# Opt the bundled observability/langfuse plugin into plugins.enabled.
|
||||
# The plugin ships in the repo but doesn't load until the user enables
|
||||
# it (standalone plugins are opt-in).
|
||||
try:
|
||||
from hermes_cli.plugins_cmd import _get_enabled_set, _save_enabled_set
|
||||
enabled = _get_enabled_set()
|
||||
if "observability/langfuse" in enabled or "langfuse" in enabled:
|
||||
_print_success(" Plugin observability/langfuse already enabled")
|
||||
else:
|
||||
enabled.add("observability/langfuse")
|
||||
_save_enabled_set(enabled)
|
||||
_print_success(" Plugin observability/langfuse enabled")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Could not enable plugin automatically: {exc}")
|
||||
_print_info(" Run manually: hermes plugins enable observability/langfuse")
|
||||
_print_info(" Restart Hermes for tracing to take effect.")
|
||||
_print_info(" Verify: hermes plugins list")
|
||||
|
||||
|
||||
# ─── Platform / Toolset Helpers ───────────────────────────────────────────────
|
||||
|
||||
@@ -925,16 +777,6 @@ def _get_platform_tools(
|
||||
else:
|
||||
enabled_toolsets.update(explicit_mcp_servers)
|
||||
|
||||
# Honor agent.disabled_toolsets from config.yaml — allows users to
|
||||
# globally suppress specific toolsets (e.g. "memory") across all
|
||||
# platforms without per-platform toolset configuration. This runs
|
||||
# last so it overrides everything above.
|
||||
agent_cfg = config.get("agent") or {}
|
||||
disabled_toolsets = agent_cfg.get("disabled_toolsets") or []
|
||||
if disabled_toolsets:
|
||||
disabled_set = {str(ts) for ts in disabled_toolsets}
|
||||
enabled_toolsets -= disabled_set
|
||||
|
||||
return enabled_toolsets
|
||||
|
||||
|
||||
|
||||
@@ -736,7 +736,7 @@ async def get_sessions(limit: int = 20, offset: int = 0):
|
||||
return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
|
||||
finally:
|
||||
db.close()
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
_log.exception("GET /api/sessions failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -968,7 +968,7 @@ async def update_config(body: ConfigUpdate):
|
||||
try:
|
||||
save_config(_denormalize_config_from_web(body.config))
|
||||
return {"ok": True}
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
_log.exception("PUT /api/config failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -997,7 +997,7 @@ async def set_env_var(body: EnvVarUpdate):
|
||||
try:
|
||||
save_env_value(body.key, body.value)
|
||||
return {"ok": True, "key": body.key}
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
_log.exception("PUT /api/env failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -1011,7 +1011,7 @@ async def remove_env_var(body: EnvVarDelete):
|
||||
return {"ok": True, "key": body.key}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
_log.exception("DELETE /api/env failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@@ -1568,6 +1568,7 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
|
||||
then spawns a background poller. Returns the user-facing display fields
|
||||
so the UI can render the verification page link + user code.
|
||||
"""
|
||||
from hermes_cli import auth as hauth
|
||||
if provider_id == "nous":
|
||||
from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
|
||||
import httpx
|
||||
@@ -2211,7 +2212,7 @@ async def get_usage_analytics(days: int = 30):
|
||||
cutoff = time.time() - (days * 86400)
|
||||
cur = db._conn.execute("""
|
||||
SELECT date(started_at, 'unixepoch') as day,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(reasoning_tokens) as reasoning_tokens,
|
||||
@@ -2226,18 +2227,18 @@ async def get_usage_analytics(days: int = 30):
|
||||
|
||||
cur2 = db._conn.execute("""
|
||||
SELECT model,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
|
||||
COUNT(*) as sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as api_calls
|
||||
FROM sessions WHERE started_at > ? AND model IS NOT NULL
|
||||
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
|
||||
GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC
|
||||
""", (cutoff,))
|
||||
by_model = [dict(r) for r in cur2.fetchall()]
|
||||
|
||||
cur3 = db._conn.execute("""
|
||||
SELECT SUM(input_tokens) as total_input,
|
||||
SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input,
|
||||
SUM(output_tokens) as total_output,
|
||||
SUM(cache_read_tokens) as total_cache_read,
|
||||
SUM(reasoning_tokens) as total_reasoning,
|
||||
|
||||
@@ -11,6 +11,7 @@ hot-reloaded by the webhook adapter without a gateway restart.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import time
|
||||
@@ -18,7 +19,6 @@ from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from utils import atomic_replace
|
||||
|
||||
|
||||
_SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
|
||||
@@ -52,7 +52,7 @@ def _save_subscriptions(subs: Dict[str, dict]) -> None:
|
||||
json.dumps(subs, indent=2, ensure_ascii=False),
|
||||
encoding="utf-8",
|
||||
)
|
||||
atomic_replace(tmp_path, path)
|
||||
os.replace(str(tmp_path), str(path))
|
||||
|
||||
|
||||
def _get_webhook_config() -> dict:
|
||||
|
||||
+148
-431
@@ -22,8 +22,6 @@ import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from agent.memory_manager import sanitize_context
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
||||
|
||||
@@ -33,7 +31,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 11
|
||||
SCHEMA_VERSION = 9
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
@@ -102,56 +100,22 @@ CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestam
|
||||
|
||||
FTS_SQL = """
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
|
||||
content
|
||||
content,
|
||||
content=messages,
|
||||
content_rowid=id
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN
|
||||
INSERT INTO messages_fts(rowid, content) VALUES (
|
||||
new.id,
|
||||
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
|
||||
);
|
||||
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN
|
||||
DELETE FROM messages_fts WHERE rowid = old.id;
|
||||
INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
|
||||
DELETE FROM messages_fts WHERE rowid = old.id;
|
||||
INSERT INTO messages_fts(rowid, content) VALUES (
|
||||
new.id,
|
||||
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
|
||||
);
|
||||
END;
|
||||
"""
|
||||
|
||||
# Trigram FTS5 table for CJK substring search. The default unicode61
|
||||
# tokenizer splits CJK characters into individual tokens, breaking phrase
|
||||
# matching. The trigram tokenizer creates overlapping 3-byte sequences so
|
||||
# substring queries work natively for any script (CJK, Thai, etc.).
|
||||
FTS_TRIGRAM_SQL = """
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
|
||||
content,
|
||||
tokenize='trigram'
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
|
||||
INSERT INTO messages_fts_trigram(rowid, content) VALUES (
|
||||
new.id,
|
||||
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
|
||||
);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
|
||||
DELETE FROM messages_fts_trigram WHERE rowid = old.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
|
||||
DELETE FROM messages_fts_trigram WHERE rowid = old.id;
|
||||
INSERT INTO messages_fts_trigram(rowid, content) VALUES (
|
||||
new.id,
|
||||
COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
|
||||
);
|
||||
INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
|
||||
INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
|
||||
END;
|
||||
"""
|
||||
|
||||
@@ -293,201 +257,118 @@ class SessionDB:
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
@staticmethod
|
||||
def _parse_schema_columns(schema_sql: str) -> Dict[str, Dict[str, str]]:
|
||||
"""Extract expected columns per table from SCHEMA_SQL.
|
||||
|
||||
Uses an in-memory SQLite database to parse the SQL — SQLite itself
|
||||
handles all syntax (DEFAULT expressions with commas, inline
|
||||
REFERENCES, CHECK constraints, etc.) so there are zero regex
|
||||
edge cases. The in-memory DB is opened, the schema DDL is
|
||||
executed, and PRAGMA table_info extracts the column metadata.
|
||||
|
||||
Adding a column to SCHEMA_SQL is all that's needed; the
|
||||
reconciliation loop picks it up automatically.
|
||||
"""
|
||||
ref = sqlite3.connect(":memory:")
|
||||
try:
|
||||
ref.executescript(schema_sql)
|
||||
table_columns: Dict[str, Dict[str, str]] = {}
|
||||
for (tbl,) in ref.execute(
|
||||
"SELECT name FROM sqlite_master "
|
||||
"WHERE type='table' AND name NOT LIKE 'sqlite_%'"
|
||||
).fetchall():
|
||||
cols: Dict[str, str] = {}
|
||||
for row in ref.execute(
|
||||
f'PRAGMA table_info("{tbl}")'
|
||||
).fetchall():
|
||||
# row: (cid, name, type, notnull, dflt_value, pk)
|
||||
col_name = row[1]
|
||||
col_type = row[2] or ""
|
||||
notnull = row[3]
|
||||
default = row[4]
|
||||
pk = row[5]
|
||||
# Reconstruct the type expression for ALTER TABLE ADD COLUMN
|
||||
parts = [col_type] if col_type else []
|
||||
if notnull and not pk:
|
||||
parts.append("NOT NULL")
|
||||
if default is not None:
|
||||
parts.append(f"DEFAULT {default}")
|
||||
cols[col_name] = " ".join(parts)
|
||||
table_columns[tbl] = cols
|
||||
return table_columns
|
||||
finally:
|
||||
ref.close()
|
||||
|
||||
def _reconcile_columns(self, cursor: sqlite3.Cursor) -> None:
|
||||
"""Ensure live tables have every column declared in SCHEMA_SQL.
|
||||
|
||||
Follows the Beets/sqlite-utils pattern: the CREATE TABLE definition
|
||||
in SCHEMA_SQL is the single source of truth for the desired schema.
|
||||
On every startup this method diffs the live columns (via PRAGMA
|
||||
table_info) against the declared columns, and ADDs any that are
|
||||
missing.
|
||||
|
||||
This makes column additions a declarative operation — just add
|
||||
the column to SCHEMA_SQL and it appears on the next startup.
|
||||
Version-gated migration blocks are no longer needed for ADD COLUMN.
|
||||
"""
|
||||
expected = self._parse_schema_columns(SCHEMA_SQL)
|
||||
for table_name, declared_cols in expected.items():
|
||||
# Get current columns from the live table
|
||||
try:
|
||||
rows = cursor.execute(
|
||||
f'PRAGMA table_info("{table_name}")'
|
||||
).fetchall()
|
||||
except sqlite3.OperationalError:
|
||||
continue # Table doesn't exist yet (shouldn't happen after executescript)
|
||||
live_cols = set()
|
||||
for row in rows:
|
||||
# PRAGMA table_info returns (cid, name, type, notnull, dflt_value, pk)
|
||||
name = row[1] if isinstance(row, (tuple, list)) else row["name"]
|
||||
live_cols.add(name)
|
||||
|
||||
for col_name, col_type in declared_cols.items():
|
||||
if col_name not in live_cols:
|
||||
safe_name = col_name.replace('"', '""')
|
||||
try:
|
||||
cursor.execute(
|
||||
f'ALTER TABLE "{table_name}" ADD COLUMN "{safe_name}" {col_type}'
|
||||
)
|
||||
except sqlite3.OperationalError as exc:
|
||||
# Expected: "duplicate column name" from a race or
|
||||
# re-run. Unexpected: "Cannot add a NOT NULL column
|
||||
# with default value NULL" from a schema mistake.
|
||||
# Log at DEBUG so it's visible in agent.log.
|
||||
logger.debug(
|
||||
"reconcile %s.%s: %s", table_name, col_name, exc,
|
||||
)
|
||||
|
||||
def _init_schema(self):
|
||||
"""Create tables and FTS if they don't exist, reconcile columns.
|
||||
|
||||
Schema management follows the declarative reconciliation pattern
|
||||
(Beets, sqlite-utils): SCHEMA_SQL is the single source of truth.
|
||||
On existing databases, _reconcile_columns() diffs live columns
|
||||
against SCHEMA_SQL and ADDs any missing ones. This eliminates
|
||||
the version-gated migration chain for column additions, making
|
||||
it impossible for reordered or inserted migrations to skip columns.
|
||||
|
||||
The schema_version table is retained for future data migrations
|
||||
(transforming existing rows) which cannot be handled declaratively.
|
||||
"""
|
||||
"""Create tables and FTS if they don't exist, run migrations."""
|
||||
cursor = self._conn.cursor()
|
||||
|
||||
cursor.executescript(SCHEMA_SQL)
|
||||
|
||||
# ── Declarative column reconciliation ──────────────────────────
|
||||
# Diff live tables against SCHEMA_SQL and ADD any missing columns.
|
||||
# This is idempotent and self-healing: even if a version-gated
|
||||
# migration was skipped (e.g. due to version renumbering), the
|
||||
# column gets created here.
|
||||
self._reconcile_columns(cursor)
|
||||
|
||||
# ── Schema version bookkeeping ─────────────────────────────────
|
||||
# Bump to current so future data migrations (if any) can gate on
|
||||
# version. No version-gated column additions remain.
|
||||
# Check schema version and run migrations
|
||||
cursor.execute("SELECT version FROM schema_version LIMIT 1")
|
||||
row = cursor.fetchone()
|
||||
if row is None:
|
||||
cursor.execute(
|
||||
"INSERT INTO schema_version (version) VALUES (?)",
|
||||
(SCHEMA_VERSION,),
|
||||
)
|
||||
cursor.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
|
||||
else:
|
||||
current_version = row["version"] if isinstance(row, sqlite3.Row) else row[0]
|
||||
# Data migrations that can't be expressed declaratively (row
|
||||
# backfills, index changes tied to a specific version step) stay
|
||||
# in a version-gated chain. Column additions are handled by
|
||||
# _reconcile_columns() above and no longer need entries here.
|
||||
if current_version < 10:
|
||||
# v10: trigram FTS5 table for CJK/substring search. The
|
||||
# virtual table + triggers are created unconditionally via
|
||||
# FTS_TRIGRAM_SQL below, but existing rows need a one-time
|
||||
# backfill into the FTS index.
|
||||
if current_version < 2:
|
||||
# v2: add finish_reason column to messages
|
||||
try:
|
||||
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
|
||||
_fts_trigram_exists = True
|
||||
cursor.execute("ALTER TABLE messages ADD COLUMN finish_reason TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
_fts_trigram_exists = False
|
||||
if not _fts_trigram_exists:
|
||||
cursor.executescript(FTS_TRIGRAM_SQL)
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 2")
|
||||
if current_version < 3:
|
||||
# v3: add title column to sessions
|
||||
try:
|
||||
cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT")
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 3")
|
||||
if current_version < 4:
|
||||
# v4: add unique index on title (NULLs allowed, only non-NULL must be unique)
|
||||
try:
|
||||
cursor.execute(
|
||||
"INSERT INTO messages_fts_trigram(rowid, content) "
|
||||
"SELECT id, content FROM messages WHERE content IS NOT NULL"
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
|
||||
"ON sessions(title) WHERE title IS NOT NULL"
|
||||
)
|
||||
if current_version < 11:
|
||||
# v11: re-index FTS5 tables to cover tool_name + tool_calls and
|
||||
# switch from external-content to inline mode. Existing DBs have
|
||||
# old-schema FTS tables and triggers that IF NOT EXISTS won't
|
||||
# overwrite, so we drop them explicitly and let the post-migration
|
||||
# existence checks (below) recreate them from FTS_SQL /
|
||||
# FTS_TRIGRAM_SQL, then backfill every message row. Fixes #16751.
|
||||
for _trig in (
|
||||
"messages_fts_insert",
|
||||
"messages_fts_delete",
|
||||
"messages_fts_update",
|
||||
"messages_fts_trigram_insert",
|
||||
"messages_fts_trigram_delete",
|
||||
"messages_fts_trigram_update",
|
||||
):
|
||||
except sqlite3.OperationalError:
|
||||
pass # Index already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 4")
|
||||
if current_version < 5:
|
||||
new_columns = [
|
||||
("cache_read_tokens", "INTEGER DEFAULT 0"),
|
||||
("cache_write_tokens", "INTEGER DEFAULT 0"),
|
||||
("reasoning_tokens", "INTEGER DEFAULT 0"),
|
||||
("billing_provider", "TEXT"),
|
||||
("billing_base_url", "TEXT"),
|
||||
("billing_mode", "TEXT"),
|
||||
("estimated_cost_usd", "REAL"),
|
||||
("actual_cost_usd", "REAL"),
|
||||
("cost_status", "TEXT"),
|
||||
("cost_source", "TEXT"),
|
||||
("pricing_version", "TEXT"),
|
||||
]
|
||||
for name, column_type in new_columns:
|
||||
try:
|
||||
cursor.execute(f"DROP TRIGGER IF EXISTS {_trig}")
|
||||
# name and column_type come from the hardcoded tuple above,
|
||||
# not user input. Double-quote identifier escaping is applied
|
||||
# as defense-in-depth; SQLite DDL cannot be parameterized.
|
||||
safe_name = name.replace('"', '""')
|
||||
cursor.execute(f'ALTER TABLE sessions ADD COLUMN "{safe_name}" {column_type}')
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
for _tbl in ("messages_fts", "messages_fts_trigram"):
|
||||
cursor.execute("UPDATE schema_version SET version = 5")
|
||||
if current_version < 6:
|
||||
# v6: add reasoning columns to messages table — preserves assistant
|
||||
# reasoning text and structured reasoning_details across gateway
|
||||
# session turns. Without these, reasoning chains are lost on
|
||||
# session reload, breaking multi-turn reasoning continuity for
|
||||
# providers that replay reasoning (OpenRouter, OpenAI, Nous).
|
||||
for col_name, col_type in [
|
||||
("reasoning", "TEXT"),
|
||||
("reasoning_details", "TEXT"),
|
||||
("codex_reasoning_items", "TEXT"),
|
||||
]:
|
||||
try:
|
||||
cursor.execute(f"DROP TABLE IF EXISTS {_tbl}")
|
||||
safe = col_name.replace('"', '""')
|
||||
cursor.execute(
|
||||
f'ALTER TABLE messages ADD COLUMN "{safe}" {col_type}'
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
# Recreate virtual tables + triggers with the new inline-mode
|
||||
# schema that indexes content || tool_name || tool_calls.
|
||||
cursor.executescript(FTS_SQL)
|
||||
cursor.executescript(FTS_TRIGRAM_SQL)
|
||||
# Backfill both indexes from every existing messages row.
|
||||
cursor.execute(
|
||||
"INSERT INTO messages_fts(rowid, content) "
|
||||
"SELECT id, "
|
||||
"COALESCE(content, '') || ' ' || "
|
||||
"COALESCE(tool_name, '') || ' ' || "
|
||||
"COALESCE(tool_calls, '') "
|
||||
"FROM messages"
|
||||
)
|
||||
cursor.execute(
|
||||
"INSERT INTO messages_fts_trigram(rowid, content) "
|
||||
"SELECT id, "
|
||||
"COALESCE(content, '') || ' ' || "
|
||||
"COALESCE(tool_name, '') || ' ' || "
|
||||
"COALESCE(tool_calls, '') "
|
||||
"FROM messages"
|
||||
)
|
||||
if current_version < SCHEMA_VERSION:
|
||||
cursor.execute(
|
||||
"UPDATE schema_version SET version = ?",
|
||||
(SCHEMA_VERSION,),
|
||||
)
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 6")
|
||||
if current_version < 7:
|
||||
# v7: preserve provider-native reasoning_content separately from
|
||||
# normalized reasoning text. Kimi/Moonshot replay can require
|
||||
# this field on assistant tool-call messages when thinking is on.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 7")
|
||||
if current_version < 8:
|
||||
# v8: add api_call_count column to sessions — tracks the number
|
||||
# of individual LLM API calls made within a session (as opposed
|
||||
# to the session count itself).
|
||||
try:
|
||||
cursor.execute(
|
||||
'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
|
||||
)
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 8")
|
||||
if current_version < 9:
|
||||
# v9: preserve replayable Codex assistant message ids/phases so
|
||||
# follow-up turns can rebuild Responses API message items instead
|
||||
# of flattening everything to plain assistant text.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 9")
|
||||
|
||||
# Unique title index — always ensure it exists
|
||||
# Unique title index — always ensure it exists (safe to run after migrations
|
||||
# since the title column is guaranteed to exist at this point)
|
||||
try:
|
||||
cursor.execute(
|
||||
"CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
|
||||
@@ -502,12 +383,6 @@ class SessionDB:
|
||||
except sqlite3.OperationalError:
|
||||
cursor.executescript(FTS_SQL)
|
||||
|
||||
# Trigram FTS5 for CJK/substring search
|
||||
try:
|
||||
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
|
||||
except sqlite3.OperationalError:
|
||||
cursor.executescript(FTS_TRIGRAM_SQL)
|
||||
|
||||
self._conn.commit()
|
||||
|
||||
# =========================================================================
|
||||
@@ -1172,85 +1047,6 @@ class SessionDB:
|
||||
|
||||
return self._execute_write(_do)
|
||||
|
||||
def replace_messages(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
|
||||
"""Atomically replace every message for a session.
|
||||
|
||||
Used by transcript-rewrite flows such as /retry, /undo, and /compress.
|
||||
The delete + reinsert sequence must commit as one transaction so a
|
||||
mid-rewrite failure does not leave SQLite with a partial transcript.
|
||||
"""
|
||||
|
||||
def _do(conn):
|
||||
conn.execute(
|
||||
"DELETE FROM messages WHERE session_id = ?", (session_id,)
|
||||
)
|
||||
conn.execute(
|
||||
"UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
|
||||
(session_id,),
|
||||
)
|
||||
|
||||
now_ts = time.time()
|
||||
total_messages = 0
|
||||
total_tool_calls = 0
|
||||
for msg in messages:
|
||||
role = msg.get("role", "unknown")
|
||||
tool_calls = msg.get("tool_calls")
|
||||
reasoning_details = msg.get("reasoning_details") if role == "assistant" else None
|
||||
codex_reasoning_items = (
|
||||
msg.get("codex_reasoning_items") if role == "assistant" else None
|
||||
)
|
||||
codex_message_items = (
|
||||
msg.get("codex_message_items") if role == "assistant" else None
|
||||
)
|
||||
|
||||
reasoning_details_json = (
|
||||
json.dumps(reasoning_details) if reasoning_details else None
|
||||
)
|
||||
codex_items_json = (
|
||||
json.dumps(codex_reasoning_items) if codex_reasoning_items else None
|
||||
)
|
||||
codex_message_items_json = (
|
||||
json.dumps(codex_message_items) if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
|
||||
conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
msg.get("content"),
|
||||
msg.get("tool_call_id"),
|
||||
tool_calls_json,
|
||||
msg.get("tool_name"),
|
||||
now_ts,
|
||||
msg.get("token_count"),
|
||||
msg.get("finish_reason"),
|
||||
msg.get("reasoning") if role == "assistant" else None,
|
||||
msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
),
|
||||
)
|
||||
total_messages += 1
|
||||
if tool_calls is not None:
|
||||
total_tool_calls += (
|
||||
len(tool_calls) if isinstance(tool_calls, list) else 1
|
||||
)
|
||||
now_ts += 1e-6
|
||||
|
||||
conn.execute(
|
||||
"UPDATE sessions SET message_count = ?, tool_call_count = ? WHERE id = ?",
|
||||
(total_messages, total_tool_calls, session_id),
|
||||
)
|
||||
|
||||
self._execute_write(_do)
|
||||
|
||||
def get_messages(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""Load all messages for a session, ordered by timestamp."""
|
||||
with self._lock:
|
||||
@@ -1359,10 +1155,7 @@ class SessionDB:
|
||||
|
||||
messages = []
|
||||
for row in rows:
|
||||
content = row["content"]
|
||||
if row["role"] in {"user", "assistant"} and isinstance(content, str):
|
||||
content = sanitize_context(content).strip()
|
||||
msg = {"role": row["role"], "content": content}
|
||||
msg = {"role": row["role"], "content": row["content"]}
|
||||
if row["tool_call_id"]:
|
||||
msg["tool_call_id"] = row["tool_call_id"]
|
||||
if row["tool_name"]:
|
||||
@@ -1487,9 +1280,9 @@ class SessionDB:
|
||||
# quotes. FTS5's tokenizer splits on dots and hyphens, turning
|
||||
# ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
|
||||
# Quoting preserves phrase semantics. A single pass avoids the
|
||||
# double-quoting bug that would occur if dotted, hyphenated and underscored
|
||||
# double-quoting bug that would occur if dotted and hyphenated
|
||||
# patterns were applied sequentially (e.g. ``my-app.config``).
|
||||
sanitized = re.sub(r"\b(\w+(?:[._-]\w+)+)\b", r'"\1"', sanitized)
|
||||
sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
|
||||
|
||||
# Step 6: Restore preserved quoted phrases
|
||||
for i, quoted in enumerate(_quoted_parts):
|
||||
@@ -1498,16 +1291,6 @@ class SessionDB:
|
||||
return sanitized.strip()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _is_cjk_codepoint(cp: int) -> bool:
|
||||
return (0x4E00 <= cp <= 0x9FFF or # CJK Unified Ideographs
|
||||
0x3400 <= cp <= 0x4DBF or # CJK Extension A
|
||||
0x20000 <= cp <= 0x2A6DF or # CJK Extension B
|
||||
0x3000 <= cp <= 0x303F or # CJK Symbols
|
||||
0x3040 <= cp <= 0x309F or # Hiragana
|
||||
0x30A0 <= cp <= 0x30FF or # Katakana
|
||||
0xAC00 <= cp <= 0xD7AF) # Hangul Syllables
|
||||
|
||||
@staticmethod
|
||||
def _contains_cjk(text: str) -> bool:
|
||||
"""Check if text contains CJK (Chinese, Japanese, Korean) characters."""
|
||||
@@ -1523,11 +1306,6 @@ class SessionDB:
|
||||
return True
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _count_cjk(cls, text: str) -> int:
|
||||
"""Count CJK characters in text."""
|
||||
return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
|
||||
|
||||
def search_messages(
|
||||
self,
|
||||
query: str,
|
||||
@@ -1598,113 +1376,52 @@ class SessionDB:
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
|
||||
# CJK queries bypass the unicode61 FTS5 table. The default tokenizer
|
||||
# splits CJK characters into individual tokens, so "大别山项目" becomes
|
||||
# "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
|
||||
# missing exact phrase matches.
|
||||
#
|
||||
# For queries with 3+ CJK characters, we use the trigram FTS5 table
|
||||
# (indexed substring matching with ranking and snippets). For shorter
|
||||
# CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
|
||||
# bytes = 3 CJK chars), so we fall back to LIKE.
|
||||
is_cjk = self._contains_cjk(query)
|
||||
if is_cjk:
|
||||
raw_query = query.strip('"').strip()
|
||||
cjk_count = self._count_cjk(raw_query)
|
||||
|
||||
if cjk_count >= 3:
|
||||
# Trigram FTS5 path — quote each non-operator token to handle
|
||||
# FTS5 special chars (%, *, etc.) while preserving boolean
|
||||
# operators (AND, OR, NOT) for multi-term queries.
|
||||
tokens = raw_query.split()
|
||||
parts = []
|
||||
for tok in tokens:
|
||||
if tok.upper() in ("AND", "OR", "NOT"):
|
||||
parts.append(tok)
|
||||
else:
|
||||
parts.append('"' + tok.replace('"', '""') + '"')
|
||||
trigram_query = " ".join(parts)
|
||||
tri_where = ["messages_fts_trigram MATCH ?"]
|
||||
tri_params: list = [trigram_query]
|
||||
if source_filter is not None:
|
||||
tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
|
||||
tri_params.extend(source_filter)
|
||||
if exclude_sources is not None:
|
||||
tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
|
||||
tri_params.extend(exclude_sources)
|
||||
if role_filter:
|
||||
tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
||||
tri_params.extend(role_filter)
|
||||
tri_sql = f"""
|
||||
SELECT
|
||||
m.id,
|
||||
m.session_id,
|
||||
m.role,
|
||||
snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
|
||||
m.content,
|
||||
m.timestamp,
|
||||
m.tool_name,
|
||||
s.source,
|
||||
s.model,
|
||||
s.started_at AS session_started
|
||||
FROM messages_fts_trigram
|
||||
JOIN messages m ON m.id = messages_fts_trigram.rowid
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(tri_where)}
|
||||
ORDER BY rank
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
tri_params.extend([limit, offset])
|
||||
with self._lock:
|
||||
try:
|
||||
tri_cursor = self._conn.execute(tri_sql, tri_params)
|
||||
except sqlite3.OperationalError:
|
||||
matches = []
|
||||
else:
|
||||
matches = [dict(row) for row in tri_cursor.fetchall()]
|
||||
else:
|
||||
# Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
|
||||
# Fall back to LIKE substring search.
|
||||
escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
||||
like_where = ["(m.content LIKE ? ESCAPE '\\' OR m.tool_name LIKE ? ESCAPE '\\' OR m.tool_calls LIKE ? ESCAPE '\\')"]
|
||||
like_params: list = [f"%{escaped}%", f"%{escaped}%", f"%{escaped}%"]
|
||||
if source_filter is not None:
|
||||
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
|
||||
like_params.extend(source_filter)
|
||||
if exclude_sources is not None:
|
||||
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
|
||||
like_params.extend(exclude_sources)
|
||||
if role_filter:
|
||||
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
||||
like_params.extend(role_filter)
|
||||
like_sql = f"""
|
||||
SELECT m.id, m.session_id, m.role,
|
||||
substr(m.content,
|
||||
max(1, instr(m.content, ?) - 40),
|
||||
120) AS snippet,
|
||||
m.content, m.timestamp, m.tool_name,
|
||||
s.source, s.model, s.started_at AS session_started
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(like_where)}
|
||||
ORDER BY m.timestamp DESC
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
like_params.extend([limit, offset])
|
||||
# instr() parameter goes first in the bound list
|
||||
like_params = [raw_query] + like_params
|
||||
with self._lock:
|
||||
like_cursor = self._conn.execute(like_sql, like_params)
|
||||
matches = [dict(row) for row in like_cursor.fetchall()]
|
||||
else:
|
||||
with self._lock:
|
||||
try:
|
||||
cursor = self._conn.execute(sql, params)
|
||||
except sqlite3.OperationalError:
|
||||
# FTS5 query syntax error despite sanitization — return empty
|
||||
with self._lock:
|
||||
try:
|
||||
cursor = self._conn.execute(sql, params)
|
||||
except sqlite3.OperationalError:
|
||||
# FTS5 query syntax error despite sanitization — return empty
|
||||
# unless query contains CJK (fall back to LIKE below)
|
||||
if not self._contains_cjk(query):
|
||||
return []
|
||||
else:
|
||||
matches = [dict(row) for row in cursor.fetchall()]
|
||||
matches = []
|
||||
else:
|
||||
matches = [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
|
||||
# characters individually, causing multi-character queries to fail.
|
||||
if not matches and self._contains_cjk(query):
|
||||
raw_query = query.strip('"').strip()
|
||||
like_where = ["m.content LIKE ?"]
|
||||
like_params: list = [f"%{raw_query}%"]
|
||||
if source_filter is not None:
|
||||
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
|
||||
like_params.extend(source_filter)
|
||||
if exclude_sources is not None:
|
||||
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
|
||||
like_params.extend(exclude_sources)
|
||||
if role_filter:
|
||||
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
||||
like_params.extend(role_filter)
|
||||
like_sql = f"""
|
||||
SELECT m.id, m.session_id, m.role,
|
||||
substr(m.content,
|
||||
max(1, instr(m.content, ?) - 40),
|
||||
120) AS snippet,
|
||||
m.content, m.timestamp, m.tool_name,
|
||||
s.source, s.model, s.started_at AS session_started
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(like_where)}
|
||||
ORDER BY m.timestamp DESC
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
like_params.extend([limit, offset])
|
||||
# instr() parameter goes first in the bound list
|
||||
like_params = [raw_query] + like_params
|
||||
with self._lock:
|
||||
like_cursor = self._conn.execute(like_sql, like_params)
|
||||
matches = [dict(row) for row in like_cursor.fetchall()]
|
||||
|
||||
# Add surrounding context (1 message before + after each match).
|
||||
# Done outside the lock so we don't hold it across N sequential queries.
|
||||
|
||||
+10
-110
@@ -138,18 +138,12 @@ def _run_async(coro):
|
||||
|
||||
discover_builtin_tools()
|
||||
|
||||
# MCP tool discovery (external MCP servers from config) used to run here as
|
||||
# a module-level side effect. It was removed because discover_mcp_tools()
|
||||
# internally uses a blocking future.result(timeout=120) wait, and the
|
||||
# gateway lazy-imports this module from inside the asyncio event loop on
|
||||
# the first user message — freezing Discord/Telegram heartbeats for up to
|
||||
# 120s whenever any configured MCP server was slow or unreachable (#16856).
|
||||
#
|
||||
# Each entry point now runs discovery explicitly at its own startup:
|
||||
# - gateway/run.py -> start_gateway() uses run_in_executor
|
||||
# - cli.py, hermes_cli/* -> inline on startup (no event loop)
|
||||
# - tui_gateway/server.py -> inline on startup (no event loop)
|
||||
# - acp_adapter/server.py -> asyncio.to_thread on session init
|
||||
# MCP tool discovery (external MCP servers from config)
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception as e:
|
||||
logger.debug("MCP tool discovery failed: %s", e)
|
||||
|
||||
# Plugin tool discovery (user/project/pip plugins)
|
||||
try:
|
||||
@@ -206,27 +200,6 @@ _LEGACY_TOOLSET_MAP = {
|
||||
# get_tool_definitions (the main schema provider)
|
||||
# =============================================================================
|
||||
|
||||
# Module-level memoization for get_tool_definitions(). Keyed on
|
||||
# (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation).
|
||||
# Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn
|
||||
# with quiet_mode=True; caching avoids ~7 ms of registry walking + schema
|
||||
# filtering + check_fn probing per call. Only active when quiet_mode=True
|
||||
# because quiet_mode=False has stdout side effects (tool-selection prints).
|
||||
#
|
||||
# Invalidation happens transparently via the registry's _generation counter,
|
||||
# which bumps on register() / deregister() / register_toolset_alias(). The
|
||||
# inner check_fn TTL cache in registry.py handles environment drift (Docker
|
||||
# daemon start/stop, env var changes, etc.) on a 30 s horizon.
|
||||
_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
|
||||
|
||||
|
||||
def _clear_tool_defs_cache() -> None:
|
||||
"""Drop memoized get_tool_definitions() results. Called when dynamic
|
||||
schema dependencies change (e.g. discord capability cache reset,
|
||||
execute_code sandbox reconfigured)."""
|
||||
_tool_defs_cache.clear()
|
||||
|
||||
|
||||
def get_tool_definitions(
|
||||
enabled_toolsets: List[str] = None,
|
||||
disabled_toolsets: List[str] = None,
|
||||
@@ -245,50 +218,6 @@ def get_tool_definitions(
|
||||
Returns:
|
||||
Filtered list of OpenAI-format tool definitions.
|
||||
"""
|
||||
# Fast path: memoized result when the caller doesn't need stdout prints.
|
||||
# The cache key captures every argument-level input; the registry
|
||||
# generation captures registry mutations (MCP refresh, plugin load).
|
||||
# check_fn results are TTL-cached one level down, inside
|
||||
# registry.get_definitions. The config-mtime fingerprint below captures
|
||||
# user-visible config edits that affect dynamic schemas (execute_code
|
||||
# mode, discord action allowlist, etc.) without needing an explicit
|
||||
# invalidate hook on every config-writer.
|
||||
if quiet_mode:
|
||||
try:
|
||||
from hermes_cli.config import get_config_path
|
||||
cfg_path = get_config_path()
|
||||
cfg_stat = cfg_path.stat()
|
||||
cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size)
|
||||
except (FileNotFoundError, OSError, ImportError):
|
||||
cfg_fp = None
|
||||
cache_key = (
|
||||
frozenset(enabled_toolsets) if enabled_toolsets is not None else None,
|
||||
frozenset(disabled_toolsets) if disabled_toolsets else None,
|
||||
registry._generation,
|
||||
cfg_fp,
|
||||
)
|
||||
cached = _tool_defs_cache.get(cache_key)
|
||||
if cached is not None:
|
||||
# Update _last_resolved_tool_names so downstream callers see
|
||||
# consistent state even on a cache hit.
|
||||
global _last_resolved_tool_names
|
||||
_last_resolved_tool_names = [t["function"]["name"] for t in cached]
|
||||
# Return a shallow copy of the list but share the dict references —
|
||||
# schemas are treated as read-only by all known callers.
|
||||
return list(cached)
|
||||
|
||||
result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
|
||||
if quiet_mode:
|
||||
_tool_defs_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
def _compute_tool_definitions(
|
||||
enabled_toolsets: List[str] = None,
|
||||
disabled_toolsets: List[str] = None,
|
||||
quiet_mode: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Uncached implementation of :func:`get_tool_definitions`."""
|
||||
# Determine which tool names the caller wants
|
||||
tools_to_include: set = set()
|
||||
|
||||
@@ -480,27 +409,24 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
|
||||
if not prop_schema:
|
||||
continue
|
||||
expected = prop_schema.get("type")
|
||||
if not expected and not _schema_allows_null(prop_schema):
|
||||
if not expected:
|
||||
continue
|
||||
coerced = _coerce_value(value, expected, schema=prop_schema)
|
||||
coerced = _coerce_value(value, expected)
|
||||
if coerced is not value:
|
||||
args[key] = coerced
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def _coerce_value(value: str, expected_type, schema: dict | None = None):
|
||||
def _coerce_value(value: str, expected_type):
|
||||
"""Attempt to coerce a string *value* to *expected_type*.
|
||||
|
||||
Returns the original string when coercion is not applicable or fails.
|
||||
"""
|
||||
if _schema_allows_null(schema) and value.strip().lower() == "null":
|
||||
return None
|
||||
|
||||
if isinstance(expected_type, list):
|
||||
# Union type — try each in order, return first successful coercion
|
||||
for t in expected_type:
|
||||
result = _coerce_value(value, t, schema=schema)
|
||||
result = _coerce_value(value, t)
|
||||
if result is not value:
|
||||
return result
|
||||
return value
|
||||
@@ -513,35 +439,9 @@ def _coerce_value(value: str, expected_type, schema: dict | None = None):
|
||||
return _coerce_json(value, list)
|
||||
if expected_type == "object":
|
||||
return _coerce_json(value, dict)
|
||||
if expected_type == "null" and value.strip().lower() == "null":
|
||||
return None
|
||||
return value
|
||||
|
||||
|
||||
def _schema_allows_null(schema: dict | None) -> bool:
|
||||
"""Return True when a JSON Schema fragment explicitly permits null."""
|
||||
if not isinstance(schema, dict):
|
||||
return False
|
||||
|
||||
schema_type = schema.get("type")
|
||||
if schema_type == "null":
|
||||
return True
|
||||
if isinstance(schema_type, list) and "null" in schema_type:
|
||||
return True
|
||||
if schema.get("nullable") is True:
|
||||
return True
|
||||
|
||||
for union_key in ("anyOf", "oneOf"):
|
||||
variants = schema.get(union_key)
|
||||
if not isinstance(variants, list):
|
||||
continue
|
||||
for variant in variants:
|
||||
if isinstance(variant, dict) and variant.get("type") == "null":
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _coerce_json(value: str, expected_python_type: type):
|
||||
"""Parse *value* as JSON when the schema expects an array or object.
|
||||
|
||||
|
||||
+3
-30
@@ -7,7 +7,9 @@
|
||||
perSystem = { pkgs, system, lib, ... }:
|
||||
let
|
||||
hermes-agent = inputs.self.packages.${system}.default;
|
||||
hermesVenv = hermes-agent.hermesVenv;
|
||||
hermesVenv = pkgs.callPackage ./python.nix {
|
||||
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
|
||||
};
|
||||
|
||||
configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
|
||||
|
||||
@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
|
||||
echo "ok" > $out/result
|
||||
'';
|
||||
|
||||
# Verify extraPythonPackages PYTHONPATH injection
|
||||
extra-python-packages = let
|
||||
testPkg = pkgs.python312Packages.pyfiglet;
|
||||
hermesWithExtra = hermes-agent.override {
|
||||
extraPythonPackages = [ testPkg ];
|
||||
};
|
||||
in pkgs.runCommand "hermes-extra-python-packages" { } ''
|
||||
set -e
|
||||
echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
|
||||
|
||||
grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
|
||||
(echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
|
||||
echo "PASS: PYTHONPATH present in wrapper"
|
||||
|
||||
grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
|
||||
(echo "FAIL: test package path not in PYTHONPATH"; exit 1)
|
||||
echo "PASS: test package path found in wrapper"
|
||||
|
||||
echo "=== Checking base package has no PYTHONPATH ==="
|
||||
if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
|
||||
echo "FAIL: base package should not have PYTHONPATH"; exit 1
|
||||
fi
|
||||
echo "PASS: base package clean"
|
||||
|
||||
echo "=== All extraPythonPackages checks passed ==="
|
||||
mkdir -p $out
|
||||
echo "ok" > $out/result
|
||||
'';
|
||||
|
||||
# ── Config merge + round-trip test ────────────────────────────────
|
||||
# Tests the merge script (Nix activation behavior) across 7
|
||||
# scenarios, then verifies Python's load_config() reads correctly.
|
||||
|
||||
@@ -1,186 +0,0 @@
|
||||
# nix/hermes-agent.nix — Overridable Hermes Agent package
|
||||
#
|
||||
# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
|
||||
# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
|
||||
{
|
||||
lib,
|
||||
stdenv,
|
||||
makeWrapper,
|
||||
callPackage,
|
||||
python312,
|
||||
nodejs_22,
|
||||
ripgrep,
|
||||
git,
|
||||
openssh,
|
||||
ffmpeg,
|
||||
tirith,
|
||||
# Flake inputs — passed explicitly by packages.nix and overlays.nix
|
||||
uv2nix,
|
||||
pyproject-nix,
|
||||
pyproject-build-systems,
|
||||
npm-lockfile-fix,
|
||||
# Overridable parameters
|
||||
extraPythonPackages ? [ ],
|
||||
}:
|
||||
let
|
||||
hermesVenv = callPackage ./python.nix {
|
||||
inherit uv2nix pyproject-nix pyproject-build-systems;
|
||||
};
|
||||
|
||||
hermesNpmLib = callPackage ./lib.nix {
|
||||
inherit npm-lockfile-fix;
|
||||
};
|
||||
|
||||
hermesTui = callPackage ./tui.nix {
|
||||
inherit hermesNpmLib;
|
||||
};
|
||||
|
||||
hermesWeb = callPackage ./web.nix {
|
||||
inherit hermesNpmLib;
|
||||
};
|
||||
|
||||
bundledSkills = lib.cleanSourceWith {
|
||||
src = ../skills;
|
||||
filter = path: _type: !(lib.hasInfix "/index-cache/" path);
|
||||
};
|
||||
|
||||
runtimeDeps = [
|
||||
nodejs_22
|
||||
ripgrep
|
||||
git
|
||||
openssh
|
||||
ffmpeg
|
||||
tirith
|
||||
];
|
||||
|
||||
runtimePath = lib.makeBinPath runtimeDeps;
|
||||
|
||||
sitePackagesPath = python312.sitePackages;
|
||||
|
||||
# Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
|
||||
# Without this, a plugin listing e.g. requests as a dep would fail at runtime
|
||||
# if requests isn't already in the sealed uv2nix venv.
|
||||
allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
|
||||
|
||||
pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
|
||||
|
||||
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
|
||||
uvLockHash =
|
||||
if builtins.pathExists ../uv.lock then
|
||||
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
|
||||
else
|
||||
"none";
|
||||
in
|
||||
stdenv.mkDerivation {
|
||||
pname = "hermes-agent";
|
||||
version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
|
||||
|
||||
dontUnpack = true;
|
||||
dontBuild = true;
|
||||
nativeBuildInputs = [ makeWrapper ];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p $out/share/hermes-agent $out/bin
|
||||
cp -r ${bundledSkills} $out/share/hermes-agent/skills
|
||||
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
|
||||
|
||||
mkdir -p $out/ui-tui
|
||||
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
|
||||
|
||||
${lib.concatMapStringsSep "\n"
|
||||
(name: ''
|
||||
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
|
||||
--suffix PATH : "${runtimePath}" \
|
||||
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
|
||||
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
|
||||
--set HERMES_TUI_DIR $out/ui-tui \
|
||||
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
|
||||
--set HERMES_NODE ${nodejs_22}/bin/node \
|
||||
${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
|
||||
'')
|
||||
[
|
||||
"hermes"
|
||||
"hermes-agent"
|
||||
"hermes-acp"
|
||||
]
|
||||
}
|
||||
|
||||
${lib.optionalString (extraPythonPackages != [ ]) ''
|
||||
echo "=== Checking for plugin/core package collisions ==="
|
||||
${hermesVenv}/bin/python3 -c "
|
||||
import pathlib, sys, re
|
||||
|
||||
def canonical(name):
|
||||
return re.sub(r'[-_.]+', '-', name).lower()
|
||||
|
||||
# Collect core venv package names
|
||||
core = set()
|
||||
venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
|
||||
for di in venv_sp.glob('*.dist-info'):
|
||||
meta = di / 'METADATA'
|
||||
if meta.exists():
|
||||
for line in meta.read_text().splitlines():
|
||||
if line.startswith('Name:'):
|
||||
core.add(canonical(line.split(':', 1)[1].strip()))
|
||||
break
|
||||
|
||||
# Check each extra package for collisions
|
||||
extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
|
||||
for edir in extras_dirs:
|
||||
sp = pathlib.Path(edir) / '${sitePackagesPath}'
|
||||
if not sp.exists():
|
||||
continue
|
||||
for di in sp.glob('*.dist-info'):
|
||||
meta = di / 'METADATA'
|
||||
if not meta.exists():
|
||||
continue
|
||||
for line in meta.read_text().splitlines():
|
||||
if line.startswith('Name:'):
|
||||
pkg = canonical(line.split(':', 1)[1].strip())
|
||||
if pkg in core:
|
||||
print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
|
||||
print(f' from: {di}', file=sys.stderr)
|
||||
print(f' Remove this dependency from extraPythonPackages.', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
break
|
||||
|
||||
print('No collisions found.')
|
||||
"
|
||||
echo "=== No collisions ==="
|
||||
''}
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
passthru = {
|
||||
inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
|
||||
|
||||
devShellHook = ''
|
||||
STAMP=".nix-stamps/hermes-agent"
|
||||
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
|
||||
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
|
||||
echo "hermes-agent: installing Python dependencies..."
|
||||
uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all]"
|
||||
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
|
||||
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
|
||||
mkdir -p .nix-stamps
|
||||
echo "$STAMP_VALUE" > "$STAMP"
|
||||
else
|
||||
source .venv/bin/activate
|
||||
export HERMES_PYTHON=${hermesVenv}/bin/python3
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
meta = with lib; {
|
||||
description = "AI agent with advanced tool-calling capabilities";
|
||||
homepage = "https://github.com/NousResearch/hermes-agent";
|
||||
mainProgram = "hermes";
|
||||
license = licenses.mit;
|
||||
platforms = platforms.unix;
|
||||
};
|
||||
}
|
||||
+1
-15
@@ -165,17 +165,6 @@
|
||||
|
||||
NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
|
||||
if [ -z "$NEW_HASH" ]; then
|
||||
# Magic-Nix-Cache occasionally returns HTTP 418 / cache-throttled
|
||||
# mid-run; nix then prints "outputs … not valid, so checking is
|
||||
# not possible" without a `got:` line. That's an infrastructure
|
||||
# blip, not a stale lockfile — warn + skip rather than failing
|
||||
# the lint. A real hash mismatch would still surface in the
|
||||
# primary `.#$ATTR` build, which is a separate CI job.
|
||||
if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
|
||||
echo " skipped (transient cache failure — see primary nix build for real status)" >&2
|
||||
echo "$OUTPUT" | tail -8 >&2
|
||||
continue
|
||||
fi
|
||||
echo " build failed with no hash mismatch:" >&2
|
||||
echo "$OUTPUT" | tail -40 >&2
|
||||
exit 1
|
||||
@@ -198,10 +187,7 @@
|
||||
|
||||
if [ "$MODE" = "--apply" ]; then
|
||||
sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
|
||||
if ! nix build ".#$ATTR.npmDeps" --no-link --print-build-logs; then
|
||||
echo " verification build failed after hash update" >&2
|
||||
exit 1
|
||||
fi
|
||||
nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
|
||||
FIXED=1
|
||||
echo " fixed"
|
||||
fi
|
||||
|
||||
+7
-101
@@ -28,8 +28,6 @@
|
||||
|
||||
let
|
||||
cfg = config.services.hermes-agent;
|
||||
effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
|
||||
else cfg.package.override { inherit (cfg) extraPythonPackages; };
|
||||
hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;
|
||||
|
||||
# Deep-merge config type (from 0xrsydn/nix-hermes-agent)
|
||||
@@ -455,61 +453,7 @@
|
||||
extraPackages = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [ ];
|
||||
description = ''
|
||||
Extra packages available to the agent — terminal commands, skills,
|
||||
cron jobs, and the service process all see them.
|
||||
|
||||
Implemented via the hermes user's per-user profile
|
||||
(`/etc/profiles/per-user/${cfg.user}/bin`), which NixOS includes
|
||||
in PATH for login shells. The packages are also added to the
|
||||
systemd service PATH for direct process access.
|
||||
'';
|
||||
};
|
||||
|
||||
extraPlugins = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [ ];
|
||||
description = ''
|
||||
Directory-based plugin packages to symlink into the hermes plugins
|
||||
directory. Each package should contain a plugin.yaml and __init__.py
|
||||
at its root. Hermes discovers these automatically on startup.
|
||||
'';
|
||||
example = literalExpression ''
|
||||
[
|
||||
(pkgs.fetchFromGitHub {
|
||||
owner = "stephenschoettler";
|
||||
repo = "hermes-lcm";
|
||||
name = "hermes-lcm";
|
||||
rev = "v0.7.0";
|
||||
hash = "sha256-...";
|
||||
})
|
||||
]
|
||||
'';
|
||||
};
|
||||
|
||||
extraPythonPackages = mkOption {
|
||||
type = types.listOf types.package;
|
||||
default = [ ];
|
||||
description = ''
|
||||
Python packages to add to PYTHONPATH for entry-point plugin discovery.
|
||||
These are pip-packaged plugins that register via the
|
||||
hermes_agent.plugins entry-point group. Each package must be built
|
||||
with the same Python interpreter as hermes (python312).
|
||||
'';
|
||||
example = literalExpression ''
|
||||
[
|
||||
(pkgs.python312Packages.buildPythonPackage {
|
||||
pname = "rtk-hermes";
|
||||
version = "1.0.0";
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "ogallotti";
|
||||
repo = "rtk-hermes";
|
||||
rev = "main";
|
||||
hash = "sha256-...";
|
||||
};
|
||||
})
|
||||
]
|
||||
'';
|
||||
description = "Extra packages available on PATH.";
|
||||
};
|
||||
|
||||
restart = mkOption {
|
||||
@@ -626,7 +570,7 @@
|
||||
# so interactive shells share state (sessions, skills, cron) with the
|
||||
# gateway service instead of creating a separate ~/.hermes/.
|
||||
(lib.mkIf cfg.addToSystemPackages {
|
||||
environment.systemPackages = [ effectivePackage ];
|
||||
environment.systemPackages = [ cfg.package ];
|
||||
environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
|
||||
})
|
||||
|
||||
@@ -637,28 +581,7 @@
|
||||
});
|
||||
})
|
||||
|
||||
# ── Assertions ─────────────────────────────────────────────────────
|
||||
{
|
||||
assertions = let
|
||||
names = map lib.getName cfg.extraPlugins;
|
||||
in [{
|
||||
assertion = (lib.length names) == (lib.length (lib.unique names));
|
||||
message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
|
||||
}];
|
||||
}
|
||||
|
||||
# ── Warnings ──────────────────────────────────────────────────────
|
||||
# ── Per-user profile for extraPackages ───────────────────────────
|
||||
# Wire extraPackages into the hermes user's per-user profile so the
|
||||
# login-shell snapshot (which rebuilds PATH from NixOS profiles) sees
|
||||
# them. The systemd service PATH also includes them for direct access.
|
||||
(lib.mkIf (cfg.extraPackages != []) {
|
||||
# listOf options are merged by the NixOS module system — this appends to
|
||||
# any packages the operator assigned to this user externally (e.g. when
|
||||
# createUser = false and the user definition lives elsewhere in the config).
|
||||
users.users.${cfg.user}.packages = cfg.extraPackages;
|
||||
})
|
||||
|
||||
(lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
|
||||
warnings = [
|
||||
''
|
||||
@@ -679,7 +602,6 @@
|
||||
"d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
|
||||
"d ${cfg.stateDir}/.hermes/logs 2770 ${cfg.user} ${cfg.group} - -"
|
||||
"d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
|
||||
"d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
|
||||
"d ${cfg.stateDir}/home 0750 ${cfg.user} ${cfg.group} - -"
|
||||
"d ${cfg.workingDirectory} 2770 ${cfg.user} ${cfg.group} - -"
|
||||
];
|
||||
@@ -701,7 +623,7 @@
|
||||
find ${cfg.stateDir}/.hermes -maxdepth 1 \
|
||||
\( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
|
||||
-exec chmod g+rw {} + 2>/dev/null || true
|
||||
for _subdir in cron sessions logs memories plugins; do
|
||||
for _subdir in cron sessions logs memories; do
|
||||
mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
|
||||
chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
|
||||
chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
|
||||
@@ -810,22 +732,6 @@ HERMES_NIX_ENV_EOF
|
||||
${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
|
||||
install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
|
||||
'') cfg.documents)}
|
||||
|
||||
# ── Declarative plugins ─────────────────────────────────────────
|
||||
# Remove stale managed symlinks (plugins removed from config)
|
||||
find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
|
||||
|
||||
${lib.concatStringsSep "\n" (map (plugin:
|
||||
let
|
||||
name = lib.getName plugin;
|
||||
in ''
|
||||
if [ ! -f "${plugin}/plugin.yaml" ]; then
|
||||
echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
|
||||
exit 1
|
||||
fi
|
||||
ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
|
||||
chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
|
||||
'') cfg.extraPlugins)}
|
||||
'';
|
||||
}
|
||||
|
||||
@@ -856,7 +762,7 @@ HERMES_NIX_ENV_EOF
|
||||
# reads them at Python startup — no systemd EnvironmentFile needed.
|
||||
|
||||
ExecStart = lib.concatStringsSep " " ([
|
||||
"${effectivePackage}/bin/hermes"
|
||||
"${cfg.package}/bin/hermes"
|
||||
"gateway"
|
||||
] ++ cfg.extraArgs);
|
||||
|
||||
@@ -879,7 +785,7 @@ HERMES_NIX_ENV_EOF
|
||||
};
|
||||
|
||||
path = [
|
||||
effectivePackage
|
||||
cfg.package
|
||||
pkgs.bash
|
||||
pkgs.coreutils
|
||||
pkgs.git
|
||||
@@ -904,11 +810,11 @@ HERMES_NIX_ENV_EOF
|
||||
|
||||
preStart = ''
|
||||
# Stable symlinks — container references these, not store paths directly
|
||||
ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
|
||||
ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
|
||||
ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint
|
||||
|
||||
# GC roots so nix-collect-garbage doesn't remove store paths in use
|
||||
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
|
||||
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
|
||||
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true
|
||||
|
||||
# Check if container needs (re)creation
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
|
||||
{ inputs, ... }:
|
||||
{
|
||||
flake.overlays.default = final: _: {
|
||||
hermes-agent = final.callPackage ./hermes-agent.nix {
|
||||
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
|
||||
npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
|
||||
};
|
||||
};
|
||||
}
|
||||
+107
-6
@@ -4,19 +4,120 @@
|
||||
perSystem =
|
||||
{ pkgs, inputs', ... }:
|
||||
let
|
||||
hermesAgent = pkgs.callPackage ./hermes-agent.nix {
|
||||
hermesVenv = pkgs.callPackage ./python.nix {
|
||||
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
|
||||
};
|
||||
|
||||
hermesNpmLib = pkgs.callPackage ./lib.nix {
|
||||
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
|
||||
};
|
||||
|
||||
hermesTui = pkgs.callPackage ./tui.nix {
|
||||
inherit hermesNpmLib;
|
||||
};
|
||||
|
||||
# Import bundled skills, excluding runtime caches
|
||||
bundledSkills = pkgs.lib.cleanSourceWith {
|
||||
src = ../skills;
|
||||
filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
|
||||
};
|
||||
|
||||
hermesWeb = pkgs.callPackage ./web.nix {
|
||||
inherit hermesNpmLib;
|
||||
};
|
||||
|
||||
runtimeDeps = with pkgs; [
|
||||
nodejs_22
|
||||
ripgrep
|
||||
git
|
||||
openssh
|
||||
ffmpeg
|
||||
tirith
|
||||
];
|
||||
|
||||
runtimePath = pkgs.lib.makeBinPath runtimeDeps;
|
||||
|
||||
# Lockfile hashes for dev shell stamps
|
||||
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
|
||||
uvLockHash =
|
||||
if builtins.pathExists ../uv.lock then
|
||||
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
|
||||
else
|
||||
"none";
|
||||
in
|
||||
{
|
||||
packages = {
|
||||
default = hermesAgent;
|
||||
tui = hermesAgent.hermesTui;
|
||||
web = hermesAgent.hermesWeb;
|
||||
default = pkgs.stdenv.mkDerivation {
|
||||
pname = "hermes-agent";
|
||||
version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;
|
||||
|
||||
fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
|
||||
packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
|
||||
dontUnpack = true;
|
||||
dontBuild = true;
|
||||
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
mkdir -p $out/share/hermes-agent $out/bin
|
||||
cp -r ${bundledSkills} $out/share/hermes-agent/skills
|
||||
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
|
||||
|
||||
# copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
|
||||
mkdir -p $out/ui-tui
|
||||
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
|
||||
|
||||
${pkgs.lib.concatMapStringsSep "\n"
|
||||
(name: ''
|
||||
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
|
||||
--suffix PATH : "${runtimePath}" \
|
||||
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
|
||||
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
|
||||
--set HERMES_TUI_DIR $out/ui-tui \
|
||||
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
|
||||
--set HERMES_NODE ${pkgs.nodejs_22}/bin/node
|
||||
'')
|
||||
[
|
||||
"hermes"
|
||||
"hermes-agent"
|
||||
"hermes-acp"
|
||||
]
|
||||
}
|
||||
|
||||
runHook postInstall
|
||||
'';
|
||||
|
||||
passthru.devShellHook = ''
|
||||
STAMP=".nix-stamps/hermes-agent"
|
||||
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
|
||||
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
|
||||
echo "hermes-agent: installing Python dependencies..."
|
||||
uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all]"
|
||||
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
|
||||
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
|
||||
mkdir -p .nix-stamps
|
||||
echo "$STAMP_VALUE" > "$STAMP"
|
||||
else
|
||||
source .venv/bin/activate
|
||||
export HERMES_PYTHON=${hermesVenv}/bin/python3
|
||||
fi
|
||||
'';
|
||||
|
||||
meta = with pkgs.lib; {
|
||||
description = "AI agent with advanced tool-calling capabilities";
|
||||
homepage = "https://github.com/NousResearch/hermes-agent";
|
||||
mainProgram = "hermes";
|
||||
license = licenses.mit;
|
||||
platforms = platforms.unix;
|
||||
};
|
||||
};
|
||||
|
||||
tui = hermesTui;
|
||||
web = hermesWeb;
|
||||
|
||||
fix-lockfiles = hermesNpmLib.mkFixLockfiles {
|
||||
packages = [ hermesTui hermesWeb ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
+1
-2
@@ -7,7 +7,6 @@
|
||||
pyproject-nix,
|
||||
pyproject-build-systems,
|
||||
stdenv,
|
||||
dependency-groups ? [ "all" ],
|
||||
}:
|
||||
let
|
||||
workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
|
||||
@@ -97,5 +96,5 @@ let
|
||||
]);
|
||||
in
|
||||
pythonSet.mkVirtualEnv "hermes-agent-env" {
|
||||
hermes-agent = dependency-groups;
|
||||
hermes-agent = [ "all" ];
|
||||
}
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-+B2+Fe4djPzHHcUXRx+m0cuyaopAhW0PcHsMgYfV5VE=";
|
||||
hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
+1
-11
@@ -204,9 +204,8 @@ win.par.winopen.pulse()
|
||||
| `td_input_clear` | Stop input automation |
|
||||
| `td_op_screen_rect` | Get screen coords of a node |
|
||||
| `td_click_screen_point` | Click a point in a screenshot |
|
||||
| `td_screen_point_to_global` | Convert screenshot pixel to absolute screen coords |
|
||||
|
||||
The table above covers the 32 tools used in typical creative workflows. The remaining 4 tools (`td_project_quit`, `td_test_session`, `td_dev_log`, `td_clear_dev_log`) are admin/dev-mode utilities — see `references/mcp-tools.md` for the full 36-tool reference with complete parameter schemas.
|
||||
See `references/mcp-tools.md` for full parameter schemas.
|
||||
|
||||
## Key Implementation Rules
|
||||
|
||||
@@ -339,15 +338,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
|
||||
| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
|
||||
| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
|
||||
| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
|
||||
| `references/animation.md` | LFOs, timers, keyframes, easing, expression-driven motion |
|
||||
| `references/midi-osc.md` | MIDI/OSC controllers, TouchOSC, multi-machine sync |
|
||||
| `references/particles.md` | POPs and legacy particleSOP — emission, forces, collisions |
|
||||
| `references/projection-mapping.md` | Multi-window output, corner pin, mesh warp, edge blending |
|
||||
| `references/external-data.md` | HTTP, WebSocket, MQTT, Serial, TCP, webserverDAT |
|
||||
| `references/panel-ui.md` | Custom params, panel COMPs, button/slider/field, panelExecuteDAT |
|
||||
| `references/replicator.md` | replicatorCOMP — data-driven cloning, layouts, callbacks |
|
||||
| `references/dat-scripting.md` | Execute DAT family — chop/dat/parameter/panel/op/executeDAT |
|
||||
| `references/3d-scene.md` | Lighting rigs, shadows, IBL/cubemaps, multi-camera, PBR |
|
||||
| `scripts/setup.sh` | Automated setup script |
|
||||
|
||||
---
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user