Compare commits
44 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 53f875d063 | |||
| ed170f4333 | |||
| be57af7188 | |||
| 059980727a | |||
| 21676e80cc | |||
| 58a6171bfb | |||
| bc0d8a941e | |||
| 2d137074a3 | |||
| 5531c0df82 | |||
| 5e68503d2f | |||
| 22cc7492ff | |||
| c2fd0fa684 | |||
| fa9383d27b | |||
| 019d4c1c3f | |||
| a12f7aa8bb | |||
| 0d31864e3b | |||
| c8b7e7268a | |||
| bc79e227e6 | |||
| 88602376d4 | |||
| ded12f0968 | |||
| 80e474f11f | |||
| d341af22c0 | |||
| 88e07c42b4 | |||
| cc5efb6fc1 | |||
| 97a2474b39 | |||
| 6b4ef00a2c | |||
| 4858e26eaa | |||
| dcd7b717f8 | |||
| ac855bba0e | |||
| f95c34f415 | |||
| 679a27498d | |||
| d1ee4915f3 | |||
| 26816d1f77 | |||
| e750829015 | |||
| 7d39a45749 | |||
| 69ff114ee2 | |||
| f10a3df632 | |||
| 88a9efdb1a | |||
| 72a3af63d4 | |||
| a2819e1820 | |||
| 0a6ecea676 | |||
| b66cbb7b4c | |||
| 9e398e1809 | |||
| 0399d4b976 |
@@ -43,6 +43,16 @@ jobs:
|
||||
source .venv/bin/activate
|
||||
uv pip install -e ".[all,dev]"
|
||||
|
||||
- name: Verify tools/_manifest.py is up-to-date
|
||||
# tools/_manifest.py is auto-generated by scripts/build_tool_manifest.py
|
||||
# and must be regenerated when tools/*.py are added/removed/converted.
|
||||
# Rejects PRs that add/remove a self-registering tool module without
|
||||
# updating the manifest. Doesn't run the build (no side effects in CI) —
|
||||
# just verifies the committed manifest matches what would be generated.
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
python scripts/build_tool_manifest.py --check
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
source .venv/bin/activate
|
||||
|
||||
+67
-74
@@ -257,11 +257,10 @@ _OAUTH_ONLY_BETAS = [
|
||||
"oauth-2025-04-20",
|
||||
]
|
||||
|
||||
# Claude Code version — sent on OAuth token-exchange / refresh requests
|
||||
# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
|
||||
# OAuth flow validates the UA and may reject requests with a version that's
|
||||
# too old, so detecting dynamically keeps users on a current Claude Code
|
||||
# install from hitting stale-version errors during login/refresh.
|
||||
# Claude Code identity — required for OAuth requests to be routed correctly.
|
||||
# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
|
||||
# The version must stay reasonably current — Anthropic rejects OAuth requests
|
||||
# when the spoofed user-agent version is too far behind the actual release.
|
||||
_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
|
||||
_claude_code_version_cache: Optional[str] = None
|
||||
|
||||
@@ -269,9 +268,9 @@ _claude_code_version_cache: Optional[str] = None
|
||||
def _detect_claude_code_version() -> str:
|
||||
"""Detect the installed Claude Code version, fall back to a static constant.
|
||||
|
||||
Used only by the OAuth token-exchange / refresh flow
|
||||
(``platform.claude.com/v1/oauth/token``). The Messages API client no
|
||||
longer sends a claude-cli user-agent.
|
||||
Anthropic's OAuth infrastructure validates the user-agent version and may
|
||||
reject requests with a version that's too old. Detecting dynamically means
|
||||
users who keep Claude Code updated never hit stale-version 400s.
|
||||
"""
|
||||
import subprocess as _sp
|
||||
|
||||
@@ -291,13 +290,12 @@ def _detect_claude_code_version() -> str:
|
||||
return _CLAUDE_CODE_VERSION_FALLBACK
|
||||
|
||||
|
||||
def _get_claude_code_version() -> str:
|
||||
"""Lazily detect the installed Claude Code version for OAuth flow headers.
|
||||
_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
|
||||
_MCP_TOOL_PREFIX = "mcp_"
|
||||
|
||||
Used only on the OAuth token-exchange and refresh endpoints
|
||||
(``platform.claude.com/v1/oauth/token``). The Messages API client does
|
||||
not send a claude-cli user-agent.
|
||||
"""
|
||||
|
||||
def _get_claude_code_version() -> str:
|
||||
"""Lazily detect the installed Claude Code version when OAuth headers need it."""
|
||||
global _claude_code_version_cache
|
||||
if _claude_code_version_cache is None:
|
||||
_claude_code_version_cache = _detect_claude_code_version()
|
||||
@@ -467,21 +465,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
if common_betas:
|
||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
|
||||
elif _is_oauth_token(api_key):
|
||||
# OAuth access token / setup-token → Bearer auth + OAuth-only betas.
|
||||
# The OAuth-specific beta headers are still required by Anthropic's
|
||||
# OAuth-gated Messages API path; the Claude Code user-agent / x-app
|
||||
# spoofing is deliberately NOT sent — Hermes identifies as itself.
|
||||
#
|
||||
# ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
|
||||
# OAuth requests that carry it with
|
||||
# "This authentication style is incompatible with the long
|
||||
# context beta header."
|
||||
# Subscription-gated OAuth traffic gets the 200K default window.
|
||||
oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
|
||||
all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
|
||||
# OAuth access token / setup-token → Bearer auth + Claude Code identity.
|
||||
# Anthropic routes OAuth requests based on user-agent and headers;
|
||||
# without Claude Code's fingerprint, requests get intermittent 500s.
|
||||
all_betas = common_betas + _OAUTH_ONLY_BETAS
|
||||
kwargs["auth_token"] = api_key
|
||||
kwargs["default_headers"] = {
|
||||
"anthropic-beta": ",".join(all_betas),
|
||||
"user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
|
||||
"x-app": "cli",
|
||||
}
|
||||
else:
|
||||
# Regular API key → x-api-key header + common betas
|
||||
@@ -825,45 +817,17 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
"""Resolve an Anthropic token from all available sources.
|
||||
|
||||
Priority:
|
||||
1. Hermes credential pool (``~/.hermes/auth.json`` →
|
||||
``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
|
||||
own PKCE login flow. Entries are auto-refreshed when near
|
||||
expiry. Env-sourced pool entries (``source="env:..."``) are
|
||||
skipped here so the env-var priority logic below still runs.
|
||||
2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
|
||||
3. CLAUDE_CODE_OAUTH_TOKEN env var
|
||||
4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
|
||||
1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
|
||||
2. CLAUDE_CODE_OAUTH_TOKEN env var
|
||||
3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
|
||||
— with automatic refresh if expired and a refresh token is available
|
||||
5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
|
||||
4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
|
||||
|
||||
Returns the token string or None.
|
||||
"""
|
||||
# 1. Hermes credential pool — the live source of truth for tokens
|
||||
# minted via ``hermes login anthropic`` / the dashboard PKCE flow.
|
||||
# ``select()`` picks the best available entry and refreshes it if
|
||||
# it's near expiry, so callers always get a fresh token.
|
||||
#
|
||||
# Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
|
||||
# those are passthroughs of the env var, and the env-var branches
|
||||
# below have richer priority logic (``_prefer_refreshable_claude_code_token``)
|
||||
# that can upgrade a static env OAuth token to a refreshed
|
||||
# Claude Code token. Letting the pool win here would short-circuit
|
||||
# that upgrade.
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("anthropic")
|
||||
entry = pool.select()
|
||||
if entry and entry.access_token and not entry.source.startswith("env:"):
|
||||
return entry.access_token
|
||||
except Exception as exc:
|
||||
# Pool lookup is best-effort — fall through to env/file sources
|
||||
# if anything goes wrong (e.g. auth.json corruption during a
|
||||
# concurrent write).
|
||||
logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
|
||||
# 2. Hermes-managed OAuth/setup token env var
|
||||
# 1. Hermes-managed OAuth/setup token env var
|
||||
token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||||
if token:
|
||||
preferred = _prefer_refreshable_claude_code_token(token, creds)
|
||||
@@ -871,7 +835,7 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
return preferred
|
||||
return token
|
||||
|
||||
# 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
|
||||
# 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
|
||||
cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||||
if cc_token:
|
||||
preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
|
||||
@@ -879,12 +843,12 @@ def resolve_anthropic_token() -> Optional[str]:
|
||||
return preferred
|
||||
return cc_token
|
||||
|
||||
# 4. Claude Code credential file
|
||||
# 3. Claude Code credential file
|
||||
resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
|
||||
if resolved_claude_token:
|
||||
return resolved_claude_token
|
||||
|
||||
# 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
|
||||
# 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
|
||||
# This remains as a compatibility fallback for pre-migration Hermes configs.
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if api_key:
|
||||
@@ -1649,10 +1613,8 @@ def build_anthropic_kwargs(
|
||||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||||
|
||||
When *is_oauth* is True, enables the OAuth-only beta headers required by
|
||||
Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
|
||||
the default headers are set by build_anthropic_client). No system-prompt
|
||||
or tool-name rewriting is performed — Hermes identifies as itself.
|
||||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||||
|
||||
When *preserve_dots* is True, model name dots are not converted to hyphens
|
||||
(for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
|
||||
@@ -1685,11 +1647,45 @@ def build_anthropic_kwargs(
|
||||
if context_length and effective_max_tokens > context_length:
|
||||
effective_max_tokens = max(context_length - 1, 1)
|
||||
|
||||
# OAuth requests go through Anthropic's subscription-gated Messages
|
||||
# endpoint but otherwise send the real Hermes system prompt and real
|
||||
# Hermes tool names — the only OAuth-specific wire differences are
|
||||
# Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
|
||||
# build_anthropic_client and the fast-mode branch below).
|
||||
# ── OAuth: Claude Code identity ──────────────────────────────────
|
||||
if is_oauth:
|
||||
# 1. Prepend Claude Code system prompt identity
|
||||
cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
|
||||
if isinstance(system, list):
|
||||
system = [cc_block] + system
|
||||
elif isinstance(system, str) and system:
|
||||
system = [cc_block, {"type": "text", "text": system}]
|
||||
else:
|
||||
system = [cc_block]
|
||||
|
||||
# 2. Sanitize system prompt — replace product name references
|
||||
# to avoid Anthropic's server-side content filters.
|
||||
for block in system:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = block.get("text", "")
|
||||
text = text.replace("Hermes Agent", "Claude Code")
|
||||
text = text.replace("Hermes agent", "Claude Code")
|
||||
text = text.replace("hermes-agent", "claude-code")
|
||||
text = text.replace("Nous Research", "Anthropic")
|
||||
block["text"] = text
|
||||
|
||||
# 3. Prefix tool names with mcp_ (Claude Code convention)
|
||||
if anthropic_tools:
|
||||
for tool in anthropic_tools:
|
||||
if "name" in tool:
|
||||
tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
|
||||
|
||||
# 4. Prefix tool names in message history (tool_use and tool_result blocks)
|
||||
for msg in anthropic_messages:
|
||||
content = msg.get("content")
|
||||
if isinstance(content, list):
|
||||
for block in content:
|
||||
if isinstance(block, dict):
|
||||
if block.get("type") == "tool_use" and "name" in block:
|
||||
if not block["name"].startswith(_MCP_TOOL_PREFIX):
|
||||
block["name"] = _MCP_TOOL_PREFIX + block["name"]
|
||||
elif block.get("type") == "tool_result" and "tool_use_id" in block:
|
||||
pass # tool_result uses ID, not name
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
@@ -1780,9 +1776,6 @@ def build_anthropic_kwargs(
|
||||
# extra_headers override the client-level anthropic-beta header).
|
||||
betas = list(_common_betas_for_base_url(base_url))
|
||||
if is_oauth:
|
||||
# Strip context-1m — incompatible with OAuth auth. See matching
|
||||
# comment in build_anthropic_client().
|
||||
betas = [b for b in betas if b != _CONTEXT_1M_BETA]
|
||||
betas.extend(_OAUTH_ONLY_BETAS)
|
||||
betas.append(_FAST_MODE_BETA)
|
||||
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||
|
||||
@@ -713,7 +713,9 @@ class _AnthropicCompletionsAdapter:
|
||||
|
||||
response = self._client.messages.create(**anthropic_kwargs)
|
||||
_transport = get_transport("anthropic_messages")
|
||||
_nr = _transport.normalize_response(response)
|
||||
_nr = _transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_oauth
|
||||
)
|
||||
|
||||
# ToolCall already duck-types as OpenAI shape (.type, .function.name,
|
||||
# .function.arguments) via properties, so no wrapping needed.
|
||||
|
||||
@@ -0,0 +1,869 @@
|
||||
"""Curator — background skill maintenance orchestrator.
|
||||
|
||||
The curator is an auxiliary-model task that periodically reviews agent-created
|
||||
skills and maintains the collection. It runs inactivity-triggered (no cron
|
||||
daemon): when the agent is idle and the last curator run was longer than
|
||||
``interval_hours`` ago, ``maybe_run_curator()`` spawns a forked AIAgent to do
|
||||
the review.
|
||||
|
||||
Responsibilities:
|
||||
- Auto-transition lifecycle states based on last_used_at timestamps
|
||||
- Spawn a background review agent that can pin / archive / consolidate /
|
||||
patch agent-created skills via skill_manage
|
||||
- Persist curator state (last_run_at, paused, etc.) in .curator_state
|
||||
|
||||
Strict invariants:
|
||||
- Only touches agent-created skills (see tools/skill_usage.is_agent_created)
|
||||
- Never auto-deletes — only archives. Archive is recoverable.
|
||||
- Pinned skills bypass all auto-transitions
|
||||
- Uses the auxiliary client; never touches the main session's prompt cache
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import threading
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from tools import skill_usage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_INTERVAL_HOURS = 24 * 7 # 7 days
|
||||
DEFAULT_MIN_IDLE_HOURS = 2
|
||||
DEFAULT_STALE_AFTER_DAYS = 30
|
||||
DEFAULT_ARCHIVE_AFTER_DAYS = 90
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# .curator_state — persistent scheduler + status
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _state_file() -> Path:
|
||||
return get_hermes_home() / "skills" / ".curator_state"
|
||||
|
||||
|
||||
def _default_state() -> Dict[str, Any]:
|
||||
return {
|
||||
"last_run_at": None,
|
||||
"last_run_duration_seconds": None,
|
||||
"last_run_summary": None,
|
||||
"paused": False,
|
||||
"run_count": 0,
|
||||
}
|
||||
|
||||
|
||||
def load_state() -> Dict[str, Any]:
|
||||
path = _state_file()
|
||||
if not path.exists():
|
||||
return _default_state()
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
base = _default_state()
|
||||
base.update({k: v for k, v in data.items() if k in base or k.startswith("_")})
|
||||
return base
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.debug("Failed to read curator state: %s", e)
|
||||
return _default_state()
|
||||
|
||||
|
||||
def save_state(data: Dict[str, Any]) -> None:
|
||||
path = _state_file()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp, path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug("Failed to save curator state: %s", e, exc_info=True)
|
||||
|
||||
|
||||
def set_paused(paused: bool) -> None:
|
||||
state = load_state()
|
||||
state["paused"] = bool(paused)
|
||||
save_state(state)
|
||||
|
||||
|
||||
def is_paused() -> bool:
|
||||
return bool(load_state().get("paused"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config access
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _load_config() -> Dict[str, Any]:
|
||||
"""Read curator.* config from ~/.hermes/config.yaml. Tolerates missing file."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as e:
|
||||
logger.debug("Failed to load config for curator: %s", e)
|
||||
return {}
|
||||
if not isinstance(cfg, dict):
|
||||
return {}
|
||||
cur = cfg.get("curator") or {}
|
||||
if not isinstance(cur, dict):
|
||||
return {}
|
||||
return cur
|
||||
|
||||
|
||||
def is_enabled() -> bool:
|
||||
"""Default ON when no config says otherwise."""
|
||||
cfg = _load_config()
|
||||
return bool(cfg.get("enabled", True))
|
||||
|
||||
|
||||
def get_interval_hours() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
return int(cfg.get("interval_hours", DEFAULT_INTERVAL_HOURS))
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_INTERVAL_HOURS
|
||||
|
||||
|
||||
def get_min_idle_hours() -> float:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
return float(cfg.get("min_idle_hours", DEFAULT_MIN_IDLE_HOURS))
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_MIN_IDLE_HOURS
|
||||
|
||||
|
||||
def get_stale_after_days() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
return int(cfg.get("stale_after_days", DEFAULT_STALE_AFTER_DAYS))
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_STALE_AFTER_DAYS
|
||||
|
||||
|
||||
def get_archive_after_days() -> int:
|
||||
cfg = _load_config()
|
||||
try:
|
||||
return int(cfg.get("archive_after_days", DEFAULT_ARCHIVE_AFTER_DAYS))
|
||||
except (TypeError, ValueError):
|
||||
return DEFAULT_ARCHIVE_AFTER_DAYS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Idle / interval check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _parse_iso(ts: Optional[str]) -> Optional[datetime]:
|
||||
if not ts:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(ts)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def should_run_now(now: Optional[datetime] = None) -> bool:
|
||||
"""Return True if the curator should run immediately.
|
||||
|
||||
Gates:
|
||||
- curator.enabled == True
|
||||
- not paused
|
||||
- last_run_at missing, OR older than interval_hours
|
||||
|
||||
The idle check (min_idle_hours) is applied at the call site where we know
|
||||
whether an agent is actively running — here we only enforce the static
|
||||
gates.
|
||||
"""
|
||||
if not is_enabled():
|
||||
return False
|
||||
if is_paused():
|
||||
return False
|
||||
|
||||
state = load_state()
|
||||
last = _parse_iso(state.get("last_run_at"))
|
||||
if last is None:
|
||||
return True
|
||||
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
if last.tzinfo is None:
|
||||
last = last.replace(tzinfo=timezone.utc)
|
||||
interval = timedelta(hours=get_interval_hours())
|
||||
return (now - last) >= interval
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Automatic state transitions (pure function, no LLM)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
|
||||
"""Walk every agent-created skill and move active/stale/archived based on
|
||||
last_used_at. Pinned skills are never touched. Returns a counter dict
|
||||
describing what changed."""
|
||||
from tools import skill_usage as _u
|
||||
|
||||
if now is None:
|
||||
now = datetime.now(timezone.utc)
|
||||
stale_cutoff = now - timedelta(days=get_stale_after_days())
|
||||
archive_cutoff = now - timedelta(days=get_archive_after_days())
|
||||
|
||||
counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0}
|
||||
|
||||
for row in _u.agent_created_report():
|
||||
counts["checked"] += 1
|
||||
name = row["name"]
|
||||
if row.get("pinned"):
|
||||
continue
|
||||
|
||||
last_used = _parse_iso(row.get("last_used_at"))
|
||||
# If never used, treat as using created_at as the anchor so new skills
|
||||
# don't immediately archive themselves.
|
||||
anchor = last_used or _parse_iso(row.get("created_at")) or now
|
||||
if anchor.tzinfo is None:
|
||||
anchor = anchor.replace(tzinfo=timezone.utc)
|
||||
|
||||
current = row.get("state", _u.STATE_ACTIVE)
|
||||
|
||||
if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
|
||||
ok, _msg = _u.archive_skill(name)
|
||||
if ok:
|
||||
counts["archived"] += 1
|
||||
elif anchor <= stale_cutoff and current == _u.STATE_ACTIVE:
|
||||
_u.set_state(name, _u.STATE_STALE)
|
||||
counts["marked_stale"] += 1
|
||||
elif anchor > stale_cutoff and current == _u.STATE_STALE:
|
||||
# Skill got used again after being marked stale — reactivate.
|
||||
_u.set_state(name, _u.STATE_ACTIVE)
|
||||
counts["reactivated"] += 1
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Review prompt for the forked agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
CURATOR_REVIEW_PROMPT = (
|
||||
"You are running as Hermes' background skill CURATOR. This is an "
|
||||
"UMBRELLA-BUILDING consolidation pass, not a passive audit and not a "
|
||||
"duplicate-finder.\n\n"
|
||||
"The goal of the skill collection is a LIBRARY OF CLASS-LEVEL "
|
||||
"INSTRUCTIONS AND EXPERIENTIAL KNOWLEDGE. A collection of hundreds of "
|
||||
"narrow skills where each one captures one session's specific bug is "
|
||||
"a FAILURE of the library — not a feature. An agent searching skills "
|
||||
"matches on descriptions, not on exact names; one broad umbrella "
|
||||
"skill with labeled subsections beats five narrow siblings for "
|
||||
"discoverability, not the other way around.\n\n"
|
||||
"The right target shape is CLASS-LEVEL skills with rich SKILL.md "
|
||||
"bodies + `references/`, `templates/`, and `scripts/` subfiles for "
|
||||
"session-specific detail — not one-session-one-skill micro-entries.\n\n"
|
||||
"Hard rules — do not violate:\n"
|
||||
"1. DO NOT touch bundled or hub-installed skills. The candidate list "
|
||||
"below is already filtered to agent-created skills only.\n"
|
||||
"2. DO NOT delete any skill. Archiving (moving the skill's directory "
|
||||
"into ~/.hermes/skills/.archive/) is the maximum destructive action. "
|
||||
"Archives are recoverable; deletion is not.\n"
|
||||
"3. DO NOT touch skills shown as pinned=yes. Skip them entirely.\n"
|
||||
"4. DO NOT use usage counters as a reason to skip consolidation. The "
|
||||
"counters are new and often mostly zero. Judge overlap on CONTENT, "
|
||||
"not on use_count. 'use=0' is not evidence a skill is valuable; it's "
|
||||
"absence of evidence either way.\n"
|
||||
"5. DO NOT reject consolidation on the grounds that 'each skill has "
|
||||
"a distinct trigger'. Pairwise distinctness is the wrong bar. The "
|
||||
"right bar is: 'would a human maintainer write this as N separate "
|
||||
"skills, or as one skill with N labeled subsections?' When the "
|
||||
"answer is the latter, merge.\n\n"
|
||||
"How to work — not optional:\n"
|
||||
"1. Scan the full candidate list. Identify PREFIX CLUSTERS (skills "
|
||||
"sharing a first word or domain keyword). Examples you are likely "
|
||||
"to find: hermes-config-*, hermes-dashboard-*, gateway-*, codex-*, "
|
||||
"ollama-*, anthropic-*, gemini-*, mcp-*, salvage-*, pr-*, "
|
||||
"competitor-*, python-*, security-*, etc. Expect 10-25 clusters.\n"
|
||||
"2. For each cluster with 2+ members, do NOT ask 'are these pairs "
|
||||
"overlapping?' — ask 'what is the UMBRELLA CLASS these skills all "
|
||||
"serve? Would a maintainer name that class and write one skill for "
|
||||
"it?' If yes, pick (or create) the umbrella and absorb the siblings "
|
||||
"into it.\n"
|
||||
"3. Three ways to consolidate — use the right one per cluster:\n"
|
||||
" a. MERGE INTO EXISTING UMBRELLA — one skill in the cluster is "
|
||||
"already broad enough to be the umbrella (example: `pr-triage-"
|
||||
"salvage` for the PR review cluster). Patch it to add a labeled "
|
||||
"section for each sibling's unique insight, then archive the "
|
||||
"siblings.\n"
|
||||
" b. CREATE A NEW UMBRELLA SKILL.md — no existing member is broad "
|
||||
"enough. Use skill_manage action=create to write a new class-level "
|
||||
"skill whose SKILL.md covers the shared workflow and has short "
|
||||
"labeled subsections. Archive the now-absorbed narrow siblings.\n"
|
||||
" c. DEMOTE TO REFERENCES/TEMPLATES/SCRIPTS — a sibling has "
|
||||
"narrow-but-valuable session-specific content. Move it into the "
|
||||
"umbrella's appropriate support directory:\n"
|
||||
" • `references/<topic>.md` for session-specific detail OR "
|
||||
"condensed knowledge banks (quoted research, API docs excerpts, "
|
||||
"domain notes, provider quirks, reproduction recipes)\n"
|
||||
" • `templates/<name>.<ext>` for starter files meant to be "
|
||||
"copied and modified\n"
|
||||
" • `scripts/<name>.<ext>` for statically re-runnable actions "
|
||||
"(verification scripts, fixture generators, probes)\n"
|
||||
" Then archive the old sibling. Use `terminal` with `mkdir -p "
|
||||
"~/.hermes/skills/<umbrella>/references/ && mv ... <umbrella>/"
|
||||
"references/<topic>.md` (or templates/ / scripts/).\n"
|
||||
"4. Also flag skills whose NAME is too narrow (contains a PR number, "
|
||||
"a feature codename, a specific error string, an 'audit' / "
|
||||
"'diagnosis' / 'salvage' session artifact). These almost always "
|
||||
"belong as a subsection or support file under a class-level umbrella.\n"
|
||||
"5. Iterate. After one consolidation round, scan the remaining set "
|
||||
"and look for the NEXT umbrella opportunity. Don't stop after 3 "
|
||||
"merges.\n\n"
|
||||
"Your toolset:\n"
|
||||
" - skills_list, skill_view — read the current landscape\n"
|
||||
" - skill_manage action=patch — add sections to the umbrella\n"
|
||||
" - skill_manage action=create — create a new umbrella SKILL.md\n"
|
||||
" - skill_manage action=write_file — add a references/, templates/, "
|
||||
"or scripts/ file under an existing skill (the skill must already "
|
||||
"exist)\n"
|
||||
" - terminal — mv a sibling into the archive "
|
||||
"OR move its content into a support subfile\n\n"
|
||||
"'keep' is a legitimate decision ONLY when the skill is already a "
|
||||
"class-level umbrella and none of the proposed merges would improve "
|
||||
"discoverability. 'This is narrow but distinct from its siblings' "
|
||||
"is NOT a reason to keep — it's a reason to move it under an "
|
||||
"umbrella as a subsection or support file.\n\n"
|
||||
"Expected output: real umbrella-ification. Process every obvious "
|
||||
"cluster. If you end the pass with fewer than 10 archives, you "
|
||||
"stopped too early — go back and look at the clusters you left "
|
||||
"alone.\n\n"
|
||||
"When done, write a summary with: clusters processed, skills "
|
||||
"patched/absorbed, skills demoted to references/templates/scripts, "
|
||||
"skills archived, new umbrellas created, and clusters you "
|
||||
"deliberately left alone with one line each."
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-run reports — {YYYYMMDD-HHMMSS}/run.json + REPORT.md under logs/curator/
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _reports_root() -> Path:
|
||||
"""Directory where curator run reports are written.
|
||||
|
||||
Lives under the profile-aware logs dir (``~/.hermes/logs/curator/``)
|
||||
alongside ``agent.log`` and ``gateway.log`` so it's found by anyone
|
||||
looking for operational telemetry, not mixed in with the user's
|
||||
authored skill data in ``~/.hermes/skills/``.
|
||||
"""
|
||||
return get_hermes_home() / "logs" / "curator"
|
||||
|
||||
|
||||
def _write_run_report(
|
||||
*,
|
||||
started_at: datetime,
|
||||
elapsed_seconds: float,
|
||||
auto_counts: Dict[str, int],
|
||||
auto_summary: str,
|
||||
before_report: List[Dict[str, Any]],
|
||||
before_names: Set[str],
|
||||
after_report: List[Dict[str, Any]],
|
||||
llm_meta: Dict[str, Any],
|
||||
) -> Optional[Path]:
|
||||
"""Write run.json + REPORT.md under logs/curator/{YYYYMMDD-HHMMSS}/.
|
||||
|
||||
Returns the report directory path on success, None if the write
|
||||
couldn't happen (caller logs and continues — reporting is best-effort).
|
||||
"""
|
||||
root = _reports_root()
|
||||
try:
|
||||
root.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.debug("Curator report dir create failed: %s", e)
|
||||
return None
|
||||
|
||||
stamp = started_at.strftime("%Y%m%d-%H%M%S")
|
||||
run_dir = root / stamp
|
||||
# If we crash-reran within the same second, append a disambiguator
|
||||
suffix = 1
|
||||
while run_dir.exists():
|
||||
suffix += 1
|
||||
run_dir = root / f"{stamp}-{suffix}"
|
||||
try:
|
||||
run_dir.mkdir(parents=True, exist_ok=False)
|
||||
except Exception as e:
|
||||
logger.debug("Curator run dir create failed: %s", e)
|
||||
return None
|
||||
|
||||
# Diff before/after
|
||||
after_by_name = {r.get("name"): r for r in after_report if isinstance(r, dict)}
|
||||
after_names = set(after_by_name.keys())
|
||||
removed = sorted(before_names - after_names) # archived during this run
|
||||
added = sorted(after_names - before_names) # new skills this run
|
||||
before_by_name = {r.get("name"): r for r in before_report if isinstance(r, dict)}
|
||||
|
||||
# State transitions between the two snapshots (e.g. active -> stale)
|
||||
transitions: List[Dict[str, str]] = []
|
||||
for name in sorted(after_names & before_names):
|
||||
s_before = (before_by_name.get(name) or {}).get("state")
|
||||
s_after = (after_by_name.get(name) or {}).get("state")
|
||||
if s_before and s_after and s_before != s_after:
|
||||
transitions.append({"name": name, "from": s_before, "to": s_after})
|
||||
|
||||
# Classify LLM tool calls
|
||||
tc_counts: Dict[str, int] = {}
|
||||
for tc in llm_meta.get("tool_calls", []) or []:
|
||||
name = tc.get("name", "unknown")
|
||||
tc_counts[name] = tc_counts.get(name, 0) + 1
|
||||
|
||||
payload = {
|
||||
"started_at": started_at.isoformat(),
|
||||
"duration_seconds": round(elapsed_seconds, 2),
|
||||
"model": llm_meta.get("model", ""),
|
||||
"provider": llm_meta.get("provider", ""),
|
||||
"auto_transitions": auto_counts,
|
||||
"counts": {
|
||||
"before": len(before_names),
|
||||
"after": len(after_names),
|
||||
"delta": len(after_names) - len(before_names),
|
||||
"archived_this_run": len(removed),
|
||||
"added_this_run": len(added),
|
||||
"state_transitions": len(transitions),
|
||||
"tool_calls_total": sum(tc_counts.values()),
|
||||
},
|
||||
"tool_call_counts": tc_counts,
|
||||
"archived": removed,
|
||||
"added": added,
|
||||
"state_transitions": transitions,
|
||||
"llm_final": llm_meta.get("final", ""),
|
||||
"llm_summary": llm_meta.get("summary", ""),
|
||||
"llm_error": llm_meta.get("error"),
|
||||
"tool_calls": llm_meta.get("tool_calls", []),
|
||||
}
|
||||
|
||||
# run.json — machine-readable, full fidelity
|
||||
try:
|
||||
(run_dir / "run.json").write_text(
|
||||
json.dumps(payload, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator run.json write failed: %s", e)
|
||||
|
||||
# REPORT.md — human-readable
|
||||
try:
|
||||
md = _render_report_markdown(payload)
|
||||
(run_dir / "REPORT.md").write_text(md, encoding="utf-8")
|
||||
except Exception as e:
|
||||
logger.debug("Curator REPORT.md write failed: %s", e)
|
||||
|
||||
return run_dir
|
||||
|
||||
|
||||
def _render_report_markdown(p: Dict[str, Any]) -> str:
|
||||
"""Render the human-readable report."""
|
||||
lines: List[str] = []
|
||||
started = p.get("started_at", "")
|
||||
duration = p.get("duration_seconds", 0) or 0
|
||||
mins, secs = divmod(int(duration), 60)
|
||||
dur_label = f"{mins}m {secs}s" if mins else f"{secs}s"
|
||||
|
||||
lines.append(f"# Curator run — {started}\n")
|
||||
model = p.get("model") or "(not resolved)"
|
||||
prov = p.get("provider") or "(not resolved)"
|
||||
counts = p.get("counts") or {}
|
||||
lines.append(
|
||||
f"Model: `{model}` via `{prov}` · Duration: {dur_label} · "
|
||||
f"Agent-created skills: {counts.get('before', 0)} → {counts.get('after', 0)} "
|
||||
f"({counts.get('delta', 0):+d})\n"
|
||||
)
|
||||
|
||||
error = p.get("llm_error")
|
||||
if error:
|
||||
lines.append(f"> ⚠ LLM pass error: `{error}`\n")
|
||||
|
||||
# Auto-transitions (pure, no LLM)
|
||||
auto = p.get("auto_transitions") or {}
|
||||
lines.append("## Auto-transitions (pure, no LLM)\n")
|
||||
lines.append(f"- checked: {auto.get('checked', 0)}")
|
||||
lines.append(f"- marked stale: {auto.get('marked_stale', 0)}")
|
||||
lines.append(f"- archived: {auto.get('archived', 0)}")
|
||||
lines.append(f"- reactivated: {auto.get('reactivated', 0)}")
|
||||
lines.append("")
|
||||
|
||||
# LLM pass numbers
|
||||
tc_counts = p.get("tool_call_counts") or {}
|
||||
lines.append("## LLM consolidation pass\n")
|
||||
lines.append(f"- tool calls: **{counts.get('tool_calls_total', 0)}** "
|
||||
f"(by name: {', '.join(f'{k}={v}' for k, v in sorted(tc_counts.items())) or 'none'})")
|
||||
lines.append(f"- archived this run: **{counts.get('archived_this_run', 0)}**")
|
||||
lines.append(f"- new skills this run: **{counts.get('added_this_run', 0)}**")
|
||||
lines.append(f"- state transitions (active ↔ stale ↔ archived): "
|
||||
f"**{counts.get('state_transitions', 0)}**")
|
||||
lines.append("")
|
||||
|
||||
# Archived list
|
||||
archived = p.get("archived") or []
|
||||
if archived:
|
||||
lines.append(f"### Skills archived ({len(archived)})\n")
|
||||
lines.append("_Archived skills are at `~/.hermes/skills/.archive/`. "
|
||||
"Restore any via `hermes curator restore <name>`._\n")
|
||||
# Show first 50 inline, note truncation after that
|
||||
SHOW = 50
|
||||
for n in archived[:SHOW]:
|
||||
lines.append(f"- `{n}`")
|
||||
if len(archived) > SHOW:
|
||||
lines.append(f"- … and {len(archived) - SHOW} more (see `run.json` for the full list)")
|
||||
lines.append("")
|
||||
|
||||
# Added list
|
||||
added = p.get("added") or []
|
||||
if added:
|
||||
lines.append(f"### New skills this run ({len(added)})\n")
|
||||
lines.append("_Usually these are new class-level umbrellas created via `skill_manage action=create`._\n")
|
||||
for n in added:
|
||||
lines.append(f"- `{n}`")
|
||||
lines.append("")
|
||||
|
||||
# State transitions
|
||||
trans = p.get("state_transitions") or []
|
||||
if trans:
|
||||
lines.append(f"### State transitions ({len(trans)})\n")
|
||||
for t in trans:
|
||||
lines.append(f"- `{t.get('name')}`: {t.get('from')} → {t.get('to')}")
|
||||
lines.append("")
|
||||
|
||||
# Full LLM final response
|
||||
final = (p.get("llm_final") or "").strip()
|
||||
if final:
|
||||
lines.append("## LLM final summary\n")
|
||||
lines.append(final)
|
||||
lines.append("")
|
||||
elif not error:
|
||||
llm_sum = p.get("llm_summary") or ""
|
||||
if llm_sum:
|
||||
lines.append("## LLM summary\n")
|
||||
lines.append(llm_sum)
|
||||
lines.append("")
|
||||
|
||||
# Recovery footer
|
||||
lines.append("## Recovery\n")
|
||||
lines.append("- Restore an archived skill: `hermes curator restore <name>`")
|
||||
lines.append("- All archives live under `~/.hermes/skills/.archive/` and are recoverable by `mv`")
|
||||
lines.append("- See `run.json` in this directory for the full machine-readable record.")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Orchestrator — spawn a forked AIAgent for the LLM review pass
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _render_candidate_list() -> str:
|
||||
"""Human/agent-readable list of agent-created skills with usage stats."""
|
||||
rows = skill_usage.agent_created_report()
|
||||
if not rows:
|
||||
return "No agent-created skills to review."
|
||||
lines = [f"Agent-created skills ({len(rows)}):\n"]
|
||||
for r in rows:
|
||||
lines.append(
|
||||
f"- {r['name']} "
|
||||
f"state={r['state']} "
|
||||
f"pinned={'yes' if r.get('pinned') else 'no'} "
|
||||
f"use={r.get('use_count', 0)} "
|
||||
f"view={r.get('view_count', 0)} "
|
||||
f"patches={r.get('patch_count', 0)} "
|
||||
f"last_used={r.get('last_used_at') or 'never'}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def run_curator_review(
|
||||
on_summary: Optional[Callable[[str], None]] = None,
|
||||
synchronous: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Execute a single curator review pass.
|
||||
|
||||
Steps:
|
||||
1. Apply automatic state transitions (pure, no LLM).
|
||||
2. If there are agent-created skills, spawn a forked AIAgent that runs
|
||||
the LLM review prompt against the current candidate list.
|
||||
3. Update .curator_state with last_run_at and a one-line summary.
|
||||
4. Invoke *on_summary* with a user-visible description.
|
||||
|
||||
If *synchronous* is True, the LLM review runs in the calling thread; the
|
||||
default is to spawn a daemon thread so the caller returns immediately.
|
||||
"""
|
||||
start = datetime.now(timezone.utc)
|
||||
counts = apply_automatic_transitions(now=start)
|
||||
|
||||
auto_summary_parts = []
|
||||
if counts["marked_stale"]:
|
||||
auto_summary_parts.append(f"{counts['marked_stale']} marked stale")
|
||||
if counts["archived"]:
|
||||
auto_summary_parts.append(f"{counts['archived']} archived")
|
||||
if counts["reactivated"]:
|
||||
auto_summary_parts.append(f"{counts['reactivated']} reactivated")
|
||||
auto_summary = ", ".join(auto_summary_parts) if auto_summary_parts else "no changes"
|
||||
|
||||
# Persist state before the LLM pass so a crash mid-review still records
|
||||
# the run and doesn't immediately re-trigger.
|
||||
state = load_state()
|
||||
state["last_run_at"] = start.isoformat()
|
||||
state["run_count"] = int(state.get("run_count", 0)) + 1
|
||||
state["last_run_summary"] = f"auto: {auto_summary}"
|
||||
save_state(state)
|
||||
|
||||
def _llm_pass():
|
||||
nonlocal auto_summary
|
||||
# Snapshot skill state BEFORE the LLM pass so the report can diff.
|
||||
try:
|
||||
before_report = skill_usage.agent_created_report()
|
||||
except Exception:
|
||||
before_report = []
|
||||
before_names = {r.get("name") for r in before_report if isinstance(r, dict)}
|
||||
|
||||
llm_meta: Dict[str, Any] = {}
|
||||
try:
|
||||
candidate_list = _render_candidate_list()
|
||||
if "No agent-created skills" in candidate_list:
|
||||
final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": "skipped (no candidates)",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
else:
|
||||
prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
|
||||
llm_meta = _run_llm_review(prompt)
|
||||
final_summary = (
|
||||
f"auto: {auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
|
||||
final_summary = f"auto: {auto_summary}; llm: error ({e})"
|
||||
llm_meta = {
|
||||
"final": "",
|
||||
"summary": f"error ({e})",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
elapsed = (datetime.now(timezone.utc) - start).total_seconds()
|
||||
state2 = load_state()
|
||||
state2["last_run_duration_seconds"] = elapsed
|
||||
state2["last_run_summary"] = final_summary
|
||||
|
||||
# Write the per-run report. Runs in a best-effort try so a
|
||||
# reporting bug never breaks the curator itself. Report path is
|
||||
# recorded in state so `hermes curator status` can point at it.
|
||||
try:
|
||||
after_report = skill_usage.agent_created_report()
|
||||
except Exception:
|
||||
after_report = []
|
||||
try:
|
||||
report_path = _write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=elapsed,
|
||||
auto_counts=counts,
|
||||
auto_summary=auto_summary,
|
||||
before_report=before_report,
|
||||
before_names=before_names,
|
||||
after_report=after_report,
|
||||
llm_meta=llm_meta,
|
||||
)
|
||||
if report_path is not None:
|
||||
state2["last_report_path"] = str(report_path)
|
||||
except Exception as e:
|
||||
logger.debug("Curator report write failed: %s", e, exc_info=True)
|
||||
|
||||
save_state(state2)
|
||||
|
||||
if on_summary:
|
||||
try:
|
||||
on_summary(f"curator: {final_summary}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if synchronous:
|
||||
_llm_pass()
|
||||
else:
|
||||
t = threading.Thread(target=_llm_pass, daemon=True, name="curator-review")
|
||||
t.start()
|
||||
|
||||
return {
|
||||
"started_at": start.isoformat(),
|
||||
"auto_transitions": counts,
|
||||
"summary_so_far": auto_summary,
|
||||
}
|
||||
|
||||
|
||||
def _run_llm_review(prompt: str) -> Dict[str, Any]:
|
||||
"""Spawn an AIAgent fork to run the curator review prompt.
|
||||
|
||||
Returns a dict with:
|
||||
- final: full (untruncated) final response from the reviewer
|
||||
- summary: short summary suitable for state file (240-char cap)
|
||||
- model, provider: what the fork actually ran on
|
||||
- tool_calls: list of {name, arguments} for every tool call made during
|
||||
the pass (arguments may be truncated for readability)
|
||||
- error: set if the pass failed mid-run; final/summary may still be empty
|
||||
|
||||
Never raises; callers get a structured failure instead.
|
||||
"""
|
||||
import contextlib
|
||||
result_meta: Dict[str, Any] = {
|
||||
"final": "",
|
||||
"summary": "",
|
||||
"model": "",
|
||||
"provider": "",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
except Exception as e:
|
||||
result_meta["error"] = f"AIAgent import failed: {e}"
|
||||
result_meta["summary"] = result_meta["error"]
|
||||
return result_meta
|
||||
|
||||
# Resolve provider + model the same way the CLI does, so the curator
|
||||
# fork inherits the user's active main config rather than falling
|
||||
# through to an empty provider/model pair (which sends HTTP 400
|
||||
# "No models provided"). AIAgent() without explicit provider/model
|
||||
# arguments hits an auto-resolution path that fails for OAuth-only
|
||||
# providers and for pool-backed credentials.
|
||||
_api_key = None
|
||||
_base_url = None
|
||||
_api_mode = None
|
||||
_resolved_provider = None
|
||||
_model_name = ""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
_cfg = load_config()
|
||||
_m = _cfg.get("model", {}) if isinstance(_cfg.get("model"), dict) else {}
|
||||
_provider = _m.get("provider") or "auto"
|
||||
_model_name = _m.get("default") or _m.get("model") or ""
|
||||
_rp = resolve_runtime_provider(
|
||||
requested=_provider, target_model=_model_name
|
||||
)
|
||||
_api_key = _rp.get("api_key")
|
||||
_base_url = _rp.get("base_url")
|
||||
_api_mode = _rp.get("api_mode")
|
||||
_resolved_provider = _rp.get("provider") or _provider
|
||||
except Exception as e:
|
||||
logger.debug("Curator provider resolution failed: %s", e, exc_info=True)
|
||||
|
||||
result_meta["model"] = _model_name
|
||||
result_meta["provider"] = _resolved_provider or ""
|
||||
|
||||
review_agent = None
|
||||
try:
|
||||
review_agent = AIAgent(
|
||||
model=_model_name,
|
||||
provider=_resolved_provider,
|
||||
api_key=_api_key,
|
||||
base_url=_base_url,
|
||||
api_mode=_api_mode,
|
||||
# Umbrella-building over a large skill collection is worth a
|
||||
# high iteration ceiling — the pass typically takes 50-100
|
||||
# API calls against hundreds of candidate skills. The
|
||||
# single-session review path caps itself at a much smaller
|
||||
# number because it's not doing a curation sweep.
|
||||
max_iterations=9999,
|
||||
quiet_mode=True,
|
||||
platform="curator",
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
# Disable recursive nudges — the curator must never spawn its own review.
|
||||
review_agent._memory_nudge_interval = 0
|
||||
review_agent._skill_nudge_interval = 0
|
||||
|
||||
# Redirect the forked agent's stdout/stderr to /dev/null while it
|
||||
# runs so its tool-call chatter doesn't pollute the foreground
|
||||
# terminal. The background-thread runner also hides it; this
|
||||
# belt-and-suspenders path matters when a caller invokes
|
||||
# run_curator_review(synchronous=True) from the CLI.
|
||||
with open(os.devnull, "w") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
conv_result = review_agent.run_conversation(user_message=prompt)
|
||||
|
||||
final = ""
|
||||
if isinstance(conv_result, dict):
|
||||
final = str(conv_result.get("final_response") or "").strip()
|
||||
result_meta["final"] = final
|
||||
result_meta["summary"] = (final[:240] + "…") if len(final) > 240 else (final or "no change")
|
||||
|
||||
# Collect tool calls for the report. Walk the forked agent's
|
||||
# session messages and extract every tool_call made during the
|
||||
# pass. Truncate argument payloads so a giant skill_manage create
|
||||
# doesn't blow up the report.
|
||||
_calls: List[Dict[str, Any]] = []
|
||||
for msg in getattr(review_agent, "_session_messages", []) or []:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
tcs = msg.get("tool_calls") or []
|
||||
for tc in tcs:
|
||||
if not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function") or {}
|
||||
name = fn.get("name") or ""
|
||||
args_raw = fn.get("arguments") or ""
|
||||
if isinstance(args_raw, str) and len(args_raw) > 400:
|
||||
args_raw = args_raw[:400] + "…"
|
||||
_calls.append({"name": name, "arguments": args_raw})
|
||||
result_meta["tool_calls"] = _calls
|
||||
except Exception as e:
|
||||
result_meta["error"] = f"error: {e}"
|
||||
result_meta["summary"] = result_meta["error"]
|
||||
finally:
|
||||
if review_agent is not None:
|
||||
try:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
return result_meta
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public entrypoint for the session-start hook
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def maybe_run_curator(
|
||||
*,
|
||||
idle_for_seconds: Optional[float] = None,
|
||||
on_summary: Optional[Callable[[str], None]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Best-effort: run a curator pass if all gates pass. Returns the result
|
||||
dict if a pass was started, else None. Never raises."""
|
||||
try:
|
||||
if not should_run_now():
|
||||
return None
|
||||
# Idle gating: only enforce when the caller provided a measurement.
|
||||
if idle_for_seconds is not None:
|
||||
min_idle_s = get_min_idle_hours() * 3600.0
|
||||
if idle_for_seconds < min_idle_s:
|
||||
return None
|
||||
return run_curator_review(on_summary=on_summary)
|
||||
except Exception as e:
|
||||
logger.debug("maybe_run_curator failed: %s", e, exc_info=True)
|
||||
return None
|
||||
@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
if not isinstance(raw_dirs, list):
|
||||
return []
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
hermes_home = get_hermes_home()
|
||||
local_skills = get_skills_dir().resolve()
|
||||
seen: Set[Path] = set()
|
||||
result: List[Path] = []
|
||||
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
|
||||
continue
|
||||
# Expand ~ and environment variables
|
||||
expanded = os.path.expanduser(os.path.expandvars(entry))
|
||||
p = Path(expanded).resolve()
|
||||
p = Path(expanded)
|
||||
# Resolve relative paths against HERMES_HOME, not cwd
|
||||
if not p.is_absolute():
|
||||
p = (hermes_home / p).resolve()
|
||||
else:
|
||||
p = p.resolve()
|
||||
if p == local_skills:
|
||||
continue
|
||||
if p in seen:
|
||||
|
||||
@@ -85,6 +85,9 @@ class AnthropicTransport(ProviderTransport):
|
||||
from agent.anthropic_adapter import _to_plain_data
|
||||
from agent.transports.types import ToolCall
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
_MCP_PREFIX = "mcp_"
|
||||
|
||||
text_parts = []
|
||||
reasoning_parts = []
|
||||
reasoning_details = []
|
||||
@@ -99,10 +102,13 @@ class AnthropicTransport(ProviderTransport):
|
||||
if isinstance(block_dict, dict):
|
||||
reasoning_details.append(block_dict)
|
||||
elif block.type == "tool_use":
|
||||
name = block.name
|
||||
if strip_tool_prefix and name.startswith(_MCP_PREFIX):
|
||||
name = name[len(_MCP_PREFIX):]
|
||||
tool_calls.append(
|
||||
ToolCall(
|
||||
id=block.id,
|
||||
name=block.name,
|
||||
name=name,
|
||||
arguments=json.dumps(block.input),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -180,6 +180,11 @@ terminal:
|
||||
# lifetime_seconds: 300
|
||||
# docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
# docker_mount_cwd_to_workspace: true # Explicit opt-in: mount your launch cwd into /workspace
|
||||
# # Optional: run the container as your host user's uid:gid so files written
|
||||
# # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
|
||||
# # caps too since no gosu privilege drop is needed. Leave off if your
|
||||
# # chosen docker_image expects to start as root.
|
||||
# docker_run_as_host_user: true
|
||||
# # Optional: explicitly forward selected env vars into Docker.
|
||||
# # These values come from your current shell first, then ~/.hermes/.env.
|
||||
# # Warning: anything forwarded here is visible to commands run in the container.
|
||||
|
||||
@@ -80,6 +80,11 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
|
||||
# Load .env from ~/.hermes/.env first, then project root as dev fallback.
|
||||
# User-managed env files should override stale shell exports on restart.
|
||||
from hermes_constants import get_hermes_home, display_hermes_home
|
||||
from hermes_cli.browser_connect import (
|
||||
DEFAULT_BROWSER_CDP_URL,
|
||||
manual_chrome_debug_command,
|
||||
try_launch_chrome_debug,
|
||||
)
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from utils import base_url_host_matches
|
||||
|
||||
@@ -240,65 +245,6 @@ def _parse_service_tier_config(raw: str) -> str | None:
|
||||
logger.warning("Unknown service_tier '%s', ignoring", raw)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def _get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
"""Return likely browser executables for local CDP auto-launch."""
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add_candidate(path: str | None) -> None:
|
||||
if not path:
|
||||
return
|
||||
normalized = os.path.normcase(os.path.normpath(path))
|
||||
if normalized in seen:
|
||||
return
|
||||
if os.path.isfile(path):
|
||||
candidates.append(path)
|
||||
seen.add(normalized)
|
||||
|
||||
def _add_from_path(*names: str) -> None:
|
||||
for name in names:
|
||||
_add_candidate(shutil.which(name))
|
||||
|
||||
if system == "Darwin":
|
||||
for app in (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
):
|
||||
_add_candidate(app)
|
||||
elif system == "Windows":
|
||||
_add_from_path(
|
||||
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
|
||||
"chrome", "msedge", "brave", "chromium",
|
||||
)
|
||||
|
||||
for base in (
|
||||
os.environ.get("ProgramFiles"),
|
||||
os.environ.get("ProgramFiles(x86)"),
|
||||
os.environ.get("LOCALAPPDATA"),
|
||||
):
|
||||
if not base:
|
||||
continue
|
||||
for parts in (
|
||||
("Google", "Chrome", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chromium.exe"),
|
||||
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
||||
("Microsoft", "Edge", "Application", "msedge.exe"),
|
||||
):
|
||||
_add_candidate(os.path.join(base, *parts))
|
||||
else:
|
||||
_add_from_path(
|
||||
"google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge",
|
||||
)
|
||||
|
||||
return candidates
|
||||
|
||||
|
||||
def load_cli_config() -> Dict[str, Any]:
|
||||
"""
|
||||
Load CLI configuration from config files.
|
||||
@@ -563,6 +509,7 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
|
||||
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
# Persistent shell (non-local backends)
|
||||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
@@ -5979,7 +5926,29 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
|
||||
def _handle_curator_command(self, cmd: str):
|
||||
"""Handle /curator slash command.
|
||||
|
||||
Delegates to hermes_cli.curator so the CLI and the `hermes curator`
|
||||
subcommand share the same handler set.
|
||||
"""
|
||||
import shlex
|
||||
|
||||
tokens = shlex.split(cmd)[1:] if cmd else []
|
||||
if not tokens:
|
||||
tokens = ["status"]
|
||||
|
||||
try:
|
||||
from hermes_cli.curator import cli_main
|
||||
cli_main(tokens)
|
||||
except SystemExit:
|
||||
# argparse calls sys.exit() on --help or errors; swallow so we
|
||||
# don't kill the interactive session.
|
||||
pass
|
||||
except Exception as exc:
|
||||
print(f"(._.) curator: {exc}")
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6223,6 +6192,8 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "curator":
|
||||
self._handle_curator_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6606,34 +6577,7 @@ class HermesCLI:
|
||||
|
||||
Returns True if a launch command was executed (doesn't guarantee success).
|
||||
"""
|
||||
import subprocess as _sp
|
||||
|
||||
candidates = _get_chrome_debug_candidates(system)
|
||||
|
||||
if not candidates:
|
||||
return False
|
||||
|
||||
# Dedicated profile dir so debug Chrome won't collide with normal Chrome
|
||||
data_dir = str(_hermes_home / "chrome-debug")
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
chrome = candidates[0]
|
||||
try:
|
||||
_sp.Popen(
|
||||
[
|
||||
chrome,
|
||||
f"--remote-debugging-port={port}",
|
||||
f"--user-data-dir={data_dir}",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
],
|
||||
stdout=_sp.DEVNULL,
|
||||
stderr=_sp.DEVNULL,
|
||||
start_new_session=True, # detach from terminal
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
return try_launch_chrome_debug(port, system)
|
||||
|
||||
def _handle_browser_command(self, cmd: str):
|
||||
"""Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
|
||||
@@ -6642,13 +6586,44 @@ class HermesCLI:
|
||||
parts = cmd.strip().split(None, 1)
|
||||
sub = parts[1].lower().strip() if len(parts) > 1 else "status"
|
||||
|
||||
_DEFAULT_CDP = "http://127.0.0.1:9222"
|
||||
_DEFAULT_CDP = DEFAULT_BROWSER_CDP_URL
|
||||
current = os.environ.get("BROWSER_CDP_URL", "").strip()
|
||||
|
||||
if sub.startswith("connect"):
|
||||
# Optionally accept a custom CDP URL: /browser connect ws://host:port
|
||||
connect_parts = cmd.strip().split(None, 2) # ["/browser", "connect", "ws://..."]
|
||||
cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
|
||||
parsed_cdp = urlparse(cdp_url if "://" in cdp_url else f"http://{cdp_url}")
|
||||
if parsed_cdp.scheme not in {"http", "https", "ws", "wss"}:
|
||||
print()
|
||||
print(
|
||||
f" ⚠ Unsupported browser url scheme: {parsed_cdp.scheme or '(missing)'} "
|
||||
"(expected one of: http, https, ws, wss)"
|
||||
)
|
||||
print()
|
||||
return
|
||||
try:
|
||||
_port = parsed_cdp.port or (443 if parsed_cdp.scheme in {"https", "wss"} else 80)
|
||||
except ValueError:
|
||||
print()
|
||||
print(f" ⚠ Invalid port in browser url: {cdp_url}")
|
||||
print()
|
||||
return
|
||||
if not parsed_cdp.hostname:
|
||||
print()
|
||||
print(f" ⚠ Missing host in browser url: {cdp_url}")
|
||||
print()
|
||||
return
|
||||
_host = parsed_cdp.hostname
|
||||
if parsed_cdp.path.startswith("/devtools/browser/"):
|
||||
cdp_url = parsed_cdp.geturl()
|
||||
else:
|
||||
cdp_url = parsed_cdp._replace(
|
||||
path="",
|
||||
params="",
|
||||
query="",
|
||||
fragment="",
|
||||
).geturl()
|
||||
|
||||
# Clear any existing browser sessions so the next tool call uses the new backend
|
||||
try:
|
||||
@@ -6659,20 +6634,13 @@ class HermesCLI:
|
||||
|
||||
print()
|
||||
|
||||
# Extract port for connectivity checks
|
||||
_port = 9222
|
||||
try:
|
||||
_port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
# Check if Chrome is already listening on the debug port
|
||||
import socket
|
||||
_already_open = False
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.settimeout(1)
|
||||
s.connect(("127.0.0.1", _port))
|
||||
s.connect((_host, _port))
|
||||
s.close()
|
||||
_already_open = True
|
||||
except (OSError, socket.timeout):
|
||||
@@ -6690,7 +6658,7 @@ class HermesCLI:
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.settimeout(1)
|
||||
s.connect(("127.0.0.1", _port))
|
||||
s.connect((_host, _port))
|
||||
s.close()
|
||||
_already_open = True
|
||||
break
|
||||
@@ -6703,33 +6671,22 @@ class HermesCLI:
|
||||
print(" Try again in a few seconds — the debug instance may still be starting")
|
||||
else:
|
||||
print(" ⚠ Could not auto-launch Chrome")
|
||||
# Show manual instructions as fallback
|
||||
_data_dir = str(_hermes_home / "chrome-debug")
|
||||
sys_name = _plat.system()
|
||||
if sys_name == "Darwin":
|
||||
chrome_cmd = (
|
||||
'open -a "Google Chrome" --args'
|
||||
f" --remote-debugging-port=9222"
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
elif sys_name == "Windows":
|
||||
chrome_cmd = (
|
||||
f'chrome.exe --remote-debugging-port=9222'
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
f" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
chrome_cmd = manual_chrome_debug_command(_port, sys_name)
|
||||
if chrome_cmd:
|
||||
print(f" Launch Chrome manually:")
|
||||
print(f" {chrome_cmd}")
|
||||
else:
|
||||
chrome_cmd = (
|
||||
f"google-chrome --remote-debugging-port=9222"
|
||||
f' --user-data-dir="{_data_dir}"'
|
||||
f" --no-first-run --no-default-browser-check"
|
||||
)
|
||||
print(f" Launch Chrome manually:")
|
||||
print(f" {chrome_cmd}")
|
||||
print(" No Chrome/Chromium executable found in this environment")
|
||||
else:
|
||||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
if not _already_open:
|
||||
print()
|
||||
print("Browser not connected — start Chrome with remote debugging and retry /browser connect")
|
||||
print()
|
||||
return
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
@@ -9344,6 +9301,21 @@ class HermesCLI:
|
||||
self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
|
||||
except Exception:
|
||||
pass # Tips are non-critical — never break startup
|
||||
|
||||
# Curator — kick off a background skill-maintenance pass on startup
|
||||
# if the schedule says we're due. Runs in a daemon thread so it
|
||||
# never blocks the interactive loop. Best-effort; any failure is
|
||||
# swallowed to avoid breaking session startup.
|
||||
try:
|
||||
from agent.curator import maybe_run_curator
|
||||
maybe_run_curator(
|
||||
idle_for_seconds=float("inf"), # CLI startup = fully idle
|
||||
on_summary=lambda msg: self._console_print(
|
||||
f"[dim #6b7684]💾 {msg}[/]"
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if self.preloaded_skills and not self._startup_skills_line_shown:
|
||||
skills_label = ", ".join(self.preloaded_skills)
|
||||
self._console_print(
|
||||
|
||||
+71
-10
@@ -38,6 +38,7 @@ from typing import Dict, Optional, Any, List
|
||||
# gateway is a long-running daemon, so its boot cost matters less than
|
||||
# preserving the established test-patch surface.
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
# --- Agent cache tuning ---------------------------------------------------
|
||||
# Bounds the per-session AIAgent cache to prevent unbounded growth in
|
||||
@@ -274,6 +275,8 @@ if _config_path.exists():
|
||||
"container_disk": "TERMINAL_CONTAINER_DISK",
|
||||
"container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
|
||||
"docker_volumes": "TERMINAL_DOCKER_VOLUMES",
|
||||
"docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
"persistent_shell": "TERMINAL_PERSISTENT_SHELL",
|
||||
}
|
||||
@@ -286,6 +289,10 @@ if _config_path.exists():
|
||||
# Only bridge explicit absolute paths from config.yaml.
|
||||
if _cfg_key == "cwd" and str(_val) in (".", "auto", "cwd"):
|
||||
continue
|
||||
# Expand shell tilde in cwd so subprocess.Popen never
|
||||
# receives a literal "~/" which the kernel rejects.
|
||||
if _cfg_key == "cwd" and isinstance(_val, str):
|
||||
_val = os.path.expanduser(_val)
|
||||
if isinstance(_val, list):
|
||||
os.environ[_env_var] = json.dumps(_val)
|
||||
else:
|
||||
@@ -1545,7 +1552,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return (cfg.get("agent", {}).get("system_prompt", "") or "").strip()
|
||||
return (cfg_get(cfg, "agent", "system_prompt", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
@@ -1566,7 +1573,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||
effort = str(cfg_get(cfg, "agent", "reasoning_effort", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
result = parse_reasoning_effort(effort)
|
||||
@@ -1649,7 +1656,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = str(cfg.get("agent", {}).get("service_tier", "") or "").strip()
|
||||
raw = str(cfg_get(cfg, "agent", "service_tier", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1670,7 +1677,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
return bool(cfg.get("display", {}).get("show_reasoning", False))
|
||||
return bool(cfg_get(cfg, "display", "show_reasoning", default=False))
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
@@ -1686,7 +1693,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
mode = str(cfg.get("display", {}).get("busy_input_mode", "") or "").strip().lower()
|
||||
mode = str(cfg_get(cfg, "display", "busy_input_mode", default="") or "").strip().lower()
|
||||
except Exception:
|
||||
pass
|
||||
if mode == "queue":
|
||||
@@ -1706,7 +1713,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = str(cfg.get("agent", {}).get("restart_drain_timeout", "") or "").strip()
|
||||
raw = str(cfg_get(cfg, "agent", "restart_drain_timeout", default="") or "").strip()
|
||||
except Exception:
|
||||
pass
|
||||
value = parse_restart_drain_timeout(raw)
|
||||
@@ -1739,7 +1746,7 @@ class GatewayRunner:
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
raw = cfg.get("display", {}).get("background_process_notifications")
|
||||
raw = cfg_get(cfg, "display", "background_process_notifications")
|
||||
if raw is False:
|
||||
mode = "off"
|
||||
elif raw not in (None, ""):
|
||||
@@ -2378,6 +2385,7 @@ class GatewayRunner:
|
||||
|
||||
# Discover and load event hooks
|
||||
self.hooks.discover_and_load()
|
||||
|
||||
|
||||
# Recover background processes from checkpoint (crash recovery)
|
||||
try:
|
||||
@@ -6444,7 +6452,7 @@ class GatewayRunner:
|
||||
|
||||
try:
|
||||
config = _load_gateway_config()
|
||||
personalities = config.get("agent", {}).get("personalities", {}) if config else {}
|
||||
personalities = cfg_get(config, "agent", "personalities", default={})
|
||||
except Exception:
|
||||
config = {}
|
||||
personalities = {}
|
||||
@@ -7445,7 +7453,7 @@ class GatewayRunner:
|
||||
# --- check config gate ------------------------------------------------
|
||||
try:
|
||||
user_config = _load_gateway_config()
|
||||
gate_enabled = user_config.get("display", {}).get("tool_progress_command", False)
|
||||
gate_enabled = cfg_get(user_config, "display", "tool_progress_command", default=False)
|
||||
except Exception:
|
||||
gate_enabled = False
|
||||
|
||||
@@ -10064,7 +10072,7 @@ class GatewayRunner:
|
||||
tool_progress_hint_gateway,
|
||||
)
|
||||
_cfg = _load_gateway_config()
|
||||
gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False))
|
||||
gate_on = bool(cfg_get(_cfg, "display", "tool_progress_command", default=False))
|
||||
if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
|
||||
long_tool_hint_fired[0] = True
|
||||
progress_queue.put(tool_progress_hint_gateway())
|
||||
@@ -10221,6 +10229,20 @@ class GatewayRunner:
|
||||
if progress_lines:
|
||||
progress_lines[-1] = f"{base_msg} (×{count + 1})"
|
||||
msg = progress_lines[-1] if progress_lines else base_msg
|
||||
elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
|
||||
# Content bubble just landed on the platform — close off
|
||||
# the current tool-progress bubble so the next tool
|
||||
# starts a fresh bubble below the content. Without this,
|
||||
# tool lines keep editing the ORIGINAL progress message
|
||||
# above the new content, making the chat appear out of
|
||||
# order. Mirrors GatewayStreamConsumer.on_segment_break
|
||||
# on the content side. (Issue: tool + content
|
||||
# linearization regression after PR #7885.)
|
||||
progress_msg_id = None
|
||||
progress_lines = []
|
||||
last_progress_msg[0] = None
|
||||
repeat_count[0] = 0
|
||||
continue
|
||||
else:
|
||||
msg = raw
|
||||
progress_lines.append(msg)
|
||||
@@ -10290,6 +10312,24 @@ class GatewayRunner:
|
||||
_, base_msg, count = raw
|
||||
if progress_lines:
|
||||
progress_lines[-1] = f"{base_msg} (×{count + 1})"
|
||||
elif isinstance(raw, tuple) and len(raw) >= 1 and raw[0] == "__reset__":
|
||||
# Content-bubble marker during drain: close off
|
||||
# the current progress bubble and start a fresh
|
||||
# one for any tool lines that arrived after.
|
||||
if can_edit and progress_lines and progress_msg_id:
|
||||
_pending_text = "\n".join(progress_lines)
|
||||
try:
|
||||
await adapter.edit_message(
|
||||
chat_id=source.chat_id,
|
||||
message_id=progress_msg_id,
|
||||
content=_pending_text,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
progress_msg_id = None
|
||||
progress_lines = []
|
||||
last_progress_msg[0] = None
|
||||
repeat_count[0] = 0
|
||||
else:
|
||||
progress_lines.append(raw)
|
||||
except Exception:
|
||||
@@ -10495,6 +10535,11 @@ class GatewayRunner:
|
||||
chat_id=source.chat_id,
|
||||
config=_consumer_cfg,
|
||||
metadata={"thread_id": _progress_thread_id} if _progress_thread_id else None,
|
||||
on_new_message=(
|
||||
(lambda: progress_queue.put(("__reset__",)))
|
||||
if progress_queue is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
if _want_stream_deltas:
|
||||
def _stream_delta_cb(text: str) -> None:
|
||||
@@ -11702,6 +11747,7 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
|
||||
IMAGE_CACHE_EVERY = 60 # ticks — once per hour at default 60s interval
|
||||
CHANNEL_DIR_EVERY = 5 # ticks — every 5 minutes
|
||||
PASTE_SWEEP_EVERY = 60 # ticks — once per hour
|
||||
CURATOR_EVERY = 60 # ticks — poll hourly (inner gate handles the real cadence)
|
||||
|
||||
logger.info("Cron ticker started (interval=%ds)", interval)
|
||||
tick_count = 0
|
||||
@@ -11753,6 +11799,21 @@ def _start_cron_ticker(stop_event: threading.Event, adapters=None, loop=None, in
|
||||
except Exception as e:
|
||||
logger.debug("Paste sweep error: %s", e)
|
||||
|
||||
# Curator — piggy-back on the existing cron ticker so long-running
|
||||
# gateways get weekly skill maintenance without needing restarts.
|
||||
# maybe_run_curator() is internally gated by config.interval_hours
|
||||
# (7 days by default), so CURATOR_EVERY is just the poll rate — the
|
||||
# real work only fires once per config interval.
|
||||
if tick_count % CURATOR_EVERY == 0:
|
||||
try:
|
||||
from agent.curator import maybe_run_curator
|
||||
maybe_run_curator(
|
||||
idle_for_seconds=float("inf"),
|
||||
on_summary=lambda msg: logger.info("curator: %s", msg),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Curator tick error: %s", e)
|
||||
|
||||
stop_event.wait(timeout=interval)
|
||||
logger.info("Cron ticker stopped")
|
||||
|
||||
|
||||
@@ -91,11 +91,20 @@ class GatewayStreamConsumer:
|
||||
chat_id: str,
|
||||
config: Optional[StreamConsumerConfig] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
on_new_message: Optional[callable] = None,
|
||||
):
|
||||
self.adapter = adapter
|
||||
self.chat_id = chat_id
|
||||
self.cfg = config or StreamConsumerConfig()
|
||||
self.metadata = metadata
|
||||
# Fired whenever a fresh content bubble is created on the platform
|
||||
# (first-send of a new message, commentary, overflow chunk, or
|
||||
# fallback continuation). The gateway uses this to linearize the
|
||||
# tool-progress bubble: when content resumes after a tool batch,
|
||||
# the next tool.started should open a NEW progress bubble below
|
||||
# the content, not edit the old bubble above it.
|
||||
# Called with no arguments. Exceptions are swallowed.
|
||||
self._on_new_message = on_new_message
|
||||
self._queue: queue.Queue = queue.Queue()
|
||||
self._accumulated = ""
|
||||
self._message_id: Optional[str] = None
|
||||
@@ -146,6 +155,16 @@ class GatewayStreamConsumer:
|
||||
if text:
|
||||
self._queue.put((_COMMENTARY, text))
|
||||
|
||||
def _notify_new_message(self) -> None:
|
||||
"""Fire the on_new_message callback, swallowing any errors."""
|
||||
cb = self._on_new_message
|
||||
if cb is None:
|
||||
return
|
||||
try:
|
||||
cb()
|
||||
except Exception:
|
||||
logger.debug("on_new_message callback error", exc_info=True)
|
||||
|
||||
def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
|
||||
if preserve_no_edit and self._message_id == "__no_edit__":
|
||||
return
|
||||
@@ -529,6 +548,9 @@ class GatewayStreamConsumer:
|
||||
self._message_id = str(result.message_id)
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
# Fresh content bubble — close off any stale tool bubble
|
||||
# above so the next tool starts a new bubble below.
|
||||
self._notify_new_message()
|
||||
return str(result.message_id)
|
||||
else:
|
||||
self._edit_supported = False
|
||||
@@ -661,6 +683,9 @@ class GatewayStreamConsumer:
|
||||
sent_any_chunk = True
|
||||
last_successful_chunk = chunk
|
||||
last_message_id = result.message_id or last_message_id
|
||||
# Each fallback chunk is a fresh platform message — notify
|
||||
# so any stale tool-progress bubble gets closed off.
|
||||
self._notify_new_message()
|
||||
|
||||
self._message_id = last_message_id
|
||||
self._already_sent = True
|
||||
@@ -744,6 +769,11 @@ class GatewayStreamConsumer:
|
||||
# tool..."), not the final response. Setting already_sent would cause
|
||||
# the final response to be incorrectly suppressed when there are
|
||||
# multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
|
||||
if result.success:
|
||||
# Commentary counts as fresh content — close off any
|
||||
# stale tool bubble above it so the next tool starts a
|
||||
# new bubble below.
|
||||
self._notify_new_message()
|
||||
return result.success
|
||||
except Exception as e:
|
||||
logger.error("Commentary send error: %s", e)
|
||||
@@ -973,6 +1003,11 @@ class GatewayStreamConsumer:
|
||||
# every delta/tool boundary when platforms accept a
|
||||
# message but do not return an editable message id.
|
||||
self._message_id = "__no_edit__"
|
||||
# Notify the gateway that a fresh content bubble was
|
||||
# created so any accumulated tool-progress bubble above
|
||||
# gets closed off — the next tool fires into a new
|
||||
# bubble below, preserving chronological order.
|
||||
self._notify_new_message()
|
||||
return True
|
||||
else:
|
||||
# Initial send failed — disable streaming for this session
|
||||
|
||||
@@ -0,0 +1,138 @@
|
||||
"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
DEFAULT_BROWSER_CDP_PORT = 9222
|
||||
DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
|
||||
|
||||
_DARWIN_APPS = (
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
||||
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
)
|
||||
|
||||
_WINDOWS_INSTALL_PARTS = (
|
||||
("Google", "Chrome", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chrome.exe"),
|
||||
("Chromium", "Application", "chromium.exe"),
|
||||
("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
|
||||
("Microsoft", "Edge", "Application", "msedge.exe"),
|
||||
)
|
||||
|
||||
_LINUX_BIN_NAMES = (
|
||||
"google-chrome", "google-chrome-stable", "chromium-browser",
|
||||
"chromium", "brave-browser", "microsoft-edge",
|
||||
)
|
||||
|
||||
_WINDOWS_BIN_NAMES = (
|
||||
"chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
|
||||
"chrome", "msedge", "brave", "chromium",
|
||||
)
|
||||
|
||||
|
||||
def get_chrome_debug_candidates(system: str) -> list[str]:
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def add(path: str | None) -> None:
|
||||
if not path:
|
||||
return
|
||||
normalized = os.path.normcase(os.path.normpath(path))
|
||||
if normalized in seen or not os.path.isfile(path):
|
||||
return
|
||||
candidates.append(path)
|
||||
seen.add(normalized)
|
||||
|
||||
def add_install_paths(bases: tuple[str | None, ...]) -> None:
|
||||
for base in filter(None, bases):
|
||||
for parts in _WINDOWS_INSTALL_PARTS:
|
||||
add(os.path.join(base, *parts))
|
||||
|
||||
if system == "Darwin":
|
||||
for app in _DARWIN_APPS:
|
||||
add(app)
|
||||
return candidates
|
||||
|
||||
if system == "Windows":
|
||||
for name in _WINDOWS_BIN_NAMES:
|
||||
add(shutil.which(name))
|
||||
add_install_paths((
|
||||
os.environ.get("ProgramFiles"),
|
||||
os.environ.get("ProgramFiles(x86)"),
|
||||
os.environ.get("LOCALAPPDATA"),
|
||||
))
|
||||
return candidates
|
||||
|
||||
for name in _LINUX_BIN_NAMES:
|
||||
add(shutil.which(name))
|
||||
add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
|
||||
return candidates
|
||||
|
||||
|
||||
def chrome_debug_data_dir() -> str:
|
||||
return str(get_hermes_home() / "chrome-debug")
|
||||
|
||||
|
||||
def _chrome_debug_args(port: int) -> list[str]:
|
||||
return [
|
||||
f"--remote-debugging-port={port}",
|
||||
f"--user-data-dir={chrome_debug_data_dir()}",
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
]
|
||||
|
||||
|
||||
def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
|
||||
system = system or platform.system()
|
||||
candidates = get_chrome_debug_candidates(system)
|
||||
|
||||
if candidates:
|
||||
argv = [candidates[0], *_chrome_debug_args(port)]
|
||||
return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
|
||||
|
||||
if system == "Darwin":
|
||||
data_dir = chrome_debug_data_dir()
|
||||
return (
|
||||
f'open -a "Google Chrome" --args --remote-debugging-port={port} '
|
||||
f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _detach_kwargs(system: str) -> dict:
|
||||
if system != "Windows":
|
||||
return {"start_new_session": True}
|
||||
flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
|
||||
subprocess, "CREATE_NEW_PROCESS_GROUP", 0
|
||||
)
|
||||
return {"creationflags": flags} if flags else {}
|
||||
|
||||
|
||||
def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
|
||||
system = system or platform.system()
|
||||
candidates = get_chrome_debug_candidates(system)
|
||||
if not candidates:
|
||||
return False
|
||||
|
||||
os.makedirs(chrome_debug_data_dir(), exist_ok=True)
|
||||
try:
|
||||
subprocess.Popen(
|
||||
[candidates[0], *_chrome_debug_args(port)],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
**_detach_kwargs(system),
|
||||
)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
@@ -148,6 +148,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
|
||||
+100
-5
@@ -515,6 +515,16 @@ DEFAULT_CONFIG = {
|
||||
# Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
|
||||
# Default off because passing host directories into a sandbox weakens isolation.
|
||||
"docker_mount_cwd_to_workspace": False,
|
||||
# Explicit opt-in: run the Docker container as the host user's uid:gid
|
||||
# (via `--user`). When enabled, files written into bind-mounted dirs
|
||||
# (docker_volumes, the persistent workspace, or the auto-mounted cwd)
|
||||
# are owned by your host user instead of root, which avoids needing
|
||||
# `sudo chown` after container runs. Default off to preserve behavior
|
||||
# for images whose entrypoints expect to start as root (e.g. the
|
||||
# bundled Hermes image, which drops to the `hermes` user via gosu).
|
||||
# When on, SETUID/SETGID caps are omitted from the container since
|
||||
# no privilege drop is needed.
|
||||
"docker_run_as_host_user": False,
|
||||
# Persistent shell — keep a long-lived bash shell across execute() calls
|
||||
# so cwd/env vars/shell variables survive between commands.
|
||||
# Enabled by default for non-local backends (SSH); local is always opt-in
|
||||
@@ -915,6 +925,35 @@ DEFAULT_CONFIG = {
|
||||
"guard_agent_created": False,
|
||||
},
|
||||
|
||||
# Curator — background skill maintenance.
|
||||
#
|
||||
# Periodically reviews AGENT-CREATED skills (never bundled or
|
||||
# hub-installed) and keeps the collection tidy: marks long-unused skills
|
||||
# as stale, archives genuinely obsolete ones (archive only, never
|
||||
# deletes), and spawns a forked aux-model agent to consolidate overlaps
|
||||
# and patch drift. Runs inactivity-triggered from session start — no
|
||||
# cron daemon.
|
||||
#
|
||||
# See `hermes curator status` for the last run summary.
|
||||
"curator": {
|
||||
"enabled": True,
|
||||
# How long to wait between curator runs (hours). Default: 7 days.
|
||||
"interval_hours": 24 * 7,
|
||||
# Only run when the agent has been idle at least this long (hours).
|
||||
"min_idle_hours": 2,
|
||||
# Mark a skill as "stale" after this many days without use.
|
||||
"stale_after_days": 30,
|
||||
# Archive a skill (move to skills/.archive/) after this many days
|
||||
# without use. Archived skills are recoverable — no auto-deletion.
|
||||
"archive_after_days": 90,
|
||||
# Optional per-task override for the curator's aux model. Leave null
|
||||
# to use Hermes' main auxiliary client resolution.
|
||||
"auxiliary": {
|
||||
"provider": None,
|
||||
"model": None,
|
||||
},
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
# This section is only needed for hermes-specific overrides; everything else
|
||||
# (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
|
||||
@@ -3448,6 +3487,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return config
|
||||
|
||||
|
||||
def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
|
||||
"""Traverse nested dict keys safely, returning ``default`` on any miss.
|
||||
|
||||
Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
|
||||
that appears 50+ times across the codebase. Handles three common gotchas
|
||||
in one place:
|
||||
|
||||
1. Missing intermediate keys (returns ``default``, no KeyError).
|
||||
2. An intermediate value that's not a dict (e.g. a user wrote a string
|
||||
where a section was expected). Returns ``default`` instead of
|
||||
AttributeError on ``.get()``.
|
||||
3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
|
||||
|
||||
Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
|
||||
ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
|
||||
that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
|
||||
|
||||
Explicit ``None`` values are returned as-is (matches ``dict.get(key,
|
||||
default)`` semantics — ``default`` is only returned when the key is
|
||||
*absent*, not when it's present but set to ``None``).
|
||||
|
||||
Examples:
|
||||
>>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
|
||||
'high'
|
||||
>>> cfg_get({}, "agent", "reasoning_effort", default="medium")
|
||||
'medium'
|
||||
>>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
|
||||
'low'
|
||||
>>> cfg_get(None, "anything", default=42)
|
||||
42
|
||||
>>> cfg_get({"a": {"b": None}}, "a", "b", default="def") # explicit None preserved
|
||||
>>> cfg_get({"a": {"b": False}}, "a", "b", default=True) # falsy values preserved
|
||||
False
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return default
|
||||
node: Any = cfg
|
||||
for key in keys:
|
||||
if not isinstance(node, dict):
|
||||
return default
|
||||
if key not in node:
|
||||
return default
|
||||
node = node[key]
|
||||
return node
|
||||
|
||||
|
||||
|
||||
def read_raw_config() -> Dict[str, Any]:
|
||||
"""Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
|
||||
@@ -3710,18 +3795,27 @@ def _sanitize_env_lines(lines: list) -> list:
|
||||
|
||||
# Detect concatenated KEY=VALUE pairs on one line.
|
||||
# Search for known KEY= patterns at any position in the line.
|
||||
split_positions = []
|
||||
# We collect full needle ranges so we can drop matches that are
|
||||
# fully contained within a longer overlapping needle. Without this,
|
||||
# suffix collisions corrupt the file: e.g. LM_API_KEY= inside
|
||||
# GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
|
||||
match_ranges: list[tuple[int, int]] = []
|
||||
for key_name in known_keys:
|
||||
needle = key_name + "="
|
||||
idx = stripped.find(needle)
|
||||
while idx >= 0:
|
||||
split_positions.append(idx)
|
||||
match_ranges.append((idx, idx + len(needle)))
|
||||
idx = stripped.find(needle, idx + len(needle))
|
||||
|
||||
split_positions = sorted({
|
||||
s for s, e in match_ranges
|
||||
if not any(
|
||||
s2 <= s and e2 >= e and (s2, e2) != (s, e)
|
||||
for s2, e2 in match_ranges
|
||||
)
|
||||
})
|
||||
|
||||
if len(split_positions) > 1:
|
||||
split_positions.sort()
|
||||
# Deduplicate (shouldn't happen, but be safe)
|
||||
split_positions = sorted(set(split_positions))
|
||||
for i, pos in enumerate(split_positions):
|
||||
end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
|
||||
part = stripped[pos:end].strip()
|
||||
@@ -4292,6 +4386,7 @@ def set_config_value(key: str, value: str):
|
||||
"terminal.modal_image": "TERMINAL_MODAL_IMAGE",
|
||||
"terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
|
||||
"terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
|
||||
"terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
|
||||
"terminal.cwd": "TERMINAL_CWD",
|
||||
"terminal.timeout": "TERMINAL_TIMEOUT",
|
||||
"terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
|
||||
|
||||
@@ -0,0 +1,235 @@
|
||||
"""CLI subcommand: `hermes curator <subcommand>`.
|
||||
|
||||
Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
|
||||
table, triggers a run, pauses/resumes, and pins/unpins skills.
|
||||
|
||||
This module intentionally has no side effects at import time — main.py wires
|
||||
the argparse subparsers on demand.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _fmt_ts(ts: Optional[str]) -> str:
|
||||
if not ts:
|
||||
return "never"
|
||||
try:
|
||||
dt = datetime.fromisoformat(ts)
|
||||
except (TypeError, ValueError):
|
||||
return str(ts)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
delta = datetime.now(timezone.utc) - dt
|
||||
secs = int(delta.total_seconds())
|
||||
if secs < 60:
|
||||
return f"{secs}s ago"
|
||||
if secs < 3600:
|
||||
return f"{secs // 60}m ago"
|
||||
if secs < 86400:
|
||||
return f"{secs // 3600}h ago"
|
||||
return f"{secs // 86400}d ago"
|
||||
|
||||
|
||||
def _cmd_status(args) -> int:
|
||||
from agent import curator
|
||||
from tools import skill_usage
|
||||
|
||||
state = curator.load_state()
|
||||
enabled = curator.is_enabled()
|
||||
paused = state.get("paused", False)
|
||||
last_run = state.get("last_run_at")
|
||||
summary = state.get("last_run_summary") or "(none)"
|
||||
runs = state.get("run_count", 0)
|
||||
|
||||
status_line = (
|
||||
"ENABLED" if enabled and not paused else
|
||||
"PAUSED" if paused else
|
||||
"DISABLED"
|
||||
)
|
||||
print(f"curator: {status_line}")
|
||||
print(f" runs: {runs}")
|
||||
print(f" last run: {_fmt_ts(last_run)}")
|
||||
print(f" last summary: {summary}")
|
||||
_report = state.get("last_report_path")
|
||||
if _report:
|
||||
print(f" last report: {_report}")
|
||||
_ih = curator.get_interval_hours()
|
||||
_interval_label = (
|
||||
f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
|
||||
else f"{_ih}h"
|
||||
)
|
||||
print(f" interval: every {_interval_label}")
|
||||
print(f" stale after: {curator.get_stale_after_days()}d unused")
|
||||
print(f" archive after: {curator.get_archive_after_days()}d unused")
|
||||
|
||||
rows = skill_usage.agent_created_report()
|
||||
if not rows:
|
||||
print("\nno agent-created skills")
|
||||
return 0
|
||||
|
||||
by_state = {"active": [], "stale": [], "archived": []}
|
||||
pinned = []
|
||||
for r in rows:
|
||||
state_name = r.get("state", "active")
|
||||
by_state.setdefault(state_name, []).append(r)
|
||||
if r.get("pinned"):
|
||||
pinned.append(r["name"])
|
||||
|
||||
print(f"\nagent-created skills: {len(rows)} total")
|
||||
for state_name in ("active", "stale", "archived"):
|
||||
bucket = by_state.get(state_name, [])
|
||||
print(f" {state_name:10s} {len(bucket)}")
|
||||
|
||||
if pinned:
|
||||
print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
|
||||
|
||||
# Show top 5 least-recently-used active skills
|
||||
active = sorted(
|
||||
by_state.get("active", []),
|
||||
key=lambda r: r.get("last_used_at") or r.get("created_at") or "",
|
||||
)[:5]
|
||||
if active:
|
||||
print("\nleast recently used (top 5):")
|
||||
for r in active:
|
||||
last = _fmt_ts(r.get("last_used_at"))
|
||||
print(f" {r['name']:40s} use={r.get('use_count', 0):3d} last_used={last}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_run(args) -> int:
|
||||
from agent import curator
|
||||
if not curator.is_enabled():
|
||||
print("curator: disabled via config; enable with `curator.enabled: true`")
|
||||
return 1
|
||||
|
||||
print("curator: running review pass...")
|
||||
|
||||
def _on_summary(msg: str) -> None:
|
||||
print(msg)
|
||||
|
||||
result = curator.run_curator_review(
|
||||
on_summary=_on_summary,
|
||||
synchronous=bool(args.synchronous),
|
||||
)
|
||||
auto = result.get("auto_transitions", {})
|
||||
if auto:
|
||||
print(
|
||||
f"auto: checked={auto.get('checked', 0)} "
|
||||
f"stale={auto.get('marked_stale', 0)} "
|
||||
f"archived={auto.get('archived', 0)} "
|
||||
f"reactivated={auto.get('reactivated', 0)}"
|
||||
)
|
||||
if not args.synchronous:
|
||||
print("llm pass running in background — check `hermes curator status` later")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_pause(args) -> int:
|
||||
from agent import curator
|
||||
curator.set_paused(True)
|
||||
print("curator: paused")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_resume(args) -> int:
|
||||
from agent import curator
|
||||
curator.set_paused(False)
|
||||
print("curator: resumed")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_pin(args) -> int:
|
||||
from tools import skill_usage
|
||||
if not skill_usage.is_agent_created(args.skill):
|
||||
print(
|
||||
f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
|
||||
"(only agent-created skills participate in curation)"
|
||||
)
|
||||
return 1
|
||||
skill_usage.set_pinned(args.skill, True)
|
||||
print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_unpin(args) -> int:
|
||||
from tools import skill_usage
|
||||
if not skill_usage.is_agent_created(args.skill):
|
||||
print(
|
||||
f"curator: '{args.skill}' is bundled or hub-installed — "
|
||||
"there's nothing to unpin (curator only tracks agent-created skills)"
|
||||
)
|
||||
return 1
|
||||
skill_usage.set_pinned(args.skill, False)
|
||||
print(f"curator: unpinned '{args.skill}'")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_restore(args) -> int:
|
||||
from tools import skill_usage
|
||||
ok, msg = skill_usage.restore_skill(args.skill)
|
||||
print(f"curator: {msg}")
|
||||
return 0 if ok else 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring (called from hermes_cli.main)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def register_cli(parent: argparse.ArgumentParser) -> None:
|
||||
"""Attach `curator` subcommands to *parent*.
|
||||
|
||||
main.py calls this with the ArgumentParser returned by
|
||||
``subparsers.add_parser("curator", ...)``.
|
||||
"""
|
||||
parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
|
||||
subs = parent.add_subparsers(dest="curator_command")
|
||||
|
||||
p_status = subs.add_parser("status", help="Show curator status and skill stats")
|
||||
p_status.set_defaults(func=_cmd_status)
|
||||
|
||||
p_run = subs.add_parser("run", help="Trigger a curator review now")
|
||||
p_run.add_argument(
|
||||
"--sync", "--synchronous", dest="synchronous", action="store_true",
|
||||
help="Wait for the LLM review pass to finish (default: background thread)",
|
||||
)
|
||||
p_run.set_defaults(func=_cmd_run)
|
||||
|
||||
p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
|
||||
p_pause.set_defaults(func=_cmd_pause)
|
||||
|
||||
p_resume = subs.add_parser("resume", help="Resume a paused curator")
|
||||
p_resume.set_defaults(func=_cmd_resume)
|
||||
|
||||
p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
|
||||
p_pin.add_argument("skill", help="Skill name")
|
||||
p_pin.set_defaults(func=_cmd_pin)
|
||||
|
||||
p_unpin = subs.add_parser("unpin", help="Unpin a skill")
|
||||
p_unpin.add_argument("skill", help="Skill name")
|
||||
p_unpin.set_defaults(func=_cmd_unpin)
|
||||
|
||||
p_restore = subs.add_parser("restore", help="Restore an archived skill")
|
||||
p_restore.add_argument("skill", help="Skill name")
|
||||
p_restore.set_defaults(func=_cmd_restore)
|
||||
|
||||
|
||||
def cli_main(argv=None) -> int:
|
||||
"""Standalone entry (also usable by hermes_cli.main fallthrough)."""
|
||||
parser = argparse.ArgumentParser(prog="hermes curator")
|
||||
register_cli(parser)
|
||||
args = parser.parse_args(argv)
|
||||
fn = getattr(args, "func", None)
|
||||
if fn is None:
|
||||
parser.print_help()
|
||||
return 0
|
||||
return int(fn(args) or 0)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
sys.exit(cli_main())
|
||||
@@ -9230,6 +9230,26 @@ Examples:
|
||||
except Exception as _exc:
|
||||
logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
|
||||
|
||||
# =========================================================================
|
||||
# curator command — background skill maintenance
|
||||
# =========================================================================
|
||||
curator_parser = subparsers.add_parser(
|
||||
"curator",
|
||||
help="Background skill maintenance (curator) — status, run, pause, pin",
|
||||
description=(
|
||||
"The curator is an auxiliary-model background task that "
|
||||
"periodically reviews agent-created skills, prunes stale ones, "
|
||||
"consolidates overlaps, and archives obsolete skills. "
|
||||
"Bundled and hub-installed skills are never touched. "
|
||||
"Archives are recoverable; auto-deletion never happens."
|
||||
),
|
||||
)
|
||||
try:
|
||||
from hermes_cli.curator import register_cli as _register_curator_cli
|
||||
_register_curator_cli(curator_parser)
|
||||
except Exception as _exc:
|
||||
logging.getLogger(__name__).debug("curator CLI wiring failed: %s", _exc)
|
||||
|
||||
# =========================================================================
|
||||
# memory command
|
||||
# =========================================================================
|
||||
|
||||
@@ -16,6 +16,7 @@ import time
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from hermes_cli.config import (
|
||||
cfg_get,
|
||||
load_config,
|
||||
save_config,
|
||||
get_env_value,
|
||||
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):
|
||||
|
||||
# Update config
|
||||
config = load_config()
|
||||
server_entry = config.get("mcp_servers", {}).get(name, {})
|
||||
server_entry = cfg_get(config, "mcp_servers", name, default={})
|
||||
|
||||
if len(chosen) == total:
|
||||
# All selected → remove include/exclude (register all)
|
||||
|
||||
@@ -45,6 +45,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import env_var_enabled
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
try:
|
||||
import yaml
|
||||
@@ -115,7 +116,7 @@ def _get_disabled_plugins() -> set:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
disabled = config.get("plugins", {}).get("disabled", [])
|
||||
disabled = cfg_get(config, "plugins", "disabled", default=[])
|
||||
return set(disabled) if isinstance(disabled, list) else set()
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
@@ -18,6 +18,7 @@ from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -519,7 +520,7 @@ def _get_disabled_set() -> set:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
disabled = config.get("plugins", {}).get("disabled", [])
|
||||
disabled = cfg_get(config, "plugins", "disabled", default=[])
|
||||
return set(disabled) if isinstance(disabled, list) else set()
|
||||
except Exception:
|
||||
return set()
|
||||
@@ -763,7 +764,7 @@ def _get_current_memory_provider() -> str:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("memory", {}).get("provider", "") or ""
|
||||
return cfg_get(config, "memory", "provider", default="") or ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
@@ -773,7 +774,7 @@ def _get_current_context_engine() -> str:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("context", {}).get("engine", "compressor") or "compressor"
|
||||
return cfg_get(config, "context", "engine", default="compressor") or "compressor"
|
||||
except Exception:
|
||||
return "compressor"
|
||||
|
||||
|
||||
+14
-19
@@ -131,6 +131,7 @@ def _set_reasoning_effort(config: Dict[str, Any], effort: str) -> None:
|
||||
|
||||
# Import config helpers
|
||||
from hermes_cli.config import (
|
||||
cfg_get,
|
||||
DEFAULT_CONFIG,
|
||||
get_hermes_home,
|
||||
get_config_path,
|
||||
@@ -441,7 +442,7 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||
tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))
|
||||
|
||||
# TTS — show configured provider
|
||||
tts_provider = config.get("tts", {}).get("provider", "edge")
|
||||
tts_provider = cfg_get(config, "tts", "provider", default="edge")
|
||||
if subscription_features.tts.managed_by_nous:
|
||||
tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None))
|
||||
elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
|
||||
@@ -480,7 +481,7 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||
|
||||
if subscription_features.modal.managed_by_nous:
|
||||
tool_status.append(("Modal Execution (Nous subscription)", True, None))
|
||||
elif config.get("terminal", {}).get("backend") == "modal":
|
||||
elif cfg_get(config, "terminal", "backend") == "modal":
|
||||
if subscription_features.modal.direct_override:
|
||||
tool_status.append(("Modal Execution (direct Modal)", True, None))
|
||||
else:
|
||||
@@ -1179,7 +1180,7 @@ def setup_terminal_backend(config: dict):
|
||||
print_info(f" Guide: {_DOCS_BASE}/developer-guide/environments")
|
||||
print()
|
||||
|
||||
current_backend = config.get("terminal", {}).get("backend", "local")
|
||||
current_backend = cfg_get(config, "terminal", "backend", default="local")
|
||||
is_linux = _platform.system() == "Linux"
|
||||
|
||||
# Build backend choices with descriptions
|
||||
@@ -1228,7 +1229,7 @@ def setup_terminal_backend(config: dict):
|
||||
print_info(
|
||||
" the agent starts. CLI mode always starts in the current directory."
|
||||
)
|
||||
current_cwd = config.get("terminal", {}).get("cwd", "")
|
||||
current_cwd = cfg_get(config, "terminal", "cwd", default="")
|
||||
cwd = prompt(" Messaging working directory", current_cwd or str(Path.home()))
|
||||
if cwd:
|
||||
config["terminal"]["cwd"] = cwd
|
||||
@@ -1259,9 +1260,7 @@ def setup_terminal_backend(config: dict):
|
||||
print_info(f"Docker found: {docker_bin}")
|
||||
|
||||
# Docker image
|
||||
current_image = config.get("terminal", {}).get(
|
||||
"docker_image", "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
)
|
||||
current_image = cfg_get(config, "terminal", "docker_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
|
||||
image = prompt(" Docker image", current_image)
|
||||
config["terminal"]["docker_image"] = image
|
||||
save_env_value("TERMINAL_DOCKER_IMAGE", image)
|
||||
@@ -1281,9 +1280,7 @@ def setup_terminal_backend(config: dict):
|
||||
else:
|
||||
print_info(f"Found: {sing_bin}")
|
||||
|
||||
current_image = config.get("terminal", {}).get(
|
||||
"singularity_image", "docker://nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
)
|
||||
current_image = cfg_get(config, "terminal", "singularity_image", default="docker://nikolaik/python-nodejs:python3.11-nodejs20")
|
||||
image = prompt(" Container image", current_image)
|
||||
config["terminal"]["singularity_image"] = image
|
||||
save_env_value("TERMINAL_SINGULARITY_IMAGE", image)
|
||||
@@ -1302,7 +1299,7 @@ def setup_terminal_backend(config: dict):
|
||||
get_nous_subscription_features(config).nous_auth_present
|
||||
and is_managed_tool_gateway_ready("modal")
|
||||
)
|
||||
modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode"))
|
||||
modal_mode = normalize_modal_mode(cfg_get(config, "terminal", "modal_mode"))
|
||||
use_managed_modal = False
|
||||
if managed_modal_available:
|
||||
modal_choices = [
|
||||
@@ -1439,9 +1436,7 @@ def setup_terminal_backend(config: dict):
|
||||
print_success(" Configured")
|
||||
|
||||
# Daytona image
|
||||
current_image = config.get("terminal", {}).get(
|
||||
"daytona_image", "nikolaik/python-nodejs:python3.11-nodejs20"
|
||||
)
|
||||
current_image = cfg_get(config, "terminal", "daytona_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
|
||||
image = prompt(" Sandbox image", current_image)
|
||||
config["terminal"]["daytona_image"] = image
|
||||
save_env_value("TERMINAL_DAYTONA_IMAGE", image)
|
||||
@@ -1545,7 +1540,7 @@ def setup_agent_settings(config: dict):
|
||||
|
||||
# ── Max Iterations ──
|
||||
current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
|
||||
config.get("agent", {}).get("max_turns", 90)
|
||||
cfg_get(config, "agent", "max_turns", default=90)
|
||||
)
|
||||
print_info("Maximum tool-calling iterations per conversation.")
|
||||
print_info("Higher = more complex tasks, but costs more tokens.")
|
||||
@@ -1573,7 +1568,7 @@ def setup_agent_settings(config: dict):
|
||||
print_info(" all — Show every tool call with a short preview")
|
||||
print_info(" verbose — Full args, results, and debug logs")
|
||||
|
||||
current_mode = config.get("display", {}).get("tool_progress", "all")
|
||||
current_mode = cfg_get(config, "display", "tool_progress", default="all")
|
||||
mode = prompt("Tool progress mode", current_mode)
|
||||
if mode.lower() in ("off", "new", "all", "verbose"):
|
||||
if "display" not in config:
|
||||
@@ -1593,7 +1588,7 @@ def setup_agent_settings(config: dict):
|
||||
|
||||
config.setdefault("compression", {})["enabled"] = True
|
||||
|
||||
current_threshold = config.get("compression", {}).get("threshold", 0.50)
|
||||
current_threshold = cfg_get(config, "compression", "threshold", default=0.50)
|
||||
threshold_str = prompt("Compression threshold (0.5-0.95)", str(current_threshold))
|
||||
try:
|
||||
threshold = float(threshold_str)
|
||||
@@ -2601,11 +2596,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
|
||||
return "configured"
|
||||
|
||||
elif section_key == "terminal":
|
||||
backend = config.get("terminal", {}).get("backend", "local")
|
||||
backend = cfg_get(config, "terminal", "backend", default="local")
|
||||
return f"backend: {backend}"
|
||||
|
||||
elif section_key == "agent":
|
||||
max_turns = config.get("agent", {}).get("max_turns", 90)
|
||||
max_turns = cfg_get(config, "agent", "max_turns", default=90)
|
||||
return f"max turns: {max_turns}"
|
||||
|
||||
elif section_key == "gateway":
|
||||
|
||||
@@ -13,7 +13,7 @@ Config stored in ~/.hermes/config.yaml under:
|
||||
"""
|
||||
from typing import List, Optional, Set
|
||||
|
||||
from hermes_cli.config import load_config, save_config
|
||||
from hermes_cli.config import cfg_get, load_config, save_config
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.platforms import PLATFORMS as _PLATFORMS
|
||||
|
||||
@@ -30,7 +30,7 @@ def get_disabled_skills(config: dict, platform: Optional[str] = None) -> Set[str
|
||||
global_disabled = set(skills_cfg.get("disabled", []))
|
||||
if platform is None:
|
||||
return global_disabled
|
||||
platform_disabled = skills_cfg.get("platform_disabled", {}).get(platform)
|
||||
platform_disabled = cfg_get(skills_cfg, "platform_disabled", platform)
|
||||
if platform_disabled is None:
|
||||
return global_disabled
|
||||
return set(platform_disabled)
|
||||
|
||||
@@ -13,7 +13,7 @@ PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
||||
|
||||
from hermes_cli.auth import AuthError, resolve_provider
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config
|
||||
from hermes_cli.config import cfg_get, get_env_path, get_env_value, get_hermes_home, load_config
|
||||
from hermes_cli.models import provider_label
|
||||
from hermes_cli.nous_subscription import get_nous_subscription_features
|
||||
from hermes_cli.runtime_provider import resolve_requested_provider
|
||||
@@ -306,7 +306,7 @@ def show_status(args):
|
||||
# (hermes status doesn't go through cli.py's config loading)
|
||||
try:
|
||||
_cfg = load_config()
|
||||
terminal_env = _cfg.get("terminal", {}).get("backend", "local")
|
||||
terminal_env = cfg_get(_cfg, "terminal", "backend", default="local")
|
||||
except Exception:
|
||||
terminal_env = "local"
|
||||
print(f" Backend: {terminal_env}")
|
||||
|
||||
@@ -18,6 +18,7 @@ from typing import Dict, List, Optional, Set
|
||||
|
||||
|
||||
from hermes_cli.config import (
|
||||
cfg_get,
|
||||
load_config, save_config, get_env_value, save_env_value,
|
||||
)
|
||||
from hermes_cli.colors import Colors, color
|
||||
@@ -965,7 +966,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# Get existing toolsets for this platform
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
existing_toolsets = cfg_get(config, "platform_toolsets", platform, default=[])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
@@ -1352,23 +1353,23 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
|
||||
if provider.get("tts_provider"):
|
||||
return (
|
||||
feature.managed_by_nous
|
||||
and config.get("tts", {}).get("provider") == provider["tts_provider"]
|
||||
and cfg_get(config, "tts", "provider") == provider["tts_provider"]
|
||||
)
|
||||
if "browser_provider" in provider:
|
||||
current = config.get("browser", {}).get("cloud_provider")
|
||||
current = cfg_get(config, "browser", "cloud_provider")
|
||||
return feature.managed_by_nous and provider["browser_provider"] == current
|
||||
if provider.get("web_backend"):
|
||||
current = config.get("web", {}).get("backend")
|
||||
current = cfg_get(config, "web", "backend")
|
||||
return feature.managed_by_nous and current == provider["web_backend"]
|
||||
return feature.managed_by_nous
|
||||
|
||||
if provider.get("tts_provider"):
|
||||
return config.get("tts", {}).get("provider") == provider["tts_provider"]
|
||||
return cfg_get(config, "tts", "provider") == provider["tts_provider"]
|
||||
if "browser_provider" in provider:
|
||||
current = config.get("browser", {}).get("cloud_provider")
|
||||
current = cfg_get(config, "browser", "cloud_provider")
|
||||
return provider["browser_provider"] == current
|
||||
if provider.get("web_backend"):
|
||||
current = config.get("web", {}).get("backend")
|
||||
current = cfg_get(config, "web", "backend")
|
||||
return current == provider["web_backend"]
|
||||
if provider.get("imagegen_backend"):
|
||||
image_cfg = config.get("image_gen", {})
|
||||
|
||||
@@ -33,6 +33,7 @@ if str(PROJECT_ROOT) not in sys.path:
|
||||
|
||||
from hermes_cli import __version__, __release_date__
|
||||
from hermes_cli.config import (
|
||||
cfg_get,
|
||||
DEFAULT_CONFIG,
|
||||
OPTIONAL_ENV_VARS,
|
||||
get_config_path,
|
||||
@@ -2902,7 +2903,7 @@ async def get_dashboard_themes():
|
||||
them without a stub.
|
||||
"""
|
||||
config = load_config()
|
||||
active = config.get("dashboard", {}).get("theme", "default")
|
||||
active = cfg_get(config, "dashboard", "theme", default="default")
|
||||
user_themes = _discover_user_themes()
|
||||
seen = set()
|
||||
themes = []
|
||||
|
||||
@@ -19,6 +19,7 @@ from typing import Dict
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from utils import atomic_replace
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
|
||||
_SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
|
||||
@@ -60,7 +61,7 @@ def _get_webhook_config() -> dict:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
return cfg.get("platforms", {}).get("webhook", {})
|
||||
return cfg_get(cfg, "platforms", "webhook", default={})
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -314,7 +315,7 @@ def _get_active_memory_provider() -> Optional[str]:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
return config.get("memory", {}).get("provider") or None
|
||||
return cfg_get(config, "memory", "provider") or None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ from typing import Any, Dict, List
|
||||
from agent.memory_provider import MemoryProvider
|
||||
from hermes_constants import get_hermes_home
|
||||
from tools.registry import tool_error
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -913,7 +914,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
|
||||
self._llm_base_url = self._config.get("llm_base_url", "")
|
||||
|
||||
banks = self._config.get("banks", {}).get("hermes", {})
|
||||
banks = cfg_get(self._config, "banks", "hermes", default={})
|
||||
static_bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
self._bank_id_template = self._config.get("bank_id_template", "") or ""
|
||||
self._bank_id = _resolve_bank_id_template(
|
||||
|
||||
@@ -26,6 +26,7 @@ from agent.memory_provider import MemoryProvider
|
||||
from tools.registry import tool_error
|
||||
from .store import MemoryStore
|
||||
from .retrieval import FactRetriever
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -102,7 +103,7 @@ def _load_plugin_config() -> dict:
|
||||
import yaml
|
||||
with open(config_path) as f:
|
||||
all_config = yaml.safe_load(f) or {}
|
||||
return all_config.get("plugins", {}).get("hermes-memory-store", {}) or {}
|
||||
return cfg_get(all_config, "plugins", "hermes-memory-store", default={}) or {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
|
||||
def clone_honcho_for_profile(profile_name: str) -> bool:
|
||||
@@ -106,7 +107,7 @@ def cmd_enable(args) -> None:
|
||||
|
||||
# If this is a new profile host block with no settings, clone from default
|
||||
if not block.get("aiPeer"):
|
||||
default_block = cfg.get("hosts", {}).get(HOST, {})
|
||||
default_block = cfg_get(cfg, "hosts", HOST, default={})
|
||||
for key in ("recallMode", "writeFrequency", "sessionStrategy",
|
||||
"contextTokens", "dialecticReasoningLevel", "dialecticDynamic",
|
||||
"dialecticMaxChars", "messageMaxChars", "dialecticMaxInputChars",
|
||||
@@ -139,7 +140,7 @@ def cmd_disable(args) -> None:
|
||||
cfg = _read_config()
|
||||
host = _host_key()
|
||||
label = f"[{host}] " if host != "hermes" else ""
|
||||
block = cfg.get("hosts", {}).get(host, {})
|
||||
block = cfg_get(cfg, "hosts", host, default={})
|
||||
|
||||
if not block or block.get("enabled") is False:
|
||||
print(f" {label}Honcho is already disabled.\n")
|
||||
@@ -212,7 +213,7 @@ def sync_honcho_profiles_quiet() -> int:
|
||||
if not cfg:
|
||||
return 0
|
||||
|
||||
default_block = cfg.get("hosts", {}).get(HOST, {})
|
||||
default_block = cfg_get(cfg, "hosts", HOST, default={})
|
||||
has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
|
||||
if not default_block and not has_key:
|
||||
return 0
|
||||
|
||||
+14
-5
@@ -160,6 +160,7 @@ from agent.trajectory import (
|
||||
save_trajectory as _save_trajectory_to_file,
|
||||
)
|
||||
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
|
||||
|
||||
@@ -1788,7 +1789,7 @@ class AIAgent:
|
||||
# compression model. Custom endpoints often cannot report this via
|
||||
# /models, so the startup feasibility check needs the config hint.
|
||||
try:
|
||||
_aux_cfg = _agent_cfg.get("auxiliary", {}).get("compression", {})
|
||||
_aux_cfg = cfg_get(_agent_cfg, "auxiliary", "compression", default={})
|
||||
except Exception:
|
||||
_aux_cfg = {}
|
||||
if isinstance(_aux_cfg, dict):
|
||||
@@ -9940,7 +9941,7 @@ class AIAgent:
|
||||
is_oauth=self._is_anthropic_oauth,
|
||||
preserve_dots=self._anthropic_preserve_dots())
|
||||
summary_response = self._anthropic_messages_create(_ant_kw)
|
||||
_summary_result = _tsum.normalize_response(summary_response)
|
||||
_summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_summary_result.content or "").strip()
|
||||
else:
|
||||
summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
|
||||
@@ -9970,7 +9971,7 @@ class AIAgent:
|
||||
max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
|
||||
preserve_dots=self._anthropic_preserve_dots())
|
||||
retry_response = self._anthropic_messages_create(_ant_kw2)
|
||||
_retry_result = _tretry.normalize_response(retry_response)
|
||||
_retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_retry_result.content or "").strip()
|
||||
else:
|
||||
summary_kwargs = {
|
||||
@@ -11098,7 +11099,12 @@ class AIAgent:
|
||||
# would have been appended in the non-truncated path.
|
||||
_trunc_msg = None
|
||||
_trunc_transport = self._get_transport()
|
||||
_trunc_result = _trunc_transport.normalize_response(response)
|
||||
if self.api_mode == "anthropic_messages":
|
||||
_trunc_result = _trunc_transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_anthropic_oauth
|
||||
)
|
||||
else:
|
||||
_trunc_result = _trunc_transport.normalize_response(response)
|
||||
_trunc_msg = _trunc_result
|
||||
|
||||
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
||||
@@ -12436,7 +12442,10 @@ class AIAgent:
|
||||
|
||||
try:
|
||||
_transport = self._get_transport()
|
||||
normalized = _transport.normalize_response(response)
|
||||
_normalize_kwargs = {}
|
||||
if self.api_mode == "anthropic_messages":
|
||||
_normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
|
||||
normalized = _transport.normalize_response(response, **_normalize_kwargs)
|
||||
assistant_message = normalized
|
||||
finish_reason = normalized.finish_reason
|
||||
|
||||
|
||||
Executable
+136
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate ``tools/_manifest.py`` — a static list of built-in tool modules.
|
||||
|
||||
At runtime, ``tools.registry.discover_builtin_tools()`` reads this manifest
|
||||
instead of AST-scanning every ``tools/*.py`` file to find ones that call
|
||||
``registry.register()``. Saves ~145 ms at every CLI/gateway startup.
|
||||
|
||||
When to run:
|
||||
- Automatically: the ``discover_builtin_tools`` fallback triggers when any
|
||||
``tools/*.py`` file has an mtime newer than the manifest. This surfaces
|
||||
a warning in dev. Run this script and commit the regenerated manifest
|
||||
to silence the warning.
|
||||
- Manually: ``python scripts/build_tool_manifest.py``
|
||||
- CI: the build-tools GitHub workflow runs this and diff-checks on every PR.
|
||||
|
||||
Usage:
|
||||
python scripts/build_tool_manifest.py # regenerate in place
|
||||
python scripts/build_tool_manifest.py --check # exit 1 if stale (for CI)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
TOOLS_DIR = REPO_ROOT / "tools"
|
||||
MANIFEST_PATH = TOOLS_DIR / "_manifest.py"
|
||||
|
||||
# Exclusions match tools/registry.py:discover_builtin_tools — these files live
|
||||
# in tools/ but are infrastructure (not self-registering modules).
|
||||
SKIP_FILES = {
|
||||
"__init__.py",
|
||||
"_manifest.py",
|
||||
"registry.py",
|
||||
"mcp_tool.py", # MCP registers dynamically at runtime, not at import
|
||||
}
|
||||
|
||||
|
||||
def _is_registry_register_call(node: ast.AST) -> bool:
|
||||
"""Return True when *node* is a ``registry.register(...)`` call expression."""
|
||||
if not isinstance(node, ast.Expr) or not isinstance(node.value, ast.Call):
|
||||
return False
|
||||
func = node.value.func
|
||||
return (
|
||||
isinstance(func, ast.Attribute)
|
||||
and func.attr == "register"
|
||||
and isinstance(func.value, ast.Name)
|
||||
and func.value.id == "registry"
|
||||
)
|
||||
|
||||
|
||||
def _module_registers_tools(path: Path) -> bool:
|
||||
try:
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
|
||||
except (OSError, SyntaxError):
|
||||
return False
|
||||
return any(_is_registry_register_call(stmt) for stmt in tree.body)
|
||||
|
||||
|
||||
def scan_tool_modules() -> list[str]:
|
||||
"""Return sorted list of ``tools.<stem>`` module names that self-register."""
|
||||
return sorted(
|
||||
f"tools.{path.stem}"
|
||||
for path in TOOLS_DIR.glob("*.py")
|
||||
if path.name not in SKIP_FILES and _module_registers_tools(path)
|
||||
)
|
||||
|
||||
|
||||
MANIFEST_HEADER = '''\
|
||||
"""Auto-generated list of built-in tool modules that call ``registry.register()``.
|
||||
|
||||
DO NOT EDIT MANUALLY. Regenerate with:
|
||||
|
||||
python scripts/build_tool_manifest.py
|
||||
|
||||
This file is read at startup by ``tools.registry.discover_builtin_tools()`` to
|
||||
skip the ~145 ms AST scan of every ``tools/*.py`` file. When a ``tools/*.py``
|
||||
file is added, modified, or removed, the dev-mode mtime check in
|
||||
``discover_builtin_tools`` will log a warning and fall back to the AST scan —
|
||||
run this script to regenerate and commit.
|
||||
|
||||
Only covers *built-in* tools (shipped in ``tools/*.py``). Plugin tools and
|
||||
MCP-registered tools use separate discovery paths and are not listed here.
|
||||
"""
|
||||
|
||||
TOOL_MODULES: tuple[str, ...] = (
|
||||
'''
|
||||
|
||||
MANIFEST_FOOTER = ")\n"
|
||||
|
||||
|
||||
def render_manifest(modules: list[str]) -> str:
|
||||
lines = [MANIFEST_HEADER]
|
||||
for name in modules:
|
||||
lines.append(f" {name!r},\n")
|
||||
lines.append(MANIFEST_FOOTER)
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
help="Exit 1 if the on-disk manifest doesn't match what would be generated (for CI).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
modules = scan_tool_modules()
|
||||
new_content = render_manifest(modules)
|
||||
|
||||
if args.check:
|
||||
if not MANIFEST_PATH.exists():
|
||||
print(f"{MANIFEST_PATH} is missing — run: python scripts/build_tool_manifest.py", file=sys.stderr)
|
||||
return 1
|
||||
current = MANIFEST_PATH.read_text(encoding="utf-8")
|
||||
if current != new_content:
|
||||
print(
|
||||
f"{MANIFEST_PATH} is stale — run: python scripts/build_tool_manifest.py",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
print(f"OK: {MANIFEST_PATH} is up-to-date ({len(modules)} tool modules).")
|
||||
return 0
|
||||
|
||||
MANIFEST_PATH.write_text(new_content, encoding="utf-8")
|
||||
print(f"Wrote {MANIFEST_PATH} ({len(modules)} tool modules).")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -261,6 +261,7 @@ AUTHOR_MAP = {
|
||||
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
"mgparkprint@gmail.com": "vlwkaos",
|
||||
"tranquil_flow@protonmail.com": "Tranquil-Flow",
|
||||
"LyleLengyel@gmail.com": "mcndjxlefnd",
|
||||
"wangshengyang2004@163.com": "Wangshengyang2004",
|
||||
"hasan.ali13381@gmail.com": "H-Ali13381",
|
||||
"xienb@proton.me": "XieNBi",
|
||||
@@ -412,6 +413,7 @@ AUTHOR_MAP = {
|
||||
"tesseracttars@gmail.com": "tesseracttars-creator",
|
||||
"tianliangjay@gmail.com": "xingkongliang",
|
||||
"tranquil_flow@protonmail.com": "Tranquil-Flow",
|
||||
"LyleLengyel@gmail.com": "mcndjxlefnd",
|
||||
"unayung@gmail.com": "Unayung",
|
||||
"vorvul.danylo@gmail.com": "WorldInnovationsDepartment",
|
||||
"win4r@outlook.com": "win4r",
|
||||
|
||||
@@ -68,33 +68,6 @@ class TestBuildAnthropicClient:
|
||||
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
||||
assert "api_key" not in kwargs
|
||||
|
||||
def test_oauth_does_not_send_claude_code_spoof_headers(self):
|
||||
"""OAuth requests identify as Hermes — no claude-cli UA, no x-app: cli.
|
||||
|
||||
Anthropic's OAuth-gated Messages API accepts requests from non-Claude-Code
|
||||
clients as long as auth is correct and the OAuth beta headers are present.
|
||||
See commit that removed fingerprinting for the live-test write-up.
|
||||
"""
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client("sk-ant-oat01-" + "x" * 60)
|
||||
headers = mock_sdk.Anthropic.call_args[1]["default_headers"]
|
||||
assert "user-agent" not in {k.lower() for k in headers}
|
||||
assert "x-app" not in {k.lower() for k in headers}
|
||||
|
||||
def test_oauth_strips_context_1m_beta(self):
|
||||
"""context-1m-2025-08-07 is incompatible with OAuth auth — must be stripped.
|
||||
|
||||
Anthropic returns HTTP 400 "This authentication style is incompatible
|
||||
with the long context beta header." when OAuth traffic carries it.
|
||||
"""
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client("sk-ant-oat01-" + "x" * 60)
|
||||
betas = mock_sdk.Anthropic.call_args[1]["default_headers"]["anthropic-beta"]
|
||||
assert "context-1m-2025-08-07" not in betas
|
||||
# But other common betas still flow through
|
||||
assert "interleaved-thinking-2025-05-14" in betas
|
||||
assert "oauth-2025-04-20" in betas
|
||||
|
||||
def test_api_key_uses_api_key(self):
|
||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||
build_anthropic_client("sk-ant-api03-something")
|
||||
@@ -113,7 +86,7 @@ class TestBuildAnthropicClient:
|
||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||
assert kwargs["base_url"] == "https://custom.api.com"
|
||||
assert kwargs["default_headers"] == {
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
|
||||
"anthropic-beta": "interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07"
|
||||
}
|
||||
|
||||
def test_minimax_anthropic_endpoint_uses_bearer_auth_for_regular_api_keys(self):
|
||||
|
||||
@@ -0,0 +1,487 @@
|
||||
"""Tests for agent/curator.py — orchestrator, idle gating, state transitions.
|
||||
|
||||
LLM spawning is never exercised here — `_run_llm_review` is monkeypatched so
|
||||
tests run fully offline and the curator module doesn't need real credentials.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def curator_env(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME + freshly reloaded curator + skill_usage modules."""
|
||||
home = tmp_path / ".hermes"
|
||||
(home / "skills").mkdir(parents=True)
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
import tools.skill_usage as usage
|
||||
importlib.reload(usage)
|
||||
import agent.curator as curator
|
||||
importlib.reload(curator)
|
||||
|
||||
# Neutralize the real LLM pass by default — tests opt in per-case.
|
||||
monkeypatch.setattr(curator, "_run_llm_review", lambda prompt: "llm-stub")
|
||||
|
||||
# Default: no config file → curator defaults. Tests can override.
|
||||
monkeypatch.setattr(curator, "_load_config", lambda: {})
|
||||
|
||||
return {"home": home, "curator": curator, "usage": usage}
|
||||
|
||||
|
||||
def _write_skill(skills_dir: Path, name: str):
|
||||
d = skills_dir / name
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
(d / "SKILL.md").write_text(
|
||||
f"---\nname: {name}\ndescription: x\n---\n", encoding="utf-8",
|
||||
)
|
||||
return d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config gates
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_curator_enabled_default_true(curator_env):
|
||||
assert curator_env["curator"].is_enabled() is True
|
||||
|
||||
|
||||
def test_curator_disabled_via_config(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
||||
assert c.is_enabled() is False
|
||||
assert c.should_run_now() is False
|
||||
|
||||
|
||||
def test_curator_defaults(curator_env):
|
||||
c = curator_env["curator"]
|
||||
assert c.get_interval_hours() == 24 * 7 # 7 days
|
||||
assert c.get_min_idle_hours() == 2
|
||||
assert c.get_stale_after_days() == 30
|
||||
assert c.get_archive_after_days() == 90
|
||||
|
||||
|
||||
def test_curator_config_overrides(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
monkeypatch.setattr(c, "_load_config", lambda: {
|
||||
"interval_hours": 12,
|
||||
"min_idle_hours": 0.5,
|
||||
"stale_after_days": 7,
|
||||
"archive_after_days": 60,
|
||||
})
|
||||
assert c.get_interval_hours() == 12
|
||||
assert c.get_min_idle_hours() == 0.5
|
||||
assert c.get_stale_after_days() == 7
|
||||
assert c.get_archive_after_days() == 60
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# should_run_now
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_first_run_always_eligible(curator_env):
|
||||
c = curator_env["curator"]
|
||||
assert c.should_run_now() is True
|
||||
|
||||
|
||||
def test_recent_run_blocks(curator_env):
|
||||
c = curator_env["curator"]
|
||||
c.save_state({
|
||||
"last_run_at": datetime.now(timezone.utc).isoformat(),
|
||||
"paused": False,
|
||||
})
|
||||
assert c.should_run_now() is False
|
||||
|
||||
|
||||
def test_old_run_eligible(curator_env):
|
||||
"""A run older than the configured interval should re-trigger. Use a
|
||||
2x-interval cushion so the test doesn't become coupled to the exact
|
||||
default — bumping DEFAULT_INTERVAL_HOURS shouldn't break it."""
|
||||
c = curator_env["curator"]
|
||||
long_ago = datetime.now(timezone.utc) - timedelta(
|
||||
hours=c.get_interval_hours() * 2
|
||||
)
|
||||
c.save_state({"last_run_at": long_ago.isoformat(), "paused": False})
|
||||
assert c.should_run_now() is True
|
||||
|
||||
|
||||
def test_paused_blocks_even_if_stale(curator_env):
|
||||
c = curator_env["curator"]
|
||||
long_ago = datetime.now(timezone.utc) - timedelta(days=30)
|
||||
c.save_state({"last_run_at": long_ago.isoformat(), "paused": True})
|
||||
assert c.should_run_now() is False
|
||||
|
||||
|
||||
def test_set_paused_roundtrip(curator_env):
|
||||
c = curator_env["curator"]
|
||||
c.set_paused(True)
|
||||
assert c.is_paused() is True
|
||||
c.set_paused(False)
|
||||
assert c.is_paused() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Automatic state transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_unused_skill_transitions_to_stale(curator_env):
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "old-skill")
|
||||
|
||||
# Record last-use well past stale_after_days (30 default)
|
||||
long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat()
|
||||
data = u.load_usage()
|
||||
data["old-skill"] = u._empty_record()
|
||||
data["old-skill"]["last_used_at"] = long_ago
|
||||
data["old-skill"]["created_at"] = long_ago
|
||||
u.save_usage(data)
|
||||
|
||||
counts = c.apply_automatic_transitions()
|
||||
assert counts["marked_stale"] == 1
|
||||
assert u.get_record("old-skill")["state"] == "stale"
|
||||
|
||||
|
||||
def test_very_old_skill_gets_archived(curator_env):
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
skill_dir = _write_skill(skills_dir, "ancient")
|
||||
|
||||
super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat()
|
||||
data = u.load_usage()
|
||||
data["ancient"] = u._empty_record()
|
||||
data["ancient"]["last_used_at"] = super_old
|
||||
data["ancient"]["created_at"] = super_old
|
||||
u.save_usage(data)
|
||||
|
||||
counts = c.apply_automatic_transitions()
|
||||
assert counts["archived"] == 1
|
||||
assert not skill_dir.exists()
|
||||
assert (skills_dir / ".archive" / "ancient" / "SKILL.md").exists()
|
||||
assert u.get_record("ancient")["state"] == "archived"
|
||||
|
||||
|
||||
def test_pinned_skill_is_never_touched(curator_env):
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "precious")
|
||||
|
||||
super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
||||
data = u.load_usage()
|
||||
data["precious"] = u._empty_record()
|
||||
data["precious"]["last_used_at"] = super_old
|
||||
data["precious"]["created_at"] = super_old
|
||||
data["precious"]["pinned"] = True
|
||||
u.save_usage(data)
|
||||
|
||||
counts = c.apply_automatic_transitions()
|
||||
assert counts["archived"] == 0
|
||||
assert counts["marked_stale"] == 0
|
||||
rec = u.get_record("precious")
|
||||
assert rec["state"] == "active" # untouched
|
||||
assert rec["pinned"] is True
|
||||
|
||||
|
||||
def test_stale_skill_reactivates_on_recent_use(curator_env):
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "revived")
|
||||
|
||||
recent = datetime.now(timezone.utc).isoformat()
|
||||
data = u.load_usage()
|
||||
data["revived"] = u._empty_record()
|
||||
data["revived"]["state"] = "stale"
|
||||
data["revived"]["last_used_at"] = recent
|
||||
data["revived"]["created_at"] = recent
|
||||
u.save_usage(data)
|
||||
|
||||
counts = c.apply_automatic_transitions()
|
||||
assert counts["reactivated"] == 1
|
||||
assert u.get_record("revived")["state"] == "active"
|
||||
|
||||
|
||||
def test_new_skill_without_last_used_not_immediately_archived(curator_env):
|
||||
"""A freshly-created skill with no use history should not get archived
|
||||
just because last_used_at is None."""
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "fresh")
|
||||
|
||||
# Bump nothing — record doesn't exist yet. Curator should create it
|
||||
# and fall back to created_at which is ~now.
|
||||
counts = c.apply_automatic_transitions()
|
||||
assert counts["archived"] == 0
|
||||
assert counts["marked_stale"] == 0
|
||||
assert (skills_dir / "fresh").exists()
|
||||
|
||||
|
||||
def test_bundled_skill_not_touched_by_transitions(curator_env):
|
||||
c = curator_env["curator"]
|
||||
u = curator_env["usage"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "bundled")
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"bundled:abc\n", encoding="utf-8",
|
||||
)
|
||||
|
||||
super_old = (datetime.now(timezone.utc) - timedelta(days=500)).isoformat()
|
||||
data = u.load_usage()
|
||||
data["bundled"] = u._empty_record()
|
||||
data["bundled"]["last_used_at"] = super_old
|
||||
u.save_usage(data)
|
||||
|
||||
counts = c.apply_automatic_transitions()
|
||||
# bundled skills are excluded from the agent-created list entirely
|
||||
assert counts["checked"] == 0
|
||||
assert (skills_dir / "bundled").exists() # never moved
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_curator_review orchestration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_review_records_state(curator_env):
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
|
||||
result = c.run_curator_review(synchronous=True)
|
||||
assert "started_at" in result
|
||||
state = c.load_state()
|
||||
assert state["last_run_at"] is not None
|
||||
assert state["run_count"] >= 1
|
||||
assert state["last_run_summary"] is not None
|
||||
|
||||
|
||||
def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
|
||||
calls = []
|
||||
def _stub(prompt):
|
||||
calls.append(prompt)
|
||||
return {
|
||||
"final": "stubbed-summary",
|
||||
"summary": "stubbed-summary",
|
||||
"model": "stub-model",
|
||||
"provider": "stub-provider",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
monkeypatch.setattr(c, "_run_llm_review", _stub)
|
||||
|
||||
captured = []
|
||||
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
||||
|
||||
assert len(calls) == 1
|
||||
assert "skill CURATOR" in calls[0] or "CURATOR" in calls[0]
|
||||
assert captured # on_summary was called
|
||||
assert any("stubbed-summary" in s for s in captured)
|
||||
|
||||
|
||||
def test_run_review_skips_llm_when_no_candidates(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
# No skills in the dir → no candidates
|
||||
calls = []
|
||||
monkeypatch.setattr(
|
||||
c, "_run_llm_review",
|
||||
lambda prompt: (calls.append(prompt), "never-called")[1],
|
||||
)
|
||||
|
||||
captured = []
|
||||
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
||||
|
||||
assert calls == [] # LLM not invoked
|
||||
assert any("skipped" in s for s in captured)
|
||||
|
||||
|
||||
def test_maybe_run_curator_respects_disabled(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
||||
result = c.maybe_run_curator()
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
monkeypatch.setattr(c, "_load_config", lambda: {"min_idle_hours": 2})
|
||||
# idle less than the threshold
|
||||
result = c.maybe_run_curator(idle_for_seconds=60.0)
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
# Force idle over threshold
|
||||
result = c.maybe_run_curator(idle_for_seconds=99999.0)
|
||||
assert result is not None
|
||||
assert "started_at" in result
|
||||
|
||||
|
||||
def test_maybe_run_curator_swallows_exceptions(curator_env, monkeypatch):
|
||||
c = curator_env["curator"]
|
||||
|
||||
def explode():
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(c, "should_run_now", explode)
|
||||
# Must not raise
|
||||
assert c.maybe_run_curator() is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Persistence
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_state_file_survives_corrupt_read(curator_env):
|
||||
c = curator_env["curator"]
|
||||
c._state_file().write_text("not json", encoding="utf-8")
|
||||
# Must fall back to default, not raise
|
||||
assert c.load_state() == c._default_state()
|
||||
|
||||
|
||||
def test_state_atomic_write_no_tmp_leftovers(curator_env):
|
||||
c = curator_env["curator"]
|
||||
c.save_state({"paused": True})
|
||||
parent = c._state_file().parent
|
||||
for p in parent.iterdir():
|
||||
assert not p.name.startswith(".curator_state_"), f"tmp leftover: {p.name}"
|
||||
|
||||
|
||||
def test_curator_review_prompt_has_invariants():
|
||||
"""Core invariants must be in the review prompt text."""
|
||||
from agent.curator import CURATOR_REVIEW_PROMPT
|
||||
assert "MUST NOT" in CURATOR_REVIEW_PROMPT or "DO NOT" in CURATOR_REVIEW_PROMPT
|
||||
assert "bundled" in CURATOR_REVIEW_PROMPT.lower()
|
||||
assert "delete" in CURATOR_REVIEW_PROMPT.lower()
|
||||
assert "pinned" in CURATOR_REVIEW_PROMPT.lower()
|
||||
# Must describe the actions the reviewer can take. The exact vocabulary
|
||||
# has tightened over time (the umbrella-first prompt drops 'keep' as a
|
||||
# first-class decision verb, since passive keep-everything is the
|
||||
# failure mode the prompt is trying to avoid), but the core merge /
|
||||
# archive / patch trio must remain callable.
|
||||
for verb in ("patch", "archive"):
|
||||
assert verb in CURATOR_REVIEW_PROMPT.lower()
|
||||
# Must mention consolidation (possibly via "merge" or "consolidat")
|
||||
assert "consolidat" in CURATOR_REVIEW_PROMPT.lower() or "merge" in CURATOR_REVIEW_PROMPT.lower()
|
||||
|
||||
|
||||
def test_curator_review_prompt_points_at_existing_tools_only():
|
||||
"""The review prompt must rely on existing tools (skill_manage + terminal)
|
||||
and must NOT reference bespoke curator tools that are not registered
|
||||
model tools."""
|
||||
from agent.curator import CURATOR_REVIEW_PROMPT
|
||||
assert "skill_manage" in CURATOR_REVIEW_PROMPT
|
||||
assert "skills_list" in CURATOR_REVIEW_PROMPT
|
||||
assert "skill_view" in CURATOR_REVIEW_PROMPT
|
||||
assert "terminal" in CURATOR_REVIEW_PROMPT.lower()
|
||||
# These would be nice but aren't actually registered as tools — the
|
||||
# curator uses skill_manage + terminal mv instead.
|
||||
assert "archive_skill" not in CURATOR_REVIEW_PROMPT
|
||||
assert "pin_skill" not in CURATOR_REVIEW_PROMPT
|
||||
|
||||
|
||||
def test_curator_does_not_instruct_model_to_pin():
|
||||
"""Pinning is a user opt-out, not a model decision. The prompt should
|
||||
not tell the reviewer to pin skills autonomously."""
|
||||
from agent.curator import CURATOR_REVIEW_PROMPT
|
||||
# "pinned" appears in the invariant ("skip pinned skills"), but "pin"
|
||||
# as a decision verb should not.
|
||||
lines = CURATOR_REVIEW_PROMPT.split("\n")
|
||||
decision_block = "\n".join(
|
||||
l for l in lines
|
||||
if l.strip().startswith(("keep", "patch", "archive", "consolidate", "pin "))
|
||||
)
|
||||
# No standalone "pin" action line
|
||||
assert not any(l.strip().startswith("pin ") for l in lines), (
|
||||
f"Found a pin action line in:\n{decision_block}"
|
||||
)
|
||||
|
||||
|
||||
def test_curator_review_prompt_is_umbrella_first():
|
||||
"""The curator prompt must push umbrella-building / class-level thinking,
|
||||
not pair-level 'are these two the same?' analysis."""
|
||||
from agent.curator import CURATOR_REVIEW_PROMPT
|
||||
lower = CURATOR_REVIEW_PROMPT.lower()
|
||||
# Must frame the task as active umbrella-building, not a passive audit.
|
||||
assert "umbrella" in lower, (
|
||||
"must use UMBRELLA framing — the class-first abstraction the curator "
|
||||
"is designed to produce"
|
||||
)
|
||||
# Must tell the reviewer not to stop at pair-level distinctness.
|
||||
assert "class" in lower, "must reference class-level thinking"
|
||||
# Must cover the three consolidation methods explicitly
|
||||
assert "references/" in CURATOR_REVIEW_PROMPT, (
|
||||
"must name references/ as a demotion target for session-specific content"
|
||||
)
|
||||
# templates/ and scripts/ make the umbrella a real class-level skill
|
||||
assert "templates/" in CURATOR_REVIEW_PROMPT
|
||||
assert "scripts/" in CURATOR_REVIEW_PROMPT
|
||||
# Must say the counter argument: usage=0 is not a reason to skip
|
||||
assert "use_count" in CURATOR_REVIEW_PROMPT or "counter" in lower, (
|
||||
"must pre-empt the 'usage counters are zero, I can't judge' bailout"
|
||||
)
|
||||
|
||||
|
||||
def test_curator_review_prompt_offers_support_file_actions():
|
||||
"""Support-file demotion (references/templates/scripts) must be one of
|
||||
the three consolidation methods, alongside merge-into-existing and
|
||||
create-new-umbrella."""
|
||||
from agent.curator import CURATOR_REVIEW_PROMPT
|
||||
# skill_manage action=write_file is how references/ are added to an
|
||||
# existing skill — this is the create-adjacent action the curator needs
|
||||
# to demote narrow siblings without touching their SKILL.md.
|
||||
assert "write_file" in CURATOR_REVIEW_PROMPT
|
||||
# Must offer creating a brand-new umbrella when no existing one fits
|
||||
assert "action=create" in CURATOR_REVIEW_PROMPT or "create a new umbrella" in CURATOR_REVIEW_PROMPT.lower()
|
||||
|
||||
|
||||
|
||||
def test_cli_unpin_refuses_bundled_skill(curator_env, capsys):
|
||||
"""hermes curator unpin must refuse bundled/hub skills too (matches pin)."""
|
||||
from hermes_cli import curator as cli
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "ship-skill")
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"ship-skill:abc\n", encoding="utf-8",
|
||||
)
|
||||
|
||||
class _A:
|
||||
skill = "ship-skill"
|
||||
|
||||
rc = cli._cmd_unpin(_A())
|
||||
captured = capsys.readouterr()
|
||||
assert rc == 1
|
||||
assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
|
||||
|
||||
|
||||
def test_cli_pin_refuses_bundled_skill(curator_env, capsys):
|
||||
from hermes_cli import curator as cli
|
||||
skills_dir = curator_env["home"] / "skills"
|
||||
_write_skill(skills_dir, "ship-skill")
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"ship-skill:abc\n", encoding="utf-8",
|
||||
)
|
||||
|
||||
class _A:
|
||||
skill = "ship-skill"
|
||||
|
||||
rc = cli._cmd_pin(_A())
|
||||
captured = capsys.readouterr()
|
||||
assert rc == 1
|
||||
assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
|
||||
@@ -0,0 +1,258 @@
|
||||
"""Tests for the curator per-run report writer (run.json + REPORT.md).
|
||||
|
||||
Reports live under ``~/.hermes/logs/curator/{YYYYMMDD-HHMMSS}/`` alongside
|
||||
the standard log dir, not inside the user's ``skills/`` data directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def curator_env(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with a skills/ dir + reset curator module state."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
(home / "skills").mkdir()
|
||||
(home / "logs").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
|
||||
import importlib
|
||||
import hermes_constants
|
||||
importlib.reload(hermes_constants)
|
||||
from agent import curator
|
||||
importlib.reload(curator)
|
||||
from tools import skill_usage
|
||||
importlib.reload(skill_usage)
|
||||
yield {"home": home, "curator": curator, "skill_usage": skill_usage}
|
||||
|
||||
|
||||
def _make_llm_meta(**overrides):
|
||||
base = {
|
||||
"final": "short summary of the pass",
|
||||
"summary": "short summary",
|
||||
"model": "test-model",
|
||||
"provider": "test-provider",
|
||||
"tool_calls": [],
|
||||
"error": None,
|
||||
}
|
||||
base.update(overrides)
|
||||
return base
|
||||
|
||||
|
||||
def test_reports_root_is_under_logs_not_skills(curator_env):
|
||||
"""Reports live in logs/curator/, not skills/ — operational telemetry
|
||||
belongs with the logs, not with user-authored skill data."""
|
||||
curator = curator_env["curator"]
|
||||
root = curator._reports_root()
|
||||
home = curator_env["home"]
|
||||
# Must be under logs/
|
||||
assert root == home / "logs" / "curator"
|
||||
# Must NOT be under skills/
|
||||
assert "skills" not in root.parts
|
||||
|
||||
|
||||
def test_write_run_report_creates_both_files(curator_env):
|
||||
"""Each run writes both a run.json (machine) and a REPORT.md (human)."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=12.345,
|
||||
auto_counts={"checked": 5, "marked_stale": 1, "archived": 0, "reactivated": 0},
|
||||
auto_summary="1 marked stale",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
assert run_dir is not None
|
||||
assert run_dir.is_dir()
|
||||
assert (run_dir / "run.json").exists()
|
||||
assert (run_dir / "REPORT.md").exists()
|
||||
|
||||
# The directory name is a timestamp under logs/curator/
|
||||
assert run_dir.parent == curator._reports_root()
|
||||
|
||||
|
||||
def test_run_json_has_expected_shape(curator_env):
|
||||
"""run.json must carry the machine-readable fields downstream tooling needs."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
before_report = [
|
||||
{"name": "old-thing", "state": "active", "pinned": False},
|
||||
{"name": "keeper", "state": "active", "pinned": True},
|
||||
]
|
||||
after_report = [
|
||||
{"name": "keeper", "state": "active", "pinned": True},
|
||||
{"name": "new-umbrella", "state": "active", "pinned": False},
|
||||
]
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=42.0,
|
||||
auto_counts={"checked": 2, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=before_report,
|
||||
before_names={r["name"] for r in before_report},
|
||||
after_report=after_report,
|
||||
llm_meta=_make_llm_meta(
|
||||
final="I consolidated the whole universe.",
|
||||
tool_calls=[
|
||||
{"name": "skills_list", "arguments": "{}"},
|
||||
{"name": "skill_manage", "arguments": '{"action":"create"}'},
|
||||
{"name": "terminal", "arguments": "mv ..."},
|
||||
],
|
||||
),
|
||||
)
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
|
||||
# top-level shape
|
||||
for k in (
|
||||
"started_at", "duration_seconds", "model", "provider",
|
||||
"auto_transitions", "counts", "tool_call_counts",
|
||||
"archived", "added", "state_transitions",
|
||||
"llm_final", "llm_summary", "llm_error", "tool_calls",
|
||||
):
|
||||
assert k in payload, f"missing key: {k}"
|
||||
|
||||
# Diff logic
|
||||
assert payload["archived"] == ["old-thing"]
|
||||
assert payload["added"] == ["new-umbrella"]
|
||||
# Counts reflect the diff
|
||||
assert payload["counts"]["before"] == 2
|
||||
assert payload["counts"]["after"] == 2
|
||||
assert payload["counts"]["archived_this_run"] == 1
|
||||
assert payload["counts"]["added_this_run"] == 1
|
||||
# Tool call counts are aggregated
|
||||
assert payload["tool_call_counts"]["skills_list"] == 1
|
||||
assert payload["tool_call_counts"]["skill_manage"] == 1
|
||||
assert payload["tool_call_counts"]["terminal"] == 1
|
||||
assert payload["counts"]["tool_calls_total"] == 3
|
||||
|
||||
|
||||
def test_report_md_is_human_readable(curator_env):
|
||||
"""REPORT.md should be a valid markdown doc with the key sections visible."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=75.0,
|
||||
auto_counts={"checked": 10, "marked_stale": 2, "archived": 1, "reactivated": 0},
|
||||
auto_summary="2 marked stale, 1 archived",
|
||||
before_report=[{"name": "foo", "state": "active", "pinned": False}],
|
||||
before_names={"foo"},
|
||||
after_report=[{"name": "foo-umbrella", "state": "active", "pinned": False}],
|
||||
llm_meta=_make_llm_meta(
|
||||
final="Consolidated foo-like skills into foo-umbrella.",
|
||||
model="claude-opus-4.7",
|
||||
provider="openrouter",
|
||||
),
|
||||
)
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
|
||||
# Structural checks
|
||||
assert "# Curator run" in md
|
||||
assert "Auto-transitions" in md
|
||||
assert "LLM consolidation pass" in md
|
||||
assert "Recovery" in md
|
||||
|
||||
# The model / provider we passed in show up
|
||||
assert "claude-opus-4.7" in md
|
||||
assert "openrouter" in md
|
||||
|
||||
# The added/archived lists are present
|
||||
assert "Skills archived" in md
|
||||
assert "`foo`" in md
|
||||
assert "New skills this run" in md
|
||||
assert "`foo-umbrella`" in md
|
||||
|
||||
# The full LLM final response is included verbatim (no 240-char truncation)
|
||||
assert "Consolidated foo-like skills into foo-umbrella." in md
|
||||
|
||||
|
||||
def test_same_second_reruns_get_unique_dirs(curator_env):
|
||||
"""If the curator somehow runs twice in the same second, the second
|
||||
report still gets its own directory rather than overwriting the first."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime(2026, 4, 29, 5, 33, 34, tzinfo=timezone.utc)
|
||||
|
||||
kwargs = dict(
|
||||
started_at=start,
|
||||
elapsed_seconds=1.0,
|
||||
auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
a = curator._write_run_report(**kwargs)
|
||||
b = curator._write_run_report(**kwargs)
|
||||
assert a != b
|
||||
assert a is not None and b is not None
|
||||
# Second dir has a numeric disambiguator suffix
|
||||
assert b.name.startswith(a.name)
|
||||
|
||||
|
||||
def test_report_captures_llm_error_and_continues(curator_env):
|
||||
"""If the LLM pass recorded an error, the report still writes and
|
||||
surfaces the error prominently."""
|
||||
curator = curator_env["curator"]
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=datetime.now(timezone.utc),
|
||||
elapsed_seconds=2.0,
|
||||
auto_counts={"checked": 0, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
||||
auto_summary="no changes",
|
||||
before_report=[],
|
||||
before_names=set(),
|
||||
after_report=[],
|
||||
llm_meta=_make_llm_meta(
|
||||
error="HTTP 400: No models provided",
|
||||
final="",
|
||||
summary="error",
|
||||
),
|
||||
)
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
assert "HTTP 400" in md
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
assert payload["llm_error"] == "HTTP 400: No models provided"
|
||||
|
||||
|
||||
def test_state_transitions_captured_in_report(curator_env):
|
||||
"""When a skill moves active → stale or stale → archived between
|
||||
before/after snapshots, the report records it."""
|
||||
curator = curator_env["curator"]
|
||||
start = datetime.now(timezone.utc)
|
||||
|
||||
before = [{"name": "getting-old", "state": "active", "pinned": False}]
|
||||
after = [{"name": "getting-old", "state": "stale", "pinned": False}]
|
||||
|
||||
run_dir = curator._write_run_report(
|
||||
started_at=start,
|
||||
elapsed_seconds=1.0,
|
||||
auto_counts={"checked": 1, "marked_stale": 1, "archived": 0, "reactivated": 0},
|
||||
auto_summary="1 marked stale",
|
||||
before_report=before,
|
||||
before_names={r["name"] for r in before},
|
||||
after_report=after,
|
||||
llm_meta=_make_llm_meta(),
|
||||
)
|
||||
payload = json.loads((run_dir / "run.json").read_text())
|
||||
assert payload["state_transitions"] == [
|
||||
{"name": "getting-old", "from": "active", "to": "stale"}
|
||||
]
|
||||
md = (run_dir / "REPORT.md").read_text()
|
||||
assert "State transitions" in md
|
||||
assert "getting-old" in md
|
||||
assert "active → stale" in md
|
||||
@@ -1,9 +1,11 @@
|
||||
"""Tests for CLI browser CDP auto-launch helpers."""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
from unittest.mock import patch
|
||||
|
||||
from cli import HermesCLI
|
||||
from hermes_cli.browser_connect import manual_chrome_debug_command
|
||||
|
||||
|
||||
def _assert_chrome_debug_cmd(cmd, expected_chrome, expected_port):
|
||||
@@ -26,13 +28,19 @@ class TestChromeDebugLaunch:
|
||||
captured["kwargs"] = kwargs
|
||||
return object()
|
||||
|
||||
with patch("cli.shutil.which", side_effect=lambda name: r"C:\Chrome\chrome.exe" if name == "chrome.exe" else None), \
|
||||
patch("cli.os.path.isfile", side_effect=lambda path: path == r"C:\Chrome\chrome.exe"), \
|
||||
with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: r"C:\Chrome\chrome.exe" if name == "chrome.exe" else None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == r"C:\Chrome\chrome.exe"), \
|
||||
patch("subprocess.Popen", side_effect=fake_popen):
|
||||
assert HermesCLI._try_launch_chrome_debug(9333, "Windows") is True
|
||||
|
||||
_assert_chrome_debug_cmd(captured["cmd"], r"C:\Chrome\chrome.exe", 9333)
|
||||
assert captured["kwargs"]["start_new_session"] is True
|
||||
# Windows uses creationflags (POSIX-only start_new_session would raise).
|
||||
assert "start_new_session" not in captured["kwargs"]
|
||||
flags = captured["kwargs"].get("creationflags", 0)
|
||||
expected = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
|
||||
subprocess, "CREATE_NEW_PROCESS_GROUP", 0
|
||||
)
|
||||
assert flags == expected
|
||||
|
||||
def test_windows_launch_falls_back_to_common_install_dirs(self, monkeypatch):
|
||||
captured = {}
|
||||
@@ -49,9 +57,45 @@ class TestChromeDebugLaunch:
|
||||
monkeypatch.delenv("ProgramFiles(x86)", raising=False)
|
||||
monkeypatch.delenv("LOCALAPPDATA", raising=False)
|
||||
|
||||
with patch("cli.shutil.which", return_value=None), \
|
||||
patch("cli.os.path.isfile", side_effect=lambda path: path == installed), \
|
||||
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == installed), \
|
||||
patch("subprocess.Popen", side_effect=fake_popen):
|
||||
assert HermesCLI._try_launch_chrome_debug(9222, "Windows") is True
|
||||
|
||||
_assert_chrome_debug_cmd(captured["cmd"], installed, 9222)
|
||||
|
||||
def test_manual_command_uses_detected_linux_browser(self):
|
||||
with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: "/usr/bin/chromium" if name == "chromium" else None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == "/usr/bin/chromium"):
|
||||
command = manual_chrome_debug_command(9222, "Linux")
|
||||
|
||||
assert command is not None
|
||||
assert command.startswith("/usr/bin/chromium --remote-debugging-port=9222")
|
||||
|
||||
def test_manual_command_uses_wsl_windows_chrome_when_available(self):
|
||||
chrome = "/mnt/c/Program Files/Google/Chrome/Application/chrome.exe"
|
||||
|
||||
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == chrome):
|
||||
command = manual_chrome_debug_command(9222, "Linux")
|
||||
|
||||
assert command is not None
|
||||
# Linux/WSL uses POSIX shell quoting (single quotes around paths with spaces).
|
||||
assert command.startswith(f"'{chrome}' --remote-debugging-port=9222")
|
||||
|
||||
def test_manual_command_uses_windows_quoting_on_windows(self):
|
||||
chrome = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
|
||||
|
||||
with patch("hermes_cli.browser_connect.shutil.which", side_effect=lambda name: chrome if name == "chrome.exe" else None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", side_effect=lambda path: path == chrome):
|
||||
command = manual_chrome_debug_command(9222, "Windows")
|
||||
|
||||
assert command is not None
|
||||
# Windows uses cmd.exe-compatible quoting via subprocess.list2cmdline.
|
||||
assert command.startswith(f'"{chrome}" --remote-debugging-port=9222')
|
||||
assert "'" not in command
|
||||
|
||||
def test_manual_command_returns_none_when_linux_browser_missing(self):
|
||||
with patch("hermes_cli.browser_connect.shutil.which", return_value=None), \
|
||||
patch("hermes_cli.browser_connect.os.path.isfile", return_value=False):
|
||||
assert manual_chrome_debug_command(9222, "Linux") is None
|
||||
|
||||
@@ -41,6 +41,10 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
|
||||
# TERMINAL_CWD. Mirrors the fix in gateway/run.py.
|
||||
if cfg_key == "cwd" and str(val) in (".", "auto", "cwd"):
|
||||
continue
|
||||
# Expand shell tilde so subprocess.Popen never receives a literal
|
||||
# "~/" which the kernel rejects.
|
||||
if cfg_key == "cwd" and isinstance(val, str):
|
||||
val = os.path.expanduser(val)
|
||||
if isinstance(val, list):
|
||||
env[env_var] = json.dumps(val)
|
||||
else:
|
||||
@@ -55,6 +59,8 @@ def _simulate_config_bridge(cfg: dict, initial_env: dict | None = None):
|
||||
if alias_env not in env:
|
||||
alias_val = cfg.get(alias_key)
|
||||
if isinstance(alias_val, str) and alias_val.strip():
|
||||
if alias_key == "cwd":
|
||||
alias_val = os.path.expanduser(alias_val)
|
||||
env[alias_env] = alias_val.strip()
|
||||
|
||||
# --- Replicate lines 144-147: MESSAGING_CWD fallback ---
|
||||
@@ -205,3 +211,32 @@ class TestNestedTerminalCwdPlaceholderSkip:
|
||||
assert result["TERMINAL_ENV"] == "docker"
|
||||
assert result["TERMINAL_TIMEOUT"] == "300"
|
||||
assert result["TERMINAL_CWD"] == "/from/env"
|
||||
|
||||
|
||||
class TestTildeExpansion:
|
||||
"""terminal.cwd values containing shell tilde must be expanded.
|
||||
|
||||
subprocess.Popen does not expand shell syntax, so a literal "~/"
|
||||
causes FileNotFoundError. Regression test for commit 3c42064e.
|
||||
"""
|
||||
|
||||
def test_terminal_cwd_tilde_expanded(self):
|
||||
"""terminal.cwd: '~/projects' should expand to /home/<user>/projects."""
|
||||
cfg = {"terminal": {"cwd": "~/projects"}}
|
||||
result = _simulate_config_bridge(cfg)
|
||||
assert result["TERMINAL_CWD"] == os.path.expanduser("~/projects")
|
||||
|
||||
def test_top_level_cwd_tilde_expanded(self):
|
||||
"""top-level cwd: '~/' should expand to user's home directory."""
|
||||
cfg = {"cwd": "~/"}
|
||||
result = _simulate_config_bridge(cfg)
|
||||
assert result["TERMINAL_CWD"] == os.path.expanduser("~/")
|
||||
|
||||
def test_tilde_with_nested_precedence(self):
|
||||
"""Nested terminal.cwd should win over top-level, both expanded."""
|
||||
cfg = {
|
||||
"cwd": "~/top",
|
||||
"terminal": {"cwd": "~/nested"},
|
||||
}
|
||||
result = _simulate_config_bridge(cfg)
|
||||
assert result["TERMINAL_CWD"] == os.path.expanduser("~/nested")
|
||||
|
||||
@@ -1337,3 +1337,159 @@ class TestCursorStrippingOnFallback:
|
||||
assert consumer._already_sent is True
|
||||
# _last_sent_text must NOT be updated when the edit failed
|
||||
assert consumer._last_sent_text == "Hello ▉"
|
||||
|
||||
|
||||
# ── on_new_message callback (tool-progress linearization) ─────────────
|
||||
|
||||
|
||||
class TestOnNewMessageCallback:
|
||||
"""The on_new_message callback fires whenever a fresh content bubble
|
||||
lands on the platform. Gateway uses this to close off the current
|
||||
tool-progress bubble so the next tool.started opens a new bubble
|
||||
below the content — preserving chronological order in the chat.
|
||||
|
||||
Before this callback existed (post PR #7885), content messages got
|
||||
their own bubbles after segment breaks, but the tool-progress task
|
||||
kept editing the ORIGINAL progress bubble above all new content.
|
||||
Result: tool lines appeared stacked in the upper bubble while
|
||||
content messages lined up below, making the timeline look scrambled.
|
||||
"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_callback_fires_on_first_send(self):
|
||||
"""First-send of a new content bubble fires on_new_message."""
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock(return_value=SimpleNamespace(success=True, message_id="msg_1"))
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
events = []
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(
|
||||
adapter, "chat", config,
|
||||
on_new_message=lambda: events.append("reset"),
|
||||
)
|
||||
|
||||
consumer.on_delta("Hello")
|
||||
consumer.finish()
|
||||
await consumer.run()
|
||||
|
||||
assert events == ["reset"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_callback_fires_once_per_segment(self):
|
||||
"""A new first-send fires the callback again after segment break."""
|
||||
adapter = MagicMock()
|
||||
msg_counter = iter(["msg_1", "msg_2", "msg_3"])
|
||||
adapter.send = AsyncMock(
|
||||
side_effect=lambda **kw: SimpleNamespace(success=True, message_id=next(msg_counter))
|
||||
)
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
events = []
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(
|
||||
adapter, "chat", config,
|
||||
on_new_message=lambda: events.append("reset"),
|
||||
)
|
||||
|
||||
consumer.on_delta("A")
|
||||
consumer.on_delta(None)
|
||||
consumer.on_delta("B")
|
||||
consumer.on_delta(None)
|
||||
consumer.on_delta("C")
|
||||
consumer.finish()
|
||||
await consumer.run()
|
||||
|
||||
# Three content bubbles ⇒ three reset notifications
|
||||
assert events == ["reset", "reset", "reset"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_callback_not_fired_on_edit(self):
|
||||
"""Subsequent edits of the same bubble do NOT fire the callback."""
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock(return_value=SimpleNamespace(success=True, message_id="msg_1"))
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
events = []
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(
|
||||
adapter, "chat", config,
|
||||
on_new_message=lambda: events.append("reset"),
|
||||
)
|
||||
|
||||
consumer.on_delta("Hello")
|
||||
task = asyncio.create_task(consumer.run())
|
||||
await asyncio.sleep(0.05)
|
||||
consumer.on_delta(" world")
|
||||
await asyncio.sleep(0.05)
|
||||
consumer.on_delta(" more")
|
||||
await asyncio.sleep(0.05)
|
||||
consumer.finish()
|
||||
await task
|
||||
|
||||
# Only one first-send happened; edits do not re-fire.
|
||||
assert events == ["reset"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_callback_fires_on_commentary(self):
|
||||
"""Commentary messages are fresh bubbles too — fire the callback."""
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock(return_value=SimpleNamespace(success=True, message_id="msg_1"))
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
events = []
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(
|
||||
adapter, "chat", config,
|
||||
on_new_message=lambda: events.append("reset"),
|
||||
)
|
||||
|
||||
consumer.on_commentary("I'll search for that first.")
|
||||
consumer.finish()
|
||||
await consumer.run()
|
||||
|
||||
assert events == ["reset"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_callback_error_swallowed(self):
|
||||
"""Exceptions in the callback do not crash the consumer."""
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock(return_value=SimpleNamespace(success=True, message_id="msg_1"))
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
def raiser():
|
||||
raise RuntimeError("boom")
|
||||
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(
|
||||
adapter, "chat", config,
|
||||
on_new_message=raiser,
|
||||
)
|
||||
|
||||
consumer.on_delta("Hello")
|
||||
consumer.finish()
|
||||
await consumer.run() # must not raise
|
||||
|
||||
assert consumer.already_sent is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_callback_when_none(self):
|
||||
"""Consumer works correctly when on_new_message is None (default)."""
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock(return_value=SimpleNamespace(success=True, message_id="msg_1"))
|
||||
adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
|
||||
adapter.MAX_MESSAGE_LENGTH = 4096
|
||||
|
||||
config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=1)
|
||||
consumer = GatewayStreamConsumer(adapter, "chat", config) # no callback
|
||||
|
||||
consumer.on_delta("Hello")
|
||||
consumer.finish()
|
||||
await consumer.run()
|
||||
|
||||
assert consumer.already_sent is True
|
||||
|
||||
@@ -319,6 +319,23 @@ class TestSanitizeEnvLines:
|
||||
assert result[0].startswith("OPENROUTER_API_KEY=")
|
||||
assert result[1].startswith("OPENAI_BASE_URL=")
|
||||
|
||||
def test_glm_suffix_collision_not_split(self):
|
||||
"""GLM_API_KEY / GLM_BASE_URL must not be mangled by LM_API_KEY / LM_BASE_URL suffixes (#17138)."""
|
||||
lines = [
|
||||
"GLM_API_KEY=glm-secret\n",
|
||||
"GLM_BASE_URL=https://api.z.ai/api/paas/v4\n",
|
||||
]
|
||||
result = _sanitize_env_lines(lines)
|
||||
assert result == lines, f"GLM_* lines were corrupted by suffix collision: {result}"
|
||||
|
||||
def test_suffix_collision_does_not_break_real_concatenation(self):
|
||||
"""A genuine concatenation that happens to start with a suffix-superset key still splits."""
|
||||
lines = ["GLM_API_KEY=glmLM_API_KEY=lm-key\n"]
|
||||
result = _sanitize_env_lines(lines)
|
||||
assert len(result) == 2
|
||||
assert result[0].startswith("GLM_API_KEY=")
|
||||
assert result[1].startswith("LM_API_KEY=")
|
||||
|
||||
def test_save_env_value_fixes_corruption_on_write(self, tmp_path):
|
||||
"""save_env_value sanitizes corrupted lines when writing a new key."""
|
||||
env_file = tmp_path / ".env"
|
||||
|
||||
@@ -2754,6 +2754,8 @@ def test_session_most_recent_handles_db_unavailable(monkeypatch):
|
||||
)
|
||||
|
||||
assert resp["result"]["session_id"] is None
|
||||
|
||||
|
||||
# ── browser.manage ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -2779,6 +2781,30 @@ def _stub_urlopen(monkeypatch, *, ok: bool):
|
||||
monkeypatch.setattr(urllib.request, "urlopen", _opener)
|
||||
|
||||
|
||||
def _stub_urlopen_capture(monkeypatch, *, ok: bool):
|
||||
urls: list[str] = []
|
||||
|
||||
class _Resp:
|
||||
status = 200
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *_):
|
||||
return False
|
||||
|
||||
def _opener(url, timeout=2.0): # noqa: ARG001 — match urllib signature
|
||||
urls.append(url)
|
||||
if not ok:
|
||||
raise OSError("probe failed")
|
||||
return _Resp()
|
||||
|
||||
import urllib.request
|
||||
|
||||
monkeypatch.setattr(urllib.request, "urlopen", _opener)
|
||||
return urls
|
||||
|
||||
|
||||
def test_browser_manage_status_reads_env_var(monkeypatch):
|
||||
"""Status returns the env var verbatim (no network I/O)."""
|
||||
monkeypatch.setenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||
@@ -2787,7 +2813,8 @@ def test_browser_manage_status_reads_env_var(monkeypatch):
|
||||
{"id": "1", "method": "browser.manage", "params": {"action": "status"}}
|
||||
)
|
||||
|
||||
assert resp["result"] == {"connected": True, "url": "http://127.0.0.1:9222"}
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
|
||||
|
||||
def test_browser_manage_status_falls_back_to_config_cdp_url(monkeypatch):
|
||||
@@ -2850,18 +2877,215 @@ def test_browser_manage_connect_sets_env_and_cleans_twice(monkeypatch):
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"] == {"connected": True, "url": "http://127.0.0.1:9222"}
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
assert resp["result"]["messages"] == ["Chrome is already listening on port 9222"]
|
||||
assert os.environ.get("BROWSER_CDP_URL") == "http://127.0.0.1:9222"
|
||||
# First cleanup runs against the OLD env (none here), second against the NEW.
|
||||
assert cleanup_calls == ["", "http://127.0.0.1:9222"]
|
||||
|
||||
|
||||
def test_browser_manage_connect_defaults_to_loopback(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
urls = _stub_urlopen_capture(monkeypatch, ok=True)
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "browser.manage", "params": {"action": "connect"}}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
assert resp["result"]["messages"] == ["Chrome is already listening on port 9222"]
|
||||
assert urls[0] == "http://127.0.0.1:9222/json/version"
|
||||
|
||||
|
||||
def test_browser_manage_connect_default_local_reports_launch_hint(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
emitted: list[tuple[str, dict]] = []
|
||||
monkeypatch.setattr(
|
||||
server,
|
||||
"_emit",
|
||||
lambda evt, sid, payload=None: emitted.append((evt, payload or {})),
|
||||
)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
_stub_urlopen(monkeypatch, ok=False)
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.browser_connect.try_launch_chrome_debug", return_value=False
|
||||
),
|
||||
patch(
|
||||
"hermes_cli.browser_connect.get_chrome_debug_candidates",
|
||||
return_value=[],
|
||||
),
|
||||
):
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {
|
||||
"action": "connect",
|
||||
"session_id": "sess-1",
|
||||
"url": "http://localhost:9222",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is False
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
assert (
|
||||
resp["result"]["messages"][0]
|
||||
== "Chrome isn't running with remote debugging — attempting to launch..."
|
||||
)
|
||||
assert any(
|
||||
"No Chrome/Chromium executable was found" in line
|
||||
for line in resp["result"]["messages"]
|
||||
)
|
||||
assert any(
|
||||
"--remote-debugging-port=9222" in line for line in resp["result"]["messages"]
|
||||
)
|
||||
assert "BROWSER_CDP_URL" not in os.environ
|
||||
progress = [p["message"] for evt, p in emitted if evt == "browser.progress"]
|
||||
assert progress == resp["result"]["messages"]
|
||||
|
||||
|
||||
def test_browser_manage_connect_no_session_skips_progress_events(monkeypatch):
|
||||
"""Without a session_id the TUI prints messages from the response;
|
||||
emitting ``browser.progress`` events would double-render. Gate the
|
||||
emit so callers without a session see the bundled list only."""
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
emitted: list[tuple[str, dict]] = []
|
||||
monkeypatch.setattr(
|
||||
server,
|
||||
"_emit",
|
||||
lambda evt, sid, payload=None: emitted.append((evt, payload or {})),
|
||||
)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
_stub_urlopen(monkeypatch, ok=False)
|
||||
with (
|
||||
patch(
|
||||
"hermes_cli.browser_connect.try_launch_chrome_debug", return_value=False
|
||||
),
|
||||
patch(
|
||||
"hermes_cli.browser_connect.get_chrome_debug_candidates",
|
||||
return_value=[],
|
||||
),
|
||||
):
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": "http://localhost:9222"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is False
|
||||
assert resp["result"]["messages"] # bundled list still populated
|
||||
assert [evt for evt, _ in emitted if evt == "browser.progress"] == []
|
||||
|
||||
|
||||
def test_browser_manage_connect_handles_null_url(monkeypatch):
|
||||
"""Explicit ``{"url": null}`` (or empty string) must fall back to the
|
||||
default loopback URL instead of raising a TypeError that gets swallowed
|
||||
by the outer 5031 catch."""
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
_stub_urlopen(monkeypatch, ok=True)
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": None},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
|
||||
|
||||
def test_browser_manage_connect_rejects_non_string_url(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": 9222},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["error"]["code"] == 4015
|
||||
assert "must be a string" in resp["error"]["message"]
|
||||
assert "BROWSER_CDP_URL" not in os.environ
|
||||
|
||||
|
||||
def test_browser_manage_connect_default_local_retries_after_launch(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
monkeypatch.setattr(server.time, "sleep", lambda _seconds: None)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
|
||||
class _Resp:
|
||||
status = 200
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *_):
|
||||
return False
|
||||
|
||||
attempts = {"n": 0}
|
||||
|
||||
def _opener(_url, timeout=2.0): # noqa: ARG001 — match urllib signature
|
||||
attempts["n"] += 1
|
||||
if attempts["n"] < 3:
|
||||
raise OSError("not ready")
|
||||
return _Resp()
|
||||
|
||||
import urllib.request
|
||||
|
||||
monkeypatch.setattr(urllib.request, "urlopen", _opener)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
with patch(
|
||||
"hermes_cli.browser_connect.try_launch_chrome_debug", return_value=True
|
||||
):
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "browser.manage", "params": {"action": "connect"}}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == "http://127.0.0.1:9222"
|
||||
assert resp["result"]["messages"] == [
|
||||
"Chrome isn't running with remote debugging — attempting to launch...",
|
||||
"Chrome launched and listening on port 9222",
|
||||
]
|
||||
assert os.environ["BROWSER_CDP_URL"] == "http://127.0.0.1:9222"
|
||||
|
||||
|
||||
def test_browser_manage_connect_rejects_unreachable_endpoint(monkeypatch):
|
||||
"""An unreachable endpoint must NOT mutate the env or reap sessions."""
|
||||
monkeypatch.setenv("BROWSER_CDP_URL", "http://existing:9222")
|
||||
cleanup_calls: list[str] = []
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: cleanup_calls.append(os.environ.get("BROWSER_CDP_URL", "")),
|
||||
cleanup_all_browsers=lambda: cleanup_calls.append(
|
||||
os.environ.get("BROWSER_CDP_URL", "")
|
||||
),
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
@@ -2941,14 +3165,19 @@ def test_browser_manage_connect_preserves_devtools_browser_endpoint(monkeypatch)
|
||||
concrete = "ws://browserbase.example/devtools/browser/abc123"
|
||||
|
||||
class _OkSocket:
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): return False
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
# If urlopen is reached for a concrete ws endpoint, the test
|
||||
# would still pass because _stub_urlopen returned ok=True before;
|
||||
# patch it to assert-fail so we prove the HTTP probe is skipped.
|
||||
with patch("urllib.request.urlopen", side_effect=AssertionError("urlopen called")):
|
||||
with patch(
|
||||
"urllib.request.urlopen", side_effect=AssertionError("urlopen called")
|
||||
):
|
||||
with patch("socket.create_connection", return_value=_OkSocket()):
|
||||
resp = server.handle_request(
|
||||
{
|
||||
@@ -2963,6 +3192,69 @@ def test_browser_manage_connect_preserves_devtools_browser_endpoint(monkeypatch)
|
||||
assert os.environ["BROWSER_CDP_URL"] == concrete
|
||||
|
||||
|
||||
def test_browser_manage_connect_local_devtools_ws_preserves_path(monkeypatch):
|
||||
"""Regression: ``ws://127.0.0.1:9222/devtools/browser/<id>`` is a real
|
||||
connectable endpoint; default-local normalization must not strip the
|
||||
``/devtools/browser/...`` path or it breaks valid local CDP connects."""
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: None,
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
concrete = "ws://127.0.0.1:9222/devtools/browser/abc123"
|
||||
|
||||
class _OkSocket:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
with patch("socket.create_connection", return_value=_OkSocket()):
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": concrete},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["result"]["connected"] is True
|
||||
assert resp["result"]["url"] == concrete
|
||||
assert os.environ["BROWSER_CDP_URL"] == concrete
|
||||
|
||||
|
||||
def test_browser_manage_connect_rejects_invalid_port(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": "http://localhost:abc"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["error"]["code"] == 4015
|
||||
assert "invalid port" in resp["error"]["message"]
|
||||
assert "BROWSER_CDP_URL" not in os.environ
|
||||
|
||||
|
||||
def test_browser_manage_connect_rejects_missing_host(monkeypatch):
|
||||
monkeypatch.delenv("BROWSER_CDP_URL", raising=False)
|
||||
resp = server.handle_request(
|
||||
{
|
||||
"id": "1",
|
||||
"method": "browser.manage",
|
||||
"params": {"action": "connect", "url": "http://:9222"},
|
||||
}
|
||||
)
|
||||
|
||||
assert resp["error"]["code"] == 4015
|
||||
assert "missing host" in resp["error"]["message"]
|
||||
assert "BROWSER_CDP_URL" not in os.environ
|
||||
|
||||
|
||||
def test_browser_manage_connect_concrete_ws_skips_http_probe(monkeypatch):
|
||||
"""Regression for round-2 Copilot review: a hosted CDP endpoint
|
||||
(no HTTP discovery) must connect via TCP-only reachability check.
|
||||
@@ -2977,8 +3269,11 @@ def test_browser_manage_connect_concrete_ws_skips_http_probe(monkeypatch):
|
||||
seen_targets: list[tuple[str, int]] = []
|
||||
|
||||
class _OkSocket:
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): return False
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *a):
|
||||
return False
|
||||
|
||||
def _fake_create_connection(addr, timeout=None):
|
||||
seen_targets.append(addr)
|
||||
@@ -2987,7 +3282,9 @@ def test_browser_manage_connect_concrete_ws_skips_http_probe(monkeypatch):
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
# urlopen would 404/ECONNREFUSED on a real hosted CDP endpoint;
|
||||
# asserting it's never called proves the probe was skipped.
|
||||
with patch("urllib.request.urlopen", side_effect=AssertionError("urlopen called")):
|
||||
with patch(
|
||||
"urllib.request.urlopen", side_effect=AssertionError("urlopen called")
|
||||
):
|
||||
with patch("socket.create_connection", side_effect=_fake_create_connection):
|
||||
resp = server.handle_request(
|
||||
{
|
||||
@@ -3031,7 +3328,9 @@ def test_browser_manage_disconnect_drops_env_and_cleans(monkeypatch):
|
||||
monkeypatch.setenv("BROWSER_CDP_URL", "http://127.0.0.1:9222")
|
||||
cleanup_count = {"n": 0}
|
||||
fake = types.SimpleNamespace(
|
||||
cleanup_all_browsers=lambda: cleanup_count.__setitem__("n", cleanup_count["n"] + 1),
|
||||
cleanup_all_browsers=lambda: cleanup_count.__setitem__(
|
||||
"n", cleanup_count["n"] + 1
|
||||
),
|
||||
_get_cdp_override=lambda: os.environ.get("BROWSER_CDP_URL", ""),
|
||||
)
|
||||
with patch.dict(sys.modules, {"tools.browser_tool": fake}):
|
||||
@@ -3099,11 +3398,16 @@ def test_config_get_indicator_falls_back_when_unset(monkeypatch):
|
||||
def test_config_set_indicator_accepts_known_value(monkeypatch):
|
||||
written: dict = {}
|
||||
monkeypatch.setattr(
|
||||
server, "_write_config_key",
|
||||
server,
|
||||
"_write_config_key",
|
||||
lambda k, v: written.update({k: v}),
|
||||
)
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "config.set", "params": {"key": "indicator", "value": "EMOJI"}}
|
||||
{
|
||||
"id": "1",
|
||||
"method": "config.set",
|
||||
"params": {"key": "indicator", "value": "EMOJI"},
|
||||
}
|
||||
)
|
||||
assert resp["result"] == {"key": "indicator", "value": "emoji"}
|
||||
assert written == {"display.tui_status_indicator": "emoji"}
|
||||
@@ -3117,7 +3421,11 @@ def test_config_set_indicator_falsy_non_string_surfaces_in_error(monkeypatch):
|
||||
|
||||
for bad in (0, False, []):
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "config.set", "params": {"key": "indicator", "value": bad}}
|
||||
{
|
||||
"id": "1",
|
||||
"method": "config.set",
|
||||
"params": {"key": "indicator", "value": bad},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
msg = resp["error"]["message"]
|
||||
@@ -3132,7 +3440,47 @@ def test_config_set_indicator_none_keeps_blank_repr(monkeypatch):
|
||||
"""`None` is the genuine 'no value' case — empty raw is acceptable."""
|
||||
monkeypatch.setattr(server, "_write_config_key", lambda *a, **k: None)
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "config.set", "params": {"key": "indicator", "value": None}}
|
||||
{
|
||||
"id": "1",
|
||||
"method": "config.set",
|
||||
"params": {"key": "indicator", "value": None},
|
||||
}
|
||||
)
|
||||
assert "error" in resp
|
||||
assert "unknown indicator: ''" in resp["error"]["message"]
|
||||
|
||||
|
||||
# ── reload.env ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_reload_env_rpc_calls_hermes_cli_reload_env(monkeypatch):
|
||||
"""reload.env mirrors classic CLI's `/reload` — re-reads ~/.hermes/.env
|
||||
into the gateway process and reports the count of vars updated."""
|
||||
calls = {"n": 0}
|
||||
|
||||
def _fake_reload():
|
||||
calls["n"] += 1
|
||||
return 7
|
||||
|
||||
fake = types.SimpleNamespace(reload_env=_fake_reload)
|
||||
with patch.dict(sys.modules, {"hermes_cli.config": fake}):
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "reload.env", "params": {}}
|
||||
)
|
||||
|
||||
assert resp["result"] == {"updated": 7}
|
||||
assert calls["n"] == 1
|
||||
|
||||
|
||||
def test_reload_env_rpc_surfaces_errors(monkeypatch):
|
||||
def _broken():
|
||||
raise RuntimeError("env path locked")
|
||||
|
||||
fake = types.SimpleNamespace(reload_env=_broken)
|
||||
with patch.dict(sys.modules, {"hermes_cli.config": fake}):
|
||||
resp = server.handle_request(
|
||||
{"id": "1", "method": "reload.env", "params": {}}
|
||||
)
|
||||
|
||||
assert "error" in resp
|
||||
assert "env path locked" in resp["error"]["message"]
|
||||
|
||||
@@ -770,11 +770,19 @@ class TestLoadConfig(unittest.TestCase):
|
||||
|
||||
def test_returns_code_execution_section(self):
|
||||
from tools.code_execution_tool import _load_config
|
||||
mock_cli = MagicMock()
|
||||
mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 120, "max_tool_calls": 10}}
|
||||
with patch.dict("sys.modules", {"cli": mock_cli}):
|
||||
with patch("hermes_cli.config.read_raw_config",
|
||||
return_value={"code_execution": {"timeout": 120, "max_tool_calls": 10}}):
|
||||
result = _load_config()
|
||||
self.assertIsInstance(result, dict)
|
||||
self.assertEqual(result, {"timeout": 120, "max_tool_calls": 10})
|
||||
|
||||
def test_does_not_import_interactive_cli(self):
|
||||
from tools.code_execution_tool import _load_config
|
||||
mock_cli = MagicMock()
|
||||
mock_cli.CLI_CONFIG = {"code_execution": {"timeout": 999}}
|
||||
with patch.dict("sys.modules", {"cli": mock_cli}), \
|
||||
patch("hermes_cli.config.read_raw_config", return_value={}):
|
||||
result = _load_config()
|
||||
self.assertEqual(result, {})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -45,6 +45,7 @@ def _make_dummy_env(**kwargs):
|
||||
host_cwd=kwargs.get("host_cwd"),
|
||||
auto_mount_cwd=kwargs.get("auto_mount_cwd", False),
|
||||
env=kwargs.get("env"),
|
||||
run_as_host_user=kwargs.get("run_as_host_user", False),
|
||||
)
|
||||
|
||||
|
||||
@@ -384,9 +385,10 @@ def test_normalize_env_dict_rejects_complex_values():
|
||||
assert result == {"GOOD": "string"}
|
||||
|
||||
|
||||
def test_security_args_include_setuid_setgid_for_gosu_drop():
|
||||
"""_SECURITY_ARGS must include SETUID and SETGID so the image entrypoint
|
||||
can drop from root to the non-root `hermes` user via gosu.
|
||||
def test_security_args_include_setuid_setgid_for_gosu_drop(monkeypatch):
|
||||
"""The default (run_as_host_user=False) invocation must include SETUID and
|
||||
SETGID caps so the image entrypoint can drop from root to the non-root
|
||||
`hermes` user via gosu.
|
||||
|
||||
Without these caps gosu exits with
|
||||
``error: failed switching to 'hermes': operation not permitted``
|
||||
@@ -396,17 +398,117 @@ def test_security_args_include_setuid_setgid_for_gosu_drop():
|
||||
after the drop — the drop is a one-way transition performed before the
|
||||
`no_new_privs` bit is enforced on the exec boundary.
|
||||
"""
|
||||
args = docker_env._SECURITY_ARGS
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env()
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert run_calls, "docker run should have been called"
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
# Flatten to set of added caps for clarity.
|
||||
added = {
|
||||
args[i + 1]
|
||||
for i, flag in enumerate(args[:-1])
|
||||
run_args[i + 1]
|
||||
for i, flag in enumerate(run_args[:-1])
|
||||
if flag == "--cap-add"
|
||||
}
|
||||
assert "SETUID" in added, "SETUID cap missing — gosu drop in entrypoint will fail"
|
||||
assert "SETGID" in added, "SETGID cap missing — gosu drop in entrypoint will fail"
|
||||
|
||||
# Sanity: the hardening posture is still in place.
|
||||
assert "--cap-drop" in args and "ALL" in args
|
||||
assert "--security-opt" in args and "no-new-privileges" in args
|
||||
|
||||
# ── run_as_host_user tests ────────────────────────────────────────
|
||||
|
||||
|
||||
def test_run_as_host_user_passes_uid_gid(monkeypatch):
|
||||
"""With run_as_host_user=True, --user <uid>:<gid> is added to docker run."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1234, raising=False)
|
||||
monkeypatch.setattr(docker_env.os, "getgid", lambda: 5678, raising=False)
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env(run_as_host_user=True)
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
assert run_calls, "docker run should have been called"
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
# --user must be present and must be paired with "1234:5678"
|
||||
assert "--user" in run_args, f"--user flag missing from docker run args: {run_args}"
|
||||
idx = run_args.index("--user")
|
||||
assert run_args[idx + 1] == "1234:5678", (
|
||||
f"expected --user 1234:5678, got --user {run_args[idx + 1]}"
|
||||
)
|
||||
|
||||
|
||||
def test_run_as_host_user_drops_setuid_setgid_caps(monkeypatch):
|
||||
"""When --user is passed, the container never needs gosu, so SETUID/SETGID
|
||||
caps are omitted for a tighter security posture."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
monkeypatch.setattr(docker_env.os, "getuid", lambda: 1000, raising=False)
|
||||
monkeypatch.setattr(docker_env.os, "getgid", lambda: 1000, raising=False)
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env(run_as_host_user=True)
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
added = {
|
||||
run_args[i + 1]
|
||||
for i, flag in enumerate(run_args[:-1])
|
||||
if flag == "--cap-add"
|
||||
}
|
||||
assert "SETUID" not in added, (
|
||||
"SETUID cap should be dropped when running as host user — no gosu drop is needed"
|
||||
)
|
||||
assert "SETGID" not in added, (
|
||||
"SETGID cap should be dropped when running as host user — no gosu drop is needed"
|
||||
)
|
||||
# Core non-privilege-drop caps must still be there (pip/npm/apt need them).
|
||||
assert "DAC_OVERRIDE" in added
|
||||
assert "CHOWN" in added
|
||||
assert "FOWNER" in added
|
||||
|
||||
|
||||
def test_run_as_host_user_default_off(monkeypatch):
|
||||
"""Without the opt-in, no --user flag is emitted — preserving existing behavior."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
_make_dummy_env() # run_as_host_user defaults to False
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
run_args = run_calls[0][0]
|
||||
assert "--user" not in run_args, (
|
||||
f"--user should not be in docker run args when opt-in is off: {run_args}"
|
||||
)
|
||||
|
||||
|
||||
def test_run_as_host_user_warns_and_skips_when_no_posix_ids(monkeypatch, caplog):
|
||||
"""On platforms without POSIX getuid/getgid, log a warning and leave the
|
||||
container at its image default user (no --user flag, full cap set)."""
|
||||
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
|
||||
# Simulate a platform where os.getuid is absent (e.g. Windows host).
|
||||
monkeypatch.delattr(docker_env.os, "getuid", raising=False)
|
||||
monkeypatch.delattr(docker_env.os, "getgid", raising=False)
|
||||
calls = _mock_subprocess_run(monkeypatch)
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
_make_dummy_env(run_as_host_user=True)
|
||||
|
||||
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
|
||||
run_args = run_calls[0][0]
|
||||
|
||||
assert "--user" not in run_args
|
||||
# Fall back to the full cap set since the container still starts as root.
|
||||
added = {
|
||||
run_args[i + 1]
|
||||
for i, flag in enumerate(run_args[:-1])
|
||||
if flag == "--cap-add"
|
||||
}
|
||||
assert "SETUID" in added
|
||||
assert "SETGID" in added
|
||||
assert any(
|
||||
"does not expose POSIX uid/gid" in rec.getMessage()
|
||||
for rec in caplog.records
|
||||
), "expected a warning when POSIX ids are unavailable"
|
||||
|
||||
@@ -0,0 +1,148 @@
|
||||
"""Tests that init_session() respects the configured cwd.
|
||||
|
||||
The bug: when terminal.cwd is set in config.yaml, the configured path was
|
||||
displayed in the TUI banner but actual terminal commands ran in os.getcwd()
|
||||
(the directory where ``hermes chat`` was started).
|
||||
|
||||
Root cause: init_session() captures the login shell environment by running
|
||||
``pwd -P`` inside a ``bash -l -c`` bootstrap. Profile scripts (.bashrc,
|
||||
.bash_profile, etc.) can change the working directory before ``pwd -P``
|
||||
runs, so _update_cwd() overwrites self.cwd with the wrong directory.
|
||||
|
||||
Fix: the bootstrap now includes an explicit ``cd`` back to self.cwd before
|
||||
running ``pwd -P``, so the configured cwd is always what gets recorded.
|
||||
"""
|
||||
|
||||
from tempfile import TemporaryFile
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from tools.environments.base import BaseEnvironment
|
||||
|
||||
|
||||
class _TestableEnv(BaseEnvironment):
|
||||
"""Concrete subclass for testing base class methods."""
|
||||
|
||||
def __init__(self, cwd="/tmp", timeout=10):
|
||||
super().__init__(cwd=cwd, timeout=timeout)
|
||||
|
||||
def _run_bash(self, cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
raise NotImplementedError("Use mock")
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
|
||||
class TestInitSessionCwdRespect:
|
||||
"""init_session() must preserve the configured cwd."""
|
||||
|
||||
def test_bootstrap_contains_cd_to_configured_cwd(self):
|
||||
"""The bootstrap script must cd to self.cwd before running pwd."""
|
||||
env = _TestableEnv(cwd="/my/project")
|
||||
|
||||
# Capture the bootstrap script that init_session would pass to _run_bash
|
||||
captured = {}
|
||||
|
||||
def mock_run_bash(cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
captured["cmd"] = cmd_string
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
stdout = TemporaryFile(mode="w+b")
|
||||
stdout.seek(0)
|
||||
mock.stdout = stdout
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.init_session()
|
||||
|
||||
assert "cmd" in captured, "init_session did not call _run_bash"
|
||||
bootstrap = captured["cmd"]
|
||||
|
||||
# The cd must appear before pwd -P so the configured cwd is recorded
|
||||
cd_pos = bootstrap.find("builtin cd")
|
||||
pwd_pos = bootstrap.find("pwd -P")
|
||||
assert cd_pos != -1, "bootstrap must contain 'builtin cd'"
|
||||
assert pwd_pos != -1, "bootstrap must contain 'pwd -P'"
|
||||
assert cd_pos < pwd_pos, (
|
||||
"builtin cd must appear before pwd -P in the bootstrap so "
|
||||
"the configured cwd is what gets recorded"
|
||||
)
|
||||
|
||||
# The cd target must be the configured path (shlex.quote only adds
|
||||
# quotes when the path contains shell-special characters)
|
||||
assert "/my/project" in bootstrap, (
|
||||
"bootstrap cd must target the configured cwd (/my/project)"
|
||||
)
|
||||
|
||||
def test_configured_cwd_survives_init_session(self):
|
||||
"""self.cwd must be the configured path after init_session completes."""
|
||||
configured_cwd = "/my/project"
|
||||
env = _TestableEnv(cwd=configured_cwd)
|
||||
|
||||
marker = env._cwd_marker
|
||||
|
||||
def mock_run_bash(cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
# Simulate output where pwd reports the configured cwd
|
||||
output = f"snapshot output\n{marker}{configured_cwd}{marker}\n"
|
||||
stdout = TemporaryFile(mode="w+b")
|
||||
stdout.write(output.encode("utf-8"))
|
||||
stdout.seek(0)
|
||||
mock.stdout = stdout
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.init_session()
|
||||
|
||||
assert env.cwd == configured_cwd, (
|
||||
f"Expected cwd={configured_cwd!r} after init_session, got {env.cwd!r}"
|
||||
)
|
||||
|
||||
def test_default_cwd_still_works(self):
|
||||
"""When no custom cwd is configured, default /tmp behavior is preserved."""
|
||||
env = _TestableEnv() # default cwd="/tmp"
|
||||
|
||||
marker = env._cwd_marker
|
||||
|
||||
def mock_run_bash(cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
output = f"snapshot output\n{marker}/tmp{marker}\n"
|
||||
stdout = TemporaryFile(mode="w+b")
|
||||
stdout.write(output.encode("utf-8"))
|
||||
stdout.seek(0)
|
||||
mock.stdout = stdout
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.init_session()
|
||||
|
||||
assert env.cwd == "/tmp"
|
||||
|
||||
def test_bootstrap_cd_uses_shlex_quote(self):
|
||||
"""Paths with spaces must be properly quoted in the bootstrap cd."""
|
||||
env = _TestableEnv(cwd="/my project/with spaces")
|
||||
|
||||
captured = {}
|
||||
|
||||
def mock_run_bash(cmd_string, *, login=False, timeout=120, stdin_data=None):
|
||||
captured["cmd"] = cmd_string
|
||||
mock = MagicMock()
|
||||
mock.poll.return_value = 0
|
||||
mock.returncode = 0
|
||||
stdout = TemporaryFile(mode="w+b")
|
||||
stdout.seek(0)
|
||||
mock.stdout = stdout
|
||||
return mock
|
||||
|
||||
env._run_bash = mock_run_bash
|
||||
env.init_session()
|
||||
|
||||
bootstrap = captured["cmd"]
|
||||
# shlex.quote wraps paths with spaces in single quotes
|
||||
assert "'/my project/with spaces'" in bootstrap, (
|
||||
"bootstrap cd must properly quote paths with spaces"
|
||||
)
|
||||
@@ -0,0 +1,487 @@
|
||||
"""Tests for tools/skill_usage.py — sidecar telemetry + provenance filtering."""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def skills_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with a clean skills/ dir for each test."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
(home / "skills").mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
# Force skill_usage module to re-resolve paths per test
|
||||
import importlib
|
||||
import tools.skill_usage as mod
|
||||
importlib.reload(mod)
|
||||
return home
|
||||
|
||||
|
||||
def _write_skill(skills_dir: Path, name: str, category: str = ""):
|
||||
"""Create a minimal SKILL.md with a name: frontmatter field."""
|
||||
if category:
|
||||
d = skills_dir / category / name
|
||||
else:
|
||||
d = skills_dir / name
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
(d / "SKILL.md").write_text(
|
||||
f"""---
|
||||
name: {name}
|
||||
description: test skill
|
||||
---
|
||||
|
||||
# body
|
||||
""",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Round-trip
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_empty_usage_returns_empty_dict(skills_home):
|
||||
from tools.skill_usage import load_usage
|
||||
assert load_usage() == {}
|
||||
|
||||
|
||||
def test_save_and_load_roundtrip(skills_home):
|
||||
from tools.skill_usage import load_usage, save_usage
|
||||
data = {"skill-a": {"use_count": 3, "state": "active"}}
|
||||
save_usage(data)
|
||||
loaded = load_usage()
|
||||
assert loaded["skill-a"]["use_count"] == 3
|
||||
assert loaded["skill-a"]["state"] == "active"
|
||||
|
||||
|
||||
def test_save_is_atomic_no_partial_tmp_files(skills_home):
|
||||
from tools.skill_usage import save_usage, _usage_file
|
||||
save_usage({"x": {"use_count": 1}})
|
||||
skills_dir = _usage_file().parent
|
||||
# No leftover tempfile
|
||||
for p in skills_dir.iterdir():
|
||||
assert not p.name.startswith(".usage_"), f"leftover tmp: {p.name}"
|
||||
|
||||
|
||||
def test_get_record_missing_returns_empty_record(skills_home):
|
||||
from tools.skill_usage import get_record
|
||||
rec = get_record("nonexistent")
|
||||
assert rec["use_count"] == 0
|
||||
assert rec["view_count"] == 0
|
||||
assert rec["state"] == "active"
|
||||
assert rec["pinned"] is False
|
||||
assert rec["archived_at"] is None
|
||||
|
||||
|
||||
def test_get_record_backfills_missing_keys(skills_home):
|
||||
from tools.skill_usage import get_record, save_usage
|
||||
save_usage({"legacy": {"use_count": 5}}) # old-format record
|
||||
rec = get_record("legacy")
|
||||
assert rec["use_count"] == 5
|
||||
assert "view_count" in rec # backfilled
|
||||
assert "state" in rec
|
||||
|
||||
|
||||
def test_load_usage_handles_corrupt_file(skills_home):
|
||||
from tools.skill_usage import load_usage, _usage_file
|
||||
_usage_file().write_text("{ not json }", encoding="utf-8")
|
||||
assert load_usage() == {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Counter bumps
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_bump_view_increments_and_timestamps(skills_home):
|
||||
from tools.skill_usage import bump_view, get_record
|
||||
bump_view("my-skill")
|
||||
bump_view("my-skill")
|
||||
rec = get_record("my-skill")
|
||||
assert rec["view_count"] == 2
|
||||
assert rec["last_viewed_at"] is not None
|
||||
|
||||
|
||||
def test_bump_use_increments_and_timestamps(skills_home):
|
||||
from tools.skill_usage import bump_use, get_record
|
||||
bump_use("my-skill")
|
||||
rec = get_record("my-skill")
|
||||
assert rec["use_count"] == 1
|
||||
assert rec["last_used_at"] is not None
|
||||
|
||||
|
||||
def test_bump_patch_increments_and_timestamps(skills_home):
|
||||
from tools.skill_usage import bump_patch, get_record
|
||||
bump_patch("my-skill")
|
||||
rec = get_record("my-skill")
|
||||
assert rec["patch_count"] == 1
|
||||
assert rec["last_patched_at"] is not None
|
||||
|
||||
|
||||
def test_bump_on_empty_name_is_noop(skills_home):
|
||||
from tools.skill_usage import bump_view, load_usage
|
||||
bump_view("")
|
||||
assert load_usage() == {}
|
||||
|
||||
|
||||
def test_bumps_do_not_corrupt_other_skills(skills_home):
|
||||
from tools.skill_usage import bump_view, bump_use, get_record
|
||||
bump_view("skill-a")
|
||||
bump_use("skill-b")
|
||||
bump_view("skill-a")
|
||||
assert get_record("skill-a")["view_count"] == 2
|
||||
assert get_record("skill-a")["use_count"] == 0
|
||||
assert get_record("skill-b")["use_count"] == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# State transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_set_state_active(skills_home):
|
||||
from tools.skill_usage import set_state, get_record, STATE_ACTIVE
|
||||
set_state("x", STATE_ACTIVE)
|
||||
assert get_record("x")["state"] == "active"
|
||||
|
||||
|
||||
def test_set_state_archived_records_timestamp(skills_home):
|
||||
from tools.skill_usage import set_state, get_record, STATE_ARCHIVED
|
||||
set_state("x", STATE_ARCHIVED)
|
||||
rec = get_record("x")
|
||||
assert rec["state"] == "archived"
|
||||
assert rec["archived_at"] is not None
|
||||
|
||||
|
||||
def test_set_state_invalid_is_noop(skills_home):
|
||||
from tools.skill_usage import set_state, get_record
|
||||
set_state("x", "bogus")
|
||||
# No record created for invalid state
|
||||
rec = get_record("x")
|
||||
assert rec["state"] == "active" # default
|
||||
|
||||
|
||||
def test_restoring_from_archive_clears_timestamp(skills_home):
|
||||
from tools.skill_usage import set_state, get_record, STATE_ARCHIVED, STATE_ACTIVE
|
||||
set_state("x", STATE_ARCHIVED)
|
||||
assert get_record("x")["archived_at"] is not None
|
||||
set_state("x", STATE_ACTIVE)
|
||||
assert get_record("x")["archived_at"] is None
|
||||
|
||||
|
||||
def test_set_pinned(skills_home):
|
||||
from tools.skill_usage import set_pinned, get_record
|
||||
set_pinned("x", True)
|
||||
assert get_record("x")["pinned"] is True
|
||||
set_pinned("x", False)
|
||||
assert get_record("x")["pinned"] is False
|
||||
|
||||
|
||||
def test_forget_removes_record(skills_home):
|
||||
from tools.skill_usage import bump_view, forget, load_usage
|
||||
bump_view("x")
|
||||
assert "x" in load_usage()
|
||||
forget("x")
|
||||
assert "x" not in load_usage()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provenance filter — the load-bearing safety check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_agent_created_excludes_bundled(skills_home):
|
||||
from tools.skill_usage import list_agent_created_skill_names
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "bundled-skill", category="github")
|
||||
_write_skill(skills_dir, "my-skill")
|
||||
# Seed a bundled manifest marking bundled-skill as upstream
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"bundled-skill:abc123\n", encoding="utf-8",
|
||||
)
|
||||
names = list_agent_created_skill_names()
|
||||
assert "my-skill" in names
|
||||
assert "bundled-skill" not in names
|
||||
|
||||
|
||||
def test_agent_created_excludes_hub_installed(skills_home):
|
||||
from tools.skill_usage import list_agent_created_skill_names
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "hub-skill")
|
||||
_write_skill(skills_dir, "my-skill")
|
||||
hub_dir = skills_dir / ".hub"
|
||||
hub_dir.mkdir()
|
||||
(hub_dir / "lock.json").write_text(
|
||||
json.dumps({"version": 1, "installed": {"hub-skill": {"source": "taps/main"}}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
names = list_agent_created_skill_names()
|
||||
assert "my-skill" in names
|
||||
assert "hub-skill" not in names
|
||||
|
||||
|
||||
def test_is_agent_created(skills_home):
|
||||
from tools.skill_usage import is_agent_created
|
||||
skills_dir = skills_home / "skills"
|
||||
(skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8")
|
||||
hub_dir = skills_dir / ".hub"
|
||||
hub_dir.mkdir()
|
||||
(hub_dir / "lock.json").write_text(
|
||||
json.dumps({"installed": {"hubbed": {}}}), encoding="utf-8",
|
||||
)
|
||||
assert is_agent_created("my-skill") is True
|
||||
assert is_agent_created("bundled") is False
|
||||
assert is_agent_created("hubbed") is False
|
||||
|
||||
|
||||
def test_agent_created_skips_archive_and_hub_dirs(skills_home):
|
||||
from tools.skill_usage import list_agent_created_skill_names
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "real-skill")
|
||||
# Dot-prefixed dirs must be ignored even if they contain SKILL.md
|
||||
archive = skills_dir / ".archive" / "old-skill"
|
||||
archive.mkdir(parents=True)
|
||||
(archive / "SKILL.md").write_text(
|
||||
"---\nname: old-skill\n---\n", encoding="utf-8",
|
||||
)
|
||||
names = list_agent_created_skill_names()
|
||||
assert "real-skill" in names
|
||||
assert "old-skill" not in names
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Archive / restore
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_archive_skill_moves_directory(skills_home):
|
||||
from tools.skill_usage import archive_skill, get_record, STATE_ARCHIVED
|
||||
skills_dir = skills_home / "skills"
|
||||
skill_dir = _write_skill(skills_dir, "old-skill")
|
||||
assert skill_dir.exists()
|
||||
|
||||
ok, msg = archive_skill("old-skill")
|
||||
assert ok, msg
|
||||
assert not skill_dir.exists()
|
||||
assert (skills_dir / ".archive" / "old-skill" / "SKILL.md").exists()
|
||||
assert get_record("old-skill")["state"] == "archived"
|
||||
assert get_record("old-skill")["archived_at"] is not None
|
||||
|
||||
|
||||
def test_archive_refuses_bundled_skill(skills_home):
|
||||
from tools.skill_usage import archive_skill
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "bundled")
|
||||
(skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8")
|
||||
|
||||
ok, msg = archive_skill("bundled")
|
||||
assert not ok
|
||||
assert "bundled" in msg.lower() or "hub" in msg.lower()
|
||||
|
||||
|
||||
def test_archive_refuses_hub_skill(skills_home):
|
||||
from tools.skill_usage import archive_skill
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "hub-skill")
|
||||
hub_dir = skills_dir / ".hub"
|
||||
hub_dir.mkdir()
|
||||
(hub_dir / "lock.json").write_text(
|
||||
json.dumps({"installed": {"hub-skill": {}}}), encoding="utf-8",
|
||||
)
|
||||
|
||||
ok, msg = archive_skill("hub-skill")
|
||||
assert not ok
|
||||
|
||||
|
||||
def test_archive_missing_skill_returns_error(skills_home):
|
||||
from tools.skill_usage import archive_skill
|
||||
ok, msg = archive_skill("nonexistent")
|
||||
assert not ok
|
||||
assert "not found" in msg.lower()
|
||||
|
||||
|
||||
def test_restore_skill_moves_back(skills_home):
|
||||
from tools.skill_usage import archive_skill, restore_skill, get_record
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "temp-skill")
|
||||
archive_skill("temp-skill")
|
||||
assert not (skills_dir / "temp-skill").exists()
|
||||
|
||||
ok, msg = restore_skill("temp-skill")
|
||||
assert ok, msg
|
||||
assert (skills_dir / "temp-skill" / "SKILL.md").exists()
|
||||
assert get_record("temp-skill")["state"] == "active"
|
||||
|
||||
|
||||
def test_archive_collision_gets_suffix(skills_home):
|
||||
from tools.skill_usage import archive_skill
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "dup")
|
||||
archive_skill("dup")
|
||||
_write_skill(skills_dir, "dup") # recreate
|
||||
ok, msg = archive_skill("dup")
|
||||
assert ok
|
||||
# Two entries under .archive/ — second should have a timestamp suffix
|
||||
archived = sorted(p.name for p in (skills_dir / ".archive").iterdir() if p.is_dir())
|
||||
assert "dup" in archived
|
||||
assert any(n.startswith("dup-") and n != "dup" for n in archived)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reporting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_agent_created_report_includes_defaults(skills_home):
|
||||
from tools.skill_usage import agent_created_report, bump_view
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "a")
|
||||
_write_skill(skills_dir, "b")
|
||||
bump_view("a")
|
||||
rows = agent_created_report()
|
||||
by_name = {r["name"]: r for r in rows}
|
||||
assert "a" in by_name and "b" in by_name
|
||||
assert by_name["a"]["view_count"] == 1
|
||||
# b has no usage record yet — must still appear with defaults
|
||||
assert by_name["b"]["view_count"] == 0
|
||||
assert by_name["b"]["state"] == "active"
|
||||
|
||||
|
||||
def test_agent_created_report_excludes_bundled_and_hub(skills_home):
|
||||
from tools.skill_usage import agent_created_report
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "mine")
|
||||
_write_skill(skills_dir, "bundled")
|
||||
_write_skill(skills_dir, "hubbed")
|
||||
(skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8")
|
||||
hub = skills_dir / ".hub"
|
||||
hub.mkdir()
|
||||
(hub / "lock.json").write_text(
|
||||
json.dumps({"installed": {"hubbed": {}}}), encoding="utf-8",
|
||||
)
|
||||
names = {r["name"] for r in agent_created_report()}
|
||||
assert "mine" in names
|
||||
assert "bundled" not in names
|
||||
assert "hubbed" not in names
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provenance guard — telemetry must not leak records for bundled/hub skills
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_bump_view_no_op_for_bundled_skill(skills_home):
|
||||
"""Telemetry bumps on bundled skills are dropped — the sidecar must stay
|
||||
focused on agent-created skills only."""
|
||||
from tools.skill_usage import bump_view, load_usage
|
||||
skills_dir = skills_home / "skills"
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"ship-bundled:abc\n", encoding="utf-8",
|
||||
)
|
||||
|
||||
bump_view("ship-bundled")
|
||||
assert "ship-bundled" not in load_usage(), (
|
||||
"bundled skill leaked into .usage.json"
|
||||
)
|
||||
|
||||
|
||||
def test_bump_patch_no_op_for_hub_skill(skills_home):
|
||||
from tools.skill_usage import bump_patch, load_usage
|
||||
skills_dir = skills_home / "skills"
|
||||
hub = skills_dir / ".hub"
|
||||
hub.mkdir()
|
||||
(hub / "lock.json").write_text(
|
||||
json.dumps({"installed": {"from-hub": {}}}), encoding="utf-8",
|
||||
)
|
||||
|
||||
bump_patch("from-hub")
|
||||
assert "from-hub" not in load_usage()
|
||||
|
||||
|
||||
def test_bump_use_no_op_for_hub_skill(skills_home):
|
||||
from tools.skill_usage import bump_use, load_usage
|
||||
skills_dir = skills_home / "skills"
|
||||
hub = skills_dir / ".hub"
|
||||
hub.mkdir()
|
||||
(hub / "lock.json").write_text(
|
||||
json.dumps({"installed": {"from-hub": {}}}), encoding="utf-8",
|
||||
)
|
||||
|
||||
bump_use("from-hub")
|
||||
assert "from-hub" not in load_usage()
|
||||
|
||||
|
||||
def test_set_state_no_op_for_bundled_skill(skills_home):
|
||||
"""State transitions on bundled skills must not land in the sidecar."""
|
||||
from tools.skill_usage import set_state, load_usage, STATE_ARCHIVED
|
||||
skills_dir = skills_home / "skills"
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"locked:abc\n", encoding="utf-8",
|
||||
)
|
||||
set_state("locked", STATE_ARCHIVED)
|
||||
assert "locked" not in load_usage()
|
||||
|
||||
|
||||
def test_restore_refuses_to_shadow_bundled_skill(skills_home):
|
||||
"""If a bundled skill now occupies the name, refuse to restore."""
|
||||
from tools.skill_usage import archive_skill, restore_skill
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "shared-name")
|
||||
archive_skill("shared-name")
|
||||
|
||||
# Now a bundled skill appears with the same name
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"shared-name:abc\n", encoding="utf-8",
|
||||
)
|
||||
_write_skill(skills_dir, "shared-name") # bundled install landed
|
||||
|
||||
ok, msg = restore_skill("shared-name")
|
||||
assert not ok
|
||||
assert "bundled" in msg.lower() or "shadow" in msg.lower()
|
||||
|
||||
|
||||
def test_end_to_end_no_code_path_mutates_bundled_skill(skills_home):
|
||||
"""The combined guarantee: no curator code path can archive, mark stale,
|
||||
set-state, or persist telemetry for a bundled or hub-installed skill."""
|
||||
from tools.skill_usage import (
|
||||
bump_view, bump_use, bump_patch, set_state, set_pinned,
|
||||
archive_skill, load_usage, STATE_STALE, STATE_ARCHIVED,
|
||||
)
|
||||
skills_dir = skills_home / "skills"
|
||||
_write_skill(skills_dir, "bundled-one")
|
||||
_write_skill(skills_dir, "hub-one")
|
||||
_write_skill(skills_dir, "mine")
|
||||
|
||||
(skills_dir / ".bundled_manifest").write_text(
|
||||
"bundled-one:abc\n", encoding="utf-8",
|
||||
)
|
||||
hub = skills_dir / ".hub"
|
||||
hub.mkdir()
|
||||
(hub / "lock.json").write_text(
|
||||
json.dumps({"installed": {"hub-one": {}}}), encoding="utf-8",
|
||||
)
|
||||
|
||||
# Hammer every mutator at the bundled/hub names
|
||||
for name in ("bundled-one", "hub-one"):
|
||||
bump_view(name)
|
||||
bump_use(name)
|
||||
bump_patch(name)
|
||||
set_state(name, STATE_STALE)
|
||||
set_state(name, STATE_ARCHIVED)
|
||||
set_pinned(name, True)
|
||||
ok, _msg = archive_skill(name)
|
||||
assert not ok, f"archive_skill(\"{name}\") should refuse"
|
||||
|
||||
# Sidecar must be clean of all three
|
||||
data = load_usage()
|
||||
assert "bundled-one" not in data
|
||||
assert "hub-one" not in data
|
||||
|
||||
# Directories must still be in place on disk
|
||||
assert (skills_dir / "bundled-one" / "SKILL.md").exists()
|
||||
assert (skills_dir / "hub-one" / "SKILL.md").exists()
|
||||
|
||||
# The agent-created skill can still be mutated normally
|
||||
bump_view("mine")
|
||||
assert load_usage()["mine"]["view_count"] == 1
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Regression tests for terminal config -> env-var bridging.
|
||||
|
||||
terminal_tool._get_env_config() reads ALL terminal settings from os.environ
|
||||
(TERMINAL_*). config.yaml values therefore have to be bridged into env vars
|
||||
at startup, by THREE separate code paths:
|
||||
|
||||
1. cli.py -> ``env_mappings`` dict (CLI / TUI startup)
|
||||
2. gateway/run.py -> ``_terminal_env_map`` dict (gateway / messaging
|
||||
platforms)
|
||||
3. hermes_cli/config.py:save_config_value
|
||||
-> ``_config_to_env_sync`` dict (one-shot when the
|
||||
user runs ``hermes config set …``)
|
||||
|
||||
If any one of these is missing a key, the corresponding config.yaml setting
|
||||
silently does nothing for that entry-point. This bug already shipped once
|
||||
for ``docker_run_as_host_user`` (gateway and CLI maps) and once for
|
||||
``docker_mount_cwd_to_workspace`` (gateway map).
|
||||
|
||||
This test guards against future drift by extracting all three maps via source
|
||||
inspection and asserting they all bridge the same set of writable
|
||||
``terminal.*`` keys. Source inspection (rather than importing the live
|
||||
dicts) keeps the test independent of the user's ~/.hermes/config.yaml and
|
||||
mirrors the pattern used in tests/hermes_cli/test_config_drift.py.
|
||||
"""
|
||||
|
||||
import ast
|
||||
import inspect
|
||||
|
||||
|
||||
def _extract_dict_values(source: str, dict_name: str) -> set[str]:
|
||||
"""Return the set of *value* strings in `dict_name = { "k": "VALUE", ... }`.
|
||||
|
||||
We parse the source with ast (so multi-line dicts and comments are
|
||||
handled) instead of regex. The first matching assignment wins.
|
||||
"""
|
||||
tree = ast.parse(source)
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Assign):
|
||||
continue
|
||||
targets = [t for t in node.targets if isinstance(t, ast.Name)]
|
||||
if not any(t.id == dict_name for t in targets):
|
||||
continue
|
||||
if not isinstance(node.value, ast.Dict):
|
||||
continue
|
||||
out: set[str] = set()
|
||||
for k, v in zip(node.value.keys, node.value.values):
|
||||
if isinstance(k, ast.Constant) and isinstance(v, ast.Constant):
|
||||
if isinstance(v.value, str):
|
||||
out.add(v.value)
|
||||
return out
|
||||
raise AssertionError(f"Could not find `{dict_name} = {{...}}` literal in source")
|
||||
|
||||
|
||||
def _extract_dict_keys(source: str, dict_name: str) -> set[str]:
|
||||
"""Return the set of *key* strings in `dict_name = { "KEY": "v", ... }`."""
|
||||
tree = ast.parse(source)
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.Assign):
|
||||
continue
|
||||
targets = [t for t in node.targets if isinstance(t, ast.Name)]
|
||||
if not any(t.id == dict_name for t in targets):
|
||||
continue
|
||||
if not isinstance(node.value, ast.Dict):
|
||||
continue
|
||||
out: set[str] = set()
|
||||
for k in node.value.keys:
|
||||
if isinstance(k, ast.Constant) and isinstance(k.value, str):
|
||||
out.add(k.value)
|
||||
return out
|
||||
raise AssertionError(f"Could not find `{dict_name} = {{...}}` literal in source")
|
||||
|
||||
|
||||
def _cli_env_map_keys() -> set[str]:
|
||||
"""terminal config keys bridged by cli.load_cli_config()."""
|
||||
import cli
|
||||
source = inspect.getsource(cli.load_cli_config)
|
||||
return _extract_dict_keys(source, "env_mappings")
|
||||
|
||||
|
||||
def _gateway_env_map_keys() -> set[str]:
|
||||
"""terminal config keys bridged by gateway/run.py at module load."""
|
||||
# gateway/run.py builds the dict at module top-level (not inside a
|
||||
# function), so inspect the whole module source.
|
||||
import gateway.run as gr
|
||||
source = inspect.getsource(gr)
|
||||
return _extract_dict_keys(source, "_terminal_env_map")
|
||||
|
||||
|
||||
def _save_config_env_sync_keys() -> set[str]:
|
||||
"""terminal config keys bridged by ``hermes config set foo bar``."""
|
||||
from hermes_cli import config as hc_config
|
||||
source = inspect.getsource(hc_config.set_config_value)
|
||||
keys = _extract_dict_keys(source, "_config_to_env_sync")
|
||||
# set_config_value uses fully-qualified ``terminal.foo`` keys; strip the
|
||||
# prefix so we can compare against the other two maps which use bare
|
||||
# leaf keys.
|
||||
return {k.split(".", 1)[1] for k in keys if k.startswith("terminal.")}
|
||||
|
||||
|
||||
# Keys present in cli.py env_mappings but intentionally absent from
|
||||
# gateway/run.py or set_config_value. Each entry must be justified.
|
||||
_CLI_ONLY_OK = frozenset({
|
||||
# `env_type` is a legacy YAML key alias for `backend` that cli.py
|
||||
# accepts for backwards-compat with older cli-config.yaml. The
|
||||
# gateway path normalizes on the canonical `backend` key, which is
|
||||
# also in the map and handles the same bridging. See cli.py ~line 515.
|
||||
"env_type",
|
||||
# sudo_password is not a terminal-backend option — it's a credential
|
||||
# used across backends, bridged to $SUDO_PASSWORD (not TERMINAL_*).
|
||||
# Treating it as terminal-only would be misleading.
|
||||
"sudo_password",
|
||||
})
|
||||
|
||||
|
||||
def _terminal_tool_env_var_names() -> set[str]:
|
||||
"""All TERMINAL_* env vars actually consumed by terminal_tool."""
|
||||
import tools.terminal_tool as tt
|
||||
source = inspect.getsource(tt)
|
||||
# Naive scan: every os.getenv("TERMINAL_X", ...) and _parse_env_var("TERMINAL_X", ...).
|
||||
import re
|
||||
pat = re.compile(r'["\'](TERMINAL_[A-Z0-9_]+)["\']')
|
||||
return set(pat.findall(source))
|
||||
|
||||
|
||||
def test_cli_and_gateway_env_maps_agree():
|
||||
"""cli.py and gateway/run.py must bridge the same set of terminal keys.
|
||||
|
||||
Both feed the same downstream consumer (terminal_tool). Drift between
|
||||
them means a config.yaml setting that "works in CLI mode but not gateway
|
||||
mode" (or vice-versa) — the bug class that shipped twice already.
|
||||
"""
|
||||
cli_keys = _cli_env_map_keys() - _CLI_ONLY_OK
|
||||
gw_keys = _gateway_env_map_keys()
|
||||
|
||||
# Normalize the legacy `env_type` alias: cli.py accepts both `env_type`
|
||||
# and `backend` as source keys for TERMINAL_ENV; gateway only accepts
|
||||
# `backend`. Since cli.py copies `backend` → `env_type` before the
|
||||
# lookup, they're equivalent. Remove `backend` from the gateway side
|
||||
# to avoid a spurious "backend missing from cli" failure.
|
||||
gw_keys = gw_keys - {"backend"}
|
||||
|
||||
missing_in_gateway = cli_keys - gw_keys
|
||||
missing_in_cli = gw_keys - cli_keys
|
||||
|
||||
assert not missing_in_gateway, (
|
||||
f"Keys in cli.py env_mappings but missing from gateway/run.py "
|
||||
f"_terminal_env_map: {sorted(missing_in_gateway)}. Add them to "
|
||||
f"both maps (same bug class as docker_run_as_host_user shipping "
|
||||
f"wired in cli but not gateway in April 2026)."
|
||||
)
|
||||
assert not missing_in_cli, (
|
||||
f"Keys in gateway/run.py _terminal_env_map but missing from cli.py "
|
||||
f"env_mappings: {sorted(missing_in_cli)}. Add them to both maps."
|
||||
)
|
||||
|
||||
|
||||
def test_save_config_set_supports_critical_bridged_keys():
|
||||
"""``hermes config set terminal.X true`` must propagate to .env for
|
||||
known-critical keys. This used to be an all-keys invariant but several
|
||||
pre-existing terminal keys (ssh_*, docker_forward_env, docker_volumes)
|
||||
aren't in _config_to_env_sync and are instead handled via the separate
|
||||
api_keys TERMINAL_SSH_* fallback path or user-edits-yaml-directly.
|
||||
|
||||
Until those gaps are audited and fixed, pin the specific keys that are
|
||||
load-bearing for the docker backend's ownership flag so the bug we just
|
||||
fixed cannot silently regress.
|
||||
"""
|
||||
save_keys = _save_config_env_sync_keys()
|
||||
required = {
|
||||
"docker_run_as_host_user",
|
||||
"docker_mount_cwd_to_workspace",
|
||||
"backend",
|
||||
"docker_image",
|
||||
"container_cpu",
|
||||
"container_memory",
|
||||
"container_disk",
|
||||
"container_persistent",
|
||||
}
|
||||
missing = required - save_keys
|
||||
assert not missing, (
|
||||
f"`hermes config set terminal.X` doesn't sync these load-bearing "
|
||||
f"keys to .env: {sorted(missing)}. Add them to _config_to_env_sync "
|
||||
f"in hermes_cli/config.py:set_config_value."
|
||||
)
|
||||
|
||||
|
||||
def test_docker_run_as_host_user_is_bridged_everywhere():
|
||||
"""Explicit pin for the bug we just fixed.
|
||||
|
||||
docker_run_as_host_user was added to terminal_tool._get_env_config and
|
||||
DockerEnvironment but NOT to cli.py's env_mappings or gateway/run.py's
|
||||
_terminal_env_map, so ``terminal.docker_run_as_host_user: true`` in
|
||||
config.yaml had no effect at runtime. This guard makes the regression
|
||||
impossible to reintroduce silently.
|
||||
"""
|
||||
assert "docker_run_as_host_user" in _cli_env_map_keys()
|
||||
assert "docker_run_as_host_user" in _gateway_env_map_keys()
|
||||
assert "docker_run_as_host_user" in _save_config_env_sync_keys()
|
||||
assert "TERMINAL_DOCKER_RUN_AS_HOST_USER" in _terminal_tool_env_var_names()
|
||||
|
||||
|
||||
def test_docker_mount_cwd_to_workspace_is_bridged_everywhere():
|
||||
"""Same regression class — docker_mount_cwd_to_workspace was missing from
|
||||
gateway/run.py's _terminal_env_map until the docker_run_as_host_user
|
||||
audit caught it.
|
||||
"""
|
||||
assert "docker_mount_cwd_to_workspace" in _cli_env_map_keys()
|
||||
assert "docker_mount_cwd_to_workspace" in _gateway_env_map_keys()
|
||||
assert "docker_mount_cwd_to_workspace" in _save_config_env_sync_keys()
|
||||
assert "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE" in _terminal_tool_env_var_names()
|
||||
@@ -0,0 +1,45 @@
|
||||
"""Auto-generated list of built-in tool modules that call ``registry.register()``.
|
||||
|
||||
DO NOT EDIT MANUALLY. Regenerate with:
|
||||
|
||||
python scripts/build_tool_manifest.py
|
||||
|
||||
This file is read at startup by ``tools.registry.discover_builtin_tools()`` to
|
||||
skip the ~145 ms AST scan of every ``tools/*.py`` file. When a ``tools/*.py``
|
||||
file is added, modified, or removed, the dev-mode mtime check in
|
||||
``discover_builtin_tools`` will log a warning and fall back to the AST scan —
|
||||
run this script to regenerate and commit.
|
||||
|
||||
Only covers *built-in* tools (shipped in ``tools/*.py``). Plugin tools and
|
||||
MCP-registered tools use separate discovery paths and are not listed here.
|
||||
"""
|
||||
|
||||
TOOL_MODULES: tuple[str, ...] = (
|
||||
'tools.browser_cdp_tool',
|
||||
'tools.browser_dialog_tool',
|
||||
'tools.browser_tool',
|
||||
'tools.clarify_tool',
|
||||
'tools.code_execution_tool',
|
||||
'tools.cronjob_tools',
|
||||
'tools.delegate_tool',
|
||||
'tools.discord_tool',
|
||||
'tools.feishu_doc_tool',
|
||||
'tools.feishu_drive_tool',
|
||||
'tools.file_tools',
|
||||
'tools.homeassistant_tool',
|
||||
'tools.image_generation_tool',
|
||||
'tools.memory_tool',
|
||||
'tools.mixture_of_agents_tool',
|
||||
'tools.process_registry',
|
||||
'tools.rl_training_tool',
|
||||
'tools.send_message_tool',
|
||||
'tools.session_search_tool',
|
||||
'tools.skill_manager_tool',
|
||||
'tools.skills_tool',
|
||||
'tools.terminal_tool',
|
||||
'tools.todo_tool',
|
||||
'tools.tts_tool',
|
||||
'tools.vision_tools',
|
||||
'tools.web_tools',
|
||||
'tools.yuanbao_tools',
|
||||
)
|
||||
+2
-1
@@ -17,6 +17,7 @@ import threading
|
||||
import time
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -711,7 +712,7 @@ def _get_cron_approval_mode() -> str:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
mode = str(config.get("approvals", {}).get("cron_mode", "deny")).lower().strip()
|
||||
mode = str(cfg_get(config, "approvals", "cron_mode", default="deny")).lower().strip()
|
||||
if mode in ("approve", "off", "allow", "yes"):
|
||||
return "approve"
|
||||
return "deny"
|
||||
|
||||
@@ -32,7 +32,7 @@ from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.config import cfg_get, load_config
|
||||
from tools.browser_camofox_state import get_camofox_identity
|
||||
from tools.registry import tool_error
|
||||
|
||||
@@ -544,7 +544,7 @@ def camofox_vision(question: str, annotate: bool = False,
|
||||
|
||||
try:
|
||||
_cfg = load_config()
|
||||
_vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
|
||||
_vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={})
|
||||
_vision_timeout = float(_vision_cfg.get("timeout", 120))
|
||||
_vision_temperature = float(_vision_cfg.get("temperature", 0.1))
|
||||
except Exception:
|
||||
|
||||
@@ -68,6 +68,7 @@ from pathlib import Path
|
||||
from agent.auxiliary_client import call_llm
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import is_truthy_value
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
try:
|
||||
from tools.website_policy import check_website_access
|
||||
@@ -192,7 +193,7 @@ def _get_command_timeout() -> int:
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
cfg = read_raw_config()
|
||||
val = cfg.get("browser", {}).get("command_timeout")
|
||||
val = cfg_get(cfg, "browser", "command_timeout")
|
||||
if val is not None:
|
||||
result = max(int(val), 5) # Floor at 5s to avoid instant kills
|
||||
except Exception as e:
|
||||
@@ -2245,7 +2246,7 @@ def _maybe_start_recording(task_id: str):
|
||||
from hermes_cli.config import read_raw_config
|
||||
hermes_home = get_hermes_home()
|
||||
cfg = read_raw_config()
|
||||
record_enabled = cfg.get("browser", {}).get("record_sessions", False)
|
||||
record_enabled = cfg_get(cfg, "browser", "record_sessions", default=False)
|
||||
|
||||
if not record_enabled:
|
||||
return
|
||||
@@ -2448,7 +2449,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg = load_config()
|
||||
_vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
|
||||
_vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={})
|
||||
_vt = _vision_cfg.get("timeout")
|
||||
if _vt is not None:
|
||||
vision_timeout = float(_vt)
|
||||
|
||||
@@ -488,6 +488,7 @@ def _get_or_create_env(task_id: str):
|
||||
"container_disk": config.get("container_disk", 51200),
|
||||
"container_persistent": config.get("container_persistent", True),
|
||||
"docker_volumes": config.get("docker_volumes", []),
|
||||
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
|
||||
}
|
||||
|
||||
ssh_config = None
|
||||
@@ -1309,10 +1310,20 @@ def _kill_process_group(proc, escalate: bool = False):
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
"""Load code_execution config from CLI_CONFIG if available."""
|
||||
"""Load code_execution config without importing the interactive CLI.
|
||||
|
||||
This helper is called while building the module-level execute_code schema
|
||||
during tool discovery. Importing ``cli`` here pulls prompt_toolkit/Rich and
|
||||
a large chunk of the classic REPL onto every agent startup path, including
|
||||
``hermes --tui`` where it is never used. Read the lightweight raw config
|
||||
instead; the config layer already caches by (mtime, size), and an absent
|
||||
key cleanly falls back to DEFAULT_EXECUTION_MODE.
|
||||
"""
|
||||
try:
|
||||
from cli import CLI_CONFIG
|
||||
return CLI_CONFIG.get("code_execution", {})
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
cfg = read_raw_config().get("code_execution", {})
|
||||
return cfg if isinstance(cfg, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ import os
|
||||
from contextvars import ContextVar
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -138,7 +139,7 @@ def _load_config_files() -> List[Dict[str, str]]:
|
||||
from hermes_cli.config import read_raw_config
|
||||
hermes_home = _resolve_hermes_home()
|
||||
cfg = read_raw_config()
|
||||
cred_files = cfg.get("terminal", {}).get("credential_files")
|
||||
cred_files = cfg_get(cfg, "terminal", "credential_files")
|
||||
if isinstance(cred_files, list):
|
||||
from tools.path_security import validate_within_dir
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from __future__ import annotations
|
||||
import logging
|
||||
from contextvars import ContextVar
|
||||
from typing import Iterable
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -109,7 +110,7 @@ def _load_config_passthrough() -> frozenset[str]:
|
||||
try:
|
||||
from hermes_cli.config import read_raw_config
|
||||
cfg = read_raw_config()
|
||||
passthrough = cfg.get("terminal", {}).get("env_passthrough")
|
||||
passthrough = cfg_get(cfg, "terminal", "env_passthrough")
|
||||
if isinstance(passthrough, list):
|
||||
for item in passthrough:
|
||||
if isinstance(item, str) and item.strip():
|
||||
|
||||
@@ -335,6 +335,10 @@ class BaseEnvironment(ABC):
|
||||
instead of running with ``bash -l``.
|
||||
"""
|
||||
# Full capture: env vars, functions (filtered), aliases, shell options.
|
||||
# Restore configured cwd after login shell profile scripts, which may
|
||||
# change the working directory (e.g. bashrc `cd ~`). Without this,
|
||||
# pwd -P captures the profile's directory, not terminal.cwd.
|
||||
_quoted_cwd = shlex.quote(self.cwd)
|
||||
bootstrap = (
|
||||
f"export -p > {self._snapshot_path}\n"
|
||||
f"declare -f | grep -vE '^_[^_]' >> {self._snapshot_path}\n"
|
||||
@@ -342,6 +346,7 @@ class BaseEnvironment(ABC):
|
||||
f"echo 'shopt -s expand_aliases' >> {self._snapshot_path}\n"
|
||||
f"echo 'set +e' >> {self._snapshot_path}\n"
|
||||
f"echo 'set +u' >> {self._snapshot_path}\n"
|
||||
f"builtin cd {_quoted_cwd} 2>/dev/null || true\n"
|
||||
f"pwd -P > {self._cwd_file} 2>/dev/null || true\n"
|
||||
f"printf '\\n{self._cwd_marker}%s{self._cwd_marker}\\n' \"$(pwd -P)\"\n"
|
||||
)
|
||||
|
||||
@@ -151,16 +151,16 @@ def find_docker() -> Optional[str]:
|
||||
# SETUID/SETGID - the image entrypoint drops from root to the 'hermes'
|
||||
# user via `gosu`, which requires these caps. Combined with
|
||||
# `no-new-privileges`, gosu still cannot escalate back to root after
|
||||
# the drop, so the security posture is preserved.
|
||||
# the drop, so the security posture is preserved. Omitted entirely
|
||||
# when the container starts as a non-root user via --user, since
|
||||
# no gosu drop is needed in that mode.
|
||||
# Block privilege escalation and limit PIDs.
|
||||
# /tmp is size-limited and nosuid but allows exec (needed by pip/npm builds).
|
||||
_SECURITY_ARGS = [
|
||||
_BASE_SECURITY_ARGS = [
|
||||
"--cap-drop", "ALL",
|
||||
"--cap-add", "DAC_OVERRIDE",
|
||||
"--cap-add", "CHOWN",
|
||||
"--cap-add", "FOWNER",
|
||||
"--cap-add", "SETUID",
|
||||
"--cap-add", "SETGID",
|
||||
"--security-opt", "no-new-privileges",
|
||||
"--pids-limit", "256",
|
||||
"--tmpfs", "/tmp:rw,nosuid,size=512m",
|
||||
@@ -168,6 +168,39 @@ _SECURITY_ARGS = [
|
||||
"--tmpfs", "/run:rw,noexec,nosuid,size=64m",
|
||||
]
|
||||
|
||||
# Extra caps needed when the container starts as root and an entrypoint
|
||||
# must drop privileges via gosu/su. Skipped when --user is passed because
|
||||
# the container already starts unprivileged and never needs to switch.
|
||||
_GOSU_CAP_ARGS = [
|
||||
"--cap-add", "SETUID",
|
||||
"--cap-add", "SETGID",
|
||||
]
|
||||
|
||||
|
||||
def _build_security_args(run_as_host_user: bool) -> list[str]:
|
||||
"""Return the security/cap/tmpfs args tailored to the privilege mode."""
|
||||
if run_as_host_user:
|
||||
return list(_BASE_SECURITY_ARGS)
|
||||
return list(_BASE_SECURITY_ARGS) + list(_GOSU_CAP_ARGS)
|
||||
|
||||
|
||||
def _resolve_host_user_spec() -> Optional[str]:
|
||||
"""Return ``<uid>:<gid>`` for the current host user, or ``None`` on platforms
|
||||
where this is not meaningful (e.g. Windows without posix ids).
|
||||
|
||||
We intentionally read ``os.getuid()``/``os.getgid()`` directly rather than
|
||||
going through ``getpass``/``pwd`` so this stays cheap and never raises on
|
||||
nameless UIDs (nss lookups can fail inside sandboxed launchers).
|
||||
"""
|
||||
get_uid = getattr(os, "getuid", None)
|
||||
get_gid = getattr(os, "getgid", None)
|
||||
if get_uid is None or get_gid is None:
|
||||
return None
|
||||
try:
|
||||
return f"{get_uid()}:{get_gid()}"
|
||||
except Exception: # pragma: no cover - defensive
|
||||
return None
|
||||
|
||||
|
||||
_storage_opt_ok: Optional[bool] = None # cached result across instances
|
||||
|
||||
@@ -266,6 +299,7 @@ class DockerEnvironment(BaseEnvironment):
|
||||
network: bool = True,
|
||||
host_cwd: str = None,
|
||||
auto_mount_cwd: bool = False,
|
||||
run_as_host_user: bool = False,
|
||||
):
|
||||
if cwd == "~":
|
||||
cwd = "/root"
|
||||
@@ -421,8 +455,35 @@ class DockerEnvironment(BaseEnvironment):
|
||||
for key in sorted(self._env):
|
||||
env_args.extend(["-e", f"{key}={self._env[key]}"])
|
||||
|
||||
# Optional: run the container as the host user so files written into
|
||||
# bind-mounted dirs (/workspace, /root, docker_volumes entries) are
|
||||
# owned by that user on the host instead of by root. Skip cleanly on
|
||||
# platforms without POSIX uid/gid (e.g. native Windows Docker).
|
||||
user_args: list[str] = []
|
||||
if run_as_host_user:
|
||||
user_spec = _resolve_host_user_spec()
|
||||
if user_spec is not None:
|
||||
user_args = ["--user", user_spec]
|
||||
logger.info("Docker: running container as host user %s", user_spec)
|
||||
else:
|
||||
logger.warning(
|
||||
"docker_run_as_host_user is enabled but this platform does "
|
||||
"not expose POSIX uid/gid; container will start as its "
|
||||
"image default user."
|
||||
)
|
||||
# Fall back to the full cap set — without --user, an image's
|
||||
# entrypoint may still need gosu/su to drop privileges.
|
||||
security_args = _build_security_args(run_as_host_user and bool(user_args))
|
||||
|
||||
logger.info(f"Docker volume_args: {volume_args}")
|
||||
all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args + env_args
|
||||
all_run_args = (
|
||||
security_args
|
||||
+ user_args
|
||||
+ writable_args
|
||||
+ resource_args
|
||||
+ volume_args
|
||||
+ env_args
|
||||
)
|
||||
logger.info(f"Docker run_args: {all_run_args}")
|
||||
|
||||
# Resolve the docker executable once so it works even when
|
||||
|
||||
@@ -305,6 +305,8 @@ class LocalEnvironment(BaseEnvironment):
|
||||
"""
|
||||
|
||||
def __init__(self, cwd: str = "", timeout: int = 60, env: dict = None):
|
||||
if cwd:
|
||||
cwd = os.path.expanduser(cwd)
|
||||
super().__init__(cwd=cwd or os.getcwd(), timeout=timeout, env=env)
|
||||
self.init_session()
|
||||
|
||||
|
||||
@@ -389,6 +389,7 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations:
|
||||
"docker_volumes": config.get("docker_volumes", []),
|
||||
"docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
|
||||
"docker_forward_env": config.get("docker_forward_env", []),
|
||||
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
|
||||
}
|
||||
|
||||
ssh_config = None
|
||||
|
||||
+45
-7
@@ -54,15 +54,53 @@ def _module_registers_tools(module_path: Path) -> bool:
|
||||
return any(_is_registry_register_call(stmt) for stmt in tree.body)
|
||||
|
||||
|
||||
def _load_manifest() -> Optional[List[str]]:
|
||||
"""Return the cached ``TOOL_MODULES`` tuple from ``tools/_manifest.py``.
|
||||
|
||||
Returns ``None`` when the manifest is missing (fresh checkout, or the
|
||||
``scripts/build_tool_manifest.py`` generator hasn't been run yet). The
|
||||
caller falls back to the AST scan in that case.
|
||||
|
||||
NOTE: This intentionally does NOT check for mtime drift between the
|
||||
manifest and ``tools/*.py`` files. Drift protection belongs in CI
|
||||
(``python scripts/build_tool_manifest.py --check``) — adding a
|
||||
per-startup stat walk here would both (a) add overhead to the path
|
||||
we're trying to speed up and (b) give false positives when devs edit
|
||||
helper modules that don't register tools. If the manifest lists a
|
||||
tool that no longer exists, the import fails loudly at startup.
|
||||
"""
|
||||
try:
|
||||
from tools._manifest import TOOL_MODULES
|
||||
return list(TOOL_MODULES)
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
|
||||
def discover_builtin_tools(tools_dir: Optional[Path] = None) -> List[str]:
|
||||
"""Import built-in self-registering tool modules and return their module names."""
|
||||
"""Import built-in self-registering tool modules and return their module names.
|
||||
|
||||
Fast path: read ``tools/_manifest.py`` and import the listed modules
|
||||
directly. Skips the ~145 ms AST scan of every ``tools/*.py`` file.
|
||||
|
||||
Fallback path: if the manifest is missing or stale (any ``tools/*.py`` is
|
||||
newer than the manifest's mtime), scan AST and log a warning. The
|
||||
fallback is the only behavior when running from a git checkout that
|
||||
hasn't regenerated the manifest after local edits.
|
||||
"""
|
||||
tools_path = Path(tools_dir) if tools_dir is not None else Path(__file__).resolve().parent
|
||||
module_names = [
|
||||
f"tools.{path.stem}"
|
||||
for path in sorted(tools_path.glob("*.py"))
|
||||
if path.name not in {"__init__.py", "registry.py", "mcp_tool.py"}
|
||||
and _module_registers_tools(path)
|
||||
]
|
||||
|
||||
# Only use the committed manifest when scanning the default tools/
|
||||
# directory. Tests and embedders that pass a custom tools_dir always
|
||||
# get the AST-scan path.
|
||||
default_tools_path = Path(__file__).resolve().parent
|
||||
module_names = _load_manifest() if tools_path == default_tools_path else None
|
||||
if module_names is None:
|
||||
module_names = [
|
||||
f"tools.{path.stem}"
|
||||
for path in sorted(tools_path.glob("*.py"))
|
||||
if path.name not in {"__init__.py", "_manifest.py", "registry.py", "mcp_tool.py"}
|
||||
and _module_registers_tools(path)
|
||||
]
|
||||
|
||||
imported: List[str] = []
|
||||
for mod_name in module_names:
|
||||
|
||||
@@ -43,6 +43,7 @@ from hermes_constants import get_hermes_home, display_hermes_home
|
||||
from typing import Dict, Any, Optional, Tuple
|
||||
|
||||
from utils import atomic_replace
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -66,7 +67,7 @@ def _guard_agent_created_enabled() -> bool:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
return bool(cfg.get("skills", {}).get("guard_agent_created", False))
|
||||
return bool(cfg_get(cfg, "skills", "guard_agent_created", default=False))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -700,6 +701,17 @@ def skill_manage(
|
||||
clear_skills_system_prompt_cache(clear_snapshot=True)
|
||||
except Exception:
|
||||
pass
|
||||
# Curator telemetry: bump patch_count on edit/patch/write_file (the actions
|
||||
# that mutate an existing skill's guidance), drop the record on delete.
|
||||
# Best-effort; telemetry failures never break the tool.
|
||||
try:
|
||||
from tools.skill_usage import bump_patch, forget
|
||||
if action in ("patch", "edit", "write_file", "remove_file"):
|
||||
bump_patch(name)
|
||||
elif action == "delete":
|
||||
forget(name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return json.dumps(result, ensure_ascii=False)
|
||||
|
||||
|
||||
@@ -0,0 +1,456 @@
|
||||
"""Skill usage telemetry + provenance tracking for the Curator feature.
|
||||
|
||||
Tracks per-skill usage metadata in a sidecar JSON file (~/.hermes/skills/.usage.json)
|
||||
keyed by skill name. Counters are bumped by the existing skill tools (skill_view,
|
||||
skill_manage); the curator orchestrator reads them to decide lifecycle transitions.
|
||||
|
||||
Design notes:
|
||||
- Sidecar, not frontmatter. Keeps operational telemetry out of user-authored
|
||||
SKILL.md content and avoids conflict pressure for bundled/hub skills.
|
||||
- Atomic writes via tempfile + os.replace (same pattern as .bundled_manifest).
|
||||
- All counter bumps are best-effort: failures log at DEBUG and return silently.
|
||||
A broken sidecar never breaks the underlying tool call.
|
||||
- Provenance filter: "agent-created" == not in .bundled_manifest AND not in
|
||||
.hub/lock.json. The curator only ever mutates agent-created skills.
|
||||
|
||||
Lifecycle states:
|
||||
active -> default
|
||||
stale -> unused > stale_after_days (config)
|
||||
archived -> unused > archive_after_days (config); moved to .archive/
|
||||
pinned -> opt-out from auto transitions (boolean flag, orthogonal to state)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
STATE_ACTIVE = "active"
|
||||
STATE_STALE = "stale"
|
||||
STATE_ARCHIVED = "archived"
|
||||
_VALID_STATES = {STATE_ACTIVE, STATE_STALE, STATE_ARCHIVED}
|
||||
|
||||
|
||||
def _skills_dir() -> Path:
|
||||
return get_hermes_home() / "skills"
|
||||
|
||||
|
||||
def _usage_file() -> Path:
|
||||
return _skills_dir() / ".usage.json"
|
||||
|
||||
|
||||
def _archive_dir() -> Path:
|
||||
return _skills_dir() / ".archive"
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provenance — which skills are agent-created (and thus eligible for curation)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_bundled_manifest_names() -> Set[str]:
|
||||
"""Return the set of skill names that were seeded from the bundled repo.
|
||||
|
||||
Reads ~/.hermes/skills/.bundled_manifest (format: "name:hash" per line).
|
||||
Returns empty set if the file is missing or unreadable.
|
||||
"""
|
||||
manifest = _skills_dir() / ".bundled_manifest"
|
||||
if not manifest.exists():
|
||||
return set()
|
||||
names: Set[str] = set()
|
||||
try:
|
||||
for line in manifest.read_text(encoding="utf-8").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
name = line.split(":", 1)[0].strip()
|
||||
if name:
|
||||
names.add(name)
|
||||
except OSError as e:
|
||||
logger.debug("Failed to read bundled manifest: %s", e)
|
||||
return names
|
||||
|
||||
|
||||
def _read_hub_installed_names() -> Set[str]:
|
||||
"""Return the set of skill names installed via the Skills Hub.
|
||||
|
||||
Reads ~/.hermes/skills/.hub/lock.json (see tools/skills_hub.py :: HubLockFile).
|
||||
"""
|
||||
lock_path = _skills_dir() / ".hub" / "lock.json"
|
||||
if not lock_path.exists():
|
||||
return set()
|
||||
try:
|
||||
data = json.loads(lock_path.read_text(encoding="utf-8"))
|
||||
if isinstance(data, dict):
|
||||
installed = data.get("installed") or {}
|
||||
if isinstance(installed, dict):
|
||||
return {str(k) for k in installed.keys()}
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.debug("Failed to read hub lock file: %s", e)
|
||||
return set()
|
||||
|
||||
|
||||
def list_agent_created_skill_names() -> List[str]:
|
||||
"""Enumerate skills that were authored by the agent (or user), NOT by a
|
||||
bundled or hub-installed source.
|
||||
|
||||
The curator operates exclusively on this set. Bundled / hub skills are
|
||||
maintained by their upstream sources and must never be pruned here.
|
||||
"""
|
||||
base = _skills_dir()
|
||||
if not base.exists():
|
||||
return []
|
||||
bundled = _read_bundled_manifest_names()
|
||||
hub = _read_hub_installed_names()
|
||||
off_limits = bundled | hub
|
||||
|
||||
names: List[str] = []
|
||||
# Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md
|
||||
for skill_md in base.rglob("SKILL.md"):
|
||||
# Skip anything under .archive or .hub
|
||||
try:
|
||||
rel = skill_md.relative_to(base)
|
||||
except ValueError:
|
||||
continue
|
||||
parts = rel.parts
|
||||
if parts and (parts[0].startswith(".") or parts[0] == "node_modules"):
|
||||
continue
|
||||
name = _read_skill_name(skill_md, fallback=skill_md.parent.name)
|
||||
if name in off_limits:
|
||||
continue
|
||||
names.append(name)
|
||||
return sorted(set(names))
|
||||
|
||||
|
||||
def _read_skill_name(skill_md: Path, fallback: str) -> str:
|
||||
"""Parse the `name:` field from a SKILL.md YAML frontmatter."""
|
||||
try:
|
||||
text = skill_md.read_text(encoding="utf-8", errors="replace")[:4000]
|
||||
except OSError:
|
||||
return fallback
|
||||
in_frontmatter = False
|
||||
for line in text.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped == "---":
|
||||
if in_frontmatter:
|
||||
break
|
||||
in_frontmatter = True
|
||||
continue
|
||||
if in_frontmatter and stripped.startswith("name:"):
|
||||
value = stripped.split(":", 1)[1].strip().strip("\"'")
|
||||
if value:
|
||||
return value
|
||||
return fallback
|
||||
|
||||
|
||||
def is_agent_created(skill_name: str) -> bool:
|
||||
"""Whether *skill_name* is neither bundled nor hub-installed."""
|
||||
off_limits = _read_bundled_manifest_names() | _read_hub_installed_names()
|
||||
return skill_name not in off_limits
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sidecar I/O
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _empty_record() -> Dict[str, Any]:
|
||||
return {
|
||||
"use_count": 0,
|
||||
"view_count": 0,
|
||||
"last_used_at": None,
|
||||
"last_viewed_at": None,
|
||||
"patch_count": 0,
|
||||
"last_patched_at": None,
|
||||
"created_at": _now_iso(),
|
||||
"state": STATE_ACTIVE,
|
||||
"pinned": False,
|
||||
"archived_at": None,
|
||||
}
|
||||
|
||||
|
||||
def load_usage() -> Dict[str, Dict[str, Any]]:
|
||||
"""Read the entire .usage.json map. Returns empty dict on missing/corrupt."""
|
||||
path = _usage_file()
|
||||
if not path.exists():
|
||||
return {}
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as e:
|
||||
logger.debug("Failed to read %s: %s", path, e)
|
||||
return {}
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
# Defensive: coerce any non-dict values to a fresh empty record
|
||||
clean: Dict[str, Dict[str, Any]] = {}
|
||||
for k, v in data.items():
|
||||
if isinstance(v, dict):
|
||||
clean[str(k)] = v
|
||||
return clean
|
||||
|
||||
|
||||
def save_usage(data: Dict[str, Dict[str, Any]]) -> None:
|
||||
"""Write the usage map atomically. Best-effort — errors are logged, not raised."""
|
||||
path = _usage_file()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(path.parent), prefix=".usage_", suffix=".tmp"
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, path)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug("Failed to write %s: %s", path, e, exc_info=True)
|
||||
|
||||
|
||||
def get_record(skill_name: str) -> Dict[str, Any]:
|
||||
"""Return the record for *skill_name*, creating a fresh one if missing."""
|
||||
data = load_usage()
|
||||
rec = data.get(skill_name)
|
||||
if not isinstance(rec, dict):
|
||||
return _empty_record()
|
||||
# Backfill any missing keys so callers don't need to handle old files
|
||||
base = _empty_record()
|
||||
for k, v in base.items():
|
||||
rec.setdefault(k, v)
|
||||
return rec
|
||||
|
||||
|
||||
def _mutate(skill_name: str, mutator) -> None:
|
||||
"""Load, apply *mutator(record)* in place, save. Best-effort.
|
||||
|
||||
Bundled and hub-installed skills are NEVER recorded in the sidecar.
|
||||
This keeps .usage.json focused on agent-created skills (the only ones
|
||||
the curator considers) and prevents stale counters from hanging around
|
||||
for upstream-managed skills.
|
||||
"""
|
||||
if not skill_name:
|
||||
return
|
||||
try:
|
||||
if not is_agent_created(skill_name):
|
||||
return
|
||||
data = load_usage()
|
||||
rec = data.get(skill_name)
|
||||
if not isinstance(rec, dict):
|
||||
rec = _empty_record()
|
||||
mutator(rec)
|
||||
data[skill_name] = rec
|
||||
save_usage(data)
|
||||
except Exception as e:
|
||||
logger.debug("skill_usage._mutate(%s) failed: %s", skill_name, e, exc_info=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public counter-bump helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def bump_view(skill_name: str) -> None:
|
||||
"""Bump view_count and last_viewed_at. Called from skill_view()."""
|
||||
def _apply(rec: Dict[str, Any]) -> None:
|
||||
rec["view_count"] = int(rec.get("view_count") or 0) + 1
|
||||
rec["last_viewed_at"] = _now_iso()
|
||||
_mutate(skill_name, _apply)
|
||||
|
||||
|
||||
def bump_use(skill_name: str) -> None:
|
||||
"""Bump use_count and last_used_at. Called when a skill is actively used
|
||||
(e.g. loaded into the prompt path or referenced from an assistant turn)."""
|
||||
def _apply(rec: Dict[str, Any]) -> None:
|
||||
rec["use_count"] = int(rec.get("use_count") or 0) + 1
|
||||
rec["last_used_at"] = _now_iso()
|
||||
_mutate(skill_name, _apply)
|
||||
|
||||
|
||||
def bump_patch(skill_name: str) -> None:
|
||||
"""Bump patch_count and last_patched_at. Called from skill_manage (patch/edit)."""
|
||||
def _apply(rec: Dict[str, Any]) -> None:
|
||||
rec["patch_count"] = int(rec.get("patch_count") or 0) + 1
|
||||
rec["last_patched_at"] = _now_iso()
|
||||
_mutate(skill_name, _apply)
|
||||
|
||||
|
||||
def set_state(skill_name: str, state: str) -> None:
|
||||
"""Set lifecycle state. No-op if *state* is invalid."""
|
||||
if state not in _VALID_STATES:
|
||||
logger.debug("set_state: invalid state %r for %s", state, skill_name)
|
||||
return
|
||||
def _apply(rec: Dict[str, Any]) -> None:
|
||||
rec["state"] = state
|
||||
if state == STATE_ARCHIVED:
|
||||
rec["archived_at"] = _now_iso()
|
||||
elif state == STATE_ACTIVE:
|
||||
rec["archived_at"] = None
|
||||
_mutate(skill_name, _apply)
|
||||
|
||||
|
||||
def set_pinned(skill_name: str, pinned: bool) -> None:
|
||||
def _apply(rec: Dict[str, Any]) -> None:
|
||||
rec["pinned"] = bool(pinned)
|
||||
_mutate(skill_name, _apply)
|
||||
|
||||
|
||||
def forget(skill_name: str) -> None:
|
||||
"""Drop a skill's usage entry entirely. Called when the skill is deleted."""
|
||||
if not skill_name:
|
||||
return
|
||||
try:
|
||||
data = load_usage()
|
||||
if skill_name in data:
|
||||
del data[skill_name]
|
||||
save_usage(data)
|
||||
except Exception as e:
|
||||
logger.debug("skill_usage.forget(%s) failed: %s", skill_name, e, exc_info=True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Archive / restore
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def archive_skill(skill_name: str) -> Tuple[bool, str]:
|
||||
"""Move an agent-created skill directory to ~/.hermes/skills/.archive/.
|
||||
|
||||
Returns (ok, message). Never archives bundled or hub skills — callers are
|
||||
responsible for checking provenance, but we double-check here as a safety net.
|
||||
"""
|
||||
if not is_agent_created(skill_name):
|
||||
return False, f"skill '{skill_name}' is bundled or hub-installed; never archive"
|
||||
|
||||
skill_dir = _find_skill_dir(skill_name)
|
||||
if skill_dir is None:
|
||||
return False, f"skill '{skill_name}' not found"
|
||||
|
||||
archive_root = _archive_dir()
|
||||
try:
|
||||
archive_root.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as e:
|
||||
return False, f"failed to create archive dir: {e}"
|
||||
|
||||
# Flatten any category nesting into a single ".archive/<skill>/" so restores
|
||||
# are simple. If a collision exists, append a timestamp.
|
||||
dest = archive_root / skill_dir.name
|
||||
if dest.exists():
|
||||
dest = archive_root / f"{skill_dir.name}-{datetime.now(timezone.utc).strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
try:
|
||||
skill_dir.rename(dest)
|
||||
except OSError as e:
|
||||
# Cross-device — fall back to shutil.move
|
||||
import shutil
|
||||
try:
|
||||
shutil.move(str(skill_dir), str(dest))
|
||||
except Exception as e2:
|
||||
return False, f"failed to archive: {e2}"
|
||||
|
||||
set_state(skill_name, STATE_ARCHIVED)
|
||||
return True, f"archived to {dest}"
|
||||
|
||||
|
||||
def restore_skill(skill_name: str) -> Tuple[bool, str]:
|
||||
"""Move an archived skill back to ~/.hermes/skills/. Restores to the flat
|
||||
top-level layout; original category nesting is NOT reconstructed.
|
||||
|
||||
Refuses to restore under a name that now collides with a bundled or
|
||||
hub-installed skill — that would shadow the upstream version.
|
||||
"""
|
||||
# If a bundled or hub skill has since been installed under the same
|
||||
# name, refuse to restore rather than shadow it.
|
||||
if not is_agent_created(skill_name):
|
||||
return False, (
|
||||
f"skill '{skill_name}' is now bundled or hub-installed; "
|
||||
"restore would shadow the upstream version"
|
||||
)
|
||||
archive_root = _archive_dir()
|
||||
if not archive_root.exists():
|
||||
return False, "no archive directory"
|
||||
|
||||
# Try exact name match first, then any prefix match (for timestamped dupes)
|
||||
candidates = [p for p in archive_root.iterdir() if p.is_dir() and p.name == skill_name]
|
||||
if not candidates:
|
||||
candidates = sorted(
|
||||
[p for p in archive_root.iterdir()
|
||||
if p.is_dir() and p.name.startswith(f"{skill_name}-")],
|
||||
reverse=True,
|
||||
)
|
||||
if not candidates:
|
||||
return False, f"skill '{skill_name}' not found in archive"
|
||||
|
||||
src = candidates[0]
|
||||
dest = _skills_dir() / skill_name
|
||||
if dest.exists():
|
||||
return False, f"destination already exists: {dest}"
|
||||
|
||||
try:
|
||||
src.rename(dest)
|
||||
except OSError:
|
||||
import shutil
|
||||
try:
|
||||
shutil.move(str(src), str(dest))
|
||||
except Exception as e:
|
||||
return False, f"failed to restore: {e}"
|
||||
|
||||
set_state(skill_name, STATE_ACTIVE)
|
||||
return True, f"restored to {dest}"
|
||||
|
||||
|
||||
def _find_skill_dir(skill_name: str) -> Optional[Path]:
|
||||
"""Locate the directory for a skill by its frontmatter `name:` field.
|
||||
|
||||
Handles both flat (~/.hermes/skills/<skill>/SKILL.md) and category-nested
|
||||
(~/.hermes/skills/<category>/<skill>/SKILL.md) layouts.
|
||||
"""
|
||||
base = _skills_dir()
|
||||
if not base.exists():
|
||||
return None
|
||||
for skill_md in base.rglob("SKILL.md"):
|
||||
try:
|
||||
rel = skill_md.relative_to(base)
|
||||
except ValueError:
|
||||
continue
|
||||
if rel.parts and rel.parts[0].startswith("."):
|
||||
continue
|
||||
if _read_skill_name(skill_md, fallback=skill_md.parent.name) == skill_name:
|
||||
return skill_md.parent
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Reporting — for the curator CLI / slash command
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def agent_created_report() -> List[Dict[str, Any]]:
|
||||
"""Return a list of {name, state, pinned, last_used_at, use_count, ...}
|
||||
records for every agent-created skill. Missing usage records are backfilled
|
||||
with defaults so callers can always index fields."""
|
||||
data = load_usage()
|
||||
rows: List[Dict[str, Any]] = []
|
||||
for name in list_agent_created_skill_names():
|
||||
rec = data.get(name)
|
||||
if not isinstance(rec, dict):
|
||||
rec = _empty_record()
|
||||
base = _empty_record()
|
||||
for k, v in base.items():
|
||||
rec.setdefault(k, v)
|
||||
rows.append({"name": name, **rec})
|
||||
return rows
|
||||
+24
-4
@@ -77,6 +77,7 @@ from pathlib import Path
|
||||
from typing import Dict, Any, List, Optional, Set, Tuple
|
||||
|
||||
from tools.registry import registry, tool_error
|
||||
from hermes_cli.config import cfg_get
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -535,7 +536,7 @@ def _is_skill_disabled(name: str, platform: str = None) -> bool:
|
||||
skills_cfg = config.get("skills", {})
|
||||
resolved_platform = platform or os.getenv("HERMES_PLATFORM") or _get_session_platform()
|
||||
if resolved_platform:
|
||||
platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform)
|
||||
platform_disabled = cfg_get(skills_cfg, "platform_disabled", resolved_platform)
|
||||
if platform_disabled is not None:
|
||||
return name in platform_disabled
|
||||
return name in skills_cfg.get("disabled", [])
|
||||
@@ -1480,13 +1481,32 @@ registry.register(
|
||||
check_fn=check_skills_requirements,
|
||||
emoji="📚",
|
||||
)
|
||||
def _skill_view_with_bump(args, **kw):
|
||||
"""Invoke skill_view, then bump view_count on success. Best-effort: a
|
||||
telemetry failure never breaks the tool call."""
|
||||
name = args.get("name", "")
|
||||
result = skill_view(
|
||||
name, file_path=args.get("file_path"), task_id=kw.get("task_id")
|
||||
)
|
||||
try:
|
||||
parsed = json.loads(result)
|
||||
if isinstance(parsed, dict) and parsed.get("success"):
|
||||
# Use the resolved skill name from the payload when present —
|
||||
# qualified forms ("plugin:skill") return with the canonical name.
|
||||
resolved = parsed.get("name") or name
|
||||
if resolved:
|
||||
from tools.skill_usage import bump_view
|
||||
bump_view(str(resolved))
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
registry.register(
|
||||
name="skill_view",
|
||||
toolset="skills",
|
||||
schema=SKILL_VIEW_SCHEMA,
|
||||
handler=lambda args, **kw: skill_view(
|
||||
args.get("name", ""), file_path=args.get("file_path"), task_id=kw.get("task_id")
|
||||
),
|
||||
handler=_skill_view_with_bump,
|
||||
check_fn=check_skills_requirements,
|
||||
emoji="📚",
|
||||
)
|
||||
|
||||
@@ -925,6 +925,8 @@ def _get_env_config() -> Dict[str, Any]:
|
||||
# /workspace and track the original host path separately. Otherwise keep the
|
||||
# normal sandbox behavior and discard host paths.
|
||||
cwd = os.getenv("TERMINAL_CWD", default_cwd)
|
||||
if cwd:
|
||||
cwd = os.path.expanduser(cwd)
|
||||
host_cwd = None
|
||||
host_prefixes = ("/Users/", "/home/", "C:\\", "C:/")
|
||||
if env_type == "docker" and mount_docker_cwd:
|
||||
@@ -978,6 +980,7 @@ def _get_env_config() -> Dict[str, Any]:
|
||||
"container_disk": _parse_env_var("TERMINAL_CONTAINER_DISK", "51200"), # MB (default 50GB)
|
||||
"container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
|
||||
"docker_volumes": _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON"),
|
||||
"docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in ("true", "1", "yes"),
|
||||
}
|
||||
|
||||
|
||||
@@ -1033,6 +1036,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
|
||||
auto_mount_cwd=cc.get("docker_mount_cwd_to_workspace", False),
|
||||
forward_env=docker_forward_env,
|
||||
env=docker_env,
|
||||
run_as_host_user=cc.get("docker_run_as_host_user", False),
|
||||
)
|
||||
|
||||
elif env_type == "singularity":
|
||||
@@ -1661,6 +1665,7 @@ def terminal_tool(
|
||||
"docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
|
||||
"docker_forward_env": config.get("docker_forward_env", []),
|
||||
"docker_env": config.get("docker_env", {}),
|
||||
"docker_run_as_host_user": config.get("docker_run_as_host_user", False),
|
||||
}
|
||||
|
||||
local_config = None
|
||||
|
||||
@@ -56,9 +56,9 @@ def _resolve_download_timeout() -> float:
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.config import cfg_get, load_config
|
||||
cfg = load_config()
|
||||
val = cfg.get("auxiliary", {}).get("vision", {}).get("download_timeout")
|
||||
val = cfg_get(cfg, "auxiliary", "vision", "download_timeout")
|
||||
if val is not None:
|
||||
return float(val)
|
||||
except Exception:
|
||||
@@ -555,9 +555,9 @@ async def vision_analyze_tool(
|
||||
vision_timeout = 120.0
|
||||
vision_temperature = 0.1
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.config import cfg_get, load_config
|
||||
_cfg = load_config()
|
||||
_vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
|
||||
_vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={})
|
||||
_vt = _vision_cfg.get("timeout")
|
||||
if _vt is not None:
|
||||
vision_timeout = float(_vt)
|
||||
|
||||
+21
-3
@@ -165,11 +165,29 @@ def main():
|
||||
# a model_tools.py module-level side effect; moved to explicit
|
||||
# startup calls to avoid freezing the gateway's loop on lazy import
|
||||
# (#16856).
|
||||
#
|
||||
# Cold-start guard: importing ``tools.mcp_tool`` transitively pulls the
|
||||
# full MCP SDK (mcp, pydantic, httpx, jsonschema, starlette parsers —
|
||||
# ~200ms on macOS), which runs on the TUI's critical path before
|
||||
# ``gateway.ready`` can be emitted. The overwhelming majority of users
|
||||
# have no ``mcp_servers`` configured, in which case every byte of that
|
||||
# import is wasted. Check the config first (cheap — it's already been
|
||||
# loaded once by ``_config_mtime`` elsewhere) and only pay the import
|
||||
# cost when there's actually MCP work to do.
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
from hermes_cli.config import read_raw_config
|
||||
_mcp_servers = (read_raw_config() or {}).get("mcp_servers")
|
||||
_has_mcp_servers = isinstance(_mcp_servers, dict) and len(_mcp_servers) > 0
|
||||
except Exception:
|
||||
pass
|
||||
# Be conservative: if we can't decide, fall back to the old
|
||||
# behaviour and let the discovery path handle its own errors.
|
||||
_has_mcp_servers = True
|
||||
if _has_mcp_servers:
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not write_json({
|
||||
"jsonrpc": "2.0",
|
||||
|
||||
+367
-231
@@ -140,6 +140,7 @@ _SLASH_WORKER_TIMEOUT_S = max(
|
||||
# response writes are safe.
|
||||
_LONG_HANDLERS = frozenset(
|
||||
{
|
||||
"browser.manage",
|
||||
"cli.exec",
|
||||
"session.branch",
|
||||
"session.resume",
|
||||
@@ -464,6 +465,119 @@ def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
|
||||
return _err(rid, 5032, err) if err else None
|
||||
|
||||
|
||||
def _start_agent_build(sid: str, session: dict) -> None:
|
||||
"""Start building the real AIAgent for a TUI session, once.
|
||||
|
||||
Classic `hermes` shows the prompt before constructing AIAgent; the TUI used
|
||||
to eagerly build it during session.create, making startup feel blocked on
|
||||
tool discovery/model metadata even though the composer was visible. Keep
|
||||
the shell responsive by deferring this work until the first prompt (or any
|
||||
command that actually needs the agent), while retaining the same ready/error
|
||||
event contract for the frontend.
|
||||
"""
|
||||
ready = session.get("agent_ready")
|
||||
if ready is None:
|
||||
return
|
||||
lock = session.setdefault("agent_build_lock", threading.Lock())
|
||||
with lock:
|
||||
if ready.is_set() or session.get("agent_build_started"):
|
||||
return
|
||||
session["agent_build_started"] = True
|
||||
key = session["session_key"]
|
||||
|
||||
def _build() -> None:
|
||||
current = _sessions.get(sid)
|
||||
if current is None:
|
||||
ready.set()
|
||||
return
|
||||
|
||||
worker = None
|
||||
notify_registered = False
|
||||
try:
|
||||
tokens = _set_session_context(key)
|
||||
try:
|
||||
agent = _make_agent(sid, key)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
||||
db = _get_db()
|
||||
if db is not None:
|
||||
db.create_session(key, source="tui", model=_resolve_model())
|
||||
pending_title = (current.get("pending_title") or "").strip()
|
||||
if pending_title:
|
||||
try:
|
||||
title_applied = db.set_session_title(key, pending_title)
|
||||
if title_applied:
|
||||
current["pending_title"] = None
|
||||
else:
|
||||
existing_row = db.get_session(key)
|
||||
existing_title = ((existing_row or {}).get("title") or "").strip()
|
||||
if existing_title == pending_title:
|
||||
current["pending_title"] = None
|
||||
else:
|
||||
logger.info(
|
||||
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
||||
sid,
|
||||
pending_title,
|
||||
existing_title,
|
||||
)
|
||||
except ValueError as e:
|
||||
current["pending_title"] = None
|
||||
logger.info("Dropping pending title for session %s: %s", sid, e)
|
||||
except Exception:
|
||||
logger.warning("Failed to apply pending title for session %s", sid, exc_info=True)
|
||||
current["agent"] = agent
|
||||
|
||||
try:
|
||||
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
||||
current["slash_worker"] = worker
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from tools.approval import (
|
||||
register_gateway_notify,
|
||||
load_permanent_allowlist,
|
||||
)
|
||||
register_gateway_notify(key, lambda data: _emit("approval.request", sid, data))
|
||||
notify_registered = True
|
||||
load_permanent_allowlist()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
_notify_session_boundary("on_session_reset", key)
|
||||
|
||||
info = _session_info(agent)
|
||||
warn = _probe_credentials(agent)
|
||||
if warn:
|
||||
info["credential_warning"] = warn
|
||||
cfg_warn = _probe_config_health(_load_cfg())
|
||||
if cfg_warn:
|
||||
info["config_warning"] = cfg_warn
|
||||
logger.warning(cfg_warn)
|
||||
_emit("session.info", sid, info)
|
||||
except Exception as e:
|
||||
current["agent_error"] = str(e)
|
||||
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
||||
finally:
|
||||
if _sessions.get(sid) is not current:
|
||||
if worker is not None:
|
||||
try:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
if notify_registered:
|
||||
try:
|
||||
from tools.approval import unregister_gateway_notify
|
||||
unregister_gateway_notify(key)
|
||||
except Exception:
|
||||
pass
|
||||
ready.set()
|
||||
|
||||
threading.Thread(target=_build, daemon=True).start()
|
||||
|
||||
|
||||
def _sess_nowait(params, rid):
|
||||
s = _sessions.get(params.get("session_id") or "")
|
||||
return (s, None) if s else (None, _err(rid, 4001, "session not found"))
|
||||
@@ -471,7 +585,10 @@ def _sess_nowait(params, rid):
|
||||
|
||||
def _sess(params, rid):
|
||||
s, err = _sess_nowait(params, rid)
|
||||
return (None, err) if err else (s, _wait_agent(s, rid))
|
||||
if err:
|
||||
return (None, err)
|
||||
_start_agent_build(params.get("session_id") or "", s)
|
||||
return (s, _wait_agent(s, rid))
|
||||
|
||||
|
||||
def _normalize_completion_path(path_part: str) -> str:
|
||||
@@ -1626,129 +1743,18 @@ def _(rid, params: dict) -> dict:
|
||||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
|
||||
def _build() -> None:
|
||||
# Return the lightweight session immediately so Ink can paint the composer
|
||||
# + skeleton panel, then build the real AIAgent just after this response is
|
||||
# flushed. This keeps startup responsive while still hydrating tools/skills
|
||||
# without requiring the user to submit a first prompt.
|
||||
def _deferred_build() -> None:
|
||||
session = _sessions.get(sid)
|
||||
if session is None:
|
||||
# session.close ran before the build thread got scheduled.
|
||||
ready.set()
|
||||
return
|
||||
if session is not None:
|
||||
_start_agent_build(sid, session)
|
||||
|
||||
# Track what we allocate so we can clean up if session.close
|
||||
# races us to the finish line. session.close pops _sessions[sid]
|
||||
# unconditionally and tries to close the slash_worker it finds;
|
||||
# if _build is still mid-construction when close runs, close
|
||||
# finds slash_worker=None / notify unregistered and returns
|
||||
# cleanly — leaving us, the build thread, to later install the
|
||||
# worker + notify on an orphaned session dict. The finally
|
||||
# block below detects the orphan and cleans up instead of
|
||||
# leaking a subprocess and a global notify registration.
|
||||
worker = None
|
||||
notify_registered = False
|
||||
try:
|
||||
tokens = _set_session_context(key)
|
||||
try:
|
||||
agent = _make_agent(sid, key)
|
||||
finally:
|
||||
_clear_session_context(tokens)
|
||||
|
||||
db = _get_db()
|
||||
if db is not None:
|
||||
db.create_session(key, source="tui", model=_resolve_model())
|
||||
pending_title = (session.get("pending_title") or "").strip()
|
||||
if pending_title:
|
||||
try:
|
||||
title_applied = db.set_session_title(key, pending_title)
|
||||
if title_applied:
|
||||
session["pending_title"] = None
|
||||
else:
|
||||
existing_row = db.get_session(key)
|
||||
existing_title = (
|
||||
(existing_row or {}).get("title") or ""
|
||||
).strip()
|
||||
if existing_title == pending_title:
|
||||
session["pending_title"] = None
|
||||
else:
|
||||
logger.info(
|
||||
"Pending title still queued for session %s (wanted=%r, current=%r)",
|
||||
sid,
|
||||
pending_title,
|
||||
existing_title,
|
||||
)
|
||||
except ValueError as e:
|
||||
# Queued title can become invalid/duplicate between queue time
|
||||
# and DB row creation. Drop the queue and log the reason so
|
||||
# future /title reads don't surface a stuck pending value.
|
||||
session["pending_title"] = None
|
||||
logger.info(
|
||||
"Dropping pending title for session %s: %s",
|
||||
sid,
|
||||
e,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to apply pending title for session %s",
|
||||
sid,
|
||||
exc_info=True,
|
||||
)
|
||||
session["agent"] = agent
|
||||
|
||||
try:
|
||||
worker = _SlashWorker(key, getattr(agent, "model", _resolve_model()))
|
||||
session["slash_worker"] = worker
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from tools.approval import (
|
||||
register_gateway_notify,
|
||||
load_permanent_allowlist,
|
||||
)
|
||||
|
||||
register_gateway_notify(
|
||||
key, lambda data: _emit("approval.request", sid, data)
|
||||
)
|
||||
notify_registered = True
|
||||
load_permanent_allowlist()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_wire_callbacks(sid)
|
||||
_notify_session_boundary("on_session_reset", key)
|
||||
|
||||
info = _session_info(agent)
|
||||
warn = _probe_credentials(agent)
|
||||
if warn:
|
||||
info["credential_warning"] = warn
|
||||
cfg_warn = _probe_config_health(_load_cfg())
|
||||
if cfg_warn:
|
||||
info["config_warning"] = cfg_warn
|
||||
logger.warning(cfg_warn)
|
||||
_emit("session.info", sid, info)
|
||||
except Exception as e:
|
||||
session["agent_error"] = str(e)
|
||||
_emit("error", sid, {"message": f"agent init failed: {e}"})
|
||||
finally:
|
||||
# Orphan check: if session.close raced us and popped
|
||||
# _sessions[sid] while we were building, the dict we just
|
||||
# populated is unreachable. Clean up the subprocess and
|
||||
# the global notify registration ourselves — session.close
|
||||
# couldn't see them at the time it ran.
|
||||
if _sessions.get(sid) is not session:
|
||||
if worker is not None:
|
||||
try:
|
||||
worker.close()
|
||||
except Exception:
|
||||
pass
|
||||
if notify_registered:
|
||||
try:
|
||||
from tools.approval import unregister_gateway_notify
|
||||
|
||||
unregister_gateway_notify(key)
|
||||
except Exception:
|
||||
pass
|
||||
ready.set()
|
||||
|
||||
threading.Thread(target=_build, daemon=True).start()
|
||||
build_timer = threading.Timer(0.05, _deferred_build)
|
||||
build_timer.daemon = True
|
||||
build_timer.start()
|
||||
|
||||
return _ok(
|
||||
rid,
|
||||
@@ -1759,6 +1765,7 @@ def _(rid, params: dict) -> dict:
|
||||
"tools": {},
|
||||
"skills": {},
|
||||
"cwd": os.getenv("TERMINAL_CWD", os.getcwd()),
|
||||
"lazy": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
@@ -1900,7 +1907,7 @@ def _(rid, params: dict) -> dict:
|
||||
|
||||
@method("session.title")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
session, err = _sess_nowait(params, rid)
|
||||
if err:
|
||||
return err
|
||||
db = _get_db()
|
||||
@@ -1963,13 +1970,16 @@ def _(rid, params: dict) -> dict:
|
||||
|
||||
@method("session.usage")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
return err or _ok(rid, _get_usage(session["agent"]))
|
||||
session, err = _sess_nowait(params, rid)
|
||||
if err:
|
||||
return err
|
||||
agent = session.get("agent")
|
||||
return _ok(rid, _get_usage(agent) if agent is not None else {"calls": 0, "input": 0, "output": 0, "total": 0})
|
||||
|
||||
|
||||
@method("session.history")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
session, err = _sess_nowait(params, rid)
|
||||
if err:
|
||||
return err
|
||||
history = list(session.get("history", []))
|
||||
@@ -2436,13 +2446,31 @@ def _(rid, params: dict) -> dict:
|
||||
@method("prompt.submit")
|
||||
def _(rid, params: dict) -> dict:
|
||||
sid, text = params.get("session_id", ""), params.get("text", "")
|
||||
session, err = _sess(params, rid)
|
||||
session, err = _sess_nowait(params, rid)
|
||||
if err:
|
||||
return err
|
||||
with session["history_lock"]:
|
||||
if session.get("running"):
|
||||
return _err(rid, 4009, "session busy")
|
||||
session["running"] = True
|
||||
|
||||
_start_agent_build(sid, session)
|
||||
|
||||
def run_after_agent_ready() -> None:
|
||||
err = _wait_agent(session, rid)
|
||||
if err:
|
||||
_emit("error", sid, {"message": err.get("error", {}).get("message", "agent initialization failed")})
|
||||
with session["history_lock"]:
|
||||
session["running"] = False
|
||||
return
|
||||
_run_prompt_submit(rid, sid, session, text)
|
||||
|
||||
threading.Thread(target=run_after_agent_ready, daemon=True).start()
|
||||
return _ok(rid, {"status": "streaming"})
|
||||
|
||||
|
||||
def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None:
|
||||
with session["history_lock"]:
|
||||
history = list(session["history"])
|
||||
history_version = int(session.get("history_version", 0))
|
||||
images = list(session.get("attached_images", []))
|
||||
@@ -2681,7 +2709,6 @@ def _(rid, params: dict) -> dict:
|
||||
session["running"] = False
|
||||
|
||||
threading.Thread(target=run, daemon=True).start()
|
||||
return _ok(rid, {"status": "streaming"})
|
||||
|
||||
|
||||
@method("clipboard.paste")
|
||||
@@ -3210,7 +3237,8 @@ def _(rid, params: dict) -> dict:
|
||||
raw = ("" if value is None else str(value)).strip().lower()
|
||||
if raw not in _INDICATOR_STYLES:
|
||||
return _err(
|
||||
rid, 4002,
|
||||
rid,
|
||||
4002,
|
||||
f"unknown indicator: {raw!r}; pick one of {'|'.join(_INDICATOR_STYLES)}",
|
||||
)
|
||||
_write_config_key("display.tui_status_indicator", raw)
|
||||
@@ -3427,6 +3455,27 @@ def _(rid, params: dict) -> dict:
|
||||
return _err(rid, 5015, str(e))
|
||||
|
||||
|
||||
@method("reload.env")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""Re-read ``~/.hermes/.env`` into the gateway process via
|
||||
``hermes_cli.config.reload_env``, matching classic CLI's ``/reload``
|
||||
handler. Newly added API keys take effect on the next agent call
|
||||
without restarting the TUI.
|
||||
|
||||
The credential pool / provider routing for any *already-constructed*
|
||||
agent does not auto-rebuild — that's the same behaviour as classic
|
||||
CLI's ``/reload``. Users who want a brand-new credential resolution
|
||||
should follow with ``/new``.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import reload_env
|
||||
|
||||
count = reload_env()
|
||||
return _ok(rid, {"updated": int(count)})
|
||||
except Exception as e:
|
||||
return _err(rid, 5015, str(e))
|
||||
|
||||
|
||||
_TUI_HIDDEN: frozenset[str] = frozenset(
|
||||
{
|
||||
"sethome",
|
||||
@@ -4751,121 +4800,208 @@ def _resolve_browser_cdp_url() -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _is_default_local_cdp(parsed) -> bool:
|
||||
"""Match the discovery-style local default; never the concrete WS form.
|
||||
|
||||
A user-supplied ``ws://127.0.0.1:9222/devtools/browser/<id>`` is a
|
||||
real, connectable endpoint — collapsing it to bare ``http://...:9222``
|
||||
would strip the path and break the connect.
|
||||
"""
|
||||
try:
|
||||
port = parsed.port or 80
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
discovery_path = parsed.path in {"", "/", "/json", "/json/version"}
|
||||
return (
|
||||
parsed.scheme in {"http", "ws"}
|
||||
and parsed.hostname in {"127.0.0.1", "localhost"}
|
||||
and port == 9222
|
||||
and discovery_path
|
||||
)
|
||||
|
||||
|
||||
def _http_ok(url: str, timeout: float) -> bool:
|
||||
import urllib.request
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as resp:
|
||||
return 200 <= getattr(resp, "status", 200) < 300
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _probe_urls(parsed) -> list[str]:
|
||||
scheme = {"ws": "http", "wss": "https"}.get(parsed.scheme, parsed.scheme)
|
||||
root = f"{scheme}://{parsed.netloc}".rstrip("/")
|
||||
return [f"{root}/json/version", f"{root}/json"]
|
||||
|
||||
|
||||
def _normalize_cdp_url(parsed) -> str:
|
||||
# Concrete ``/devtools/browser/<id>`` endpoints (Browserbase et al.)
|
||||
# are connectable as-is. Discovery-style inputs collapse to bare
|
||||
# ``scheme://host:port`` so ``_resolve_cdp_override`` can append
|
||||
# ``/json/version`` later without doubling the path.
|
||||
if parsed.path.startswith("/devtools/browser/"):
|
||||
return parsed.geturl()
|
||||
return parsed._replace(path="", params="", query="", fragment="").geturl()
|
||||
|
||||
|
||||
def _failure_messages(url: str, port: int, system: str) -> list[str]:
|
||||
from hermes_cli.browser_connect import manual_chrome_debug_command
|
||||
|
||||
command = manual_chrome_debug_command(port, system)
|
||||
hint = (
|
||||
["Start Chrome with remote debugging, then retry /browser connect:", command]
|
||||
if command
|
||||
else [
|
||||
"No Chrome/Chromium executable was found in this environment.",
|
||||
f"Install one or start Chrome with --remote-debugging-port={port}, then retry /browser connect.",
|
||||
]
|
||||
)
|
||||
return [
|
||||
f"Chrome is not reachable at {url}.",
|
||||
*hint,
|
||||
"Browser not connected — start Chrome with remote debugging and retry /browser connect",
|
||||
]
|
||||
|
||||
|
||||
@method("browser.manage")
|
||||
def _(rid, params: dict) -> dict:
|
||||
action = params.get("action", "status")
|
||||
|
||||
if action == "status":
|
||||
resolved_url = _resolve_browser_cdp_url()
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"connected": bool(resolved_url),
|
||||
"url": resolved_url,
|
||||
},
|
||||
)
|
||||
if action == "connect":
|
||||
url = params.get("url", "http://localhost:9222")
|
||||
try:
|
||||
import urllib.request
|
||||
from urllib.parse import urlparse
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
url = _resolve_browser_cdp_url()
|
||||
return _ok(rid, {"connected": bool(url), "url": url})
|
||||
|
||||
parsed = urlparse(url if "://" in url else f"http://{url}")
|
||||
if parsed.scheme not in {"http", "https", "ws", "wss"}:
|
||||
return _err(rid, 4015, f"unsupported browser url: {url}")
|
||||
|
||||
# A concrete ``ws[s]://.../devtools/browser/<id>`` endpoint is
|
||||
# already directly connectable — those are the URLs Browserbase
|
||||
# / browserless / hosted CDP providers return, and they
|
||||
# generally DON'T serve the discovery-style ``/json/version``
|
||||
# path. Probing it would just reject valid endpoints. Skip
|
||||
# the HTTP probe and do a TCP-level reachability check instead;
|
||||
# the actual CDP handshake happens on the next ``browser_navigate``.
|
||||
is_concrete_ws = (
|
||||
parsed.scheme in {"ws", "wss"}
|
||||
and parsed.path.startswith("/devtools/browser/")
|
||||
)
|
||||
if is_concrete_ws:
|
||||
import socket
|
||||
|
||||
host = parsed.hostname
|
||||
port = parsed.port or (443 if parsed.scheme == "wss" else 80)
|
||||
if not host:
|
||||
return _err(rid, 4015, f"missing host in browser url: {url}")
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=2.0):
|
||||
pass
|
||||
except OSError as e:
|
||||
return _err(rid, 5031, f"could not reach browser CDP at {url}: {e}")
|
||||
else:
|
||||
probe_root = f"{'https' if parsed.scheme == 'wss' else 'http' if parsed.scheme == 'ws' else parsed.scheme}://{parsed.netloc}"
|
||||
probe_urls = [
|
||||
f"{probe_root.rstrip('/')}/json/version",
|
||||
f"{probe_root.rstrip('/')}/json",
|
||||
]
|
||||
ok = False
|
||||
for probe in probe_urls:
|
||||
try:
|
||||
with urllib.request.urlopen(probe, timeout=2.0) as resp:
|
||||
if 200 <= getattr(resp, "status", 200) < 300:
|
||||
ok = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if not ok:
|
||||
return _err(rid, 5031, f"could not reach browser CDP at {url}")
|
||||
|
||||
# Persist a normalized URL for downstream CDP resolution.
|
||||
# Discovery-style inputs (`http://host:port` or
|
||||
# `http://host:port/json[/version]`) collapse to bare
|
||||
# ``scheme://host:port`` so ``_resolve_cdp_override`` can
|
||||
# safely append ``/json/version`` without producing a
|
||||
# double-discovery path like ``.../json/json/version``.
|
||||
# Concrete websocket endpoints (``/devtools/browser/<id>``
|
||||
# — what Browserbase and other cloud providers return)
|
||||
# are preserved verbatim.
|
||||
if parsed.path.startswith("/devtools/browser/"):
|
||||
normalized = parsed.geturl()
|
||||
else:
|
||||
normalized = parsed._replace(
|
||||
path="",
|
||||
params="",
|
||||
query="",
|
||||
fragment="",
|
||||
).geturl()
|
||||
|
||||
# Order matters: clear any cached browser sessions BEFORE
|
||||
# publishing the new env var so an in-flight tool call
|
||||
# observing the old supervisor is reaped first, and the
|
||||
# next call freshly resolves the new URL. The previous
|
||||
# ordering left a brief window where ``_ensure_cdp_supervisor``
|
||||
# could re-attach to the *old* supervisor.
|
||||
cleanup_all_browsers()
|
||||
os.environ["BROWSER_CDP_URL"] = normalized
|
||||
# Drain any further cached state that could outlive the
|
||||
# cleanup pass (CDP supervisor for the default task,
|
||||
# cached agent-browser timeouts, etc.) so the next
|
||||
# ``browser_navigate`` definitively reaches ``normalized``.
|
||||
cleanup_all_browsers()
|
||||
except Exception as e:
|
||||
return _err(rid, 5031, str(e))
|
||||
return _ok(rid, {"connected": True, "url": normalized})
|
||||
if action == "disconnect":
|
||||
return _browser_disconnect(rid)
|
||||
|
||||
if action != "connect":
|
||||
return _err(rid, 4015, f"unknown action: {action}")
|
||||
|
||||
return _browser_connect(rid, params)
|
||||
|
||||
|
||||
def _browser_connect(rid, params: dict) -> dict:
|
||||
import platform
|
||||
|
||||
from hermes_cli.browser_connect import DEFAULT_BROWSER_CDP_URL
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
from urllib.parse import urlparse
|
||||
|
||||
raw_url = params.get("url")
|
||||
if raw_url is not None and not isinstance(raw_url, str):
|
||||
return _err(rid, 4015, f"browser url must be a string, got {type(raw_url).__name__}")
|
||||
url = (raw_url or "").strip() or DEFAULT_BROWSER_CDP_URL
|
||||
|
||||
sid = params.get("session_id") or ""
|
||||
system = platform.system()
|
||||
messages: list[str] = []
|
||||
|
||||
def announce(message: str, *, level: str = "info") -> None:
|
||||
messages.append(message)
|
||||
# Without a session id the TUI prints `messages` from the
|
||||
# response; emitting an event would double-render. Only stream
|
||||
# progress when there's a real session to scope it to.
|
||||
if sid:
|
||||
_emit("browser.progress", sid, {"message": message, "level": level})
|
||||
|
||||
parsed = urlparse(url if "://" in url else f"http://{url}")
|
||||
if parsed.scheme not in {"http", "https", "ws", "wss"}:
|
||||
return _err(rid, 4015, f"unsupported browser url: {url}")
|
||||
if not parsed.hostname:
|
||||
return _err(rid, 4015, f"missing host in browser url: {url}")
|
||||
try:
|
||||
port = parsed.port or (443 if parsed.scheme in {"https", "wss"} else 80)
|
||||
except ValueError:
|
||||
return _err(rid, 4015, f"invalid port in browser url: {url}")
|
||||
|
||||
# Always normalize default-local to 127.0.0.1:9222 so downstream
|
||||
# comparisons + messaging match what we'll actually persist.
|
||||
if _is_default_local_cdp(parsed):
|
||||
url = DEFAULT_BROWSER_CDP_URL
|
||||
parsed = urlparse(url)
|
||||
port = parsed.port or 9222
|
||||
|
||||
try:
|
||||
# ws[s]://.../devtools/browser/<id> endpoints (hosted CDP
|
||||
# providers) don't serve the HTTP discovery path; just check
|
||||
# TCP-level reachability and let browser_navigate handshake.
|
||||
if parsed.scheme in {"ws", "wss"} and parsed.path.startswith(
|
||||
"/devtools/browser/"
|
||||
):
|
||||
import socket
|
||||
|
||||
try:
|
||||
with socket.create_connection((parsed.hostname, port), timeout=2.0):
|
||||
pass
|
||||
except OSError as e:
|
||||
return _err(rid, 5031, f"could not reach browser CDP at {url}: {e}")
|
||||
else:
|
||||
probes = _probe_urls(parsed)
|
||||
ok = any(_http_ok(p, timeout=2.0) for p in probes)
|
||||
|
||||
if not ok and _is_default_local_cdp(parsed):
|
||||
from hermes_cli.browser_connect import try_launch_chrome_debug
|
||||
|
||||
announce(
|
||||
"Chrome isn't running with remote debugging — attempting to launch..."
|
||||
)
|
||||
|
||||
if try_launch_chrome_debug(port, system):
|
||||
for _ in range(20):
|
||||
time.sleep(0.5)
|
||||
if any(_http_ok(p, timeout=1.0) for p in probes):
|
||||
ok = True
|
||||
break
|
||||
|
||||
if ok:
|
||||
announce(f"Chrome launched and listening on port {port}")
|
||||
else:
|
||||
for line in _failure_messages(url, port, system)[1:]:
|
||||
announce(line, level="error")
|
||||
return _ok(
|
||||
rid, {"connected": False, "url": url, "messages": messages}
|
||||
)
|
||||
elif not ok:
|
||||
return _err(rid, 5031, f"could not reach browser CDP at {url}")
|
||||
elif _is_default_local_cdp(parsed):
|
||||
announce(f"Chrome is already listening on port {port}")
|
||||
|
||||
normalized = _normalize_cdp_url(parsed)
|
||||
|
||||
# Order matters: reap sessions BEFORE publishing the new env
|
||||
# so an in-flight tool call sees the old supervisor closed,
|
||||
# then again AFTER so the default task's cached supervisor
|
||||
# is drained against the new URL.
|
||||
cleanup_all_browsers()
|
||||
os.environ["BROWSER_CDP_URL"] = normalized
|
||||
cleanup_all_browsers()
|
||||
except Exception as e:
|
||||
return _err(rid, 5031, str(e))
|
||||
|
||||
payload: dict[str, object] = {"connected": True, "url": normalized}
|
||||
if messages:
|
||||
payload["messages"] = messages
|
||||
return _ok(rid, payload)
|
||||
|
||||
|
||||
def _browser_disconnect(rid) -> dict:
|
||||
# Reap, drop the env override, reap again — closes the same swap
|
||||
# window covered by ``_browser_connect``.
|
||||
def reap() -> None:
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers as _again
|
||||
|
||||
_again()
|
||||
except Exception:
|
||||
pass
|
||||
return _ok(rid, {"connected": False})
|
||||
return _err(rid, 4015, f"unknown action: {action}")
|
||||
reap()
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
reap()
|
||||
return _ok(rid, {"connected": False})
|
||||
|
||||
|
||||
@method("plugins.list")
|
||||
|
||||
@@ -293,6 +293,19 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[1]).toMatchObject({ role: 'assistant', text: 'final answer' })
|
||||
})
|
||||
|
||||
it('renders browser.progress events as system transcript lines as they stream in', () => {
|
||||
const appended: Msg[] = []
|
||||
const ctx = buildCtx(appended)
|
||||
const handler = createGatewayEventHandler(ctx)
|
||||
|
||||
handler({
|
||||
payload: { message: 'Chrome launched and listening on port 9222' },
|
||||
type: 'browser.progress'
|
||||
} as any)
|
||||
|
||||
expect(ctx.system.sys).toHaveBeenCalledWith('Chrome launched and listening on port 9222')
|
||||
})
|
||||
|
||||
it('annotates gateway.start_timeout with stderr tail lines so users can diagnose without /logs', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
|
||||
@@ -191,8 +191,10 @@ describe('createSlashHandler', () => {
|
||||
})
|
||||
|
||||
it.each([
|
||||
['/browser status', 'browser.manage', { action: 'status' }],
|
||||
['/browser status', 'browser.manage', { action: 'status', session_id: null }],
|
||||
['/browser connect', 'browser.manage', { action: 'connect', session_id: null, url: 'http://127.0.0.1:9222' }],
|
||||
['/reload-mcp', 'reload.mcp', { session_id: null }],
|
||||
['/reload', 'reload.env', {}],
|
||||
['/stop', 'process.stop', {}],
|
||||
['/fast status', 'config.get', { key: 'fast', session_id: null }],
|
||||
['/busy status', 'config.get', { key: 'busy' }],
|
||||
@@ -206,6 +208,34 @@ describe('createSlashHandler', () => {
|
||||
expect(ctx.gateway.gw.request).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('renders browser connect progress messages from the gateway', async () => {
|
||||
const rpc = vi.fn(() =>
|
||||
Promise.resolve({
|
||||
connected: false,
|
||||
messages: [
|
||||
"Chrome isn't running with remote debugging — attempting to launch...",
|
||||
'Browser not connected — start Chrome with remote debugging and retry /browser connect'
|
||||
],
|
||||
url: 'http://127.0.0.1:9222'
|
||||
})
|
||||
)
|
||||
|
||||
const ctx = buildCtx({ gateway: { ...buildGateway(), rpc } })
|
||||
|
||||
expect(createSlashHandler(ctx)('/browser connect')).toBe(true)
|
||||
expect(ctx.transcript.sys).toHaveBeenCalledWith('checking Chrome remote debugging at http://127.0.0.1:9222...')
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(ctx.transcript.sys).toHaveBeenCalledWith(
|
||||
"Chrome isn't running with remote debugging — attempting to launch..."
|
||||
)
|
||||
expect(ctx.transcript.sys).toHaveBeenCalledWith(
|
||||
'Browser not connected — start Chrome with remote debugging and retry /browser connect'
|
||||
)
|
||||
expect(ctx.transcript.sys).not.toHaveBeenCalledWith('browser connect failed')
|
||||
})
|
||||
})
|
||||
|
||||
it('routes /rollback through native RPC when a session is active', () => {
|
||||
patchUiState({ sid: 'sid-abc' })
|
||||
const rpc = vi.fn(() => Promise.resolve({}))
|
||||
|
||||
@@ -307,6 +307,16 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
return
|
||||
}
|
||||
|
||||
case 'browser.progress': {
|
||||
const message = String(ev.payload?.message ?? '').trim()
|
||||
|
||||
if (message) {
|
||||
sys(message)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
case 'voice.status': {
|
||||
// Continuous VAD loop reports its internal state so the status bar
|
||||
// can show listening / transcribing / idle without polling.
|
||||
|
||||
@@ -2,6 +2,7 @@ import type {
|
||||
BrowserManageResponse,
|
||||
DelegationPauseResponse,
|
||||
ProcessStopResponse,
|
||||
ReloadEnvResponse,
|
||||
ReloadMcpResponse,
|
||||
RollbackDiffResponse,
|
||||
RollbackListResponse,
|
||||
@@ -89,13 +90,30 @@ export const opsCommands: SlashCommand[] = [
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 're-read ~/.hermes/.env into the running gateway (CLI parity)',
|
||||
name: 'reload',
|
||||
run: (_arg, ctx) => {
|
||||
ctx.gateway
|
||||
.rpc<ReloadEnvResponse>('reload.env', {})
|
||||
.then(
|
||||
ctx.guarded<ReloadEnvResponse>(r => {
|
||||
const n = Number(r.updated ?? 0)
|
||||
const noun = n === 1 ? 'var' : 'vars'
|
||||
|
||||
ctx.transcript.sys(`reloaded .env (${n} ${noun} updated)`)
|
||||
})
|
||||
)
|
||||
.catch(ctx.guardedErr)
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'manage browser CDP connection [connect|disconnect|status]',
|
||||
name: 'browser',
|
||||
run: (arg, ctx) => {
|
||||
const trimmed = arg.trim()
|
||||
const [rawAction, ...rest] = trimmed ? trimmed.split(/\s+/) : ['status']
|
||||
const action = (rawAction || 'status').toLowerCase()
|
||||
const [rawAction = 'status', ...rest] = arg.trim().split(/\s+/).filter(Boolean)
|
||||
const action = rawAction.toLowerCase()
|
||||
|
||||
if (!['connect', 'disconnect', 'status'].includes(action)) {
|
||||
return ctx.transcript.sys(
|
||||
@@ -103,17 +121,23 @@ export const opsCommands: SlashCommand[] = [
|
||||
)
|
||||
}
|
||||
|
||||
const payload: Record<string, unknown> = { action }
|
||||
const requested = rest.join(' ').trim()
|
||||
const sid = ctx.sid ?? null
|
||||
const url = action === 'connect' ? rest.join(' ').trim() || 'http://127.0.0.1:9222' : undefined
|
||||
|
||||
if (action === 'connect') {
|
||||
payload.url = requested || 'http://localhost:9222'
|
||||
if (url) {
|
||||
ctx.transcript.sys(`checking Chrome remote debugging at ${url}...`)
|
||||
}
|
||||
|
||||
ctx.gateway
|
||||
.rpc<BrowserManageResponse>('browser.manage', payload)
|
||||
.rpc<BrowserManageResponse>('browser.manage', { action, session_id: sid, ...(url && { url }) })
|
||||
.then(
|
||||
ctx.guarded<BrowserManageResponse>(r => {
|
||||
// Without a session we can't subscribe to streamed
|
||||
// browser.progress events, so flush the bundled list.
|
||||
if (!sid) {
|
||||
r.messages?.forEach(message => ctx.transcript.sys(message))
|
||||
}
|
||||
|
||||
if (action === 'status') {
|
||||
return ctx.transcript.sys(
|
||||
r.connected
|
||||
@@ -122,18 +146,15 @@ export const opsCommands: SlashCommand[] = [
|
||||
)
|
||||
}
|
||||
|
||||
if (action === 'connect') {
|
||||
if (r.connected) {
|
||||
ctx.transcript.sys(`browser connected: ${r.url || '(url unavailable)'}`)
|
||||
ctx.transcript.sys('next browser tool call will use this CDP endpoint')
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
return ctx.transcript.sys('browser connect failed')
|
||||
if (action === 'disconnect') {
|
||||
return ctx.transcript.sys('browser disconnected')
|
||||
}
|
||||
|
||||
ctx.transcript.sys('browser disconnected')
|
||||
if (r.connected) {
|
||||
ctx.transcript.sys('Browser connected to live Chrome via CDP')
|
||||
ctx.transcript.sys(`Endpoint: ${r.url || '(url unavailable)'}`)
|
||||
ctx.transcript.sys('next browser tool call will use this CDP endpoint')
|
||||
}
|
||||
})
|
||||
)
|
||||
.catch(ctx.guardedErr)
|
||||
|
||||
@@ -5,8 +5,7 @@ import type { GatewayClient } from '../gatewayClient.js'
|
||||
import type {
|
||||
ConfigFullResponse,
|
||||
ConfigMtimeResponse,
|
||||
ReloadMcpResponse,
|
||||
VoiceToggleResponse
|
||||
ReloadMcpResponse
|
||||
} from '../gatewayTypes.js'
|
||||
import { asRpcResult } from '../lib/rpc.js'
|
||||
|
||||
@@ -118,7 +117,11 @@ export function useConfigSync({ gw, setBellOnComplete, setVoiceEnabled, sid }: U
|
||||
return
|
||||
}
|
||||
|
||||
quietRpc<VoiceToggleResponse>(gw, 'voice.toggle', { action: 'status' }).then(r => setVoiceEnabled(!!r?.enabled))
|
||||
// Keep startup cheap: voice.toggle status probes optional audio/STT deps and
|
||||
// can run long enough to delay prompt.submit on the single stdio RPC pipe.
|
||||
// Environment flags are enough to initialize the UI bit; the heavier status
|
||||
// check still runs when the user opens /voice.
|
||||
setVoiceEnabled(process.env.HERMES_VOICE === '1')
|
||||
quietRpc<ConfigMtimeResponse>(gw, 'config.get', { key: 'mtime' }).then(r => {
|
||||
mtimeRef.current = Number(r?.mtime ?? 0)
|
||||
})
|
||||
|
||||
@@ -126,6 +126,13 @@ export function useSubmission(opts: UseSubmissionOptions) {
|
||||
return sys('session not ready yet')
|
||||
}
|
||||
|
||||
// Plain prompts are the common path and should not pay an extra RPC
|
||||
// before prompt.submit. File-drop detection still runs for absolute,
|
||||
// tilde, file://, and explicit relative paths.
|
||||
if (!looksLikeSlashCommand(text) && !/(?:^|\s)(?:file:\/\/|~\/|\.?\.\/|\/)[^\s]+/.test(text)) {
|
||||
return startSubmit(text, expand(text), showUserMessage)
|
||||
}
|
||||
|
||||
gw.request<InputDetectDropResponse>('input.detect_drop', { session_id: sid, text })
|
||||
.then(r => {
|
||||
if (!r?.matched) {
|
||||
|
||||
@@ -68,7 +68,7 @@ const TranscriptPane = memo(function TranscriptPane({
|
||||
<Box flexDirection="column" paddingTop={1}>
|
||||
<Banner t={ui.theme} />
|
||||
|
||||
{row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||
{row.msg.info && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||
</Box>
|
||||
) : row.msg.kind === 'panel' && row.msg.panelData ? (
|
||||
<Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
|
||||
|
||||
@@ -1,10 +1,32 @@
|
||||
import { Box, Text, useStdout } from '@hermes/ink'
|
||||
import { useEffect, useState } from 'react'
|
||||
import unicodeSpinners from 'unicode-animations'
|
||||
|
||||
import { artWidth, caduceus, CADUCEUS_WIDTH, logo, LOGO_WIDTH } from '../banner.js'
|
||||
import { flat } from '../lib/text.js'
|
||||
import type { Theme } from '../theme.js'
|
||||
import type { PanelSection, SessionInfo } from '../types.js'
|
||||
|
||||
const LOADER_TICK_MS = 120
|
||||
|
||||
function InlineLoader({ label, t }: { label: string; t: Theme }) {
|
||||
const [tick, setTick] = useState(0)
|
||||
const spinner = unicodeSpinners.braille
|
||||
const frame = spinner.frames[tick % spinner.frames.length] ?? '⠋'
|
||||
|
||||
useEffect(() => {
|
||||
const id = setInterval(() => setTick(n => n + 1), Math.max(LOADER_TICK_MS, spinner.interval))
|
||||
|
||||
return () => clearInterval(id)
|
||||
}, [spinner.interval])
|
||||
|
||||
return (
|
||||
<Text color={t.color.muted} wrap="truncate">
|
||||
<Text color={t.color.accent}>{frame}</Text> {label}
|
||||
</Text>
|
||||
)
|
||||
}
|
||||
|
||||
export function ArtLines({ lines }: { lines: [string, string][] }) {
|
||||
return (
|
||||
<>
|
||||
@@ -67,6 +89,7 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
||||
const entries = Object.entries(data).sort()
|
||||
const shown = entries.slice(0, max)
|
||||
const overflow = entries.length - max
|
||||
const skeleton = info.lazy && entries.length === 0
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" marginTop={1}>
|
||||
@@ -74,12 +97,16 @@ export function SessionPanel({ info, sid, t }: SessionPanelProps) {
|
||||
Available {title}
|
||||
</Text>
|
||||
|
||||
{shown.map(([k, vs]) => (
|
||||
<Text key={k} wrap="truncate">
|
||||
<Text color={t.color.muted}>{strip(k)}: </Text>
|
||||
<Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
|
||||
</Text>
|
||||
))}
|
||||
{skeleton ? (
|
||||
<InlineLoader label={title === 'Tools' ? 'discovering tools' : 'scanning skills'} t={t} />
|
||||
) : (
|
||||
shown.map(([k, vs]) => (
|
||||
<Text key={k} wrap="truncate">
|
||||
<Text color={t.color.muted}>{strip(k)}: </Text>
|
||||
<Text color={t.color.text}>{truncLine(strip(k) + ': ', vs)}</Text>
|
||||
</Text>
|
||||
))
|
||||
)}
|
||||
|
||||
{overflow > 0 && (
|
||||
<Text color={t.color.muted}>
|
||||
|
||||
@@ -308,12 +308,17 @@ export interface ReloadMcpResponse {
|
||||
status?: string
|
||||
}
|
||||
|
||||
export interface ReloadEnvResponse {
|
||||
updated?: number
|
||||
}
|
||||
|
||||
export interface ProcessStopResponse {
|
||||
killed?: number
|
||||
}
|
||||
|
||||
export interface BrowserManageResponse {
|
||||
connected?: boolean
|
||||
messages?: string[]
|
||||
url?: string
|
||||
}
|
||||
|
||||
@@ -432,6 +437,11 @@ export type GatewayEvent =
|
||||
| { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' }
|
||||
| { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' }
|
||||
| { payload: { line: string }; session_id?: string; type: 'gateway.stderr' }
|
||||
| {
|
||||
payload?: { level?: 'info' | 'warn' | 'error'; message?: string }
|
||||
session_id?: string
|
||||
type: 'browser.progress'
|
||||
}
|
||||
| {
|
||||
payload?: { cwd?: string; python?: string; stderr_tail?: string }
|
||||
session_id?: string
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { evictInkCaches } from '@hermes/ink'
|
||||
|
||||
import { type HeapDumpResult, performHeapDump } from './memory.js'
|
||||
|
||||
export type MemoryLevel = 'critical' | 'high' | 'normal'
|
||||
@@ -20,6 +18,40 @@ export interface MemoryMonitorOptions {
|
||||
|
||||
const GB = 1024 ** 3
|
||||
|
||||
// Deferred @hermes/ink import: loading `@hermes/ink` at module top-level
|
||||
// pulls the full ~414KB Ink bundle (React, renderer, components, hooks) onto
|
||||
// the critical path before the Python gateway can even be spawned. That
|
||||
// serialised roughly 150ms of Node work in front of gw.start() on every
|
||||
// cold `hermes --tui` launch.
|
||||
//
|
||||
// evictInkCaches only runs inside `tick()`, which fires on a 10s timer and
|
||||
// only when heap pressure crosses the high-water mark — by then Ink has
|
||||
// long since been loaded by the app entry. This dynamic import is a no-op
|
||||
// on the hot path (module is already in the ESM cache); when a startup
|
||||
// spike somehow trips the threshold before the app registers its own Ink
|
||||
// import, we pay the load cost exactly once, inside the tick that needs it.
|
||||
let _evictInkCaches: ((level: 'all' | 'half') => unknown) | null = null
|
||||
let _evictInkCachesPromise: Promise<(level: 'all' | 'half') => unknown> | null = null
|
||||
|
||||
async function _ensureEvictInkCaches(): Promise<(level: 'all' | 'half') => unknown> {
|
||||
if (_evictInkCaches) {
|
||||
return _evictInkCaches
|
||||
}
|
||||
|
||||
_evictInkCachesPromise ??= import('@hermes/ink')
|
||||
.then(mod => {
|
||||
_evictInkCaches = mod.evictInkCaches as (level: 'all' | 'half') => unknown
|
||||
|
||||
return _evictInkCaches
|
||||
})
|
||||
.catch(err => {
|
||||
_evictInkCachesPromise = null
|
||||
throw err
|
||||
})
|
||||
|
||||
return _evictInkCachesPromise
|
||||
}
|
||||
|
||||
export function startMemoryMonitor({
|
||||
criticalBytes = 2.5 * GB,
|
||||
highBytes = 1.5 * GB,
|
||||
@@ -28,29 +60,45 @@ export function startMemoryMonitor({
|
||||
onHigh
|
||||
}: MemoryMonitorOptions = {}): () => void {
|
||||
const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
const inFlight = new Set<Exclude<MemoryLevel, 'normal'>>()
|
||||
|
||||
const tick = async () => {
|
||||
const { heapUsed, rss } = process.memoryUsage()
|
||||
const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
|
||||
|
||||
if (level === 'normal') {
|
||||
return void dumped.clear()
|
||||
}
|
||||
|
||||
if (dumped.has(level)) {
|
||||
dumped.clear()
|
||||
return
|
||||
}
|
||||
|
||||
if (dumped.has(level) || inFlight.has(level)) {
|
||||
return
|
||||
}
|
||||
|
||||
inFlight.add(level)
|
||||
|
||||
// Prune Ink content caches before dump/exit — half on 'high' (recoverable),
|
||||
// full on 'critical' (post-dump RSS reduction, keeps user running).
|
||||
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
||||
// Deferred import keeps `@hermes/ink` off the cold-start critical path;
|
||||
// by the time a tick fires 10s after launch the app has already loaded
|
||||
// the same module, so this resolves instantly from the ESM cache.
|
||||
try {
|
||||
try {
|
||||
const evictInkCaches = await _ensureEvictInkCaches()
|
||||
evictInkCaches(level === 'critical' ? 'all' : 'half')
|
||||
} catch {
|
||||
// Best-effort: if the dynamic import fails for any reason we still
|
||||
// continue to the heap dump below so the user gets diagnostics.
|
||||
}
|
||||
|
||||
dumped.add(level)
|
||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||
dumped.add(level)
|
||||
const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
|
||||
const snap: MemorySnapshot = { heapUsed, level, rss }
|
||||
|
||||
const snap: MemorySnapshot = { heapUsed, level, rss }
|
||||
|
||||
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
||||
;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
|
||||
} finally {
|
||||
inFlight.delete(level)
|
||||
}
|
||||
}
|
||||
|
||||
const handle = setInterval(() => void tick(), intervalMs)
|
||||
|
||||
+2
-1
@@ -143,11 +143,12 @@ export interface McpServerStatus {
|
||||
export interface SessionInfo {
|
||||
cwd?: string
|
||||
fast?: boolean
|
||||
lazy?: boolean
|
||||
mcp_servers?: McpServerStatus[]
|
||||
model: string
|
||||
reasoning_effort?: string
|
||||
service_tier?: string
|
||||
release_date?: string
|
||||
service_tier?: string
|
||||
skills: Record<string, string[]>
|
||||
tools: Record<string, string[]>
|
||||
update_behind?: number | null
|
||||
|
||||
@@ -70,7 +70,7 @@ Good defaults:
|
||||
|----------|-----------|---------------|
|
||||
| **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
|
||||
| **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
|
||||
| **Anthropic** | Claude models directly (Pro/Max or API key) | `hermes model` with Claude Code auth, or an Anthropic API key |
|
||||
| **Anthropic** | Claude models directly — Max plan + extra usage credits (OAuth), or API key for pay-per-token | `hermes model` → OAuth login (requires Max + extra credits), or an Anthropic API key |
|
||||
| **OpenRouter** | Multi-provider routing across many models | Enter your API key |
|
||||
| **Z.AI** | GLM / Zhipu-hosted models | Set `GLM_API_KEY` / `ZAI_API_KEY` |
|
||||
| **Kimi / Moonshot** | Moonshot-hosted coding and chat models | Set `KIMI_API_KEY` |
|
||||
|
||||
@@ -18,7 +18,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
||||
| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
|
||||
| **GitHub Copilot** | `hermes model` (OAuth device code flow, `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`) |
|
||||
| **GitHub Copilot ACP** | `hermes model` (spawns local `copilot --acp --stdio`) |
|
||||
| **Anthropic** | `hermes model` (Claude Pro/Max via Claude Code auth, Anthropic API key, or manual setup-token) |
|
||||
| **Anthropic** | `hermes model` (Claude Max + extra usage credits via OAuth; also supports Anthropic API key or manual setup-token — see note below) |
|
||||
| **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
|
||||
| **AI Gateway** | `AI_GATEWAY_API_KEY` in `~/.hermes/.env` (provider: `ai-gateway`) |
|
||||
| **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) |
|
||||
@@ -158,6 +158,12 @@ If you're trying to switch to a provider you haven't set up yet (e.g. you only h
|
||||
|
||||
Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
|
||||
|
||||
:::caution Requires Claude Max "extra usage" credits
|
||||
When you authenticate via `hermes model` → Anthropic OAuth (or via `hermes auth add anthropic --type oauth`), Hermes routes as Claude Code against your Anthropic account. **It only works if you're on a Claude Max plan and have purchased extra usage credits.** The base Max plan allowance (the usage included in Claude Code by default) is not consumed by Hermes — only the extra/overage credits you've added on top are. Claude Pro subscribers cannot use this path.
|
||||
|
||||
If you don't have Max + extra credits, use an `ANTHROPIC_API_KEY` instead — requests are billed pay-per-token against that key's organization (standard API pricing, independent of any Claude subscription).
|
||||
:::
|
||||
|
||||
```bash
|
||||
# With an API key (pay-per-token)
|
||||
export ANTHROPIC_API_KEY=***
|
||||
|
||||
@@ -89,7 +89,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
|
||||
## Provider Auth (OAuth)
|
||||
|
||||
For native Anthropic auth, Hermes prefers Claude Code's own credential files when they exist because those credentials can refresh automatically. Environment variables such as `ANTHROPIC_TOKEN` remain useful as manual overrides, but they are no longer the preferred path for Claude Pro/Max login.
|
||||
For native Anthropic auth, Hermes prefers Claude Code's own credential files when they exist because those credentials can refresh automatically. **OAuth against Anthropic requires a Claude Max plan with purchased extra usage credits** — Hermes routes as Claude Code, which only draws from the Max plan's extra/overage credits, not the base Max allowance, and does not work on Claude Pro. Without Max + extra credits, use an API key instead. Environment variables such as `ANTHROPIC_TOKEN` remain useful as manual overrides, but they are no longer the preferred path for Claude Max login.
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
|
||||
@@ -40,7 +40,7 @@ hermes auth add openrouter --api-key sk-or-v1-your-second-key
|
||||
# Add a second Anthropic key
|
||||
hermes auth add anthropic --type api-key --api-key sk-ant-api03-your-second-key
|
||||
|
||||
# Add an Anthropic OAuth credential (Claude Code subscription)
|
||||
# Add an Anthropic OAuth credential (requires Claude Max plan + extra usage credits)
|
||||
hermes auth add anthropic --type oauth
|
||||
# Opens browser for OAuth login
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user