Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor 0d3d2a2631 fix(model): preserve custom endpoint credentials and accept cloud models not in /v1/models
When switching models on a custom endpoint (ollama-launch):
- Same-provider switches no longer re-resolve credentials (fixes base_url
  being lost for 'custom' provider on subsequent switches)
- Named providers (ollama-launch) are resolved via user_providers so
  switch_model can find their base_url from config
- Models not in the /v1/models probe but present in the user's saved
  provider config are accepted with a warning instead of rejected
- CLI /model and TUI /model both pass user_providers/custom_providers
  to switch_model so the config model list is available for validation

Closes #15088
2026-04-25 14:10:42 +05:30
565 changed files with 4802 additions and 59412 deletions
-1
View File
@@ -69,4 +69,3 @@ mini-swe-agent/
.nix-stamps/
result
website/static/api/skills-index.json
models-dev-upstream/
+2 -6
View File
@@ -30,22 +30,18 @@ WORKDIR /opt/hermes
# unless the lockfiles themselves change.
COPY package.json package-lock.json ./
COPY web/package.json web/package-lock.json web/
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
RUN npm install --prefer-offline --no-audit && \
npx playwright install --with-deps chromium --only-shell && \
(cd web && npm install --prefer-offline --no-audit) && \
(cd ui-tui && npm install --prefer-offline --no-audit) && \
npm cache clean --force
# ---------- Source code ----------
# .dockerignore excludes node_modules, so the installs above survive.
COPY --chown=hermes:hermes . .
# Build browser dashboard and terminal UI assets.
RUN cd web && npm run build && \
cd ../ui-tui && npm run build
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
RUN cd web && npm run build
# ---------- Permissions ----------
# Make install dir world-readable so any HERMES_UID can read it at runtime.
+4 -13
View File
@@ -390,16 +390,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
}
if normalized_base_url:
# Azure Anthropic endpoints require an ``api-version`` query parameter.
# Pass it via default_query so the SDK appends it to every request URL
# without corrupting the base_url (appending it directly produces
# malformed paths like /anthropic?api-version=.../v1/messages).
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
if _is_azure_endpoint and "api-version" not in normalized_base_url:
kwargs["base_url"] = normalized_base_url.rstrip("/")
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
kwargs["base_url"] = normalized_base_url
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(normalized_base_url)
if _is_kimi_coding_endpoint(base_url):
@@ -1689,9 +1680,9 @@ def build_anthropic_kwargs(
# ── Strip sampling params on 4.7+ ─────────────────────────────────
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
# Callers (auxiliary_client, etc.) may set these for older models;
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
# don't require coordinated edits everywhere.
# Callers (auxiliary_client, flush_memories, etc.) may set these for
# older models; drop them here as a safety net so upstream 4.6 → 4.7
# migrations don't require coordinated edits everywhere.
if _forbids_sampling_params(model):
for _sampling_key in ("temperature", "top_p", "top_k"):
kwargs.pop(_sampling_key, None)
+46 -211
View File
@@ -42,7 +42,6 @@ import time
from pathlib import Path # noqa: F401 — used by test mocks
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse, parse_qs, urlunparse
from openai import OpenAI
@@ -53,17 +52,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
logger = logging.getLogger(__name__)
def _extract_url_query_params(url: str):
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
parsed = urlparse(url)
if parsed.query:
clean = urlunparse(parsed._replace(query=""))
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
return clean, params
return url, None
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
_stale_base_url_warned = False
@@ -82,8 +70,6 @@ _PROVIDER_ALIASES = {
"moonshot": "kimi-coding",
"kimi-cn": "kimi-coding-cn",
"moonshot-cn": "kimi-coding-cn",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@@ -157,7 +143,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"kimi-coding": "kimi-k2-turbo-preview",
"stepfun": "step-3.5-flash",
"kimi-coding-cn": "kimi-k2-turbo-preview",
"gmi": "google/gemini-3.1-flash-lite-preview",
"minimax": "MiniMax-M2.7",
"minimax-cn": "MiniMax-M2.7",
"anthropic": "claude-haiku-4-5-20251001",
@@ -405,7 +390,7 @@ class _CodexCompletionsAdapter:
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
# support max_output_tokens or temperature — omit to avoid 400 errors.
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
# Tools support for flush_memories and similar callers
tools = kwargs.get("tools")
if tools:
converted = []
@@ -1172,10 +1157,8 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
return None, None
model = _read_main_model() or "gpt-4o-mini"
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
_clean_base, _dq = _extract_url_query_params(custom_base)
_extra = {"default_query": _dq} if _dq else {}
if custom_mode == "codex_responses":
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -1189,12 +1172,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=custom_base), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=custom_base), model
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1366,49 +1349,6 @@ def _is_auth_error(exc: Exception) -> bool:
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
"""Detect provider 400s for an unsupported request parameter.
Different OpenAI-compatible endpoints phrase the same class of error a few
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
``param`` field, ``X is not supported``, ``unknown parameter: X``,
``unrecognized request argument: X``. We match on both the parameter
name and a generic "unsupported/unknown/unrecognized parameter" marker so
call sites can reactively retry without the offending key instead of
surfacing a noisy auxiliary failure.
Generalizes the temperature-specific detector that originally shipped
with PR #15621 so the same retry strategy can cover ``max_tokens``,
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
for the generalization pattern.
"""
param_lower = (param or "").lower()
if not param_lower:
return False
err_lower = str(exc).lower()
if param_lower not in err_lower:
return False
return any(marker in err_lower for marker in (
"unsupported parameter",
"unsupported_parameter",
"not supported",
"does not support",
"unknown parameter",
"unrecognized request argument",
"unrecognized parameter",
"invalid parameter",
))
def _is_unsupported_temperature_error(exc: Exception) -> bool:
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
public symbol because existing tests and call sites import it by name.
"""
return _is_unsupported_parameter_error(exc, "temperature")
def _evict_cached_clients(provider: str) -> None:
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
normalized = _normalize_aux_provider(provider)
@@ -1620,14 +1560,8 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
# below — never look up auth env vars ad-hoc.
def _to_async_client(sync_client, model: str, is_vision: bool = False):
"""Convert a sync client to its async counterpart, preserving Codex routing.
When ``is_vision=True`` and the underlying base URL is Copilot, the
resulting async client carries the ``Copilot-Vision-Request: true``
header so the request is routed to Copilot's vision-capable
infrastructure (otherwise vision payloads silently time out).
"""
def _to_async_client(sync_client, model: str):
"""Convert a sync client to its async counterpart, preserving Codex routing."""
from openai import AsyncOpenAI
if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1656,11 +1590,9 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
if base_url_host_matches(sync_base_url, "openrouter.ai"):
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
from hermes_cli.models import copilot_default_headers
async_kwargs["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
async_kwargs["default_headers"] = copilot_default_headers()
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
return AsyncOpenAI(**async_kwargs), model
@@ -1687,7 +1619,6 @@ def resolve_provider_client(
explicit_api_key: str = None,
api_mode: str = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
@@ -1771,7 +1702,7 @@ def resolve_provider_client(
"auxiliary provider (using %r instead)", model, resolved)
model = None
final_model = model or resolved
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
@@ -1784,7 +1715,7 @@ def resolve_provider_client(
)
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1801,7 +1732,7 @@ def resolve_provider_client(
"but Nous Portal not configured (run: hermes auth)")
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1828,7 +1759,7 @@ def resolve_provider_client(
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
@@ -1851,19 +1782,14 @@ def resolve_provider_client(
provider,
)
extra = {}
_clean_base, _dq = _extract_url_query_params(custom_base)
if _dq:
extra["default_query"] = _dq
if base_url_host_matches(custom_base, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
extra["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1873,7 +1799,7 @@ def resolve_provider_client(
final_model = _normalize_resolved_model(model or default, provider)
_cbase = str(getattr(client, "base_url", "") or "")
client = _wrap_if_needed(client, final_model, _cbase)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
"but no endpoint credentials found")
@@ -1898,8 +1824,6 @@ def resolve_provider_client(
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
provider,
)
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
_extra2 = {"default_query": _dq2} if _dq2 else {}
logger.debug(
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
provider, final_model, entry_api_mode or "chat_completions")
@@ -1917,8 +1841,8 @@ def resolve_provider_client(
"installed — falling back to OpenAI-wire.",
provider,
)
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
client = OpenAI(api_key=custom_key, base_url=custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
sync_anthropic = AnthropicAuxiliaryClient(
real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1926,7 +1850,7 @@ def resolve_provider_client(
if async_mode:
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
return sync_anthropic, final_model
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
client = OpenAI(api_key=custom_key, base_url=custom_base)
# codex_responses or inherited auto-detect (via _wrap_if_needed).
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
# override). Named-provider entry api_mode=codex_responses also
@@ -1937,7 +1861,7 @@ def resolve_provider_client(
client = CodexAuxiliaryClient(client, final_model)
else:
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning(
"resolve_provider_client: named custom provider %r has no base_url",
@@ -1969,7 +1893,7 @@ def resolve_provider_client(
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
return None, None
final_model = _normalize_resolved_model(model or default_model, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
creds = resolve_api_key_provider_credentials(provider)
api_key = str(creds.get("api_key", "")).strip()
@@ -1995,7 +1919,7 @@ def resolve_provider_client(
if is_native_gemini_base_url(base_url):
client = GeminiNativeClient(api_key=api_key, base_url=base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# Provider-specific headers
@@ -2003,11 +1927,9 @@ def resolve_provider_client(
if base_url_host_matches(base_url, "api.kimi.com"):
headers["User-Agent"] = "claude-code/0.1.0"
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
from hermes_cli.copilot_auth import copilot_request_headers
from hermes_cli.models import copilot_default_headers
headers.update(copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
))
headers.update(copilot_default_headers())
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
@@ -2033,7 +1955,7 @@ def resolve_provider_client(
client = _wrap_if_needed(client, final_model, base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
if pconfig.auth_type == "external_process":
@@ -2065,7 +1987,7 @@ def resolve_provider_client(
args=args,
)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: external-process provider %s not "
"directly supported", provider)
@@ -2101,7 +2023,7 @@ def resolve_provider_client(
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
)
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2176,13 +2098,8 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
return _normalize_aux_provider(provider)
def _resolve_strict_vision_backend(
provider: str,
model: Optional[str] = None,
) -> Tuple[Optional[Any], Optional[str]]:
def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
provider = _normalize_vision_provider(provider)
if provider == "copilot":
return resolve_provider_client("copilot", model, is_vision=True)
if provider == "openrouter":
return _try_openrouter()
if provider == "nous":
@@ -2250,7 +2167,7 @@ def resolve_vision_provider_client(
return resolved_provider, None, None
final_model = resolved_model or default_model
if async_mode:
async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
async_client, async_model = _to_async_client(sync_client, final_model)
return resolved_provider, async_client, async_model
return resolved_provider, sync_client, final_model
@@ -2282,11 +2199,8 @@ def resolve_vision_provider_client(
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(
main_provider, vision_model
)
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
@@ -2294,10 +2208,10 @@ def resolve_vision_provider_client(
)
return _finalize(main_provider, sync_client, default_model)
else:
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode,
is_vision=True)
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
@@ -2319,14 +2233,11 @@ def resolve_vision_provider_client(
return None, None, None
if requested in _VISION_AUTO_PROVIDER_ORDER:
sync_client, default_model = _resolve_strict_vision_backend(
requested, resolved_model
)
sync_client, default_model = _resolve_strict_vision_backend(requested)
return _finalize(requested, sync_client, default_model)
client, final_model = _get_cached_client(requested, resolved_model, async_mode,
api_mode=resolved_api_mode,
is_vision=True)
api_mode=resolved_api_mode)
if client is None:
return requested, None, None
return requested, client, final_model
@@ -2390,11 +2301,10 @@ def _client_cache_key(
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2420,7 +2330,6 @@ def _refresh_nous_auxiliary_client(
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2438,7 +2347,7 @@ def _refresh_nous_auxiliary_client(
current_loop = _aio.get_event_loop()
except RuntimeError:
pass
client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
client, final_model = _to_async_client(sync_client, final_model or "")
else:
client = sync_client
@@ -2449,7 +2358,6 @@ def _refresh_nous_auxiliary_client(
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
is_vision=is_vision,
)
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
return client, final_model
@@ -2561,19 +2469,12 @@ def _is_openrouter_client(client: Any) -> bool:
return False
def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
"""Best-effort check for cached clients that accept ``vendor/model`` IDs."""
if _is_openrouter_client(client):
return True
return bool(cached_default and "/" in cached_default)
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
"""Keep slash-bearing model IDs only for cached clients that support them.
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
"""
if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
if model and "/" in model and not _is_openrouter_client(client):
return cached_default
return model or cached_default
@@ -2586,7 +2487,6 @@ def _get_cached_client(
api_key: str = None,
api_mode: str = None,
main_runtime: Optional[Dict[str, Any]] = None,
is_vision: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider.
@@ -2623,7 +2523,6 @@ def _get_cached_client(
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
is_vision=is_vision,
)
with _client_cache_lock:
if cache_key in _client_cache:
@@ -2655,7 +2554,6 @@ def _get_cached_client(
explicit_api_key=api_key,
api_mode=api_mode,
main_runtime=runtime,
is_vision=is_vision,
)
if client is not None:
# For async clients, remember which loop they were created on so we
@@ -2862,8 +2760,8 @@ def _build_call_kwargs(
temperature = fixed_temperature
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
# structured-JSON extraction) don't 400 the moment
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
# the aux model is flipped to 4.7.
if temperature is not None:
from agent.anthropic_adapter import _forbids_sampling_params
@@ -2951,7 +2849,7 @@ def call_llm(
Args:
task: Auxiliary task name ("compression", "vision", "web_extract",
"session_search", "skills_hub", "mcp", "title_generation").
"session_search", "skills_hub", "mcp", "flush_memories").
Reads provider:model from config/env. Ignored if provider is set.
provider: Explicit provider override.
model: Explicit model override.
@@ -3054,45 +2952,13 @@ def call_llm(
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
# then payment fallback.
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
try:
return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as first_err:
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
retry_kwargs = dict(kwargs)
retry_kwargs.pop("temperature", None)
logger.info(
"Auxiliary %s: provider rejected temperature; retrying once without it",
task or "call",
)
try:
return _validate_llm_response(
client.chat.completions.create(**retry_kwargs), task)
except Exception as retry_err:
retry_err_str = str(retry_err)
# If retry still fails, fall through to the max_tokens /
# payment / auth chains below using the temperature-stripped
# kwargs. Re-raise only if the retry hit something those
# chains won't handle.
if not (
_is_payment_error(retry_err)
or _is_connection_error(retry_err)
or _is_auth_error(retry_err)
or "max_tokens" in retry_err_str
or "unsupported_parameter" in retry_err_str
):
raise
first_err = retry_err
kwargs = retry_kwargs
err_str = str(first_err)
if max_tokens is not None and (
"max_tokens" in err_str
or "unsupported_parameter" in err_str
or _is_unsupported_parameter_error(first_err, "max_tokens")
):
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens
try:
@@ -3119,7 +2985,6 @@ def call_llm(
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
is_vision=(task == "vision"),
)
if refreshed_client is not None:
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3356,35 +3221,8 @@ async def async_call_llm(
return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as first_err:
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
retry_kwargs = dict(kwargs)
retry_kwargs.pop("temperature", None)
logger.info(
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
task or "call",
)
try:
return _validate_llm_response(
await client.chat.completions.create(**retry_kwargs), task)
except Exception as retry_err:
retry_err_str = str(retry_err)
if not (
_is_payment_error(retry_err)
or _is_connection_error(retry_err)
or _is_auth_error(retry_err)
or "max_tokens" in retry_err_str
or "unsupported_parameter" in retry_err_str
):
raise
first_err = retry_err
kwargs = retry_kwargs
err_str = str(first_err)
if max_tokens is not None and (
"max_tokens" in err_str
or "unsupported_parameter" in err_str
or _is_unsupported_parameter_error(first_err, "max_tokens")
):
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens
try:
@@ -3410,7 +3248,6 @@ async def async_call_llm(
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
is_vision=(task == "vision"),
)
if refreshed_client is not None:
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3479,9 +3316,7 @@ async def async_call_llm(
extra_body=effective_extra_body,
base_url=str(getattr(fb_client, "base_url", "") or ""))
# Convert sync fallback client to async
async_fb, async_fb_model = _to_async_client(
fb_client, fb_model or "", is_vision=(task == "vision")
)
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
+11 -147
View File
@@ -44,31 +44,22 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
# Multimodal content helpers
# ---------------------------------------------------------------------------
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
"""Convert chat-style multimodal content to Responses API input parts.
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
The ``role`` parameter controls the text content type:
- ``"user"`` (default) → ``"input_text"``
- ``"assistant"`` → ``"output_text"``
The Responses API rejects ``input_text`` inside assistant messages and
``output_text`` inside user messages, so callers MUST pass the correct
role for the message being converted.
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
Returns an empty list when ``content`` is not a list or contains no
recognized parts — callers fall back to the string path.
"""
text_type = "output_text" if role == "assistant" else "input_text"
if not isinstance(content, list):
return []
converted: List[Dict[str, Any]] = []
for part in content:
if isinstance(part, str):
if part:
converted.append({"type": text_type, "text": part})
converted.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
continue
@@ -76,7 +67,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
if ptype in {"text", "input_text", "output_text"}:
text = part.get("text")
if isinstance(text, str) and text:
converted.append({"type": text_type, "text": text})
converted.append({"type": "input_text", "text": text})
continue
if ptype in {"image_url", "input_image"}:
image_ref = part.get("image_url")
@@ -227,23 +218,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
# Message format conversion
# ---------------------------------------------------------------------------
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
"""Normalize a Responses assistant message status for replay.
The API accepts completed/incomplete/in_progress on replayed assistant
output messages. Preserve those exactly (modulo case/hyphen spelling) so
incomplete Codex continuation turns don't get falsely marked completed.
"""
if isinstance(value, str):
status = value.strip().lower().replace("-", "_").replace(" ", "_")
if status in _RESPONSE_MESSAGE_STATUSES:
return status
return default
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
@@ -259,10 +233,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
if role in {"user", "assistant"}:
content = msg.get("content", "")
if isinstance(content, list):
content_parts = _chat_content_to_responses_parts(content, role=role)
text_type = "output_text" if role == "assistant" else "input_text"
content_parts = _chat_content_to_responses_parts(content)
content_text = "".join(
p.get("text", "") for p in content_parts if p.get("type") == text_type
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
)
else:
content_parts = []
@@ -289,57 +262,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
seen_item_ids.add(item_id)
has_codex_reasoning = True
# Replay exact assistant message items (with id/phase) from
# previous turns so the API can maintain prefix-cache hits.
# OpenAI docs: "preserve and resend phase on all assistant
# messages — dropping it can degrade performance."
codex_message_items = msg.get("codex_message_items")
replayed_message_items = 0
if isinstance(codex_message_items, list):
for raw_item in codex_message_items:
if not isinstance(raw_item, dict):
continue
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
continue
raw_content_parts = raw_item.get("content")
if not isinstance(raw_content_parts, list):
continue
normalized_content_parts = []
for part in raw_content_parts:
if not isinstance(part, dict):
continue
part_type = str(part.get("type") or "").strip()
if part_type not in {"output_text", "text"}:
continue
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content_parts.append({"type": "output_text", "text": text})
if not normalized_content_parts:
continue
replay_item = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(raw_item.get("status")),
"content": normalized_content_parts,
}
item_id = raw_item.get("id")
if isinstance(item_id, str) and item_id.strip():
replay_item["id"] = item_id.strip()
phase = raw_item.get("phase")
if isinstance(phase, str) and phase.strip():
replay_item["phase"] = phase.strip()
items.append(replay_item)
replayed_message_items += 1
if replayed_message_items > 0:
pass
elif content_parts:
if content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
items.append({"role": "assistant", "content": content_text})
@@ -499,47 +422,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
normalized.append(reasoning_item)
continue
if item_type == "message":
role = item.get("role")
if role != "assistant":
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
content = item.get("content")
if not isinstance(content, list):
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
normalized_content = []
for part_idx, part in enumerate(content):
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
)
part_type = part.get("type")
if part_type not in {"output_text", "text"}:
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
)
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content.append({"type": "output_text", "text": text})
if not normalized_content:
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
normalized_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item.get("status")),
"content": normalized_content,
}
item_id = item.get("id")
if isinstance(item_id, str) and item_id.strip():
normalized_item["id"] = item_id.strip()
phase = item.get("phase")
if isinstance(phase, str) and phase.strip():
normalized_item["phase"] = phase.strip()
normalized.append(normalized_item)
continue
role = item.get("role")
if role in {"user", "assistant"}:
content = item.get("content", "")
@@ -547,16 +429,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
content = ""
if isinstance(content, list):
# Multimodal content from ``_chat_messages_to_responses_input``
# is already in Responses format (``input_text`` / ``output_text``
# / ``input_image``). Validate each part and pass through.
# Use the correct text type for the role — ``output_text`` for
# assistant messages, ``input_text`` for user messages.
text_type = "output_text" if role == "assistant" else "input_text"
# is already in Responses format (``input_text`` / ``input_image``).
# Validate each part and pass through.
validated: List[Dict[str, Any]] = []
for part_idx, part in enumerate(content):
if isinstance(part, str):
if part:
validated.append({"type": text_type, "text": part})
validated.append({"type": "input_text", "text": part})
continue
if not isinstance(part, dict):
raise ValueError(
@@ -567,7 +446,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
text = part.get("text", "")
if not isinstance(text, str):
text = str(text or "")
validated.append({"type": text_type, "text": text})
validated.append({"type": "input_text", "text": text})
elif ptype in {"input_image", "image_url"}:
image_ref = part.get("image_url", "")
detail = part.get("detail")
@@ -824,7 +703,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
message_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
saw_commentary_phase = False
@@ -843,7 +721,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
if item_type == "message":
item_phase = getattr(item, "phase", None)
normalized_phase = None
if isinstance(item_phase, str):
normalized_phase = item_phase.strip().lower()
if normalized_phase in {"commentary", "analysis"}:
@@ -853,18 +730,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
message_text = _extract_responses_message_text(item)
if message_text:
content_parts.append(message_text)
raw_message_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item_status),
"content": [{"type": "output_text", "text": message_text}],
}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_message_item["id"] = item_id
if normalized_phase:
raw_message_item["phase"] = normalized_phase
message_items_raw.append(raw_message_item)
elif item_type == "reasoning":
reasoning_text = _extract_responses_reasoning_text(item)
if reasoning_text:
@@ -977,7 +842,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=reasoning_items_raw or None,
codex_message_items=message_items_raw or None,
)
if tool_calls:
+5 -70
View File
@@ -61,52 +61,9 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
# Chars per token rough estimate
_CHARS_PER_TOKEN = 4
# Flat token cost per attached image part. Real cost varies by provider and
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
# that keeps compression budgeting honest for multi-image conversations.
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
_IMAGE_TOKEN_ESTIMATE = 1600
# Same figure expressed in the char-budget currency the rest of the
# compressor speaks in. Used when accumulating message "content length"
# for tail-cut decisions.
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
def _content_length_for_budget(raw_content: Any) -> int:
"""Return the effective char-length of a message's content for token budgeting.
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
keeps the compressor from treating a turn with 5 attached images as
near-zero tokens just because the text part is empty.
"""
if isinstance(raw_content, str):
return len(raw_content)
if not isinstance(raw_content, list):
return len(str(raw_content or ""))
total = 0
for p in raw_content:
if isinstance(p, str):
total += len(p)
continue
if not isinstance(p, dict):
total += len(str(p))
continue
ptype = p.get("type")
if ptype in {"image_url", "input_image", "image"}:
total += _IMAGE_CHAR_EQUIVALENT
else:
# text / input_text / tool_result-with-text / anything else with
# a text field. Ignore the raw base64 payload inside image_url
# dicts — dimensions don't matter, only whether it's an image.
total += len(p.get("text", "") or "")
return total
def _content_text_for_contains(content: Any) -> str:
"""Return a best-effort text view of message content.
@@ -338,8 +295,6 @@ class ContextCompressor(ContextEngine):
self._context_probe_persistable = False
self._previous_summary = None
self._last_summary_error = None
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_compression_savings_pct = 100.0
self._ineffective_compression_count = 0
@@ -363,13 +318,6 @@ class ContextCompressor(ContextEngine):
int(context_length * self.threshold_percent),
MINIMUM_CONTEXT_LENGTH,
)
# Recalculate token budgets for the new context length so the
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
self.tail_token_budget = target_tokens
self.max_summary_tokens = min(
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
)
def __init__(
self,
@@ -443,11 +391,6 @@ class ContextCompressor(ContextEngine):
self._ineffective_compression_count: int = 0
self._summary_failure_cooldown_until: float = 0.0
self._last_summary_error: Optional[str] = None
# When summary generation fails and a static fallback is inserted,
# record how many turns were unrecoverably dropped so callers
# (gateway hygiene, /compress) can surface a visible warning.
self._last_summary_dropped_count: int = 0
self._last_summary_fallback_used: bool = False
def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response."""
@@ -534,7 +477,7 @@ class ContextCompressor(ContextEngine):
for i in range(len(result) - 1, -1, -1):
msg = result[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -1132,9 +1075,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
for i in range(n - 1, head_end - 1, -1):
msg = messages[i]
raw_content = msg.get("content") or ""
content_len = _content_length_for_budget(raw_content)
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
content = msg.get("content") or ""
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
# Include tool call arguments in estimate
for tc in msg.get("tool_calls") or []:
if isinstance(tc, dict):
@@ -1203,11 +1145,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio
related to this topic and be more aggressive about compressing
everything else. Inspired by Claude Code's ``/compact``.
"""
# Reset per-call summary failure state — callers inspect these fields
# after compress() returns to decide whether to surface a warning.
self._last_summary_dropped_count = 0
self._last_summary_fallback_used = False
self._last_summary_error = None
n_messages = len(messages)
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
_min_for_compress = self.protect_first_n + 3 + 1
@@ -1286,13 +1223,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
if not self.quiet_mode:
logger.warning("Summary generation failed — inserting static fallback context marker")
n_dropped = compress_end - compress_start
self._last_summary_dropped_count = n_dropped
self._last_summary_fallback_used = True
summary = (
f"{SUMMARY_PREFIX}\n"
f"Summary generation was unavailable. {n_dropped} message(s) were "
f"Summary generation was unavailable. {n_dropped} conversation turns were "
f"removed to free context space but could not be summarized. The removed "
f"messages contained earlier work in this session. Continue based on the "
f"turns contained earlier work in this session. Continue based on the "
f"recent messages below and the current state of any files or resources."
)
+3 -6
View File
@@ -14,7 +14,6 @@ from datetime import datetime
from typing import Any, Dict, List, Optional, Set, Tuple
from hermes_constants import OPENROUTER_BASE_URL
from hermes_cli.config import get_env_value
import hermes_cli.auth as auth_mod
from hermes_cli.auth import (
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -1274,8 +1273,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
@@ -1301,7 +1299,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
env_url = ""
if pconfig.base_url_env_var:
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
env_vars = list(pconfig.api_key_env_vars)
if provider == "anthropic":
@@ -1312,8 +1310,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
]
for env_var in env_vars:
# Check both os.environ and ~/.hermes/.env file
token = (get_env_value(env_var) or "").strip()
token = os.getenv(env_var, "").strip()
if not token:
continue
source = f"env:{env_var}"
-31
View File
@@ -42,7 +42,6 @@ class FailoverReason(enum.Enum):
# Context / payload
context_overflow = "context_overflow" # Context too large — compress, not failover
payload_too_large = "payload_too_large" # 413 — compress payload
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
# Model
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
@@ -148,20 +147,6 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
"error code: 413",
]
# Image-size patterns. Matched against 400 bodies (not 413) because most
# providers return a 400 with a specific image-too-big message before the
# whole request hits the 413 size limit. Anthropic's wording is the most
# important here (hard 5 MB per image, returned as
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
_IMAGE_TOO_LARGE_PATTERNS = [
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
"image too large", # generic
"image_too_large", # error_code variant
"image size exceeds", # variant
# "request_too_large" on a request known to contain an image → image is
# the likely culprit; we still try the shrink path before giving up.
]
# Context overflow patterns
_CONTEXT_OVERFLOW_PATTERNS = [
"context length",
@@ -686,15 +671,6 @@ def _classify_400(
) -> ClassifiedError:
"""Classify 400 Bad Request — context overflow, format error, or generic."""
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
# Must be checked BEFORE context_overflow because messages can trip both
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Context overflow from 400
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
return result_fn(
@@ -822,13 +798,6 @@ def _classify_by_message(
should_compress=True,
)
# Image-too-large patterns (from message text when no status_code)
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
return result_fn(
FailoverReason.image_too_large,
retryable=True,
)
# Usage-limit patterns need the same disambiguation as 402: some providers
# surface "usage limit" errors without an HTTP status code. A transient
# signal ("try again", "resets at", …) means it's a periodic quota, not
-236
View File
@@ -1,236 +0,0 @@
"""Routing helpers for inbound user-attached images.
Two modes:
native attach images as OpenAI-style ``image_url`` content parts on the
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
OpenAI chat.completions) already translate these into their
vendor-specific multimodal formats.
text run ``vision_analyze`` on each image up-front and prepend the
description to the user's text. The model never sees the pixels;
it only sees a lossy text summary. This is the pre-existing
behaviour and still the right choice for non-vision models.
The decision is made once per message turn by :func:`decide_image_input_mode`.
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
| ``text``, default ``auto``) and the active model's capability metadata.
In ``auto`` mode:
- If the user has explicitly configured ``auxiliary.vision.provider``
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
regardless of the main model they've opted in to a specific vision
backend for a reason (cost, quality, local-only, etc.).
- Otherwise, if the active model reports ``supports_vision=True`` in its
models.dev metadata, we attach natively.
- Otherwise (non-vision model, no explicit override), we fall back to text.
This keeps ``vision_analyze`` surfaced as a tool in every session skills
and agent flows that chain it (browser screenshots, deeper inspection of
URL-referenced images, style-gating loops) keep working. The routing only
affects *how user-attached images on the current turn* are presented to the
main model.
"""
from __future__ import annotations
import base64
import logging
import mimetypes
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
logger = logging.getLogger(__name__)
_VALID_MODES = frozenset({"auto", "native", "text"})
def _coerce_mode(raw: Any) -> str:
"""Normalize a config value into one of the valid modes."""
if not isinstance(raw, str):
return "auto"
val = raw.strip().lower()
if val in _VALID_MODES:
return val
return "auto"
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
"""True when the user configured a specific auxiliary vision backend.
An explicit override means the user *wants* the text pipeline (they're
paying for a dedicated vision model), so we don't silently bypass it.
"""
if not isinstance(cfg, dict):
return False
aux = cfg.get("auxiliary") or {}
if not isinstance(aux, dict):
return False
vision = aux.get("vision") or {}
if not isinstance(vision, dict):
return False
provider = str(vision.get("provider") or "").strip().lower()
model = str(vision.get("model") or "").strip()
base_url = str(vision.get("base_url") or "").strip()
# "auto" / "" / blank = not explicit
if provider in ("", "auto") and not model and not base_url:
return False
return True
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
"""Return True/False if we can resolve caps, None if unknown."""
if not provider or not model:
return None
try:
from agent.models_dev import get_model_capabilities
caps = get_model_capabilities(provider, model)
except Exception as exc: # pragma: no cover - defensive
logger.debug("image_routing: caps lookup failed for %s:%s%s", provider, model, exc)
return None
if caps is None:
return None
return bool(caps.supports_vision)
def decide_image_input_mode(
provider: str,
model: str,
cfg: Optional[Dict[str, Any]],
) -> str:
"""Return ``"native"`` or ``"text"`` for the given turn.
Args:
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
model: active model slug as it would be sent to the provider.
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
"""
mode_cfg = "auto"
if isinstance(cfg, dict):
agent_cfg = cfg.get("agent") or {}
if isinstance(agent_cfg, dict):
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
if mode_cfg == "native":
return "native"
if mode_cfg == "text":
return "text"
# auto
if _explicit_aux_vision_override(cfg):
return "text"
supports = _lookup_supports_vision(provider, model)
if supports is True:
return "native"
return "text"
# Image size handling is REACTIVE rather than proactive: we attempt native
# attachment at full size regardless of provider, and rely on
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
#
# Why reactive: our knowledge of provider ceilings is partial and evolving
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
# A proactive per-provider table would be stale the moment a provider raises
# or lowers its limit, and silently degrading quality for users on providers
# that would have accepted the full image is the worse failure mode.
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
# it fires, which is cheaper than permanent quality loss.
def _guess_mime(path: Path) -> str:
mime, _ = mimetypes.guess_type(str(path))
if mime and mime.startswith("image/"):
return mime
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
# the suffix looks imagey.
suffix = path.suffix.lower()
return {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
}.get(suffix, "image/jpeg")
def _file_to_data_url(path: Path) -> Optional[str]:
"""Encode a local image as a base64 data URL at its native size.
Size limits are NOT enforced here the agent retry loop
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
provider's first rejection. Keeping this simple means providers that
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
quality tax just because one other provider is stricter.
Returns None only if the file can't be read (missing, permission
denied, etc.); the caller reports those paths in ``skipped``.
"""
try:
raw = path.read_bytes()
except Exception as exc:
logger.warning("image_routing: failed to read %s%s", path, exc)
return None
mime = _guess_mime(path)
b64 = base64.b64encode(raw).decode("ascii")
return f"data:{mime};base64,{b64}"
def build_native_content_parts(
user_text: str,
image_paths: List[str],
) -> Tuple[List[Dict[str, Any]], List[str]]:
"""Build an OpenAI-style ``content`` list for a user turn.
Shape:
[{"type": "text", "text": "..."},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
...]
Images are attached at their native size. If a provider rejects the
request because an image is too large (e.g. Anthropic's 5 MB per-image
ceiling), the agent's retry loop transparently shrinks and retries
once see ``run_agent._try_shrink_image_parts_in_messages``.
Returns (content_parts, skipped_paths). Skipped paths are files that
couldn't be read from disk.
"""
parts: List[Dict[str, Any]] = []
skipped: List[str] = []
text = (user_text or "").strip()
if text:
parts.append({"type": "text", "text": text})
for raw_path in image_paths:
p = Path(raw_path)
if not p.exists() or not p.is_file():
skipped.append(str(raw_path))
continue
data_url = _file_to_data_url(p)
if not data_url:
skipped.append(str(raw_path))
continue
parts.append({
"type": "image_url",
"image_url": {"url": data_url},
})
# If the text was empty, add a neutral prompt so the turn isn't just images.
if not text and any(p.get("type") == "image_url" for p in parts):
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
return parts, skipped
__all__ = [
"decide_image_input_mode",
"build_native_content_parts",
]
+5 -114
View File
@@ -63,124 +63,15 @@ def sanitize_context(text: str) -> str:
return text
class StreamingContextScrubber:
"""Stateful scrubber for streaming text that may contain split memory-context spans.
The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
a ``<memory-context>`` opened in one delta and closed in a later delta
leaks its payload to the UI because the non-greedy block regex needs
both tags in one string. This scrubber runs a small state machine
across deltas, holding back partial-tag tails and discarding
everything inside a span (including the system-note line).
Usage::
scrubber = StreamingContextScrubber()
for delta in stream:
visible = scrubber.feed(delta)
if visible:
emit(visible)
trailing = scrubber.flush() # at end of stream
if trailing:
emit(trailing)
The scrubber is re-entrant per agent instance. Callers building new
top-level responses (new turn) should create a fresh scrubber or call
``reset()``.
"""
_OPEN_TAG = "<memory-context>"
_CLOSE_TAG = "</memory-context>"
def __init__(self) -> None:
self._in_span: bool = False
self._buf: str = ""
def reset(self) -> None:
self._in_span = False
self._buf = ""
def feed(self, text: str) -> str:
"""Return the visible portion of ``text`` after scrubbing.
Any trailing fragment that could be the start of an open/close tag
is held back in the internal buffer and surfaced on the next
``feed()`` call or discarded/emitted by ``flush()``.
"""
if not text:
return ""
buf = self._buf + text
self._buf = ""
out: list[str] = []
while buf:
if self._in_span:
idx = buf.lower().find(self._CLOSE_TAG)
if idx == -1:
# Hold back a potential partial close tag; drop the rest
held = self._max_partial_suffix(buf, self._CLOSE_TAG)
self._buf = buf[-held:] if held else ""
return "".join(out)
# Found close — skip span content + tag, continue
buf = buf[idx + len(self._CLOSE_TAG):]
self._in_span = False
else:
idx = buf.lower().find(self._OPEN_TAG)
if idx == -1:
# No open tag — hold back a potential partial open tag
held = self._max_partial_suffix(buf, self._OPEN_TAG)
if held:
out.append(buf[:-held])
self._buf = buf[-held:]
else:
out.append(buf)
return "".join(out)
# Emit text before the tag, enter span
if idx > 0:
out.append(buf[:idx])
buf = buf[idx + len(self._OPEN_TAG):]
self._in_span = True
return "".join(out)
def flush(self) -> str:
"""Emit any held-back buffer at end-of-stream.
If we're still inside an unterminated span the remaining content is
discarded (safer: leaking partial memory context is worse than a
truncated answer). Otherwise the held-back partial-tag tail is
emitted verbatim (it turned out not to be a real tag).
"""
if self._in_span:
self._buf = ""
self._in_span = False
return ""
tail = self._buf
self._buf = ""
return tail
@staticmethod
def _max_partial_suffix(buf: str, tag: str) -> int:
"""Return the length of the longest buf-suffix that is a tag-prefix.
Case-insensitive. Returns 0 if no suffix could start the tag.
"""
tag_lower = tag.lower()
buf_lower = buf.lower()
max_check = min(len(buf_lower), len(tag_lower) - 1)
for i in range(max_check, 0, -1):
if tag_lower.startswith(buf_lower[-i:]):
return i
return 0
def build_memory_context_block(raw_context: str) -> str:
"""Wrap prefetched memory in a fenced block with system note."""
"""Wrap prefetched memory in a fenced block with system note.
The fence prevents the model from treating recalled context as user
discourse. Injected at API-call time only never persisted.
"""
if not raw_context or not raw_context.strip():
return ""
clean = sanitize_context(raw_context)
if clean != raw_context:
logger.warning("memory provider returned pre-wrapped context; stripped")
return (
"<memory-context>\n"
"[System note: The following is recalled memory context, "
+24 -74
View File
@@ -51,7 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"gmi",
"custom", "local",
# Common aliases
"google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
"mimo", "xiaomi-mimo",
"arcee-ai", "arceeai",
"gmi-cloud", "gmicloud",
"xai", "x-ai", "x.ai", "grok",
"nvidia", "nim", "nvidia-nim", "nemotron",
"qwen-portal",
@@ -108,11 +106,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
_ENDPOINT_MODEL_CACHE_TTL = 300
# Descending tiers for context length probing when the model is unknown.
# We start at 256K (covers GPT-5.x, many current large-context models) and
# step down on context-length errors until one works. Tier[0] is also the
# default fallback when no detection method succeeds.
# We start at 128K (a safe default for most modern models) and step down
# on context-length errors until one works.
CONTEXT_PROBE_TIERS = [
256_000,
128_000,
64_000,
32_000,
@@ -147,11 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
"claude": 200000,
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
# This hardcoded value is only reached when every probe misses.
"gpt-5.5": 1050000,
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
"gpt-5.5": 400000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -167,17 +162,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek — V4 family ships with a 1M context window. The legacy
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
# and inherit the same 1M window. The ``deepseek`` substring entry
# below remains as a 128K fallback for older / unknown DeepSeek model
# ids (e.g. via custom endpoints).
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
# DeepSeek
"deepseek": 128000,
# Meta
"llama": 131072,
@@ -309,7 +294,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"integrate.api.nvidia.com": "nvidia",
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"api.gmi-serving.com": "gmi",
"ollama.com": "ollama-cloud",
}
@@ -705,29 +689,6 @@ def fetch_endpoint_model_metadata(
return {}
def _resolve_endpoint_context_length(
model: str,
base_url: str,
api_key: str = "",
) -> Optional[int]:
"""Resolve context length from an endpoint's live ``/models`` metadata."""
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
return None
def _get_context_cache_path() -> Path:
"""Return path to the persistent context length cache file."""
from hermes_constants import get_hermes_home
@@ -1232,7 +1193,6 @@ def get_model_context_length(
api_key: str = "",
config_context_length: int | None = None,
provider: str = "",
custom_providers: list | None = None,
) -> int:
"""Get the context length for a model.
@@ -1253,23 +1213,6 @@ def get_model_context_length(
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
# 0b. custom_providers per-model override — check before any probe.
# This closes the gap where /model switch and display paths used to fall
# back to 128K despite the user having a per-model context_length set.
# See #15779.
if custom_providers and base_url and model:
try:
from hermes_cli.config import get_custom_provider_context_length
cp_ctx = get_custom_provider_context_length(
model=model,
base_url=base_url,
custom_providers=custom_providers,
)
if cp_ctx:
return cp_ctx
except Exception:
pass # fall through to probing
# Normalise provider-prefixed model names (e.g. "local:model-name" →
# "model-name") so cache lookups and server queries use the bare ID that
# local servers actually know about. Ollama "model:tag" colons are preserved.
@@ -1321,9 +1264,22 @@ def get_model_context_length(
# returns 128k) instead of the model's full context (400k). models.dev
# has the correct per-provider values and is checked at step 5+.
if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if context_length is not None:
return context_length
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
matched = endpoint_metadata.get(model)
if not matched:
# Single-model servers: if only one model is loaded, use it
if len(endpoint_metadata) == 1:
matched = next(iter(endpoint_metadata.values()))
else:
# Fuzzy match: substring in either direction
for key, entry in endpoint_metadata.items():
if model in key or key in model:
matched = entry
break
if matched:
context_length = matched.get("context_length")
if isinstance(context_length, int):
return context_length
if not _is_known_provider_base_url(base_url):
# 3. Try querying local server directly
if is_local_endpoint(base_url):
@@ -1387,12 +1343,6 @@ def get_model_context_length(
if base_url:
save_context_length(model, base_url, codex_ctx)
return codex_ctx
if effective_provider == "gmi" and base_url:
# GMI exposes authoritative context_length via /models, but it is not
# in models.dev yet. Preserve that higher-fidelity endpoint lookup.
ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
if ctx is not None:
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)
@@ -1402,7 +1352,7 @@ def get_model_context_length(
# 6. OpenRouter live API metadata (provider-unaware fallback)
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
return metadata[model].get("context_length", 128000)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
# Only check `default_model in model` (is the key a substring of the input).
-142
View File
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
# Buckets with reset windows shorter than this are treated as transient
# (upstream jitter, secondary throttling) rather than a genuine quota
# exhaustion worth a cross-session breaker trip.
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
def is_genuine_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
last_known_state: Optional[Any] = None,
) -> bool:
"""Decide whether a 429 from Nous Portal is a real account rate limit.
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
exhausted a genuine rate limit that will last until the
bucket resets.
(b) The upstream provider is out of capacity for a specific model
transient, clears in seconds, and has nothing to do with
the caller's quota on Nous.
Tripping the cross-session breaker on (b) blocks ALL Nous requests
(and all models, since Nous is one provider key) for minutes even
though the caller's account is healthy and a different model would
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
We tell the two apart by looking at:
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
the full suite on every response including 429s. An exhausted
bucket (``remaining == 0`` with a reset window >= 60s) is
proof of (a).
2. The last-known-good rate-limit state captured by
``_capture_rate_limits()`` on the previous successful
response. If any bucket there was already near-exhausted with
a substantial reset window, the current 429 is almost
certainly (a) continuing from that condition.
If neither signal fires, we treat the 429 as (b): fail the single
request, let the retry loop or model-switch proceed, and do NOT
write the cross-session breaker file.
Returns True when the evidence points at (a).
"""
# Signal 1: current 429 response headers.
state = _parse_buckets_from_headers(headers)
if _has_exhausted_bucket(state):
return True
# Signal 2: last-known-good state from a recent successful response.
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
# or a dict of bucket snapshots.
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
return True
return False
def _parse_buckets_from_headers(
headers: Optional[Mapping[str, str]],
) -> dict[str, tuple[Optional[int], Optional[float]]]:
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
Returns empty dict when no rate-limit headers are present.
"""
if not headers:
return {}
lowered = {k.lower(): v for k, v in headers.items()}
if not any(k.startswith("x-ratelimit-") for k in lowered):
return {}
def _maybe_int(raw: Optional[str]) -> Optional[int]:
if raw is None:
return None
try:
return int(float(raw))
except (TypeError, ValueError):
return None
def _maybe_float(raw: Optional[str]) -> Optional[float]:
if raw is None:
return None
try:
return float(raw)
except (TypeError, ValueError):
return None
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
if remaining is not None or reset is not None:
result[tag] = (remaining, reset)
return result
def _has_exhausted_bucket(
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
) -> bool:
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
for remaining, reset in buckets.values():
if remaining is None or remaining > 0:
continue
if reset is None:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
def _has_exhausted_bucket_in_object(state: Any) -> bool:
"""Check a RateLimitState-like object for an exhausted bucket.
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
exposed as attributes ``requests_min``, ``requests_hour``,
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
object missing those attributes.
"""
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
bucket = getattr(state, attr, None)
if bucket is None:
continue
limit = getattr(bucket, "limit", 0) or 0
remaining = getattr(bucket, "remaining", 0) or 0
# Prefer the adjusted "remaining_seconds_now" property when present;
# fall back to raw reset_seconds.
reset = getattr(bucket, "remaining_seconds_now", None)
if reset is None:
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
if limit <= 0:
continue
if remaining > 0:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
-191
View File
@@ -1,191 +0,0 @@
"""
Contextual first-touch onboarding hints.
Instead of blocking first-run questionnaires, show a one-time hint the *first*
time a user hits a behavior fork message-while-running, first long-running
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
``onboarding.seen.<flag>``) and then never again.
Keep this module tiny and dependency-free so both the CLI and gateway can import
it without pulling in heavy modules.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------------
# Flag names (stable — used as config.yaml keys under onboarding.seen)
# -------------------------------------------------------------------------
BUSY_INPUT_FLAG = "busy_input_prompt"
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
# -------------------------------------------------------------------------
# Hint content
# -------------------------------------------------------------------------
def busy_input_hint_gateway(mode: str) -> str:
"""Hint shown the first time a user messages while the agent is busy.
``mode`` is the effective busy_input_mode that was just applied, so the
message matches reality ("I just interrupted…" vs "I just queued…").
"""
if mode == "queue":
return (
"💡 First-time tip — I queued your message instead of interrupting. "
"Send `/busy interrupt` to make new messages stop the current task "
"immediately, or `/busy status` to check. This notice won't appear again."
)
if mode == "steer":
return (
"💡 First-time tip — I steered your message into the current run; "
"it will arrive after the next tool call instead of interrupting. "
"Send `/busy interrupt` or `/busy queue` to change this, or "
"`/busy status` to check. This notice won't appear again."
)
return (
"💡 First-time tip — I just interrupted my current task to answer you. "
"Send `/busy queue` to queue follow-ups for after the current task instead, "
"`/busy steer` to inject them mid-run without interrupting, or "
"`/busy status` to check. This notice won't appear again."
)
def busy_input_hint_cli(mode: str) -> str:
"""CLI version of the busy-input hint (plain text, no markdown)."""
if mode == "queue":
return (
"(tip) Your message was queued for the next turn. "
"Use /busy interrupt to make Enter stop the current run instead, "
"or /busy steer to inject mid-run. This tip only shows once."
)
if mode == "steer":
return (
"(tip) Your message was steered into the current run; it arrives "
"after the next tool call. Use /busy interrupt or /busy queue to "
"change this. This tip only shows once."
)
return (
"(tip) Your message interrupted the current run. "
"Use /busy queue to queue messages for the next turn instead, "
"or /busy steer to inject mid-run. This tip only shows once."
)
def tool_progress_hint_gateway() -> str:
return (
"💡 First-time tip — that tool took a while and I'm streaming every step. "
"If the progress messages feel noisy, send `/verbose` to cycle modes "
"(all → new → off). This notice won't appear again."
)
def tool_progress_hint_cli() -> str:
return (
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
"display modes (all -> new -> off -> verbose). This tip only shows once."
)
def openclaw_residue_hint_cli() -> str:
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
get carried forward and the agent dutifully reads them). ``hermes claw
cleanup`` renames the directory so the agent stops finding it.
"""
return (
"Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
"After migrating, the agent can still get confused and read that "
"directory's config/memory instead of Hermes's.\n"
"Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
"This tip only shows once; rerun it any time with `hermes claw cleanup`."
)
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
Pure filesystem check no side effects. ``home`` override exists for tests.
"""
base = home or Path.home()
try:
return (base / ".openclaw").is_dir()
except OSError:
return False
# -------------------------------------------------------------------------
# State read / write
# -------------------------------------------------------------------------
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
if not isinstance(onboarding, Mapping):
return {}
seen = onboarding.get("seen")
return seen if isinstance(seen, Mapping) else {}
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
"""Return True if the user has already been shown this first-touch hint."""
return bool(_get_seen_dict(config).get(flag))
def mark_seen(config_path: Path, flag: str) -> bool:
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
Uses the atomic YAML writer so a concurrent process can't observe a
partially-written file. Returns True on success, False on any error
(including the config file being absent onboarding is best-effort).
"""
try:
import yaml
from utils import atomic_yaml_write
except Exception as e: # pragma: no cover — dependency issue
logger.debug("onboarding: failed to import yaml/utils: %s", e)
return False
try:
cfg: dict = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
if not isinstance(cfg.get("onboarding"), dict):
cfg["onboarding"] = {}
seen = cfg["onboarding"].get("seen")
if not isinstance(seen, dict):
seen = {}
cfg["onboarding"]["seen"] = seen
if seen.get(flag) is True:
return True # already marked — nothing to do
seen[flag] = True
atomic_yaml_write(config_path, cfg)
return True
except Exception as e:
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
return False
__all__ = [
"BUSY_INPUT_FLAG",
"TOOL_PROGRESS_FLAG",
"OPENCLAW_RESIDUE_FLAG",
"busy_input_hint_gateway",
"busy_input_hint_cli",
"tool_progress_hint_gateway",
"tool_progress_hint_cli",
"openclaw_residue_hint_cli",
"detect_openclaw_residue",
"is_seen",
"mark_seen",
]
-34
View File
@@ -141,12 +141,6 @@ DEFAULT_AGENT_IDENTITY = (
"Be targeted and efficient in your exploration and investigations."
)
HERMES_AGENT_HELP_GUIDANCE = (
"If the user asks about configuring, setting up, or using Hermes Agent "
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
)
MEMORY_GUIDANCE = (
"You have persistent memory across sessions. Save durable facts using the memory "
"tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -428,29 +422,6 @@ PLATFORM_HINTS = {
"your response. Images are sent as native photos, and other files arrive as downloadable "
"documents."
),
"yuanbao": (
"You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
"Markdown formatting is supported (code blocks, tables, bold/italic). "
"You CAN send media files natively — to deliver a file to the user, include "
"MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
"Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
"and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
"(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
"they will be downloaded and sent as native photos. "
"Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
"whenever a file delivery is appropriate.\n\n"
"Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
"When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
"you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
" 1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
" '捂脸', '合十') to discover matching sticker_ids.\n"
" 2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
" TIMFaceElem that renders as a native sticker in the chat.\n"
"DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
"them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
"— when a sticker is the right response, use yb_send_sticker."
),
}
# ---------------------------------------------------------------------------
@@ -854,11 +825,6 @@ def build_skills_system_prompt(
"Skills also encode the user's preferred approach, conventions, and quality standards "
"for tasks like code review, planning, and testing — load them even for tasks you "
"already know how to do, because the skill defines how it should be done here.\n"
"Whenever the user asks you to configure, set up, install, enable, disable, modify, "
"or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
"skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
"first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
"`hermes setup`) so you don't have to guess or invent workarounds.\n"
"If a skill has issues, fix it with skill_manage(action='patch').\n"
"After difficult/iterative tasks, offer to save as a skill. "
"If a skill you loaded was missing steps, had wrong commands, or needed "
+1 -5
View File
@@ -754,11 +754,7 @@ def _resolve_effective_accept(
if env in ("1", "true", "yes", "on"):
return True
cfg_val = cfg.get("hooks_auto_accept", False)
if isinstance(cfg_val, bool):
return cfg_val
if isinstance(cfg_val, str):
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
return False
return bool(cfg_val)
# ---------------------------------------------------------------------------
+2 -2
View File
@@ -329,7 +329,7 @@ def build_skill_invocation_message(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
"you to follow its instructions. The full skill content is loaded below.]"
)
return _build_skill_message(
@@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(
loaded_skill, skill_dir, skill_name = loaded
activation_note = (
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
"preloaded. Treat its instructions as active guidance for the duration of this "
"session unless the user overrides them.]"
)
+4 -33
View File
@@ -6,18 +6,12 @@ adds latency to the user-facing reply.
import logging
import threading
from typing import Callable, Optional
from typing import Optional
from agent.auxiliary_client import call_llm
logger = logging.getLogger(__name__)
# Callback signature: (task_name, exception) -> None. Used to surface
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
# become visible instead of piling up as NULL session titles.
FailureCallback = Callable[[str, BaseException], None]
_TITLE_PROMPT = (
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
"following exchange. The title should capture the main topic or intent. "
@@ -25,21 +19,11 @@ _TITLE_PROMPT = (
)
def generate_title(
user_message: str,
assistant_response: str,
timeout: float = 30.0,
failure_callback: Optional[FailureCallback] = None,
) -> Optional[str]:
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
"""Generate a session title from the first exchange.
Uses the auxiliary LLM client (cheapest/fastest available model).
Returns the title string or None on failure.
``failure_callback`` is invoked with ``(task, exception)`` when the
auxiliary call raises the caller typically wires this to
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
of silently accumulating untitled sessions.
"""
# Truncate long messages to keep the request small
user_snippet = user_message[:500] if user_message else ""
@@ -68,15 +52,7 @@ def generate_title(
title = title[:77] + "..."
return title if title else None
except Exception as e:
# Log at WARNING so this shows up in agent.log without debug mode.
# Full detail at debug level for operators who need the stack.
logger.warning("Title generation failed: %s", e)
logger.debug("Title generation traceback", exc_info=True)
if failure_callback is not None:
try:
failure_callback("title generation", e)
except Exception:
logger.debug("Title generation failure_callback raised", exc_info=True)
logger.debug("Title generation failed: %s", e)
return None
@@ -85,7 +61,6 @@ def auto_title_session(
session_id: str,
user_message: str,
assistant_response: str,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Generate and set a session title if one doesn't already exist.
@@ -106,9 +81,7 @@ def auto_title_session(
except Exception:
return
title = generate_title(
user_message, assistant_response, failure_callback=failure_callback
)
title = generate_title(user_message, assistant_response)
if not title:
return
@@ -125,7 +98,6 @@ def maybe_auto_title(
user_message: str,
assistant_response: str,
conversation_history: list,
failure_callback: Optional[FailureCallback] = None,
) -> None:
"""Fire-and-forget title generation after the first exchange.
@@ -147,7 +119,6 @@ def maybe_auto_title(
thread = threading.Thread(
target=auto_title_session,
args=(session_db, session_id, user_message, assistant_response),
kwargs={"failure_callback": failure_callback},
daemon=True,
name="auto-title",
)
+2 -7
View File
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
cls = _REGISTRY.get(api_mode)
if cls is None:
# The registry can be partially populated when a specific transport
# module was imported directly (for example chat_completions before
# codex). Discover on misses, not only when the registry is empty, so
# test/order-dependent imports do not make valid api_modes unavailable.
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
+4 -5
View File
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` /
``codex_message_items`` on the message, ``call_id``/``response_item_id``
on tool_calls) that strict chat-completions providers reject with 400/422.
Strips Codex Responses API fields (``codex_reasoning_items`` on the
message, ``call_id``/``response_item_id`` on tool_calls) that strict
chat-completions providers reject with 400/422.
"""
needs_sanitize = False
for msg in messages:
if not isinstance(msg, dict):
continue
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
if "codex_reasoning_items" in msg:
needs_sanitize = True
break
tool_calls = msg.get("tool_calls")
@@ -59,7 +59,6 @@ class ChatCompletionsTransport(ProviderTransport):
if not isinstance(msg, dict):
continue
msg.pop("codex_reasoning_items", None)
msg.pop("codex_message_items", None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
-20
View File
@@ -120,24 +120,6 @@ class ResponsesApiTransport(ProviderTransport):
if request_overrides:
kwargs.update(request_overrides)
if is_codex_backend:
prompt_cache_key = kwargs.get("prompt_cache_key")
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
if cache_scope_id:
existing_extra_headers = kwargs.get("extra_headers")
merged_extra_headers: Dict[str, str] = {}
if isinstance(existing_extra_headers, dict):
merged_extra_headers.update(
{
str(key): str(value)
for key, value in existing_extra_headers.items()
if key and value is not None
}
)
merged_extra_headers["session_id"] = cache_scope_id
merged_extra_headers["x-client-request-id"] = cache_scope_id
kwargs["extra_headers"] = merged_extra_headers
max_tokens = params.get("max_tokens")
if max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = max_tokens
@@ -178,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
provider_data = {}
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
provider_data["codex_message_items"] = msg.codex_message_items
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
provider_data["reasoning_details"] = msg.reasoning_details
+1 -6
View File
@@ -97,7 +97,7 @@ class NormalizedResponse:
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
@@ -126,11 +126,6 @@ class NormalizedResponse:
pd = self.provider_data or {}
return pd.get("codex_reasoning_items")
@property
def codex_message_items(self):
pd = self.provider_data or {}
return pd.get("codex_message_items")
# ---------------------------------------------------------------------------
# Factory helpers
+8 -28
View File
@@ -606,7 +606,6 @@ platform_toolsets:
signal: [hermes-signal]
homeassistant: [hermes-homeassistant]
qqbot: [hermes-qqbot]
yuanbao: [hermes-yuanbao]
# =============================================================================
# Gateway Platform Settings
@@ -825,9 +824,7 @@ delegation:
# Display
# =============================================================================
display:
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
# true: Compact single-line banner
# false: Full ASCII banner with tool/skill summary (default)
# Use compact banner mode
compact: false
# Tool progress display level (CLI and gateway)
@@ -841,19 +838,12 @@ display:
# Gateway-only natural mid-turn assistant updates.
# When true, completed assistant status messages are sent as separate chat
# messages. This is independent of tool_progress and gateway streaming.
# true: Send mid-turn assistant updates as separate messages (default)
# false: Only send the final response
interim_assistant_messages: true
# What Enter does when Hermes is already busy (CLI and gateway platforms).
# What Enter does when Hermes is already busy in the CLI.
# interrupt: Interrupt the current run and redirect Hermes (default)
# queue: Queue your message for the next turn
# steer: Inject your message mid-run via /steer, arriving at the agent
# after the next tool call — no interrupt, no role violation.
# Falls back to 'queue' if the agent isn't running yet or if
# images are attached (steer only carries text).
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
# Toggle at runtime with /busy <interrupt|queue|steer>.
# Ctrl+C always interrupts regardless of this setting.
busy_input_mode: interrupt
# Background process notifications (gateway/messaging only).
@@ -869,22 +859,17 @@ display:
# Play terminal bell when agent finishes a response.
# Useful for long-running tasks — your terminal will ding when the agent is done.
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
# true: Ring the terminal bell on each response
# false: Silent (default)
bell_on_complete: false
# Show model reasoning/thinking before each response.
# When enabled, a dim box shows the model's thought process above the response.
# Toggle at runtime with /reasoning show or /reasoning hide.
# true: Show the reasoning box
# false: Hide reasoning (default)
show_reasoning: false
# Stream tokens to the terminal as they arrive instead of waiting for the
# full response. The response box opens on first token and text appears
# line-by-line. Tool calls are still captured silently.
# true: Stream tokens as they arrive (default)
# false: Wait for the full response before rendering
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
streaming: true
# ───────────────────────────────────────────────────────────────────────────
@@ -894,15 +879,10 @@ display:
# response box label, and branding text. Change at runtime with /skin <name>.
#
# Built-in skins:
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
# daylight — Bright light-mode theme
# warm-lightmode — Warm paper-tone light-mode theme
# poseidon — Sea-green/teal Olympian theme
# sisyphus — Earthy stone-and-moss theme
# charizard — Fiery orange dragon theme
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
#
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
# Schema (all fields optional, missing values inherit from default):
+215 -486
View File
File diff suppressed because it is too large Load Diff
+4 -45
View File
@@ -16,7 +16,7 @@ import uuid
from datetime import datetime, timedelta
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Optional, Dict, List, Any, Union
from typing import Optional, Dict, List, Any
logger = logging.getLogger(__name__)
@@ -311,12 +311,6 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
elif schedule["kind"] == "cron":
if not HAS_CRONITER:
logger.warning(
"Cannot compute next run for cron schedule %r: 'croniter' "
"is not installed. Install the 'cron' extra (pip install "
"'hermes-agent[cron]') to re-enable recurring cron jobs.",
schedule.get("expr"),
)
return None
cron = croniter(schedule["expr"], now)
next_run = cron.get_next(datetime)
@@ -423,7 +417,6 @@ def create_job(
provider: Optional[str] = None,
base_url: Optional[str] = None,
script: Optional[str] = None,
context_from: Optional[Union[str, List[str]]] = None,
enabled_toolsets: Optional[List[str]] = None,
workdir: Optional[str] = None,
) -> Dict[str, Any]:
@@ -445,9 +438,6 @@ def create_job(
script: Optional path to a Python script whose stdout is injected into the
prompt each run. The script runs before the agent turn, and its output
is prepended as context. Useful for data collection / change detection.
context_from: Optional job ID (or list of job IDs) whose most recent output
is injected into the prompt as context before each run.
Useful for chaining cron jobs: job A finds data, job B processes it.
enabled_toolsets: Optional list of toolset names to restrict the agent to.
When set, only tools from these toolsets are loaded, reducing
token overhead. When omitted, all default tools are loaded.
@@ -491,14 +481,6 @@ def create_job(
normalized_toolsets = normalized_toolsets or None
normalized_workdir = _normalize_workdir(workdir)
# Normalize context_from: accept str or list of str, store as list or None
if isinstance(context_from, str):
context_from = [context_from.strip()] if context_from.strip() else None
elif isinstance(context_from, list):
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
else:
context_from = None
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
job = {
"id": job_id,
@@ -510,7 +492,6 @@ def create_job(
"provider": normalized_provider,
"base_url": normalized_base_url,
"script": normalized_script,
"context_from": context_from,
"schedule": parsed_schedule,
"schedule_display": parsed_schedule.get("display", schedule),
"repeat": {
@@ -704,32 +685,10 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
# Compute next run
job["next_run_at"] = compute_next_run(job["schedule"], now)
# If no next run, decide whether this is terminal completion
# (one-shot) or a transient failure (recurring schedule couldn't
# compute — e.g. 'croniter' missing from the runtime env).
# Recurring jobs must NEVER be silently disabled: that turns a
# missing runtime dep into "job completed" and the user's
# schedule quietly goes off. See issue #16265.
# If no next run (one-shot completed), disable
if job["next_run_at"] is None:
kind = job.get("schedule", {}).get("kind")
if kind in ("cron", "interval"):
job["state"] = "error"
if not job.get("last_error"):
job["last_error"] = (
"Failed to compute next run for recurring "
"schedule (is the 'croniter' package "
"installed in the gateway's Python env?)"
)
logger.error(
"Job '%s' (%s) could not compute next_run_at; "
"leaving enabled and marking state=error so the "
"job is not silently disabled.",
job.get("name", job["id"]),
kind,
)
else:
job["enabled"] = False
job["state"] = "completed"
job["enabled"] = False
job["state"] = "completed"
elif job.get("state") != "paused":
job["state"] = "scheduled"
+4 -77
View File
@@ -77,7 +77,7 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
"telegram", "discord", "slack", "whatsapp", "signal",
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
"wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
"qqbot", "yuanbao",
"qqbot",
})
# Platforms that support a configured cron/notification home target, mapped to
@@ -337,7 +337,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
"sms": Platform.SMS,
"bluebubbles": Platform.BLUEBUBBLES,
"qqbot": Platform.QQBOT,
"yuanbao": Platform.YUANBAO,
}
# Optionally wrap the content with a header/footer so the user knows this
@@ -672,51 +671,10 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
f"{prompt}"
)
# Inject output from referenced cron jobs as context.
context_from = job.get("context_from")
if context_from:
from cron.jobs import OUTPUT_DIR
if isinstance(context_from, str):
context_from = [context_from]
for source_job_id in context_from:
# Guard against path traversal — valid job IDs are 12-char hex strings
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
continue
try:
job_output_dir = OUTPUT_DIR / source_job_id
if not job_output_dir.exists():
continue # silent skip — no output yet
output_files = sorted(
job_output_dir.glob("*.md"),
key=lambda f: f.stat().st_mtime,
reverse=True,
)
if not output_files:
continue # silent skip — no output yet
latest_output = output_files[0].read_text(encoding="utf-8").strip()
# Truncate to 8K characters to avoid prompt bloat
_MAX_CONTEXT_CHARS = 8000
if len(latest_output) > _MAX_CONTEXT_CHARS:
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
if latest_output:
prompt = (
f"## Output from job '{source_job_id}'\n"
"The following is the most recent output from a preceding "
"cron job. Use it as context for your analysis.\n\n"
f"```\n{latest_output}\n```\n\n"
f"{prompt}"
)
else:
continue # silent skip — empty output
except (OSError, PermissionError) as e:
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
# silent skip — do not pollute the prompt with error messages
# Always prepend cron execution guidance so the agent knows how
# delivery works and can suppress delivery when appropriate.
cron_hint = (
"[IMPORTANT: You are running as a scheduled cron job. "
"[SYSTEM: You are running as a scheduled cron job. "
"DELIVERY: Your final response will be automatically delivered "
"to the user — do NOT use send_message or try to deliver "
"the output yourself. Just produce your report/output as your "
@@ -752,7 +710,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
parts.append("")
parts.extend(
[
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
"",
content,
]
@@ -760,7 +718,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
if skipped:
notice = (
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
f"and were skipped: {', '.join(skipped)}. "
f"Start your response with a brief notice so the user is aware, e.g.: "
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -822,8 +780,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
logger.info("Prompt: %s", prompt[:100])
agent = None
# Mark this as a cron session so the approval system can apply cron_mode.
# This env var is process-wide and persists for the lifetime of the
# scheduler process — every job this process runs is a cron job.
@@ -1172,24 +1128,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
_session_db.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
# Release subprocesses, terminal sandboxes, browser daemons, and the
# main OpenAI/httpx client held by this ephemeral cron agent. Without
# this, a gateway that ticks cron every N minutes leaks fds per job
# until it hits EMFILE (#10200 / "too many open files").
try:
if agent is not None:
agent.close()
except (Exception, KeyboardInterrupt) as e:
logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
# Each cron run spins up a short-lived worker thread whose event loop
# dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
# httpx clients cached under that loop are now unusable — reap them
# so their transports don't accumulate in the process-global cache.
try:
from agent.auxiliary_client import cleanup_stale_async_clients
cleanup_stale_async_clients()
except Exception as e:
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -1329,17 +1267,6 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
_results.extend(f.result() for f in _futures)
# Best-effort sweep of MCP stdio subprocesses that survived their
# session teardown during this tick. Runs AFTER every job has
# finished so active sessions (including live user chats) are
# never touched — only PIDs explicitly detected as orphans in
# tools.mcp_tool._run_stdio's finally block are reaped.
try:
from tools.mcp_tool import _kill_orphaned_mcp_children
_kill_orphaned_mcp_children()
except Exception as _e:
logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
return sum(_results)
finally:
if fcntl:
+7 -9
View File
@@ -41,15 +41,6 @@ if [ "$(id -u)" = "0" ]; then
echo "Warning: chown failed (rootless container?) — continuing anyway"
fi
# Ensure config.yaml is readable by the hermes runtime user even if it was
# edited on the host after initial ownership setup. Must run here (as root)
# rather than after the gosu drop, otherwise a non-root caller like
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
fi
echo "Dropping root privileges"
exec gosu hermes "$0" "$@"
fi
@@ -76,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
fi
# Ensure the main config file remains accessible to the hermes runtime user
# even if it was edited on the host after initial ownership setup.
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml"
chmod 640 "$HERMES_HOME/config.yaml"
fi
# SOUL.md
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
-1
View File
@@ -36,7 +36,6 @@
imports = [
./nix/packages.nix
./nix/overlays.nix
./nix/nixosModules.nix
./nix/checks.nix
./nix/devShell.nix
+14 -67
View File
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
# Build / refresh
# ---------------------------------------------------------------------------
async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
"""
Build a channel directory from connected platform adapters and session data.
@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
if platform == Platform.DISCORD:
platforms["discord"] = _build_discord(adapter)
elif platform == Platform.SLACK:
platforms["slack"] = await _build_slack(adapter)
platforms["slack"] = _build_slack(adapter)
except Exception as e:
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
@@ -136,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
return channels
async def _build_slack(adapter) -> List[Dict[str, Any]]:
"""List Slack channels the bot has joined across all workspaces.
Uses ``users.conversations`` against each workspace's web client. Pulls
public + private channels the bot is a member of, then merges in DMs
discovered from session history (IMs aren't useful to enumerate
proactively).
"""
team_clients = getattr(adapter, "_team_clients", None) or {}
if not team_clients:
def _build_slack(adapter) -> List[Dict[str, str]]:
"""List Slack channels the bot has joined."""
# Slack adapter may expose a web client
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
if not client:
return _build_from_sessions("slack")
channels: List[Dict[str, Any]] = []
seen_ids: set = set()
try:
from tools.send_message_tool import _send_slack # noqa: F401
# Use the Slack Web API directly if available
except Exception:
pass
for team_id, client in team_clients.items():
try:
cursor: Optional[str] = None
for _page in range(20): # safety cap on pagination
response = await client.users_conversations(
types="public_channel,private_channel",
exclude_archived=True,
limit=200,
cursor=cursor,
)
if not response.get("ok"):
logger.warning(
"Channel directory: users.conversations not ok for team %s: %s",
team_id,
response.get("error", "unknown"),
)
break
for ch in response.get("channels", []):
cid = ch.get("id")
name = ch.get("name")
if not cid or not name or cid in seen_ids:
continue
seen_ids.add(cid)
channels.append({
"id": cid,
"name": name,
"type": "private" if ch.get("is_private") else "channel",
})
cursor = (response.get("response_metadata") or {}).get("next_cursor")
if not cursor:
break
except Exception as e:
logger.warning(
"Channel directory: failed to list Slack channels for team %s: %s",
team_id, e,
)
continue
# Merge in DM/group entries discovered from session history.
for entry in _build_from_sessions("slack"):
if entry.get("id") not in seen_ids:
channels.append(entry)
seen_ids.add(entry.get("id"))
return channels
# Fallback to session data
return _build_from_sessions("slack")
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -268,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
if not channels:
return None
# 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
# raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
# in _parse_target_ref hasn't recognized them as explicit.
raw = name.strip()
for ch in channels:
if ch.get("id") == raw:
return ch["id"]
query = _normalize_channel_query(name)
# 1. Exact name match, including the display labels shown by send_message(action="list")
+3 -84
View File
@@ -67,7 +67,6 @@ class Platform(Enum):
WEIXIN = "weixin"
BLUEBUBBLES = "bluebubbles"
QQBOT = "qqbot"
YUANBAO = "yuanbao"
@dataclass
@@ -196,14 +195,6 @@ class StreamingConfig:
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
buffer_threshold: int = 40 # Chars before forcing an edit
cursor: str = "" # Cursor shown during streaming
# Ported from openclaw/openclaw#72038. When >0, the final edit for
# a long-running streamed response is delivered as a fresh message
# if the original preview has been visible for at least this many
# seconds, so the platform's visible timestamp reflects completion
# time instead of the preview creation time. Currently applied to
# Telegram only (other platforms ignore the setting). Default 60s
# matches the OpenClaw rollout. Set to 0 to disable.
fresh_final_after_seconds: float = 60.0
def to_dict(self) -> Dict[str, Any]:
return {
@@ -212,7 +203,6 @@ class StreamingConfig:
"edit_interval": self.edit_interval,
"buffer_threshold": self.buffer_threshold,
"cursor": self.cursor,
"fresh_final_after_seconds": self.fresh_final_after_seconds,
}
@classmethod
@@ -225,9 +215,6 @@ class StreamingConfig:
edit_interval=float(data.get("edit_interval", 1.0)),
buffer_threshold=int(data.get("buffer_threshold", 40)),
cursor=data.get("cursor", ""),
fresh_final_after_seconds=float(
data.get("fresh_final_after_seconds", 60.0)
),
)
@@ -327,9 +314,6 @@ class GatewayConfig:
# QQBot uses extra dict for app credentials
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
connected.append(platform)
# Yuanbao uses extra dict for app credentials
elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
connected.append(platform)
# DingTalk uses client_id/client_secret from config.extra or env vars
elif platform == Platform.DINGTALK and (
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -566,8 +550,6 @@ def load_gateway_config() -> GatewayConfig:
existing = {}
# Deep-merge extra dicts so gateway.json defaults survive
merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
if plat_name == Platform.SLACK.value and "enabled" in plat_block:
merged_extra["_enabled_explicit"] = True
merged = {**existing, **plat_block}
if merged_extra:
merged["extra"] = merged_extra
@@ -588,8 +570,6 @@ def load_gateway_config() -> GatewayConfig:
)
if "reply_prefix" in platform_cfg:
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
if "reply_in_thread" in platform_cfg:
bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
if "require_mention" in platform_cfg:
bridged["require_mention"] = platform_cfg["require_mention"]
if "free_response_channels" in platform_cfg:
@@ -604,7 +584,7 @@ def load_gateway_config() -> GatewayConfig:
bridged["group_policy"] = platform_cfg["group_policy"]
if "group_allow_from" in platform_cfg:
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
if "channel_prompts" in platform_cfg:
channel_prompts = platform_cfg["channel_prompts"]
@@ -612,21 +592,16 @@ def load_gateway_config() -> GatewayConfig:
bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
else:
bridged["channel_prompts"] = channel_prompts
enabled_was_explicit = "enabled" in platform_cfg
if not bridged and not enabled_was_explicit:
if not bridged:
continue
plat_data = platforms_data.setdefault(plat.value, {})
if not isinstance(plat_data, dict):
plat_data = {}
platforms_data[plat.value] = plat_data
if enabled_was_explicit:
plat_data["enabled"] = platform_cfg["enabled"]
extra = plat_data.setdefault("extra", {})
if not isinstance(extra, dict):
extra = {}
plat_data["extra"] = extra
if plat == Platform.SLACK and enabled_was_explicit:
extra["_enabled_explicit"] = True
extra.update(bridged)
# Slack settings → env vars (env vars take precedence)
@@ -634,8 +609,6 @@ def load_gateway_config() -> GatewayConfig:
if isinstance(slack_cfg, dict):
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
frc = slack_cfg.get("free_response_channels")
@@ -945,20 +918,8 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
slack_token = os.getenv("SLACK_BOT_TOKEN")
if slack_token:
if Platform.SLACK not in config.platforms:
# No yaml config for Slack — env-only setup, enable it
config.platforms[Platform.SLACK] = PlatformConfig()
config.platforms[Platform.SLACK].enabled = True
else:
slack_config = config.platforms[Platform.SLACK]
enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
if not slack_config.enabled and not enabled_was_explicit:
# Top-level Slack settings such as channel prompts should not
# turn an env-token setup into a disabled platform. Only an
# explicit slack.enabled/platforms.slack.enabled false should.
slack_config.enabled = True
# If yaml config exists, respect its enabled flag (don't override
# explicit enabled: false). Token is still stored so skills that
# send Slack messages can use it without activating the gateway adapter.
config.platforms[Platform.SLACK].enabled = True
config.platforms[Platform.SLACK].token = slack_token
slack_home = os.getenv("SLACK_HOME_CHANNEL")
if slack_home and Platform.SLACK in config.platforms:
@@ -1315,48 +1276,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
)
# Yuanbao — YUANBAO_APP_ID preferred
yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
if yuanbao_app_id and yuanbao_app_secret:
if Platform.YUANBAO not in config.platforms:
config.platforms[Platform.YUANBAO] = PlatformConfig()
config.platforms[Platform.YUANBAO].enabled = True
extra = config.platforms[Platform.YUANBAO].extra
extra["app_id"] = yuanbao_app_id
extra["app_secret"] = yuanbao_app_secret
yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
if yuanbao_bot_id:
extra["bot_id"] = yuanbao_bot_id
yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
if yuanbao_ws_url:
extra["ws_url"] = yuanbao_ws_url
yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
if yuanbao_api_domain:
extra["api_domain"] = yuanbao_api_domain
yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
if yuanbao_route_env:
extra["route_env"] = yuanbao_route_env
yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
if yuanbao_home:
config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
platform=Platform.YUANBAO,
chat_id=yuanbao_home,
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
)
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
if yuanbao_dm_policy:
extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
if yuanbao_dm_allow_from:
extra["dm_allow_from"] = yuanbao_dm_allow_from
yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
if yuanbao_group_policy:
extra["group_policy"] = yuanbao_group_policy.strip().lower()
yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
if yuanbao_group_allow_from:
extra["group_allow_from"] = yuanbao_group_allow_from
# Session settings
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
if idle_minutes:
+1 -3
View File
@@ -79,9 +79,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
"discord": _TIER_HIGH,
# Tier 2 — edit support, often customer/workspace channels
# Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
# "new"/"all" spam permanent lines in channels (hermes-agent#14663).
"slack": {**_TIER_MEDIUM, "tool_progress": "off"},
"slack": _TIER_MEDIUM,
"mattermost": _TIER_MEDIUM,
"matrix": _TIER_MEDIUM,
"feishu": _TIER_MEDIUM,
+11 -57
View File
@@ -28,7 +28,6 @@ def mirror_to_session(
message_text: str,
source_label: str = "cli",
thread_id: Optional[str] = None,
user_id: Optional[str] = None,
) -> bool:
"""
Append a delivery-mirror message to the target session's transcript.
@@ -40,20 +39,9 @@ def mirror_to_session(
All errors are caught -- this is never fatal.
"""
try:
session_id = _find_session_id(
platform,
str(chat_id),
thread_id=thread_id,
user_id=user_id,
)
session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
if not session_id:
logger.debug(
"Mirror: no session found for %s:%s:%s:%s",
platform,
chat_id,
thread_id,
user_id,
)
logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
return False
mirror_msg = {
@@ -71,33 +59,17 @@ def mirror_to_session(
return True
except Exception as e:
logger.debug(
"Mirror failed for %s:%s:%s:%s: %s",
platform,
chat_id,
thread_id,
user_id,
e,
)
logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
return False
def _find_session_id(
platform: str,
chat_id: str,
thread_id: Optional[str] = None,
user_id: Optional[str] = None,
) -> Optional[str]:
def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
"""
Find the active session_id for a platform + chat_id pair.
Scans sessions.json entries and matches where origin.chat_id == chat_id
on the right platform. DM session keys don't embed the chat_id
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
When *user_id* is provided, prefer exact sender matches. If multiple
same-chat candidates exist and none matches the user, return None instead
of guessing and contaminating another participant's session.
"""
if not _SESSIONS_INDEX.exists():
return None
@@ -109,7 +81,8 @@ def _find_session_id(
return None
platform_lower = platform.lower()
candidates = []
best_match = None
best_updated = ""
for _key, entry in data.items():
origin = entry.get("origin") or {}
@@ -123,31 +96,12 @@ def _find_session_id(
origin_thread_id = origin.get("thread_id")
if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
continue
candidates.append(entry)
updated = entry.get("updated_at", "")
if updated > best_updated:
best_updated = updated
best_match = entry.get("session_id")
if not candidates:
return None
if user_id:
exact_user_matches = [
entry for entry in candidates
if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
]
if exact_user_matches:
candidates = exact_user_matches
elif len(candidates) > 1:
return None
elif len(candidates) > 1:
distinct_user_ids = {
str((entry.get("origin") or {}).get("user_id") or "").strip()
for entry in candidates
if str((entry.get("origin") or {}).get("user_id") or "").strip()
}
if len(distinct_user_ids) > 1:
return None
best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
return best_entry.get("session_id")
return best_match
def _append_to_jsonl(session_id: str, message: dict) -> None:
-2
View File
@@ -10,12 +10,10 @@ Each adapter handles:
from .base import BasePlatformAdapter, MessageEvent, SendResult
from .qqbot import QQAdapter
from .yuanbao import YuanbaoAdapter
__all__ = [
"BasePlatformAdapter",
"MessageEvent",
"SendResult",
"QQAdapter",
"YuanbaoAdapter",
]
-49
View File
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
- GET /v1/models lists hermes-agent as an available model
- POST /v1/runs start a run, returns run_id immediately (202)
- GET /v1/runs/{run_id}/events SSE stream of structured lifecycle events
- POST /v1/runs/{run_id}/stop interrupt a running agent
- GET /health health check
- GET /health/detailed rich status for cross-container dashboard probing
@@ -587,9 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
# Creation timestamps for orphaned-run TTL sweep
self._run_streams_created: Dict[str, float] = {}
# Active run agent/task references for stop support
self._active_run_agents: Dict[str, Any] = {}
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
@staticmethod
@@ -2445,7 +2441,6 @@ class APIServerAdapter(BasePlatformAdapter):
stream_delta_callback=_text_cb,
tool_progress_callback=event_cb,
)
self._active_run_agents[run_id] = agent
def _run_sync():
r = agent.run_conversation(
user_message=user_message,
@@ -2485,11 +2480,8 @@ class APIServerAdapter(BasePlatformAdapter):
q.put_nowait(None)
except Exception:
pass
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
task = asyncio.create_task(_run_and_close())
self._active_run_tasks[run_id] = task
try:
self._background_tasks.add(task)
except TypeError:
@@ -2548,44 +2540,6 @@ class APIServerAdapter(BasePlatformAdapter):
return response
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
auth_err = self._check_auth(request)
if auth_err:
return auth_err
run_id = request.match_info["run_id"]
agent = self._active_run_agents.get(run_id)
task = self._active_run_tasks.get(run_id)
if agent is None and task is None:
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
if agent is not None:
try:
agent.interrupt("Stop requested via API")
except Exception:
pass
if task is not None and not task.done():
task.cancel()
# Bounded wait: run_conversation() executes in the default
# executor thread which task.cancel() cannot preempt — we rely on
# agent.interrupt() above to break the loop. Cap the wait so a
# slow/unresponsive interrupt can't hang this handler.
try:
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
except asyncio.TimeoutError:
logger.warning(
"[api_server] stop for run %s timed out after 5s; "
"agent may still be finishing the current step",
run_id,
)
except (asyncio.CancelledError, Exception):
pass
return web.json_response({"run_id": run_id, "status": "stopping"})
async def _sweep_orphaned_runs(self) -> None:
"""Periodically clean up run streams that were never consumed."""
while True:
@@ -2600,8 +2554,6 @@ class APIServerAdapter(BasePlatformAdapter):
logger.debug("[api_server] sweeping orphaned run %s", run_id)
self._run_streams.pop(run_id, None)
self._run_streams_created.pop(run_id, None)
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
# ------------------------------------------------------------------
# BasePlatformAdapter interface
@@ -2637,7 +2589,6 @@ class APIServerAdapter(BasePlatformAdapter):
# Structured event streaming
self._app.router.add_post("/v1/runs", self._handle_runs)
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
# Start background sweep to clean up orphaned (unconsumed) run streams
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
try:
+6 -187
View File
@@ -336,39 +336,6 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
return {}, {"proxy": proxy_url}
def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
"""Return True when ``hostname`` matches a ``NO_PROXY`` entry.
Supports comma- or whitespace-separated entries with optional leading dots
and ``*.`` wildcards, which match both the apex domain and subdomains.
"""
raw = no_proxy_value
if raw is None:
raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
raw = raw.strip()
if not raw:
return False
lower_hostname = hostname.lower()
for entry in re.split(r"[\s,]+", raw):
normalized = entry.strip().lower()
if not normalized:
continue
if normalized == "*":
return True
if normalized.startswith("*."):
normalized = normalized[2:]
elif normalized.startswith("."):
normalized = normalized[1:]
if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
return True
return False
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
@@ -726,15 +693,7 @@ SUPPORTED_DOCUMENT_TYPES = {
".pdf": "application/pdf",
".md": "text/markdown",
".txt": "text/plain",
".csv": "text/csv",
".log": "text/plain",
".json": "application/json",
".xml": "application/xml",
".yaml": "application/yaml",
".yml": "application/yaml",
".toml": "application/toml",
".ini": "text/plain",
".cfg": "text/plain",
".zip": "application/zip",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -1023,61 +982,6 @@ def resolve_channel_prompt(
return None
def resolve_channel_skills(
config_extra: dict,
channel_id: str,
parent_id: str | None = None,
) -> list[str] | None:
"""Resolve auto-loaded skill(s) for a channel/thread from platform config.
Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
Config format::
channel_skill_bindings:
- id: "C0123" # Slack channel ID or Discord channel/forum ID
skills: ["skill-a", "skill-b"]
- id: "D0ABCDE"
skill: "solo-skill" # single string also accepted
Prefers an exact match on *channel_id*; falls back to *parent_id*
(useful for forum threads / Slack threads inheriting the parent channel's
binding).
Returns a deduplicated list of skill names (order preserved), or None if
no match is found.
"""
bindings = config_extra.get("channel_skill_bindings") or []
if not isinstance(bindings, list) or not bindings:
return None
ids_to_check: set[str] = set()
if channel_id:
ids_to_check.add(str(channel_id))
if parent_id:
ids_to_check.add(str(parent_id))
if not ids_to_check:
return None
for entry in bindings:
if not isinstance(entry, dict):
continue
entry_id = str(entry.get("id", ""))
if entry_id in ids_to_check:
skills = entry.get("skills") or entry.get("skill")
if isinstance(skills, str):
s = skills.strip()
return [s] if s else None
if isinstance(skills, list) and skills:
seen: list[str] = []
for name in skills:
if not isinstance(name, str):
continue
nm = name.strip()
if nm and nm not in seen:
seen.append(nm)
return seen or None
return None
class BasePlatformAdapter(ABC):
"""
Base class for platform adapters.
@@ -1121,20 +1025,7 @@ class BasePlatformAdapter(ABC):
self._post_delivery_callbacks: Dict[str, Any] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
# Per-chat overrides live in two sets populated from ``_voice_mode``:
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
# the global default is False.
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
# global default is True.
# The gate in _process_message() is:
# fire if chat in _auto_tts_enabled_chats
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
self._auto_tts_default: bool = False
self._auto_tts_enabled_chats: set = set()
# Chats where auto-TTS on voice input is disabled (set by /voice off)
self._auto_tts_disabled_chats: set = set()
# Chats where typing indicator is paused (e.g. during approval waits).
# _keep_typing skips send_typing when the chat_id is in this set.
@@ -1156,21 +1047,6 @@ class BasePlatformAdapter(ABC):
def fatal_error_retryable(self) -> bool:
return self._fatal_error_retryable
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
"""Whether auto-TTS on voice input should fire for ``chat_id``.
Decision layers (Issue #16007):
1. Explicit ``/voice on`` or ``/voice tts`` always fire (even if
``voice.auto_tts`` is False).
2. Explicit ``/voice off`` never fire.
3. Fall back to the global ``voice.auto_tts`` config default.
"""
if chat_id in self._auto_tts_enabled_chats:
return True
if chat_id in self._auto_tts_disabled_chats:
return False
return bool(self._auto_tts_default)
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
self._fatal_error_handler = handler
@@ -1354,27 +1230,6 @@ class BasePlatformAdapter(ABC):
"""
return SendResult(success=False, error="Not supported")
async def delete_message(
self,
chat_id: str,
message_id: str,
) -> bool:
"""
Delete a previously sent message. Optional platforms that don't
support deletion return ``False`` and callers fall back to leaving
the message in place.
Used by the stream consumer's fresh-final cleanup path (see
openclaw/openclaw#72038) to remove long-lived preview messages
after sending the completed reply as a fresh message so the
platform's visible timestamp reflects completion time.
Returns ``True`` on successful deletion, ``False`` otherwise.
Subclasses should override for platforms with a deletion API
(e.g. Telegram ``deleteMessage``).
"""
return False
async def send_typing(self, chat_id: str, metadata=None) -> None:
"""
Send a typing indicator.
@@ -1702,41 +1557,13 @@ class BasePlatformAdapter(ABC):
the agent is waiting for dangerous-command approval). This is critical
for Slack's Assistant API where ``assistant_threads_setStatus`` disables
the compose box pausing lets the user type ``/approve`` or ``/deny``.
Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
network round-trip can't stall the refresh cadence. Telegram- and
Discord-side typing expire after ~5s; if any individual send_typing
takes longer than the refresh interval, the bubble would die and
stay dead until that call returns. Abandoning the slow call lets
the next tick fire a fresh send_typing on schedule as long as
one of them succeeds within the 5s platform-side window, the bubble
stays visible across provider stalls / upstream API timeouts.
"""
# Bound each send_typing round-trip so the refresh cadence isn't
# gated on network health. Must stay below ``interval`` so a slow
# call gets abandoned before the next scheduled tick.
_send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
try:
while True:
if stop_event is not None and stop_event.is_set():
return
if chat_id not in self._typing_paused:
try:
await asyncio.wait_for(
self.send_typing(chat_id, metadata=metadata),
timeout=_send_typing_timeout,
)
except asyncio.TimeoutError:
# Slow network — abandon this tick, keep the loop
# on schedule so the next send_typing fires fresh.
pass
except asyncio.CancelledError:
raise
except Exception as typing_err:
logger.debug(
"[%s] send_typing error (non-fatal): %s",
self.name, typing_err,
)
await self.send_typing(chat_id, metadata=metadata)
if stop_event is None:
await asyncio.sleep(interval)
continue
@@ -2387,14 +2214,12 @@ class BasePlatformAdapter(ABC):
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
# True globally and no ``/voice off`` has been issued.
# Skipped when the chat has voice mode disabled (/voice off)
_tts_path = None
if (self._should_auto_tts_for_chat(event.source.chat_id)
and event.message_type == MessageType.VOICE
if (event.message_type == MessageType.VOICE
and text_content
and not media_files):
and not media_files
and event.source.chat_id not in self._auto_tts_disabled_chats):
try:
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():
@@ -2718,9 +2543,6 @@ class BasePlatformAdapter(ABC):
user_id_alt: Optional[str] = None,
chat_id_alt: Optional[str] = None,
is_bot: bool = False,
guild_id: Optional[str] = None,
parent_chat_id: Optional[str] = None,
message_id: Optional[str] = None,
) -> SessionSource:
"""Helper to build a SessionSource for this platform."""
# Normalize empty topic to None
@@ -2738,9 +2560,6 @@ class BasePlatformAdapter(ABC):
user_id_alt=user_id_alt,
chat_id_alt=chat_id_alt,
is_bot=is_bot,
guild_id=str(guild_id) if guild_id else None,
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
message_id=str(message_id) if message_id else None,
)
@abstractmethod
+20 -7
View File
@@ -2315,6 +2315,11 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_background(interaction: discord.Interaction, prompt: str):
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
@tree.command(name="btw", description="Ephemeral side question using session context")
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
async def slash_btw(interaction: discord.Interaction, question: str):
await self._run_simple_slash(interaction, f"/btw {question}")
# ── Auto-register any gateway-available commands not yet on the tree ──
# This ensures new commands added to COMMAND_REGISTRY in
# hermes_cli/commands.py automatically appear as Discord slash
@@ -2679,8 +2684,21 @@ class DiscordAdapter(BasePlatformAdapter):
skills: ["skill-a", "skill-b"]
Also checks parent_id so forum threads inherit the forum's bindings.
"""
from gateway.platforms.base import resolve_channel_skills
return resolve_channel_skills(self.config.extra, channel_id, parent_id)
bindings = self.config.extra.get("channel_skill_bindings", [])
if not bindings:
return None
ids_to_check = {channel_id}
if parent_id:
ids_to_check.add(parent_id)
for entry in bindings:
entry_id = str(entry.get("id", ""))
if entry_id in ids_to_check:
skills = entry.get("skills") or entry.get("skill")
if isinstance(skills, str):
return [skills]
if isinstance(skills, list) and skills:
return list(dict.fromkeys(skills)) # dedup, preserve order
return None
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -3243,7 +3261,6 @@ class DiscordAdapter(BasePlatformAdapter):
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
thread = await self._auto_create_thread(message)
if thread:
parent_channel_id = str(message.channel.id)
is_thread = True
thread_id = str(thread.id)
auto_threaded_channel = thread
@@ -3294,7 +3311,6 @@ class DiscordAdapter(BasePlatformAdapter):
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
# Build source
guild = getattr(message, "guild", None)
source = self.build_source(
chat_id=str(effective_channel.id),
chat_name=chat_name,
@@ -3304,9 +3320,6 @@ class DiscordAdapter(BasePlatformAdapter):
thread_id=thread_id,
chat_topic=chat_topic,
is_bot=getattr(message.author, "bot", False),
guild_id=str(guild.id) if guild else None,
parent_chat_id=parent_channel_id,
message_id=str(message.id),
)
# Build media URLs -- download image attachments to local cache so the
-3
View File
@@ -28,7 +28,6 @@ from email.header import decode_header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email.utils import formatdate
from email import encoders
from pathlib import Path
from typing import Any, Dict, List, Optional
@@ -505,7 +504,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
@@ -588,7 +586,6 @@ class EmailAdapter(BasePlatformAdapter):
msg["In-Reply-To"] = original_msg_id
msg["References"] = original_msg_id
msg["Date"] = formatdate(localtime=True)
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
msg["Message-ID"] = msg_id
-9
View File
@@ -57,15 +57,6 @@ class MessageDeduplicator:
if len(self._seen) > self._max_size:
cutoff = now - self._ttl
self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
if len(self._seen) > self._max_size:
# TTL pruning alone does not cap the cache when every entry is
# still fresh. Keep the newest entries so the helper's
# max_size bound is enforced under sustained traffic.
newest = sorted(
self._seen.items(),
key=lambda item: item[1],
)[-self._max_size:]
self._seen = dict(newest)
return False
def clear(self):
+3 -73
View File
@@ -1178,83 +1178,13 @@ class MatrixAdapter(BasePlatformAdapter):
# Event callbacks
# ------------------------------------------------------------------
def _is_self_sender(self, sender: str) -> bool:
"""Return True if the sender refers to the bot's own account.
Matrix user IDs are byte-compared after trimming whitespace and
lowercasing some homeservers normalize the localpart case
differently at different API surfaces, and the reply-loop tail
of the "hall of mirrors" bug (#15763) has been observed with the
bot's own account bypassing a case-sensitive equality check.
When ``self._user_id`` is empty (whoami hasn't resolved yet, or
login failed), we cannot prove a sender is NOT us, so we return
True defensively an unidentified bot dropping its own events
is always preferable to falling into an echo loop.
"""
own = (self._user_id or "").strip().lower()
if not own:
return True
return sender.strip().lower() == own
@staticmethod
def _is_system_or_bridge_sender(sender: str) -> bool:
"""Return True if the sender looks like a system / bridge / appservice
identity rather than a real user.
Appservice namespaces on Matrix conventionally prefix bot / puppet
user IDs with an underscore (e.g. ``@_telegram_12345:server``,
``@_discord_999:server``, ``@_slack_...:server``). Server-notices
bots and bridge-controller bots on many homeservers use the same
pattern.
We treat these as system identities for pairing purposes: they
should never be offered a pairing code, because an operator
approving the code would hand the bridge itself permanent
authorization and every outbound message relayed by the bridge
would then loop back into the agent as an "authorized user
message", which is the root of issue #15763.
Matches:
``@_something:server`` appservice namespace convention
``@:server`` malformed / empty localpart
``:server`` malformed, no leading ``@``
"""
s = (sender or "").strip()
if not s:
return True
# Localpart is everything between leading '@' and ':'
if s.startswith("@"):
s = s[1:]
if ":" in s:
localpart, _, _ = s.partition(":")
else:
localpart = s
if not localpart:
return True
return localpart.startswith("_")
async def _on_room_message(self, event: Any) -> None:
"""Handle incoming room message events (text, media)."""
room_id = str(getattr(event, "room_id", ""))
sender = str(getattr(event, "sender", ""))
# Ignore own messages (case-insensitive; also drops when our own
# user_id hasn't been resolved yet — see _is_self_sender docstring
# and issue #15763).
if self._is_self_sender(sender):
return
# Ignore appservice / bridge / system identities so they never
# trigger the pairing flow. Once a bridge user is paired, every
# outbound message it relays would loop back as an authorized
# user message (the "hall of mirrors" in #15763).
if self._is_system_or_bridge_sender(sender):
logger.debug(
"Matrix: ignoring system/bridge sender %s in %s",
sender,
room_id,
)
# Ignore own messages.
if sender == self._user_id:
return
# Deduplicate by event ID.
@@ -1724,7 +1654,7 @@ class MatrixAdapter(BasePlatformAdapter):
async def _on_reaction(self, event: Any) -> None:
"""Handle incoming reaction events."""
sender = str(getattr(event, "sender", ""))
if self._is_self_sender(sender):
if sender == self._user_id:
return
event_id = str(getattr(event, "event_id", ""))
if self._is_duplicate_event(event_id):
File diff suppressed because it is too large Load Diff
-45
View File
@@ -1209,31 +1209,6 @@ class TelegramAdapter(BasePlatformAdapter):
)
return SendResult(success=False, error=str(e))
async def delete_message(self, chat_id: str, message_id: str) -> bool:
"""Delete a previously sent Telegram message.
Used by the stream consumer's fresh-final cleanup path (ported
from openclaw/openclaw#72038) to remove long-lived preview
messages after sending the completed reply as a fresh message.
Telegram's Bot API ``deleteMessage`` works for bot-posted
messages in the last 48 hours. Failures are non-fatal the
caller leaves the preview in place and logs at debug level.
"""
if not self._bot:
return False
try:
await self._bot.delete_message(
chat_id=int(chat_id),
message_id=int(message_id),
)
return True
except Exception as e:
logger.debug(
"[%s] Failed to delete Telegram message %s: %s",
self.name, message_id, e,
)
return False
async def send_update_prompt(
self, chat_id: str, prompt: str, default: str = "",
session_key: str = "",
@@ -2353,26 +2328,6 @@ class TelegramAdapter(BasePlatformAdapter):
user = getattr(entity, "user", None)
if user and getattr(user, "id", None) == bot_id:
return True
elif entity_type == "bot_command" and expected:
# Telegram's official group-disambiguation form for slash
# commands (``/cmd@botname``) is emitted as a single
# ``bot_command`` entity covering the whole span — there
# is no accompanying ``mention`` entity. Treat it as a
# direct address to this bot when the ``@botname`` suffix
# matches. This is the form Telegram's own command menu
# autocomplete produces in groups, so dropping it at the
# mention gate would break /new, /reset, /help, ... for
# every group that has ``require_mention`` enabled (#15415).
offset = int(getattr(entity, "offset", -1))
length = int(getattr(entity, "length", 0))
if offset < 0 or length <= 0:
continue
command_text = source_text[offset:offset + length]
at_index = command_text.find("@")
if at_index < 0:
continue
if command_text[at_index:].strip().lower() == expected:
return True
return False
def _message_matches_mention_patterns(self, message: Message) -> bool:
File diff suppressed because it is too large Load Diff
-647
View File
@@ -1,647 +0,0 @@
"""
yuanbao_media.py 元宝平台媒体处理模块
提供 COS 上传文件下载TIM 媒体消息构建等功能
移植自 TypeScript media.tsyuanbao-openclaw-plugin
使用 httpx 替代 cos-nodejs-sdk-v5避免引入额外 SDK 依赖
COS 上传流程
1. 调用 genUploadInfo 获取临时凭证tmpSecretId/tmpSecretKey/sessionToken
2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization
3. HTTP PUT 上传到 COS
TIM 消息体构建
- buildImageMsgBody() TIMImageElem
- buildFileMsgBody() TIMFileElem
"""
from __future__ import annotations
import hashlib
import hmac
import logging
import os
import re
import secrets
import struct
import time
import urllib.parse
from datetime import datetime, timezone, timedelta
from typing import Optional, Any
import httpx
logger = logging.getLogger(__name__)
# ============ 常量 ============
UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
DEFAULT_MAX_SIZE_MB = 50
# COS 加速域名后缀(优先使用全球加速)
COS_USE_ACCELERATE = True
# ============ 类型映射 ============
# MIME → image_format 数字(TIM 协议字段)
_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
"image/jpeg": 1,
"image/jpg": 1,
"image/gif": 2,
"image/png": 3,
"image/bmp": 4,
"image/webp": 255,
"image/heic": 255,
"image/tiff": 255,
}
# 文件扩展名 → MIME
_EXT_TO_MIME: dict[str, str] = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
".heic": "image/heic",
".tiff": "image/tiff",
".ico": "image/x-icon",
".pdf": "application/pdf",
".doc": "application/msword",
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xls": "application/vnd.ms-excel",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".ppt": "application/vnd.ms-powerpoint",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
".txt": "text/plain",
".zip": "application/zip",
".tar": "application/x-tar",
".gz": "application/gzip",
".mp3": "audio/mpeg",
".mp4": "video/mp4",
".wav": "audio/wav",
".ogg": "audio/ogg",
".webm": "video/webm",
}
# ============ 工具函数 ============
def guess_mime_type(filename: str) -> str:
"""根据文件扩展名猜测 MIME 类型。"""
ext = os.path.splitext(filename)[-1].lower()
return _EXT_TO_MIME.get(ext, "application/octet-stream")
def is_image(filename: str, mime_type: str = "") -> bool:
"""判断是否为图片类型。"""
if mime_type.startswith("image/"):
return True
ext = os.path.splitext(filename)[-1].lower()
return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
def get_image_format(mime_type: str) -> int:
"""获取 TIM 图片格式编号。"""
return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
def md5_hex(data: bytes) -> str:
"""计算 MD5 十六进制摘要。"""
return hashlib.md5(data).hexdigest()
def generate_file_id() -> str:
"""生成随机文件 ID(32 位 hex)。"""
return secrets.token_hex(16)
# ============ 图片尺寸解析(纯 Python,无需 Pillow ============
def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
"""
解析图片宽高支持 JPEG/PNG/GIF/WebP无需第三方依赖
返回 {"width": w, "height": h} None无法识别
"""
return (
_parse_png_size(data)
or _parse_jpeg_size(data)
or _parse_gif_size(data)
or _parse_webp_size(data)
)
def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
if len(buf) < 24:
return None
if buf[:4] != b"\x89PNG":
return None
w = struct.unpack(">I", buf[16:20])[0]
h = struct.unpack(">I", buf[20:24])[0]
return {"width": w, "height": h}
def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
return None
i = 2
while i < len(buf) - 9:
if buf[i] != 0xFF:
i += 1
continue
marker = buf[i + 1]
if marker in (0xC0, 0xC2):
h = struct.unpack(">H", buf[i + 5: i + 7])[0]
w = struct.unpack(">H", buf[i + 7: i + 9])[0]
return {"width": w, "height": h}
if i + 3 < len(buf):
i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
else:
break
return None
def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
if len(buf) < 10:
return None
sig = buf[:6].decode("ascii", errors="replace")
if sig not in ("GIF87a", "GIF89a"):
return None
w = struct.unpack("<H", buf[6:8])[0]
h = struct.unpack("<H", buf[8:10])[0]
return {"width": w, "height": h}
def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
if len(buf) < 16:
return None
if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
return None
chunk = buf[12:16].decode("ascii", errors="replace")
if chunk == "VP8 ":
if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
return {"width": w, "height": h}
elif chunk == "VP8L":
if len(buf) >= 25 and buf[20] == 0x2F:
bits = struct.unpack("<I", buf[21:25])[0]
w = (bits & 0x3FFF) + 1
h = ((bits >> 14) & 0x3FFF) + 1
return {"width": w, "height": h}
elif chunk == "VP8X":
if len(buf) >= 30:
w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
return {"width": w, "height": h}
return None
# ============ URL 下载 ============
async def download_url(
url: str,
max_size_mb: int = DEFAULT_MAX_SIZE_MB,
) -> tuple[bytes, str]:
"""
下载 URL 内容返回 (bytes, content_type)
Args:
url: HTTP(S) URL
max_size_mb: 最大允许大小MB超过则抛出异常
Returns:
(data_bytes, content_type_string)
Raises:
ValueError: 内容超过大小限制
httpx.HTTPError: 网络/HTTP 错误
"""
max_bytes = max_size_mb * 1024 * 1024
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
# 先 HEAD 检查大小
try:
head = await client.head(url)
content_length = int(head.headers.get("content-length", 0) or 0)
if content_length > 0 and content_length > max_bytes:
raise ValueError(
f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
)
except httpx.HTTPStatusError:
pass # 部分服务器不支持 HEAD,忽略
# GET 下载(流式读取,防止超限)
async with client.stream("GET", url) as resp:
resp.raise_for_status()
content_type = resp.headers.get("content-type", "").split(";")[0].strip()
chunks: list[bytes] = []
downloaded = 0
async for chunk in resp.aiter_bytes(65536):
downloaded += len(chunk)
if downloaded > max_bytes:
raise ValueError(
f"文件过大: 已超过 {max_size_mb} MB 限制"
)
chunks.append(chunk)
data = b"".join(chunks)
return data, content_type
# ============ COS 鉴权(HMAC-SHA1 ============
def _cos_sign(
method: str,
path: str,
params: dict[str, str],
headers: dict[str, str],
secret_id: str,
secret_key: str,
start_time: Optional[int] = None,
expire_seconds: int = 3600,
) -> str:
"""
构建 COS 请求签名q-sign-algorithm=sha1 方案
参考https://cloud.tencent.com/document/product/436/7778
Args:
method: HTTP 方法小写 "put"
path: URL 路径URL encode 后的小写
params: URL 查询参数 dict用于签名
headers: 参与签名的请求头 dictkey 需小写
secret_id: 临时 SecretIdtmpSecretId
secret_key: 临时 SecretKeytmpSecretKey
start_time: 签名起始 Unix 时间戳默认 now
expire_seconds: 签名有效期默认 3600
Returns:
Authorization header 完整字符串
"""
now = int(time.time())
q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
# Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
sign_key = hmac.new(
secret_key.encode("utf-8"),
q_sign_time.encode("utf-8"),
hashlib.sha1,
).hexdigest()
# Step 2: HttpString
# 参数和头部需按字典序排列,key 小写
sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
url_param_list = ";".join(k for k, _ in sorted_params)
url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
header_list = ";".join(k for k, _ in sorted_headers)
header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
http_string = "\n".join([
method.lower(),
path,
url_params,
header_str,
"",
])
# Step 3: StringToSign = sha1 hash of HttpString
sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
string_to_sign = "\n".join([
"sha1",
q_sign_time,
sha1_of_http,
"",
])
# Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
signature = hmac.new(
sign_key.encode("utf-8"),
string_to_sign.encode("utf-8"),
hashlib.sha1,
).hexdigest()
return (
f"q-sign-algorithm=sha1"
f"&q-ak={secret_id}"
f"&q-sign-time={q_sign_time}"
f"&q-key-time={q_sign_time}"
f"&q-header-list={header_list}"
f"&q-url-param-list={url_param_list}"
f"&q-signature={signature}"
)
# ============ 主要公开 API ============
async def get_cos_credentials(
app_key: str,
api_domain: str,
token: str,
filename: str = "file",
file_id: Optional[str] = None,
bot_id: str = "",
route_env: str = "",
) -> dict:
"""
调用 genUploadInfo 接口获取 COS 临时密钥及上传配置
Args:
app_key: 应用 Key用于 X-ID
api_domain: API 域名 https://bot.yuanbao.tencent.com
token: 当前有效的签票 tokenX-Token
filename: 待上传的文件名含扩展名
file_id: 客户端生成的唯一文件 ID不传则自动生成
bot_id: Bot 账号 ID用于 X-ID
Returns:
COS 上传配置 dict包含以下字段
bucketName (str) COS Bucket 名称
region (str) COS 地域
location (str) 上传 Key对象路径
encryptTmpSecretId (str) 临时 SecretId
encryptTmpSecretKey(str) 临时 SecretKey
encryptToken (str) SessionToken
startTime (int) 凭证起始时间戳Unix
expiredTime (int) 凭证过期时间戳Unix
resourceUrl (str) 上传后的公网访问 URL
resourceID (str) 资源 ID可选
Raises:
RuntimeError: 接口返回非 0 code 或字段缺失
"""
if file_id is None:
file_id = generate_file_id()
upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
headers = {
"Content-Type": "application/json",
"X-Token": token,
"X-ID": bot_id or app_key,
"X-Source": "web",
}
if route_env:
headers["X-Route-Env"] = route_env
body = {
"fileName": filename,
"fileId": file_id,
"docFrom": "localDoc",
"docOpenId": "",
}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.post(upload_url, json=body, headers=headers)
resp.raise_for_status()
result: dict[str, Any] = resp.json()
code = result.get("code")
if code != 0 and code is not None:
raise RuntimeError(
f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
)
data = result.get("data") or result
required_fields = ["bucketName", "location"]
missing = [f for f in required_fields if not data.get(f)]
if missing:
raise RuntimeError(
f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
)
return data
async def upload_to_cos(
file_bytes: bytes,
filename: str,
content_type: str,
credentials: dict,
bucket: str,
region: str,
) -> dict:
"""
通过 httpx PUT 请求将文件上传到 COS
使用临时凭证tmpSecretId/tmpSecretKey/sessionToken构建 HMAC-SHA1 签名
Args:
file_bytes: 文件二进制内容
filename: 文件名用于辅助计算 MIMEUUID
content_type: MIME 类型 "image/jpeg"
credentials: get_cos_credentials() 返回的 dict包含
encryptTmpSecretId tmpSecretId
encryptTmpSecretKey tmpSecretKey
encryptToken sessionToken
location COS key对象路径
resourceUrl 上传后公网 URL
startTime 凭证起始时间Unix
expiredTime 凭证过期时间Unix
bucket: COS Bucket 名称 chatbot-1234567890
region: COS 地域 ap-guangzhou
Returns:
上传结果 dict包含
url (str) COS 公网访问 URL
uuid (str) 文件内容 MD5
size (int) 文件大小字节
width (int, optional) 图片宽度仅图片
height (int, optional) 图片高度仅图片
Raises:
httpx.HTTPStatusError: COS 返回非 2xx 状态
RuntimeError: credentials 字段缺失
"""
secret_id: str = credentials.get("encryptTmpSecretId", "")
secret_key: str = credentials.get("encryptTmpSecretKey", "")
session_token: str = credentials.get("encryptToken", "")
cos_key: str = credentials.get("location", "")
resource_url: str = credentials.get("resourceUrl", "")
start_time: Optional[int] = credentials.get("startTime")
expired_time: Optional[int] = credentials.get("expiredTime")
if not secret_id or not secret_key or not cos_key:
raise RuntimeError(
f"COS credentials 不完整: secretId={bool(secret_id)}, "
f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
)
# 构建 COS 上传 URL(优先使用全球加速域名)
if COS_USE_ACCELERATE:
cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
else:
cos_host = f"{bucket}.cos.{region}.myqcloud.com"
# URL encode cos_key(保留 /
encoded_key = urllib.parse.quote(cos_key, safe="/")
cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
# 确定 Content-Type
if not content_type or content_type == "application/octet-stream":
if is_image(filename):
content_type = guess_mime_type(filename)
else:
content_type = "application/octet-stream"
# 计算文件 MD5 + size
file_uuid = md5_hex(file_bytes)
file_size = len(file_bytes)
# 参与签名的请求头
sign_headers = {
"host": cos_host,
"content-type": content_type,
"x-cos-security-token": session_token,
}
# 计算签名有效期
now = int(time.time())
sign_start = start_time if start_time else now
sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
authorization = _cos_sign(
method="put",
path=f"/{encoded_key.lstrip('/')}",
params={},
headers=sign_headers,
secret_id=secret_id,
secret_key=secret_key,
start_time=sign_start,
expire_seconds=sign_expire,
)
put_headers = {
"Authorization": authorization,
"Content-Type": content_type,
"x-cos-security-token": session_token,
}
logger.info(
"COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
bucket, region, cos_key, file_size, content_type,
)
async with httpx.AsyncClient(timeout=120.0) as client:
resp = await client.put(
cos_url,
content=file_bytes,
headers=put_headers,
)
resp.raise_for_status()
# 解析图片尺寸(仅图片类型)
result: dict[str, Any] = {
"url": resource_url or cos_url,
"uuid": file_uuid,
"size": file_size,
}
if content_type.startswith("image/"):
size_info = parse_image_size(file_bytes)
if size_info:
result["width"] = size_info["width"]
result["height"] = size_info["height"]
logger.info(
"COS 上传成功: url=%s size=%d",
result["url"], file_size,
)
return result
# ============ TIM 媒体消息构建 ============
def build_image_msg_body(
url: str,
uuid: Optional[str] = None,
filename: Optional[str] = None,
size: int = 0,
width: int = 0,
height: int = 0,
mime_type: str = "",
) -> list[dict]:
"""
构建腾讯 IM TIMImageElem 消息体
参考https://cloud.tencent.com/document/product/269/2720
Args:
url: 图片公网访问 URLCOS resourceUrl
uuid: 文件 UUIDMD5 或其他唯一标识
filename: 文件名uuid 为空时作为备用
size: 文件大小字节
width: 图片宽度像素
height: 图片高度像素
mime_type: MIME 类型用于确定 image_format
Returns:
TIMImageElem 消息体列表适合直接放入 msg_body
"""
_uuid = uuid or filename or _basename_from_url(url) or "image"
image_format = get_image_format(mime_type) if mime_type else 255
return [
{
"msg_type": "TIMImageElem",
"msg_content": {
"uuid": _uuid,
"image_format": image_format,
"image_info_array": [
{
"type": 1, # 1 = 原图
"size": size,
"width": width,
"height": height,
"url": url,
}
],
},
}
]
def build_file_msg_body(
url: str,
filename: str,
uuid: Optional[str] = None,
size: int = 0,
) -> list[dict]:
"""
构建腾讯 IM TIMFileElem 消息体
参考https://cloud.tencent.com/document/product/269/2720
Args:
url: 文件公网访问 URLCOS resourceUrl
filename: 文件名含扩展名
uuid: 文件 UUIDMD5 或其他唯一标识不传则使用 filename
size: 文件大小字节
Returns:
TIMFileElem 消息体列表适合直接放入 msg_body
"""
_uuid = uuid or filename
return [
{
"msg_type": "TIMFileElem",
"msg_content": {
"uuid": _uuid,
"file_name": filename,
"file_size": size,
"url": url,
},
}
]
# ============ 内部工具 ============
def _basename_from_url(url: str) -> str:
"""从 URL 提取文件名。"""
try:
parsed = urllib.parse.urlparse(url)
return os.path.basename(parsed.path)
except Exception:
return ""
File diff suppressed because it is too large Load Diff
-558
View File
@@ -1,558 +0,0 @@
"""
Yuanbao sticker (TIMFaceElem) support.
Ported from yuanbao-openclaw-plugin/src/sticker/.
TIMFaceElem wire format:
{
"msg_type": "TIMFaceElem",
"msg_content": {
"index": 0, # always 0 per Yuanbao convention
"data": "<json>", # serialised sticker metadata
}
}
The `data` field carries a JSON string with the sticker's metadata so the
receiver can look up the correct asset in the emoji pack.
"""
from __future__ import annotations
import json
import random
import re
import unicodedata
from typing import Optional
# ---------------------------------------------------------------------------
# Sticker catalogue ported from builtin-stickers.json
# Key : canonical name (Chinese)
# Value : {sticker_id, package_id, name, description, width, height, formats}
# ---------------------------------------------------------------------------
STICKER_MAP: dict[str, dict] = {
"六六六": {
"sticker_id": "278", "package_id": "1003", "name": "六六六",
"description": "666 厉害 牛 棒 绝了 好强 awesome",
"width": 128, "height": 128, "formats": "png",
},
"我想开了": {
"sticker_id": "262", "package_id": "1003", "name": "我想开了",
"description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
"width": 128, "height": 128, "formats": "png",
},
"害羞": {
"sticker_id": "130", "package_id": "1003", "name": "害羞",
"description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
"width": 128, "height": 128, "formats": "png",
},
"比心": {
"sticker_id": "252", "package_id": "1003", "name": "比心",
"description": "笔芯 爱你 爱心手势 love heart 喜欢你",
"width": 128, "height": 128, "formats": "png",
},
"委屈": {
"sticker_id": "125", "package_id": "1003", "name": "委屈",
"description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
"width": 128, "height": 128, "formats": "png",
},
"亲亲": {
"sticker_id": "146", "package_id": "1003", "name": "亲亲",
"description": "么么 mua 亲一下 kiss 飞吻 啵",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "131", "package_id": "1003", "name": "",
"description": "帅 墨镜 cool 高冷 有型 swagger",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "145", "package_id": "1003", "name": "",
"description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
"width": 128, "height": 128, "formats": "png",
},
"发呆": {
"sticker_id": "152", "package_id": "1003", "name": "发呆",
"description": "懵 愣住 放空 呆滞 出神 脑子空白",
"width": 128, "height": 128, "formats": "png",
},
"可怜": {
"sticker_id": "157", "package_id": "1003", "name": "可怜",
"description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
"width": 128, "height": 128, "formats": "png",
},
"摊手": {
"sticker_id": "200", "package_id": "1003", "name": "摊手",
"description": "无奈 没办法 耸肩 随便 那咋整 whatever",
"width": 128, "height": 128, "formats": "png",
},
"头大": {
"sticker_id": "213", "package_id": "1003", "name": "头大",
"description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "256", "package_id": "1003", "name": "",
"description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
"width": 128, "height": 128, "formats": "png",
},
"吐血": {
"sticker_id": "203", "package_id": "1003", "name": "吐血",
"description": "无语 崩溃 被雷 内伤 一口老血 屮",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "185", "package_id": "1003", "name": "",
"description": "傲娇 生气 不满 撇嘴 不理 赌气",
"width": 128, "height": 128, "formats": "png",
},
"嘿嘿": {
"sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
"description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
"width": 128, "height": 128, "formats": "png",
},
"头秃": {
"sticker_id": "218", "package_id": "1003", "name": "头秃",
"description": "程序员 加班 焦虑 没头发 秃了 肝爆",
"width": 128, "height": 128, "formats": "png",
},
"暗中观察": {
"sticker_id": "221", "package_id": "1003", "name": "暗中观察",
"description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
"width": 128, "height": 128, "formats": "png",
},
"我酸了": {
"sticker_id": "224", "package_id": "1003", "name": "我酸了",
"description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
"width": 128, "height": 128, "formats": "png",
},
"打call": {
"sticker_id": "246", "package_id": "1003", "name": "打call",
"description": "应援 加油 支持 喝彩 助威 call",
"width": 128, "height": 128, "formats": "png",
},
"庆祝": {
"sticker_id": "251", "package_id": "1003", "name": "庆祝",
"description": "祝贺 开心 耶 party 胜利 干杯",
"width": 128, "height": 128, "formats": "png",
},
"奋斗": {
"sticker_id": "151", "package_id": "1003", "name": "奋斗",
"description": "努力 加油 拼搏 冲 干劲 卷起来",
"width": 128, "height": 128, "formats": "png",
},
"惊讶": {
"sticker_id": "143", "package_id": "1003", "name": "惊讶",
"description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
"width": 128, "height": 128, "formats": "png",
},
"疑问": {
"sticker_id": "144", "package_id": "1003", "name": "疑问",
"description": "问号 不懂 啥 为什么 啥情况 懵逼问",
"width": 128, "height": 128, "formats": "png",
},
"仔细分析": {
"sticker_id": "248", "package_id": "1003", "name": "仔细分析",
"description": "思考 推敲 认真 研究 琢磨 让我想想",
"width": 128, "height": 128, "formats": "png",
},
"撅嘴": {
"sticker_id": "184", "package_id": "1003", "name": "撅嘴",
"description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
"width": 128, "height": 128, "formats": "png",
},
"泪奔": {
"sticker_id": "199", "package_id": "1003", "name": "泪奔",
"description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
"width": 128, "height": 128, "formats": "png",
},
"尊嘟假嘟": {
"sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
"description": "真的假的 真假 可爱问 你骗我 是不是",
"width": 128, "height": 128, "formats": "png",
},
"略略略": {
"sticker_id": "113", "package_id": "1003", "name": "略略略",
"description": "调皮 吐舌 不服 略 气死你 鬼脸",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "180", "package_id": "1003", "name": "",
"description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
"width": 128, "height": 128, "formats": "png",
},
"折磨": {
"sticker_id": "181", "package_id": "1003", "name": "折磨",
"description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
"width": 128, "height": 128, "formats": "png",
},
"抠鼻": {
"sticker_id": "182", "package_id": "1003", "name": "抠鼻",
"description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
"width": 128, "height": 128, "formats": "png",
},
"鼓掌": {
"sticker_id": "183", "package_id": "1003", "name": "鼓掌",
"description": "拍手 叫好 赞同 666 喝彩 掌声",
"width": 128, "height": 128, "formats": "png",
},
"斜眼笑": {
"sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
"description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
"width": 128, "height": 128, "formats": "png",
},
"辣眼睛": {
"sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
"description": "看不下去 cringe 毁三观 太丑了 瞎了",
"width": 128, "height": 128, "formats": "png",
},
"哦哟": {
"sticker_id": "217", "package_id": "1003", "name": "哦哟",
"description": "惊讶 起哄 哇哦 有戏 不简单 哟",
"width": 128, "height": 128, "formats": "png",
},
"吃瓜": {
"sticker_id": "222", "package_id": "1003", "name": "吃瓜",
"description": "围观 看戏 八卦 路人 看热闹 板凳",
"width": 128, "height": 128, "formats": "png",
},
"狗头": {
"sticker_id": "225", "package_id": "1003", "name": "狗头",
"description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
"width": 128, "height": 128, "formats": "png",
},
"敬礼": {
"sticker_id": "227", "package_id": "1003", "name": "敬礼",
"description": "salute 尊重 收到 遵命 致敬 报告",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "231", "package_id": "1003", "name": "",
"description": "知道了 明白 敷衍 嗯 这样啊 收到",
"width": 128, "height": 128, "formats": "png",
},
"拿到红包": {
"sticker_id": "236", "package_id": "1003", "name": "拿到红包",
"description": "红包 谢谢老板 发财 开心 抢到了 欧气",
"width": 128, "height": 128, "formats": "png",
},
"牛吖": {
"sticker_id": "239", "package_id": "1003", "name": "牛吖",
"description": "牛 厉害 强 666 佩服 大佬",
"width": 128, "height": 128, "formats": "png",
},
"贴贴": {
"sticker_id": "272", "package_id": "1003", "name": "贴贴",
"description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
"width": 128, "height": 128, "formats": "png",
},
"爱心": {
"sticker_id": "138", "package_id": "1003", "name": "爱心",
"description": "心 love 喜欢你 红心 示爱 么么哒",
"width": 128, "height": 128, "formats": "png",
},
"晚安": {
"sticker_id": "170", "package_id": "1003", "name": "晚安",
"description": "好梦 睡了 night 早点休息 安啦 moon",
"width": 128, "height": 128, "formats": "png",
},
"太阳": {
"sticker_id": "176", "package_id": "1003", "name": "太阳",
"description": "晴天 早上好 阳光 morning 好天气 日",
"width": 128, "height": 128, "formats": "png",
},
"柠檬": {
"sticker_id": "266", "package_id": "1003", "name": "柠檬",
"description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
"width": 128, "height": 128, "formats": "png",
},
"大冤种": {
"sticker_id": "267", "package_id": "1003", "name": "大冤种",
"description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
"width": 128, "height": 128, "formats": "png",
},
"吐了": {
"sticker_id": "132", "package_id": "1003", "name": "吐了",
"description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
"width": 128, "height": 128, "formats": "png",
},
"": {
"sticker_id": "134", "package_id": "1003", "name": "",
"description": "生气 愤怒 火大 暴躁 气炸 怼",
"width": 128, "height": 128, "formats": "png",
},
"玫瑰": {
"sticker_id": "165", "package_id": "1003", "name": "玫瑰",
"description": "花 示爱 表白 浪漫 送你花 情人节",
"width": 128, "height": 128, "formats": "png",
},
"凋谢": {
"sticker_id": "119", "package_id": "1003", "name": "凋谢",
"description": "花谢 失恋 难过 枯萎 心碎 凉了",
"width": 128, "height": 128, "formats": "png",
},
"点赞": {
"sticker_id": "159", "package_id": "1003", "name": "点赞",
"description": "赞 认同 好棒 good like 大拇指 顶",
"width": 128, "height": 128, "formats": "png",
},
"握手": {
"sticker_id": "164", "package_id": "1003", "name": "握手",
"description": "合作 你好 商务 hello deal 成交 友好",
"width": 128, "height": 128, "formats": "png",
},
"抱拳": {
"sticker_id": "163", "package_id": "1003", "name": "抱拳",
"description": "谢谢 失敬 江湖 承让 拜托 有礼",
"width": 128, "height": 128, "formats": "png",
},
"ok": {
"sticker_id": "169", "package_id": "1003", "name": "ok",
"description": "好的 收到 没问题 okay 行 可以 懂了",
"width": 128, "height": 128, "formats": "png",
},
"拳头": {
"sticker_id": "174", "package_id": "1003", "name": "拳头",
"description": "加油 干 冲 fight 力量 击拳 硬气",
"width": 128, "height": 128, "formats": "png",
},
"鞭炮": {
"sticker_id": "191", "package_id": "1003", "name": "鞭炮",
"description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
"width": 128, "height": 128, "formats": "png",
},
"烟花": {
"sticker_id": "258", "package_id": "1003", "name": "烟花",
"description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
"width": 128, "height": 128, "formats": "png",
},
}
def get_sticker_by_name(name: str) -> Optional[dict]:
"""
按名称查找贴纸支持模糊匹配
匹配优先级
1. 完全相等name
2. name 包含查询词前缀/子串
3. description 包含查询词同义词搜索
4. 通用模糊评分 sticker-search 同算法命中即返回得分最高的一条
返回 sticker dict找不到返回 None
"""
if not name:
return None
query = name.strip()
if query in STICKER_MAP:
return STICKER_MAP[query]
for key, sticker in STICKER_MAP.items():
if query in key or key in query:
return sticker
for sticker in STICKER_MAP.values():
desc = sticker.get("description", "")
if query in desc:
return sticker
matches = search_stickers(query, limit=1)
return matches[0] if matches else None
def get_random_sticker(category: str = None) -> dict:
"""
随机返回一个贴纸
若指定 category则在 description 中含有该关键词的贴纸里随机选取
category None 时从全表随机
"""
if category:
candidates = [
s for s in STICKER_MAP.values()
if category in s.get("description", "") or category in s.get("name", "")
]
if candidates:
return random.choice(candidates)
return random.choice(list(STICKER_MAP.values()))
def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
"""按 sticker_id 精确查找贴纸。"""
if not sticker_id:
return None
sid = str(sticker_id).strip()
for sticker in STICKER_MAP.values():
if sticker.get("sticker_id") == sid:
return sticker
return None
# ---------------------------------------------------------------------------
# 模糊搜索(对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers
# ---------------------------------------------------------------------------
_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,,。!?\"“”'‘’、/\\]+")
def _normalize_text(raw: str) -> str:
return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
def _compact_text(raw: str) -> str:
return _PUNCT_RE.sub("", _normalize_text(raw))
def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
if not needle:
return 0.0
bag: dict[str, int] = {}
for ch in haystack:
bag[ch] = bag.get(ch, 0) + 1
hits = 0
for ch in needle:
n = bag.get(ch, 0)
if n > 0:
hits += 1
bag[ch] = n - 1
return hits / len(needle)
def _bigram_jaccard(a: str, b: str) -> float:
if len(a) < 2 or len(b) < 2:
return 0.0
A = {a[i:i + 2] for i in range(len(a) - 1)}
B = {b[i:i + 2] for i in range(len(b) - 1)}
inter = len(A & B)
union = len(A) + len(B) - inter
return inter / union if union else 0.0
def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
if not needle:
return 0.0
j = 0
for ch in haystack:
if j >= len(needle):
break
if ch == needle[j]:
j += 1
return j / len(needle)
def _score_field(haystack: str, query: str) -> float:
hay = _normalize_text(haystack)
q = _normalize_text(query)
if not hay or not q:
return 0.0
hay_c = _compact_text(haystack)
q_c = _compact_text(query)
best = 0.0
if hay == q:
best = max(best, 100.0)
if q in hay:
best = max(best, 92 + min(6, len(q)))
if len(q) >= 2 and hay.startswith(q):
best = max(best, 88.0)
if q_c and q_c in hay_c:
best = max(best, 86.0)
best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
if len(q) == 1 and q in hay:
best = max(best, 68.0)
return best
def search_stickers(query: str, limit: int = 10) -> list[dict]:
"""
在内置贴纸表中按模糊匹配排序返回前 N 条结果
评分综合 name/description 字段的子串字符多重集覆盖bigram Jaccard子序列比例
name 权重略高于 description×0.88 query 时按字典顺序返回前 N
"""
safe_limit = max(1, min(500, int(limit) if limit else 10))
if not query or not _normalize_text(query):
return list(STICKER_MAP.values())[:safe_limit]
scored: list[tuple[float, dict]] = []
for sticker in STICKER_MAP.values():
name_s = _score_field(sticker.get("name", ""), query)
desc_s = _score_field(sticker.get("description", ""), query) * 0.88
sid = str(sticker.get("sticker_id", "")).strip()
q_norm = _normalize_text(query)
id_s = 0.0
if sid and q_norm:
sid_norm = _normalize_text(sid)
if sid_norm == q_norm:
id_s = 100.0
elif q_norm in sid_norm:
id_s = 84.0
scored.append((max(name_s, desc_s, id_s), sticker))
scored.sort(key=lambda x: x[0], reverse=True)
top = scored[0][0] if scored else 0
if top <= 0:
return [s for _, s in scored[:safe_limit]]
if top >= 22:
floor = 18.0
elif top >= 12:
floor = max(10.0, top * 0.5)
else:
floor = max(6.0, top * 0.35)
filtered = [pair for pair in scored if pair[0] >= floor]
out = filtered if filtered else scored
return [s for _, s in out[:safe_limit]]
def build_face_msg_body(
face_index: int,
face_type: int = 1,
data: Optional[str] = None,
) -> list:
"""
构造 TIMFaceElem 消息体
Yuanbao 约定
- index 固定传 0服务端通过 data 字段识别具体表情
- data JSON 字符串包含 sticker_id / package_id 等字段
Args:
face_index: 保留字段暂时不影响 wire formatYuanbao 固定 index=0
face_index > 0 时视为旧版 QQ 表情 ID直接放入 index
face_type: 保留字段兼容旧接口当前未使用
data: 已序列化的 JSON 字符串 None 时仅传 index
Returns:
符合 Yuanbao TIM 协议的 msg_body list::
[{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
"""
msg_content: dict = {"index": face_index}
if data is not None:
msg_content["data"] = data
return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
def build_sticker_msg_body(sticker: dict) -> list:
"""
STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体
这是 send_sticker() 的内部辅助确保 data 字段与原始 JS 插件一致
"""
data_payload = json.dumps(
{
"sticker_id": sticker["sticker_id"],
"package_id": sticker["package_id"],
"width": sticker.get("width", 128),
"height": sticker.get("height", 128),
"formats": sticker.get("formats", "png"),
"name": sticker["name"],
},
ensure_ascii=False,
separators=(",", ":"),
)
return build_face_msg_body(face_index=0, data=data_payload)
+451 -885
View File
File diff suppressed because it is too large Load Diff
+18 -85
View File
@@ -87,9 +87,6 @@ class SessionSource:
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
chat_id_alt: Optional[str] = None # Signal group internal ID
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
@property
def description(self) -> str:
@@ -127,14 +124,8 @@ class SessionSource:
d["user_id_alt"] = self.user_id_alt
if self.chat_id_alt:
d["chat_id_alt"] = self.chat_id_alt
if self.guild_id:
d["guild_id"] = self.guild_id
if self.parent_chat_id:
d["parent_chat_id"] = self.parent_chat_id
if self.message_id:
d["message_id"] = self.message_id
return d
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
return cls(
@@ -148,9 +139,6 @@ class SessionSource:
chat_topic=data.get("chat_topic"),
user_id_alt=data.get("user_id_alt"),
chat_id_alt=data.get("chat_id_alt"),
guild_id=data.get("guild_id"),
parent_chat_id=data.get("parent_chat_id"),
message_id=data.get("message_id"),
)
@@ -202,31 +190,6 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
and the LLM needs the real ID to tag users."""
def _discord_tools_loaded() -> bool:
"""True iff the agent will actually have Discord tools this session.
Two conditions must hold:
1. The `discord` or `discord_admin` toolset is enabled for the
Discord platform via `hermes tools` (opt-in, default OFF).
2. `DISCORD_BOT_TOKEN` is set the tool's `check_fn` gates on it
at registry time, so the toolset being enabled in config is not
enough if the token isn't configured.
Returns False (safe default keeps the stale-API disclaimer) on any
error so a bad config can't silently promise tools the agent lacks.
"""
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
return False
try:
from hermes_cli.config import load_config
from hermes_cli.tools_config import _get_platform_tools
cfg = load_config()
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
return "discord" in enabled or "discord_admin" in enabled
except Exception:
return False
def build_session_context_prompt(
context: SessionContext,
*,
@@ -310,38 +273,18 @@ def build_session_context_prompt(
"**Platform notes:** You are running inside Slack. "
"You do NOT have access to Slack-specific APIs — you cannot search "
"channel history, pin/unpin messages, manage channels, or list users. "
"Do not promise to perform these actions. The gateway may inline the "
"current message's Slack block/attachment payload when available, but "
"you still cannot call Slack APIs yourself."
"Do not promise to perform these actions. If the user asks, explain "
"that you can only read messages sent directly to you and respond."
)
elif context.source.platform == Platform.DISCORD:
# Inject the Discord IDs block only when the agent actually has
# Discord tools loaded this session — i.e. the user opted into
# `discord` / `discord_admin` via `hermes tools` AND the bot
# token is configured. Otherwise keep the stale-API disclaimer
# honest so we never promise tools the agent lacks.
if _discord_tools_loaded():
src = context.source
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
if src.guild_id:
id_lines.append(f" - Guild: `{src.guild_id}`")
if src.thread_id and src.parent_chat_id:
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
else:
id_lines.append(f" - Channel: `{src.chat_id}`")
if src.message_id:
id_lines.append(f" - Triggering message: `{src.message_id}`")
lines.extend(id_lines)
else:
lines.append("")
lines.append(
"**Platform notes:** You are running inside Discord. "
"You do NOT have access to Discord-specific APIs — you cannot search "
"channel history, pin messages, manage roles, or list server members. "
"Do not promise to perform these actions. If the user asks, explain "
"that you can only read messages sent directly to you and respond."
)
lines.append("")
lines.append(
"**Platform notes:** You are running inside Discord. "
"You do NOT have access to Discord-specific APIs — you cannot search "
"channel history, pin messages, manage roles, or list server members. "
"Do not promise to perform these actions. If the user asks, explain "
"that you can only read messages sent directly to you and respond."
)
elif context.source.platform == Platform.BLUEBUBBLES:
lines.append("")
lines.append(
@@ -354,14 +297,6 @@ def build_session_context_prompt(
"If the user needs a detailed answer, give the short version first "
"and offer to elaborate."
)
elif context.source.platform == Platform.YUANBAO:
lines.append("")
lines.append(
"**Platform notes:** You are running inside Yuanbao. "
"You CAN send private (DM) messages via the send_message tool. "
"Use target='yuanbao:direct:<account_id>' for DM "
"and target='yuanbao:group:<group_code>' for group chat."
)
# Connected platforms
platforms_list = ["local (files on this machine)"]
@@ -448,11 +383,11 @@ class SessionEntry:
auto_reset_reason: Optional[str] = None # "idle" or "daily"
reset_had_activity: bool = False # whether the expired session had any messages
# Set by the background expiry watcher after it finalizes an expired
# session (invoking on_session_finalize hooks and evicting the cached
# agent). Persisted to sessions.json so the flag survives gateway
# restarts — prevents redundant finalization runs.
expiry_finalized: bool = False
# Set by the background expiry watcher after it successfully flushes
# memories for this session. Persisted to sessions.json so the flag
# survives gateway restarts (the old in-memory _pre_flushed_sessions
# set was lost on restart, causing redundant re-flushes).
memory_flushed: bool = False
# When True the next call to get_or_create_session() will auto-reset
# this session (create a new session_id) so the user starts fresh.
@@ -488,7 +423,7 @@ class SessionEntry:
"last_prompt_tokens": self.last_prompt_tokens,
"estimated_cost_usd": self.estimated_cost_usd,
"cost_status": self.cost_status,
"expiry_finalized": self.expiry_finalized,
"memory_flushed": self.memory_flushed,
"suspended": self.suspended,
"resume_pending": self.resume_pending,
"resume_reason": self.resume_reason,
@@ -540,7 +475,7 @@ class SessionEntry:
last_prompt_tokens=data.get("last_prompt_tokens", 0),
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
cost_status=data.get("cost_status", "unknown"),
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
memory_flushed=data.get("memory_flushed", False),
suspended=data.get("suspended", False),
resume_pending=data.get("resume_pending", False),
resume_reason=data.get("resume_reason"),
@@ -1241,7 +1176,6 @@ class SessionStore:
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
@@ -1274,7 +1208,6 @@ class SessionStore:
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
)
except Exception as e:
logger.debug("Failed to rewrite transcript in DB: %s", e)
-110
View File
@@ -44,14 +44,6 @@ class StreamConsumerConfig:
buffer_threshold: int = 40
cursor: str = ""
buffer_only: bool = False
# When >0, the final edit for a streamed response is delivered as a
# fresh message if the original preview has been visible for at least
# this many seconds. This makes the platform's visible timestamp
# reflect completion time instead of first-token time for long-running
# responses (e.g. reasoning models that stream slowly). Ported from
# openclaw/openclaw#72038. Default 0 = always edit in place (legacy
# behavior). The gateway enables this selectively per-platform.
fresh_final_after_seconds: float = 0.0
class GatewayStreamConsumer:
@@ -99,12 +91,6 @@ class GatewayStreamConsumer:
self._queue: queue.Queue = queue.Queue()
self._accumulated = ""
self._message_id: Optional[str] = None
# Wall-clock timestamp (time.monotonic) when ``_message_id`` was
# first assigned from a successful first-send. Used by the
# fresh-final logic to detect long-lived previews whose edit
# timestamps would be stale by completion time. Ported from
# openclaw/openclaw#72038.
self._message_created_ts: Optional[float] = None
self._already_sent = False
self._edit_supported = True # Disabled when progressive edits are no longer usable
self._last_edit_time = 0.0
@@ -150,7 +136,6 @@ class GatewayStreamConsumer:
if preserve_no_edit and self._message_id == "__no_edit__":
return
self._message_id = None
self._message_created_ts = None
self._accumulated = ""
self._last_sent_text = ""
self._fallback_final_send = False
@@ -749,81 +734,6 @@ class GatewayStreamConsumer:
logger.error("Commentary send error: %s", e)
return False
def _should_send_fresh_final(self) -> bool:
"""Return True when a long-lived preview should be replaced with a
fresh final message instead of an edit.
Conditions:
- Fresh-final is enabled (``fresh_final_after_seconds > 0``).
- We have a real preview message id (not the ``__no_edit__`` sentinel
and not ``None``).
- The preview has been visible for at least the configured threshold.
Ported from openclaw/openclaw#72038.
"""
threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
if threshold <= 0:
return False
if not self._message_id or self._message_id == "__no_edit__":
return False
if self._message_created_ts is None:
return False
age = time.monotonic() - self._message_created_ts
return age >= threshold
async def _try_fresh_final(self, text: str) -> bool:
"""Send ``text`` as a brand-new message (best-effort delete the old
preview) so the platform's visible timestamp reflects completion
time. Returns True on successful delivery, False on any failure so
the caller falls back to the normal edit path.
Ported from openclaw/openclaw#72038.
"""
old_message_id = self._message_id
try:
result = await self.adapter.send(
chat_id=self.chat_id,
content=text,
metadata=self.metadata,
)
except Exception as e:
logger.debug("Fresh-final send failed, falling back to edit: %s", e)
return False
if not getattr(result, "success", False):
return False
# Successful fresh send — try to delete the stale preview so the
# user doesn't see the old edit-stuck message underneath. Cleanup
# is best-effort; platforms that don't implement ``delete_message``
# just leave the preview behind (still an acceptable outcome —
# the visible final timestamp is the important part).
if old_message_id and old_message_id != "__no_edit__":
delete_fn = getattr(self.adapter, "delete_message", None)
if delete_fn is not None:
try:
await delete_fn(self.chat_id, old_message_id)
except Exception as e:
logger.debug(
"Fresh-final preview cleanup failed (%s): %s",
old_message_id, e,
)
# Adopt the new message id as the current message so subsequent
# callers (e.g. overflow split loops, finalize retries) see a
# consistent state.
new_message_id = getattr(result, "message_id", None)
if new_message_id:
self._message_id = new_message_id
self._message_created_ts = time.monotonic()
else:
# Send succeeded but platform didn't return an id — treat the
# delivery as final-only and fall back to "__no_edit__" so we
# don't try to edit something we can't address.
self._message_id = "__no_edit__"
self._message_created_ts = None
self._already_sent = True
self._last_sent_text = text
self._final_response_sent = True
return True
async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
"""Send or edit the streaming message.
@@ -876,22 +786,6 @@ class GatewayStreamConsumer:
finalize and self._adapter_requires_finalize
):
return True
# Fresh-final for long-lived previews: when finalizing
# the last edit in a streaming sequence, if the
# original preview has been visible for at least
# ``fresh_final_after_seconds``, send the completed
# reply as a fresh message so the platform's visible
# timestamp reflects completion time instead of the
# preview creation time. Best-effort cleanup of the
# old preview follows. Ported from
# openclaw/openclaw#72038. Gated by config so the
# legacy edit-in-place path stays the default.
if (
finalize
and self._should_send_fresh_final()
and await self._try_fresh_final(text)
):
return True
# Edit existing message
result = await self.adapter.edit_message(
chat_id=self.chat_id,
@@ -958,10 +852,6 @@ class GatewayStreamConsumer:
if result.success:
if result.message_id:
self._message_id = result.message_id
# Track when the preview first became visible to
# the user so fresh-final logic can detect stale
# preview timestamps on long-running responses.
self._message_created_ts = time.monotonic()
else:
self._edit_supported = False
self._already_sent = True
+1 -21
View File
@@ -31,17 +31,8 @@ Hermes' own session keys.
from __future__ import annotations
import json
import logging
import re
from typing import Set
logger = logging.getLogger(__name__)
# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
# full-width digits / Unicode word chars can't sneak through.
_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
from hermes_constants import get_hermes_home
@@ -90,16 +81,6 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
current = queue.pop(0)
if not current or current in resolved:
continue
# Defense-in-depth: reject identifiers that could sneak path
# separators / traversal segments into the ``lid-mapping-{current}``
# filename below. The hardcoded ``lid-mapping-`` prefix already
# prevents escape via pathlib's component split (an attacker can't
# create ``lid-mapping-..`` as a real directory in session_dir), but
# this keeps the identifier space to the characters WhatsApp JIDs
# actually use and avoids depending on that filesystem-layout
# invariant.
if not _SAFE_IDENTIFIER_RE.match(current):
continue
resolved.add(current)
for suffix in ("", "_reverse"):
@@ -110,8 +91,7 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
mapped = normalize_whatsapp_identifier(
json.loads(mapping_path.read_text(encoding="utf-8"))
)
except (OSError, json.JSONDecodeError) as exc:
logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
except Exception:
continue
if mapped and mapped not in resolved:
queue.append(mapped)
+3 -36
View File
@@ -224,14 +224,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("ARCEEAI_API_KEY",),
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": ProviderConfig(
id="gmi",
name="GMI Cloud",
auth_type="api_key",
inference_base_url="https://api.gmi-serving.com/v1",
api_key_env_vars=("GMI_API_KEY",),
base_url_env_var="GMI_BASE_URL",
),
"minimax": ProviderConfig(
id="minimax",
name="MiniMax",
@@ -364,14 +356,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=(),
base_url_env_var="BEDROCK_BASE_URL",
),
"azure-foundry": ProviderConfig(
id="azure-foundry",
name="Azure Foundry",
auth_type="api_key",
inference_base_url="", # User-provided endpoint
api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
),
}
@@ -475,27 +459,11 @@ def _resolve_api_key_provider_secret(
pass
return "", ""
from hermes_cli.config import get_env_value
for env_var in pconfig.api_key_env_vars:
# Check both os.environ and ~/.hermes/.env file
val = (get_env_value(env_var) or "").strip()
val = os.getenv(env_var, "").strip()
if has_usable_secret(val):
return val, env_var
# Fallback: try credential pool (e.g. zai key stored via auth.json)
try:
from agent.credential_pool import load_pool
pool = load_pool(provider_id)
if pool and pool.has_credentials():
entry = pool.peek()
if entry:
key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
key = str(key).strip()
if has_usable_secret(key):
return key, f"credential_pool:{provider_id}"
except Exception:
pass
return "", ""
@@ -1128,7 +1096,6 @@ def resolve_provider(
"kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
"step": "stepfun", "stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee", "arceeai": "arcee",
"gmi-cloud": "gmi", "gmicloud": "gmi",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
"alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
"alibaba_coding_plan": "alibaba-coding-plan",
@@ -4269,10 +4236,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
get_curated_nous_model_ids, get_pricing_for_provider,
_PROVIDER_MODELS, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = get_curated_nous_model_ids()
model_ids = _PROVIDER_MODELS.get("nous", [])
print()
unavailable_models: list = []
-300
View File
@@ -1,300 +0,0 @@
"""Azure Foundry endpoint auto-detection.
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
- API transport (OpenAI-style ``chat_completions`` vs
Anthropic-style ``anthropic_messages``)
- Available models (best effort Azure does not expose a deployment
listing via the inference API key, but Azure OpenAI v1 endpoints
return the resource's model catalog via ``GET /models``)
- Context length for each discovered/entered model, via the existing
:func:`agent.model_metadata.get_model_context_length` resolver.
Rationale:
Azure has no pure-API-key deployment-listing endpoint per Microsoft,
deployment enumeration requires ARM management-plane auth. Azure
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
a ``/models`` list, but it reflects the resource's *available* models
rather than the user's *deployed* deployment names. In practice it is
still a useful hint the user picks a familiar model name and we look
up its context length from the catalog.
The detector never crashes on errors (every HTTP call is wrapped in a
broad try/except). Callers get a :class:`DetectionResult` with whatever
information could be gathered, and fall back to manual entry for the
rest.
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Optional
from urllib import request as urllib_request
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse, urlunparse
logger = logging.getLogger(__name__)
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
# accepts requests without ``api-version`` entirely, so this is only used
# as a fallback for pre-v1 resources that still require it.
_AZURE_OPENAI_PROBE_API_VERSIONS = (
"2025-04-01-preview",
"2024-10-21", # oldest GA that supports /models
)
# Default Azure Anthropic ``api-version``. Matches the value used by
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
@dataclass
class DetectionResult:
"""Everything auto-detection could gather from a base URL + API key."""
#: Detected API transport: ``"chat_completions"``,
#: ``"anthropic_messages"``, or ``None`` when detection failed.
api_mode: Optional[str] = None
#: Deployment / model IDs returned by ``/models`` (best effort).
#: Empty when the endpoint doesn't expose the list with an API key.
models: list[str] = field(default_factory=list)
#: Lowercased host from the base URL (used for display messages).
hostname: str = ""
#: Human-readable reason the detector chose ``api_mode``. Useful
#: for explaining auto-detection to the user in the wizard.
reason: str = ""
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
models_probe_ok: bool = False
#: ``True`` when the URL was determined to be an Anthropic-style
#: endpoint (from path suffix or live probe).
is_anthropic: bool = False
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
``(status_code, parsed_json_or_None)``. Never raises."""
req = urllib_request.Request(url, method="GET")
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
# so we probe once per URL rather than twice.
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("User-Agent", "hermes-agent/azure-detect")
try:
with urllib_request.urlopen(req, timeout=timeout) as resp:
body = resp.read()
try:
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
except Exception:
return resp.status, None
except HTTPError as exc:
return exc.code, None
except (URLError, TimeoutError, OSError) as exc:
logger.debug("azure_detect: GET %s failed: %s", url, exc)
return 0, None
except Exception as exc: # pragma: no cover — defensive
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
return 0, None
def _strip_trailing_v1(url: str) -> str:
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
return re.sub(r"/v1/?$", "", url.rstrip("/"))
def _looks_like_anthropic_path(url: str) -> bool:
"""Return True when the URL's path ends in ``/anthropic`` or
contains a ``/anthropic/`` segment. Used by Azure Foundry
resources that route Claude traffic through a dedicated path."""
try:
parsed = urlparse(url)
path = (parsed.path or "").lower().rstrip("/")
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
except Exception:
return False
def _extract_model_ids(payload: dict) -> list[str]:
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
response. Returns ``[]`` on any shape mismatch."""
data = payload.get("data") if isinstance(payload, dict) else None
if not isinstance(data, list):
return []
ids: list[str] = []
for item in data:
if not isinstance(item, dict):
continue
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
mid = item.get("id") or item.get("model") or item.get("name")
if isinstance(mid, str) and mid:
ids.append(mid)
return ids
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
"""Probe ``<base>/models`` for an OpenAI-shaped response.
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
"""
base_url = base_url.rstrip("/")
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
# api-version required for GA paths, so probe without first.
candidates = [f"{base_url}/models"]
# Fallback: explicit api-version for pre-v1 resources
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
candidates.append(f"{base_url}/models?api-version={v}")
for url in candidates:
status, body = _http_get_json(url, api_key)
if status == 200 and body is not None:
ids = _extract_model_ids(body)
if ids:
logger.info(
"azure_detect: /models probe OK at %s (%d models)",
url, len(ids),
)
return True, ids
# 200 + empty list still counts as "OpenAI shape, no models
# listed" — let the user proceed with manual entry.
if isinstance(body, dict) and "data" in body:
return True, []
return False, []
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
"""Send a zero-token request to ``<base>/v1/messages`` and check
whether the endpoint at least *recognises* the Anthropic Messages
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
``invalid_request`` with an Anthropic error shape). Never completes
a real chat.
"""
base = _strip_trailing_v1(base_url)
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
payload = json.dumps({
"model": "probe",
"max_tokens": 1,
"messages": [{"role": "user", "content": "ping"}],
}).encode("utf-8")
req = urllib_request.Request(url, method="POST", data=payload)
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("anthropic-version", "2023-06-01")
req.add_header("content-type", "application/json")
req.add_header("User-Agent", "hermes-agent/azure-detect")
try:
with urllib_request.urlopen(req, timeout=6.0) as resp:
# Should never 200 — "probe" isn't a real deployment. But
# if it does, the endpoint definitely speaks Anthropic.
return resp.status < 500
except HTTPError as exc:
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
try:
body = exc.read().decode("utf-8", errors="replace")
lowered = body.lower()
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
return True
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
# Anthropic-style calls on non-Anthropic deployments. A
# 400 "model not found" IS Anthropic though.
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
return True
return False
except Exception:
return False
except (URLError, TimeoutError, OSError):
return False
except Exception: # pragma: no cover
return False
def detect(base_url: str, api_key: str) -> DetectionResult:
"""Inspect an Azure endpoint and describe its transport + models.
Call this from the wizard before asking the user to pick an API
mode manually. The caller should treat the returned
:class:`DetectionResult` as *advisory* if ``api_mode`` is None,
fall back to asking the user.
"""
result = DetectionResult()
try:
parsed = urlparse(base_url)
result.hostname = (parsed.hostname or "").lower()
except Exception:
result.hostname = ""
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
# under a dedicated ``/anthropic`` path.
if _looks_like_anthropic_path(base_url):
result.is_anthropic = True
result.api_mode = "anthropic_messages"
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
return result
# 2. Try the OpenAI-style /models probe. If this works, the
# endpoint definitely speaks OpenAI wire.
ok, models = _probe_openai_models(base_url, api_key)
if ok:
result.models_probe_ok = True
result.models = models
result.api_mode = "chat_completions"
result.reason = (
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
if models
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
)
return result
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
# intrusive than /models, so only run it when the OpenAI probe
# failed.
if _probe_anthropic_messages(base_url, api_key):
result.is_anthropic = True
result.api_mode = "anthropic_messages"
result.reason = "Endpoint accepts Anthropic Messages shape"
return result
# Nothing matched. Caller falls back to manual selection.
result.reason = (
"Could not probe endpoint (private network, missing model list, or "
"non-standard path) — falling back to manual API-mode selection"
)
return result
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
that returns ``None`` when only the fallback default (128k) would
fire, so the wizard can distinguish "we actually know this" from
"we guessed."""
try:
from agent.model_metadata import (
DEFAULT_FALLBACK_CONTEXT,
get_model_context_length,
)
except Exception:
return None
try:
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
except Exception as exc:
logger.debug("azure_detect: context length lookup failed: %s", exc)
return None
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
return n
return None
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
+1 -177
View File
@@ -36,23 +36,12 @@ _EXCLUDED_DIRS = {
"__pycache__", # bytecode caches — regenerated on import
".git", # nested git dirs (profiles shouldn't have these, but safety)
"node_modules", # js deps if website/ somehow leaks in
"backups", # prior auto-backups — don't nest backups exponentially
"checkpoints", # session-local trajectory caches — regenerated per-session,
# session-hash-keyed so they don't port to another machine anyway
}
# File-name suffixes to skip
_EXCLUDED_SUFFIXES = (
".pyc",
".pyo",
# SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
# via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
# rollback-journal alongside would pair a fresh snapshot with stale sidecar
# state and produce a torn restore on the next open. They're transient and
# regenerated on first connection anyway.
".db-wal",
".db-shm",
".db-journal",
)
# File names to skip (runtime state that's meaningless on another machine)
@@ -465,12 +454,6 @@ def run_import(args) -> None:
# Critical state files to include in quick snapshots (relative to HERMES_HOME).
# Everything else is either regeneratable (logs, cache) or managed separately
# (skills, repo, sessions/).
#
# Entries may be individual files OR directories. Directories are captured
# recursively; missing entries are silently skipped. Pairing data lives in
# platform-specific JSON blobs outside state.db, so it's listed here explicitly
# — `hermes update` snapshots this set before pulling so approved-user lists
# are recoverable if anything goes wrong (issue #15733).
_QUICK_STATE_FILES = (
"state.db",
"config.yaml",
@@ -480,10 +463,6 @@ _QUICK_STATE_FILES = (
"gateway_state.json",
"channel_directory.json",
"processes.json",
# Pairing stores (generic + per-platform JSONs outside state.db)
"pairing", # legacy location (gateway/pairing.py)
"platforms/pairing", # new location (gateway/pairing.py)
"feishu_comment_pairing.json", # Feishu comment subscription pairings
)
_QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -519,27 +498,7 @@ def create_quick_snapshot(
for rel in _QUICK_STATE_FILES:
src = home / rel
if not src.exists():
continue
if src.is_dir():
# Walk the directory and record each file individually in the
# manifest so restore can treat them uniformly. Empty dirs are
# skipped (nothing to snapshot).
for sub in src.rglob("*"):
if not sub.is_file():
continue
sub_rel = sub.relative_to(home).as_posix()
dst = snap_dir / sub_rel
dst.parent.mkdir(parents=True, exist_ok=True)
try:
shutil.copy2(sub, dst)
manifest[sub_rel] = dst.stat().st_size
except (OSError, PermissionError) as exc:
logger.warning("Could not snapshot %s: %s", sub_rel, exc)
continue
if not src.is_file():
if not src.exists() or not src.is_file():
continue
dst = snap_dir / rel
@@ -694,138 +653,3 @@ def run_quick_backup(args) -> None:
print(f" Restore with: /snapshot restore {snap_id}")
else:
print("No state files found to snapshot.")
# ---------------------------------------------------------------------------
# Pre-update auto-backup
# ---------------------------------------------------------------------------
_PRE_UPDATE_BACKUPS_DIR = "backups"
_PRE_UPDATE_PREFIX = "pre-update-"
_PRE_UPDATE_DEFAULT_KEEP = 5
def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
home = hermes_home or get_hermes_home()
return home / _PRE_UPDATE_BACKUPS_DIR
def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
"""Remove oldest pre-update backups beyond the keep limit.
Returns the number of files deleted. Only touches files matching
``pre-update-*.zip`` so hand-made zips dropped in the same directory
are never touched.
"""
if keep < 0:
keep = 0
if not backup_dir.exists():
return 0
backups = sorted(
(p for p in backup_dir.iterdir()
if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
key=lambda p: p.name,
reverse=True,
)
deleted = 0
for p in backups[keep:]:
try:
p.unlink()
deleted += 1
except OSError as exc:
logger.warning("Failed to prune backup %s: %s", p.name, exc)
return deleted
def create_pre_update_backup(
hermes_home: Optional[Path] = None,
keep: int = _PRE_UPDATE_DEFAULT_KEEP,
) -> Optional[Path]:
"""Create a full zip backup of HERMES_HOME under ``backups/``.
Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
auto-prunes old pre-update backups.
Returns the path to the created zip, or ``None`` if no files were
found or the backup could not be created. Never raises the caller
(``hermes update``) should continue even if the backup fails.
"""
hermes_root = hermes_home or get_default_hermes_root()
if not hermes_root.is_dir():
return None
backup_dir = _pre_update_backup_dir(hermes_root)
try:
backup_dir.mkdir(parents=True, exist_ok=True)
except OSError as exc:
logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
return None
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
# Collect files (same logic as run_backup, minus the chatty progress prints)
files_to_add: list[tuple[Path, Path]] = []
try:
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
dp = Path(dirpath)
# Prune excluded directories in-place so os.walk doesn't descend
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
for fname in filenames:
fpath = dp / fname
try:
rel = fpath.relative_to(hermes_root)
except ValueError:
continue
if _should_exclude(rel):
continue
# Skip the output zip itself if it already exists
try:
if fpath.resolve() == out_path.resolve():
continue
except (OSError, ValueError):
pass
files_to_add.append((fpath, rel))
except OSError as exc:
logger.warning("Pre-update backup: walk failed: %s", exc)
return None
if not files_to_add:
return None
try:
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
for abs_path, rel_path in files_to_add:
try:
if abs_path.suffix == ".db":
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
tmp_db = Path(tmp.name)
try:
if _safe_copy_db(abs_path, tmp_db):
zf.write(tmp_db, arcname=str(rel_path))
finally:
tmp_db.unlink(missing_ok=True)
else:
zf.write(abs_path, arcname=str(rel_path))
except (PermissionError, OSError, ValueError) as exc:
logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
continue
except OSError as exc:
logger.warning("Pre-update backup: zip write failed: %s", exc)
# Best-effort cleanup of partial file
try:
out_path.unlink(missing_ok=True)
except OSError:
pass
return None
_prune_pre_update_backups(backup_dir, keep=keep)
return out_path
+6 -115
View File
@@ -62,8 +62,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
aliases=("reset",)),
CommandDef("clear", "Clear screen and start a new session", "Session",
cli_only=True),
CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
cli_only=True),
CommandDef("history", "Show conversation history", "Session",
cli_only=True),
CommandDef("save", "Save the current conversation", "Session",
@@ -86,7 +84,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("deny", "Deny a pending dangerous command", "Session",
gateway_only=True),
CommandDef("background", "Run a prompt in the background", "Session",
aliases=("bg", "btw"), args_hint="<prompt>"),
aliases=("bg",), args_hint="<prompt>"),
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
args_hint="<question>"),
CommandDef("agents", "Show active agents and running tasks", "Session",
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@@ -103,8 +103,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
# Configuration
CommandDef("config", "Show current configuration", "Configuration",
cli_only=True),
CommandDef("model", "Switch model for this session", "Configuration",
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
cli_only=True),
@@ -128,8 +127,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("voice", "Toggle voice mode", "Configuration",
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
cli_only=True, args_hint="[queue|steer|interrupt|status]",
subcommands=("queue", "steer", "interrupt", "status")),
cli_only=True, args_hint="[queue|interrupt|status]",
subcommands=("queue", "interrupt", "status")),
# Tools & Skills
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -808,114 +807,6 @@ def discord_skill_commands_by_category(
return trimmed_categories, uncategorized, hidden
# ---------------------------------------------------------------------------
# Slack native slash commands
# ---------------------------------------------------------------------------
# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
# commands per app.
_SLACK_MAX_SLASH_COMMANDS = 50
_SLACK_NAME_LIMIT = 32
_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
def _sanitize_slack_name(raw: str) -> str:
"""Convert a command name to a valid Slack slash command name.
Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
chars. Uppercase is lowercased; invalid chars are stripped.
"""
name = raw.lower()
name = _SLACK_INVALID_CHARS.sub("", name)
name = name.strip("-_")
return name[:_SLACK_NAME_LIMIT]
def slack_native_slashes() -> list[tuple[str, str, str]]:
"""Return (slash_name, description, usage_hint) triples for Slack.
Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
matching Discord's and Telegram's model where every command is a
first-class slash and not a ``/hermes <verb>`` subcommand.
Both canonical names and aliases are included so users can type any
documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
Plugin-registered slash commands are included too.
Results are clamped to Slack's 50-command limit with duplicate-name
avoidance. ``/hermes`` is always reserved as the first entry so the
legacy ``/hermes <subcommand>`` form keeps working for anything that
gets dropped by the clamp or for free-form questions.
"""
overrides = _resolve_config_gates()
entries: list[tuple[str, str, str]] = []
seen: set[str] = set()
# Reserve /hermes as the catch-all top-level command.
entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
seen.add("hermes")
def _add(name: str, desc: str, hint: str) -> None:
slack_name = _sanitize_slack_name(name)
if not slack_name or slack_name in seen:
return
if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
return
# Slack description cap is 2000 chars; keep it short.
entries.append((slack_name, desc[:140], hint[:100]))
seen.add(slack_name)
# First pass: canonical names (so they win slots if we hit the cap).
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
_add(cmd.name, cmd.description, cmd.args_hint or "")
# Second pass: aliases.
for cmd in COMMAND_REGISTRY:
if not _is_gateway_available(cmd, overrides):
continue
for alias in cmd.aliases:
# Skip aliases that only differ from canonical by case/punctuation
# normalization (already covered by _add dedup).
_add(alias, f"Alias for /{cmd.name}{cmd.description}", cmd.args_hint or "")
# Third pass: plugin commands.
for name, description, args_hint in _iter_plugin_command_entries():
_add(name, description, args_hint or "")
return entries
def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
"""Generate a Slack app manifest with all gateway commands as slashes.
``request_url`` is required by Slack's manifest schema for every slash
command, but in Socket Mode (which we use) Slack ignores it and routes
the command event through the WebSocket. A placeholder URL is fine.
The returned dict is the ``features.slash_commands`` portion only
callers compose it into a full manifest (or merge into an existing
one). Keeping it narrow avoids coupling us to the rest of the manifest
schema (display_information, oauth_config, settings, etc.) which users
set up once in the Slack UI and rarely change.
"""
slashes = []
for name, desc, usage in slack_native_slashes():
entry = {
"command": f"/{name}",
"description": desc or f"Run /{name}",
"should_escape": False,
"url": request_url,
}
if usage:
entry["usage_hint"] = usage
slashes.append(entry)
return {"features": {"slash_commands": slashes}}
def slack_subcommand_map() -> dict[str, str]:
"""Return subcommand -> /command mapping for Slack /hermes handler.
+10 -206
View File
@@ -389,20 +389,6 @@ DEFAULT_CONFIG = {
# (60+ tool iterations with tiny output) before users assume the
# bot is dead and /restart.
"gateway_notify_interval": 180,
# How user-attached images are presented to the main model on each turn.
# "auto" — attach natively when the active model reports
# supports_vision=True AND the user hasn't explicitly
# configured auxiliary.vision.provider. Otherwise fall
# back to text (vision_analyze pre-analysis).
# "native" — always attach natively; non-vision models will either
# error at the provider or get a last-chance text fallback
# (see run_agent._prepare_messages_for_api).
# "text" — always pre-analyze with vision_analyze and prepend the
# description as text; the main model never sees pixels.
# Affects gateway platforms, the TUI, and CLI /attach. vision_analyze
# remains available as a tool regardless of this setting — the routing
# only controls how inbound user images are presented.
"image_input_mode": "auto",
},
"terminal": {
@@ -479,7 +465,6 @@ DEFAULT_CONFIG = {
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
"record_sessions": False, # Auto-record browser sessions as WebM videos
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
# Active only when a CDP-capable backend is attached (Browserbase or
@@ -501,19 +486,6 @@ DEFAULT_CONFIG = {
"checkpoints": {
"enabled": True,
"max_snapshots": 50, # Max checkpoints to keep per directory
# Auto-maintenance: shadow repos accumulate forever under
# ~/.hermes/checkpoints/ (one per cd'd working directory). Field
# reports put the typical offender at 1000+ repos / ~12 GB. When
# auto_prune is on, hermes sweeps at startup (at most once per
# min_interval_hours) and deletes:
# * orphan repos: HERMES_WORKDIR no longer exists on disk
# * stale repos: newest mtime older than retention_days
# Opt-in so users who rely on /rollback against long-ago sessions
# never lose data silently.
"auto_prune": False,
"retention_days": 7,
"delete_orphans": True,
"min_interval_hours": 24,
},
# Maximum characters returned by a single read_file call. Reads that
@@ -640,6 +612,14 @@ DEFAULT_CONFIG = {
"timeout": 30,
"extra_body": {},
},
"flush_memories": {
"provider": "auto",
"model": "",
"base_url": "",
"api_key": "",
"timeout": 30,
"extra_body": {},
},
"title_generation": {
"provider": "auto",
"model": "",
@@ -654,7 +634,7 @@ DEFAULT_CONFIG = {
"compact": False,
"personality": "kawaii",
"resume_display": "full",
"busy_input_mode": "interrupt", # interrupt | queue | steer
"busy_input_mode": "interrupt",
"bell_on_complete": False,
"show_reasoning": False,
"streaming": False,
@@ -868,7 +848,7 @@ DEFAULT_CONFIG = {
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
# discord / discord_admin tools: restrict which actions the agent may call.
# discord_server tool: restrict which actions the agent may call.
# Default (empty) = all actions allowed (subject to bot privileged intents).
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
# or YAML list. Unknown names are dropped with a warning at load time.
@@ -987,27 +967,6 @@ DEFAULT_CONFIG = {
"backup_count": 3, # Number of rotated backup files to keep
},
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
# curated model lists for OpenRouter and Nous Portal from this URL,
# falling back to the in-repo snapshot on network failure. Lets us
# update model picker lists without shipping a hermes-agent release.
# The default URL is served by the docs site GitHub Pages deploy.
"model_catalog": {
"enabled": True,
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
# next /model or `hermes model` invocation; network failures
# silently fall back to the stale cache.
"ttl_hours": 24,
# Optional per-provider override URLs for third parties that want
# to self-host their own curation list using the same schema.
# Example:
# providers:
# openrouter:
# url: https://example.com/my-curation.json
"providers": {},
},
# Network settings — workarounds for connectivity issues.
"network": {
# Force IPv4 connections. On servers with broken or unreachable IPv6,
@@ -1044,27 +1003,6 @@ DEFAULT_CONFIG = {
"min_interval_hours": 24,
},
# Contextual first-touch onboarding hints (see agent/onboarding.py).
# Each hint is shown once per install and then latched here so it
# never fires again. Users can wipe the section to re-see all hints.
"onboarding": {
"seen": {},
},
# ``hermes update`` behaviour.
"updates": {
# Run a full ``hermes backup``-style zip of HERMES_HOME before every
# ``hermes update``. Backups land in ``<HERMES_HOME>/backups/`` and
# can be restored with ``hermes import <path>``. Off by default —
# on large HERMES_HOME directories the zip can add minutes to every
# update. Set to true to re-enable, or pass ``--backup`` to opt in
# for a single update run.
"pre_update_backup": False,
# How many pre-update backup zips to retain. Older ones are pruned
# automatically after each successful backup.
"backup_keep": 5,
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
}
@@ -1254,22 +1192,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"GMI_API_KEY": {
"description": "GMI Cloud API key",
"prompt": "GMI Cloud API key",
"url": "https://www.gmicloud.ai/",
"password": True,
"category": "provider",
"advanced": True,
},
"GMI_BASE_URL": {
"description": "GMI Cloud base URL override",
"prompt": "GMI Cloud base URL (leave empty for default)",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
"MINIMAX_API_KEY": {
"description": "MiniMax API key (international)",
"prompt": "MiniMax API key",
@@ -1457,21 +1379,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"AZURE_FOUNDRY_API_KEY": {
"description": "Azure Foundry API key for custom Azure endpoints",
"prompt": "Azure Foundry API Key",
"url": "https://ai.azure.com/",
"password": True,
"category": "provider",
},
"AZURE_FOUNDRY_BASE_URL": {
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
"prompt": "Azure Foundry base URL",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
# ── Tool API keys ──
"EXA_API_KEY": {
@@ -1639,44 +1546,6 @@ OPTIONAL_ENV_VARS = {
"category": "tool",
},
# ── Bundled skills (opt-in: only needed if the user uses that skill) ──
# These use category="skill" (distinct from "tool") so the sandbox
# env blocklist in tools/environments/local.py does NOT rewrite them —
# skills legitimately need these passed through to curl via
# tools/env_passthrough.py when the user's skill calls out.
"NOTION_API_KEY": {
"description": "Notion integration token (used by the `notion` skill)",
"prompt": "Notion API key",
"url": "https://www.notion.so/my-integrations",
"password": True,
"category": "skill",
"advanced": True,
},
"LINEAR_API_KEY": {
"description": "Linear personal API key (used by the `linear` skill)",
"prompt": "Linear API key",
"url": "https://linear.app/settings/api",
"password": True,
"category": "skill",
"advanced": True,
},
"AIRTABLE_API_KEY": {
"description": "Airtable personal access token (used by the `airtable` skill)",
"prompt": "Airtable API key",
"url": "https://airtable.com/create/tokens",
"password": True,
"category": "skill",
"advanced": True,
},
"TENOR_API_KEY": {
"description": "Tenor API key for GIF search (used by the `gif-search` skill)",
"prompt": "Tenor API key",
"url": "https://developers.google.com/tenor/guides/quickstart",
"password": True,
"category": "skill",
"advanced": True,
},
# ── Honcho ──
"HONCHO_API_KEY": {
"description": "Honcho API key for AI-native persistent memory",
@@ -2345,71 +2214,6 @@ def get_compatible_custom_providers(
return compatible
def get_custom_provider_context_length(
model: str,
base_url: str,
custom_providers: Optional[List[Dict[str, Any]]] = None,
config: Optional[Dict[str, Any]] = None,
) -> Optional[int]:
"""Look up a per-model ``context_length`` override from ``custom_providers``.
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
if present and valid. Returns ``None`` when no override applies.
This is the single source of truth for custom-provider context overrides,
used by:
* ``AIAgent.__init__`` (startup resolution)
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
* ``gateway.run._format_session_info`` (``/info`` display)
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
startup path only; every other path (notably ``/model`` switch) fell back
to the 128K default. See #15779.
"""
if not model or not base_url:
return None
if custom_providers is None:
try:
custom_providers = get_compatible_custom_providers(config)
except Exception:
if config is None:
return None
raw = config.get("custom_providers")
custom_providers = raw if isinstance(raw, list) else []
if not isinstance(custom_providers, list):
return None
target_url = (base_url or "").rstrip("/")
if not target_url:
return None
for entry in custom_providers:
if not isinstance(entry, dict):
continue
entry_url = (entry.get("base_url") or "").rstrip("/")
if not entry_url or entry_url != target_url:
continue
models = entry.get("models")
if not isinstance(models, dict):
continue
model_cfg = models.get(model)
if not isinstance(model_cfg, dict):
continue
raw_ctx = model_cfg.get("context_length")
if raw_ctx is None:
continue
try:
ctx = int(raw_ctx)
except (TypeError, ValueError):
continue
if ctx > 0:
return ctx
return None
def check_config_version() -> Tuple[int, int]:
"""
Check config version.
+5 -11
View File
@@ -45,13 +45,8 @@ def _pending_file() -> Path:
Each entry: ``{"url": "...", "expire_at": <unix_ts>}``. Scheduled
DELETEs used to be handled by spawning a detached Python process per
paste that slept for 6 hours; those accumulated forever if the user
ran ``hermes debug share`` repeatedly.
Deletion is now driven by the gateway's cron ticker
(``gateway/run.py::_start_cron_ticker``) which calls
``_sweep_expired_pastes`` once per hour. ``hermes debug share`` also
runs an opportunistic sweep on entry as a fallback for CLI-only users
who never start the gateway.
ran ``hermes debug share`` repeatedly. We now persist the schedule
to disk and sweep expired entries on the next debug invocation.
"""
return get_hermes_home() / "pastes" / "pending.json"
@@ -228,10 +223,9 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
interpreters that never exited until the sleep completed.
The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
and the gateway's cron ticker sweeps expired entries once per hour.
``hermes debug share`` also runs an opportunistic sweep as a fallback
for CLI-only users. If neither runs again, paste.rs's own retention
policy handles cleanup.
and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
every ``hermes debug`` invocation. If the user never runs ``hermes debug``
again, paste.rs's own retention policy handles cleanup.
"""
_record_pending(urls, delay_seconds=delay_seconds)
+1 -7
View File
@@ -46,7 +46,6 @@ _PROVIDER_ENV_HINTS = (
"Z_AI_API_KEY",
"KIMI_API_KEY",
"KIMI_CN_API_KEY",
"GMI_API_KEY",
"MINIMAX_API_KEY",
"MINIMAX_CN_API_KEY",
"KILOCODE_API_KEY",
@@ -321,11 +320,7 @@ def run_doctor(args):
known_providers.add("custom:" + name.lower().replace(" ", "-"))
canonical_provider = provider
if (
provider
and _resolve_provider_full is not None
and provider not in ("auto", "custom")
):
if provider and _resolve_provider_full is not None and provider != "auto":
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
canonical_provider = provider_def.id if provider_def is not None else None
@@ -938,7 +933,6 @@ def run_doctor(args):
("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
-361
View File
@@ -1,361 +0,0 @@
"""
hermes fallback manage the fallback provider chain.
Fallback providers are tried in order when the primary model fails with
rate-limit, overload, or connection errors. See:
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
Subcommands:
hermes fallback [list] Show the current fallback chain (default when no subcommand)
hermes fallback add Pick provider + model via the same picker as `hermes model`,
then append the selection to the chain
hermes fallback remove Pick an entry to delete from the chain
hermes fallback clear Remove all fallback entries
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
``fallback_model`` format is migrated to the new list format on first add.
"""
from __future__ import annotations
import copy
from typing import Any, Dict, List, Optional
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Return the normalized fallback chain as a list of dicts.
Accepts both the new list format (``fallback_providers``) and the legacy
single-dict format (``fallback_model``). The returned list is always a
fresh copy callers can mutate without touching the config dict.
"""
chain = config.get("fallback_providers") or []
if isinstance(chain, list):
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
if result:
return result
legacy = config.get("fallback_model")
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
return [dict(legacy)]
if isinstance(legacy, list):
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
return []
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
config["fallback_providers"] = chain
# Drop the legacy single-dict key on write so there's only one source of truth.
if "fallback_model" in config:
config.pop("fallback_model", None)
def _format_entry(entry: Dict[str, Any]) -> str:
"""One-line human-readable rendering of a fallback entry."""
provider = entry.get("provider", "?")
model = entry.get("model", "?")
base = entry.get("base_url")
suffix = f" [{base}]" if base else ""
return f"{model} (via {provider}){suffix}"
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
if not isinstance(model_cfg, dict):
return None
provider = (model_cfg.get("provider") or "").strip()
# The picker writes the selected model to ``model.default``.
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
if not provider or not model:
return None
entry: Dict[str, Any] = {"provider": provider, "model": model}
base_url = (model_cfg.get("base_url") or "").strip()
if base_url:
entry["base_url"] = base_url
api_mode = (model_cfg.get("api_mode") or "").strip()
if api_mode:
entry["api_mode"] = api_mode
return entry
def _snapshot_auth_active_provider() -> Any:
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
return store.get("active_provider")
except Exception:
return None
def _restore_auth_active_provider(value: Any) -> None:
"""Write back a previously snapshotted ``active_provider`` value."""
try:
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
with _auth_store_lock():
store = _load_auth_store()
store["active_provider"] = value
_save_auth_store(store)
except Exception:
# Best-effort — if auth.json can't be restored, the user's primary
# provider may have been deactivated by the picker. They can re-run
# `hermes model` to fix it. Don't fail the fallback add.
pass
# ---------------------------------------------------------------------------
# Subcommand handlers
# ---------------------------------------------------------------------------
def cmd_fallback_list(args) -> None: # noqa: ARG001
"""Print the current fallback chain."""
from hermes_cli.config import load_config
config = load_config()
chain = _read_chain(config)
print()
if not chain:
print(" No fallback providers configured.")
print()
print(" Add one with: hermes fallback add")
print()
return
primary = _describe_primary(config)
if primary:
print(f" Primary: {primary}")
print()
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
print()
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
"""One-line description of the primary model for display purposes."""
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
provider = (model_cfg.get("provider") or "?").strip() or "?"
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
return f"{model} (via {provider})"
if isinstance(model_cfg, str) and model_cfg.strip():
return model_cfg.strip()
return None
def cmd_fallback_add(args) -> None:
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
from hermes_cli.main import _require_tty, select_provider_and_model
from hermes_cli.config import load_config, save_config
_require_tty("fallback add")
# Snapshot BEFORE the picker runs so we can distinguish "user actually
# picked something" from "user cancelled" by comparing before/after.
before_cfg = load_config()
model_before = copy.deepcopy(before_cfg.get("model"))
active_provider_before = _snapshot_auth_active_provider()
print()
print(" Adding a fallback provider. The picker below is the same one used by")
print(" `hermes model` — select the provider + model you want as a fallback.")
print()
try:
select_provider_and_model(args=args)
except SystemExit:
# Some provider flows exit on auth failure — restore state and re-raise.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
raise
# Read the post-picker state to see what the user selected.
after_cfg = load_config()
model_after = after_cfg.get("model")
new_entry = _extract_fallback_from_model_cfg(model_after)
if not new_entry:
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(" No fallback added.")
return
# Picker picked the same thing that's already the primary → nothing changed,
# and there's nothing useful to add as a fallback to itself.
primary_entry = _extract_fallback_from_model_cfg(model_before)
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
and primary_entry["model"] == new_entry["model"]:
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
print(" A provider cannot be a fallback for itself — no change.")
return
# Reload the config with the primary restored, then append the new entry
# to ``fallback_providers``. We deliberately re-load (rather than mutating
# ``after_cfg``) because the picker may have touched other top-level keys
# (custom_providers, providers credentials) that we want to keep.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
final_cfg = load_config()
chain = _read_chain(final_cfg)
# Reject exact-duplicate fallback entries.
for existing in chain:
if existing.get("provider") == new_entry["provider"] \
and existing.get("model") == new_entry["model"]:
print()
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
return
chain.append(new_entry)
_write_chain(final_cfg, chain)
save_config(final_cfg)
print()
print(f" Added fallback: {_format_entry(new_entry)}")
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
print()
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
def _restore_model_cfg(model_before: Any) -> None:
"""Restore ``config["model"]`` to a previously-captured snapshot."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
if model_before is None:
cfg.pop("model", None)
else:
cfg["model"] = copy.deepcopy(model_before)
save_config(cfg)
def cmd_fallback_remove(args) -> None: # noqa: ARG001
"""Pick an entry from the chain and remove it."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to remove.")
print()
return
choices = [_format_entry(e) for e in chain]
choices.append("Cancel")
try:
from hermes_cli.setup import _curses_prompt_choice
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
except Exception:
idx = _numbered_pick("Select a fallback to remove:", choices)
if idx is None or idx < 0 or idx >= len(chain):
print()
print(" Cancelled — no change.")
return
removed = chain.pop(idx)
_write_chain(config, chain)
save_config(config)
print()
print(f" Removed fallback: {_format_entry(removed)}")
if chain:
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
else:
print(" Fallback chain is now empty.")
print()
def cmd_fallback_clear(args) -> None: # noqa: ARG001
"""Remove all fallback entries (with confirmation)."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to clear.")
print()
return
print()
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
try:
resp = input(" Clear all entries? [y/N]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
print()
print(" Cancelled.")
return
if resp not in ("y", "yes"):
print(" Cancelled — no change.")
return
_write_chain(config, [])
save_config(config)
print()
print(" Fallback chain cleared.")
print()
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
"""Fallback numbered-list picker when curses is unavailable."""
print(question)
for i, c in enumerate(choices, 1):
print(f" {i}. {c}")
print()
while True:
try:
val = input(f"Choice [1-{len(choices)}]: ").strip()
if not val:
return None
idx = int(val) - 1
if 0 <= idx < len(choices):
return idx
print(f"Please enter 1-{len(choices)}")
except ValueError:
print("Please enter a number")
except (KeyboardInterrupt, EOFError):
print()
return None
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
def cmd_fallback(args) -> None:
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
sub = getattr(args, "fallback_command", None)
if sub in (None, "", "list", "ls"):
cmd_fallback_list(args)
elif sub == "add":
cmd_fallback_add(args)
elif sub in ("remove", "rm"):
cmd_fallback_remove(args)
elif sub == "clear":
cmd_fallback_clear(args)
else:
print(f"Unknown fallback subcommand: {sub}")
print("Use one of: list, add, remove, clear")
raise SystemExit(2)
-24
View File
@@ -2724,24 +2724,6 @@ _PLATFORMS = [
"help": "OpenID to deliver cron results and notifications to."},
],
},
{
"key": "yuanbao",
"label": "Yuanbao",
"emoji": "💎",
"token_var": "YUANBAO_APP_ID",
"setup_instructions": [
"1. Download the Yuanbao app from https://yuanbao.tencent.com/",
"2. In the app, go to PAI → My Bot and create a new bot",
"3. After the bot is created, copy the App ID and App Secret",
"4. Enter them below and Hermes will connect automatically over WebSocket",
],
"vars": [
{"name": "YUANBAO_APP_ID", "prompt": "App ID", "password": False,
"help": "The App ID from your Yuanbao IM Bot credentials."},
{"name": "YUANBAO_APP_SECRET", "prompt": "App Secret", "password": True,
"help": "The App Secret (used for HMAC signing) from your Yuanbao IM Bot."},
],
},
]
@@ -3126,12 +3108,6 @@ def _setup_wecom():
print_success("💬 WeCom configured!")
def _setup_yuanbao():
"""Configure Yuanbao via the standard platform setup."""
yuanbao_platform = next(p for p in _PLATFORMS if p["key"] == "yuanbao")
_setup_standard_platform(yuanbao_platform)
def _is_service_installed() -> bool:
"""Check if the gateway is installed as a system service."""
if supports_systemd_services():
-1
View File
@@ -125,7 +125,6 @@ _DEFAULT_PAYLOADS = {
"task_id": "test-task",
"tool_call_id": "test-call",
"result": '{"output": "hello"}',
"duration_ms": 42,
},
"pre_llm_call": {
"session_id": "test-session",
+58 -1079
View File
File diff suppressed because it is too large Load Diff
-329
View File
@@ -1,329 +0,0 @@
"""Remote model catalog fetcher.
The Hermes docs site hosts a JSON manifest of curated models for providers
we want to update without shipping a release (currently OpenRouter and
Nous Portal). This module fetches, validates, and caches that manifest,
falling back to the in-repo hardcoded lists when the network is unavailable.
Pipeline
--------
1. ``get_catalog()`` returns a parsed manifest dict.
- Checks in-process cache (invalidated by TTL).
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
- Fetches the master URL if disk cache is stale or missing.
- On any fetch failure, keeps using the stale cache (or empty dict).
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()``
thin accessors returning the shapes existing callers expect. Each
falls back to the in-repo hardcoded list on any lookup failure.
Schema (version 1)
------------------
::
{
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {...}, # free-form
"providers": {
"openrouter": {
"metadata": {...}, # free-form
"models": [
{"id": "vendor/model", "description": "recommended",
"metadata": {...}} # free-form, model-level
]
},
"nous": {...}
}
}
Unknown fields are ignored extra metadata can be added at either level
without bumping ``version``. ``version`` bumps are reserved for
breaking changes (renaming ``providers``, changing ``models`` shape).
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any
from hermes_cli import __version__ as _HERMES_VERSION
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DEFAULT_CATALOG_URL = (
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
)
DEFAULT_TTL_HOURS = 24
DEFAULT_FETCH_TIMEOUT = 8.0
SUPPORTED_SCHEMA_VERSION = 1
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
# In-process cache to avoid repeated disk + parse work across multiple
# calls within the same session. Invalidated by TTL against the disk file's
# mtime, so calling code never has to think about this.
_catalog_cache: dict[str, Any] | None = None
_catalog_cache_source_mtime: float = 0.0
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
def _load_catalog_config() -> dict[str, Any]:
"""Load the ``model_catalog`` config block with defaults filled in."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
raw = cfg.get("model_catalog")
if not isinstance(raw, dict):
raw = {}
return {
"enabled": bool(raw.get("enabled", True)),
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
}
def _cache_path() -> Path:
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "cache" / "model_catalog.json"
# ---------------------------------------------------------------------------
# Fetch + validate + cache
# ---------------------------------------------------------------------------
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
try:
req = urllib.request.Request(
url,
headers={
"Accept": "application/json",
"User-Agent": _HERMES_USER_AGENT,
},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
logger.info("model catalog fetch failed (%s): %s", url, exc)
return None
except Exception as exc: # pragma: no cover — defensive
logger.info("model catalog fetch errored (%s): %s", url, exc)
return None
if not _validate_manifest(data):
logger.info("model catalog at %s failed schema validation", url)
return None
return data
def _validate_manifest(data: Any) -> bool:
"""Return True when ``data`` matches the minimum manifest shape."""
if not isinstance(data, dict):
return False
version = data.get("version")
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
# Future schema version we don't understand — refuse rather than
# guess. Older schemas (version < 1) aren't supported either.
return False
providers = data.get("providers")
if not isinstance(providers, dict):
return False
for pname, pblock in providers.items():
if not isinstance(pname, str) or not isinstance(pblock, dict):
return False
models = pblock.get("models")
if not isinstance(models, list):
return False
for m in models:
if not isinstance(m, dict):
return False
if not isinstance(m.get("id"), str) or not m["id"].strip():
return False
return True
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
path = _cache_path()
try:
mtime = path.stat().st_mtime
except (OSError, FileNotFoundError):
return (None, 0.0)
try:
with open(path) as fh:
data = json.load(fh)
except (OSError, json.JSONDecodeError):
return (None, 0.0)
if not _validate_manifest(data):
return (None, 0.0)
return (data, mtime)
def _write_disk_cache(data: dict[str, Any]) -> None:
path = _cache_path()
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
with open(tmp, "w") as fh:
json.dump(data, fh, indent=2)
fh.write("\n")
os.replace(tmp, path)
except OSError as exc:
logger.info("model catalog cache write failed: %s", exc)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
"""Return the parsed model catalog manifest, or an empty dict on failure.
Callers should treat a missing provider/model as "use the in-repo fallback"
never raise from this function so the CLI keeps working offline.
"""
global _catalog_cache, _catalog_cache_source_mtime
cfg = _load_catalog_config()
if not cfg["enabled"]:
return {}
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
disk_data, disk_mtime = _read_disk_cache()
now = time.time()
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
if (
not force_refresh
and _catalog_cache is not None
and disk_data is not None
and disk_mtime == _catalog_cache_source_mtime
and disk_fresh
):
return _catalog_cache
# Disk is fresh enough — use it without a network hit.
if not force_refresh and disk_fresh and disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
if fetched is not None:
_write_disk_cache(fetched)
new_disk_data, new_mtime = _read_disk_cache()
if new_disk_data is not None:
_catalog_cache = new_disk_data
_catalog_cache_source_mtime = new_mtime
return new_disk_data
_catalog_cache = fetched
_catalog_cache_source_mtime = now
return fetched
if disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
return {}
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
cfg = _load_catalog_config()
if not cfg["enabled"]:
return None
provider_cfg = cfg["providers"].get(provider)
if not isinstance(provider_cfg, dict):
return None
override_url = provider_cfg.get("url")
if not isinstance(override_url, str) or not override_url.strip():
return None
# Override fetches skip the disk cache because they're usually
# third-party self-hosted. Re-request on every call but with a short
# timeout so they don't block the picker.
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
def _get_provider_block(provider: str) -> dict[str, Any] | None:
"""Return the provider's manifest block, respecting per-provider overrides."""
override = _fetch_provider_override(provider)
if override is not None:
block = override.get("providers", {}).get(provider)
if isinstance(block, dict):
return block
catalog = get_catalog()
if not catalog:
return None
block = catalog.get("providers", {}).get(provider)
return block if isinstance(block, dict) else None
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
Returns ``None`` when the manifest is unavailable, so callers can fall
back to their hardcoded list.
"""
block = _get_provider_block("openrouter")
if not block:
return None
out: list[tuple[str, str]] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if not mid:
continue
desc = str(m.get("description") or "")
out.append((mid, desc))
return out or None
def get_curated_nous_models() -> list[str] | None:
"""Return Nous Portal's curated list of model ids from the manifest.
Returns ``None`` when the manifest is unavailable.
"""
block = _get_provider_block("nous")
if not block:
return None
out: list[str] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if mid:
out.append(mid)
return out or None
def reset_cache() -> None:
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
global _catalog_cache, _catalog_cache_source_mtime
_catalog_cache = None
_catalog_cache_source_mtime = 0.0
-7
View File
@@ -533,7 +533,6 @@ def resolve_display_context_length(
base_url: str = "",
api_key: str = "",
model_info: Optional[ModelInfo] = None,
custom_providers: list | None = None,
) -> Optional[int]:
"""Resolve the context length to show in /model output.
@@ -544,11 +543,6 @@ def resolve_display_context_length(
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
rest.
When ``custom_providers`` is provided, per-model ``context_length``
overrides from ``custom_providers[].models.<id>.context_length`` are
honored this closes #15779 where ``/model`` switch ignored user-set
overrides.
Prefer the provider-aware value; fall back to ``model_info.context_window``
only if the resolver returns nothing.
"""
@@ -559,7 +553,6 @@ def resolve_display_context_length(
base_url=base_url or "",
api_key=api_key or "",
provider=provider or None,
custom_providers=custom_providers,
)
if ctx:
return int(ctx)
+79 -226
View File
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
# (model_id, display description shown in menus)
OPENROUTER_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("deepseek/deepseek-v4-pro", ""),
("deepseek/deepseek-v4-flash", ""),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-sonnet-4.6", ""),
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
_PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"moonshotai/kimi-k2.6",
"deepseek/deepseek-v4-pro",
"deepseek/deepseek-v4-flash",
"xiaomi/mimo-v2.5-pro",
"xiaomi/mimo-v2.5",
"anthropic/claude-opus-4.7",
@@ -278,14 +282,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"trinity-large-preview",
"trinity-mini",
],
"gmi": [
"zai-org/GLM-5.1-FP8",
"deepseek-ai/DeepSeek-V3.2",
"moonshotai/Kimi-K2.5",
"google/gemini-3.1-flash-lite-preview",
"anthropic/claude-sonnet-4.6",
"openai/gpt-5.4",
],
"opencode-zen": [
"kimi-k2.5",
"gpt-5.4-pro",
@@ -387,9 +383,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
# Azure Foundry: user-provided endpoint and model.
# Empty list because models depend on the endpoint configuration.
"azure-foundry": [],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -717,6 +710,7 @@ class ProviderEntry(NamedTuple):
label: str
tui_desc: str # detailed description for `hermes model` TUI
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
@@ -742,12 +736,10 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("alibaba", "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
ProviderEntry("ollama-cloud", "Ollama Cloud", "Ollama Cloud (cloud-hosted open models — ollama.com)"),
ProviderEntry("arcee", "Arcee AI", "Arcee AI (Trinity models — direct API)"),
ProviderEntry("gmi", "GMI Cloud", "GMI Cloud (multi-model direct API)"),
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
]
# Derived dicts — used throughout the codebase
@@ -777,8 +769,6 @@ _PROVIDER_ALIASES = {
"stepfun-coding-plan": "stepfun",
"arcee-ai": "arcee",
"arceeai": "arcee",
"gmi-cloud": "gmi",
"gmicloud": "gmi",
"minimax-china": "minimax-cn",
"minimax_cn": "minimax-cn",
"claude": "anthropic",
@@ -882,16 +872,7 @@ def fetch_openrouter_models(
if _openrouter_catalog_cache is not None and not force_refresh:
return list(_openrouter_catalog_cache)
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
# snapshot when the manifest is unreachable. Both are curated lists that
# drive the picker; the OpenRouter live /v1/models filter (tool support,
# free pricing) is applied on top either way.
try:
from hermes_cli.model_catalog import get_curated_openrouter_models
remote = get_curated_openrouter_models()
except Exception:
remote = None
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
fallback = list(OPENROUTER_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
@@ -944,24 +925,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
def get_curated_nous_model_ids() -> list[str]:
"""Return the curated Nous Portal model-id list.
Prefers the remotely-hosted catalog manifest (published under
``website/static/api/model-catalog.json``); falls back to the in-repo
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
unreachable. Always returns a list (never None).
"""
try:
from hermes_cli.model_catalog import get_curated_nous_models
remote = get_curated_nous_models()
except Exception:
remote = None
if remote:
return list(remote)
return list(_PROVIDER_MODELS.get("nous", []))
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):
@@ -1416,124 +1379,6 @@ def curated_models_for_provider(
return [(m, "") for m in models]
def _provider_keys(provider: str) -> set[str]:
key = (provider or "").strip().lower()
normalized = normalize_provider(provider)
return {k for k in (key, normalized) if k}
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
return any(
name_lower == model.lower()
for provider in providers
for model in _PROVIDER_MODELS.get(provider, [])
)
_AGGREGATOR_PROVIDERS = frozenset(
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
)
def _resolve_static_model_alias(
name_lower: str,
current_keys: set[str],
) -> Optional[tuple[str, str]]:
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
try:
from hermes_cli.model_switch import MODEL_ALIASES
except Exception:
return None
identity = MODEL_ALIASES.get(name_lower)
if identity is None:
return None
vendor = identity.vendor
family = identity.family
def _match(provider: str) -> Optional[str]:
models = _PROVIDER_MODELS.get(provider, [])
if not models:
return None
prefix = (
f"{vendor}/{family}"
if provider in _AGGREGATOR_PROVIDERS
else family
).lower()
for model in models:
if model.lower().startswith(prefix):
return model
return None
for provider in current_keys:
if matched := _match(provider):
return provider, matched
for provider in _PROVIDER_MODELS:
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
continue
if matched := _match(provider):
return provider, matched
for provider in _AGGREGATOR_PROVIDERS:
if provider in current_keys and (matched := _match(provider)):
return provider, matched
return None
def detect_static_provider_for_model(
model_name: str,
current_provider: str,
) -> Optional[tuple[str, str]]:
"""Auto-detect a provider from static catalogs only.
Returns ``(provider_id, model_name)``. The model name may be remapped
when a static alias or bare provider name resolves to a catalog default.
Returns ``None`` when no confident match is found.
"""
name = (model_name or "").strip()
if not name:
return None
name_lower = name.lower()
current_keys = _provider_keys(current_provider)
alias_match = _resolve_static_model_alias(name_lower, current_keys)
if alias_match:
return alias_match
# --- Step 0: bare provider name typed as model ---
# If someone types `/model nous` or `/model anthropic`, treat it as a
# provider switch and pick the first model from that provider's catalog.
# Skip "custom" and "openrouter" — custom has no model catalog, and
# openrouter requires an explicit model name to be useful.
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
if resolved_provider not in {"custom", "openrouter"}:
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
if (
resolved_provider in _PROVIDER_LABELS
and default_models
and resolved_provider not in current_keys
):
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
# If the model belongs to the current provider's catalog, don't suggest switching
if _model_in_provider_catalog(name_lower, current_keys):
return None
# --- Step 1: check static provider catalogs for a direct match ---
for pid, models in _PROVIDER_MODELS.items():
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
continue
if any(name_lower == m.lower() for m in models):
return (pid, name)
return None
def detect_provider_for_model(
model_name: str,
current_provider: str,
@@ -1546,19 +1391,86 @@ def detect_provider_for_model(
Priority:
0. Bare provider name switch to that provider's default model
1. Direct provider static catalog match
2. OpenRouter catalog match
1. Direct provider with credentials (highest)
2. Direct provider without credentials remap to OpenRouter slug
3. OpenRouter catalog match
"""
name = (model_name or "").strip()
if not name:
return None
static_match = detect_static_provider_for_model(name, current_provider)
if static_match:
return static_match
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
name_lower = name.lower()
# --- Step 0: bare provider name typed as model ---
# If someone types `/model nous` or `/model anthropic`, treat it as a
# provider switch and pick the first model from that provider's catalog.
# Skip "custom" and "openrouter" — custom has no model catalog, and
# openrouter requires an explicit model name to be useful.
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
if resolved_provider not in {"custom", "openrouter"}:
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
if (
resolved_provider in _PROVIDER_LABELS
and default_models
and resolved_provider != normalize_provider(current_provider)
):
return (resolved_provider, default_models[0])
# Aggregators list other providers' models — never auto-switch TO them
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
# If the model belongs to the current provider's catalog, don't suggest switching
current_models = _PROVIDER_MODELS.get(current_provider, [])
if any(name_lower == m.lower() for m in current_models):
return None
# --- Step 1: check static provider catalogs for a direct match ---
direct_match: Optional[str] = None
for pid, models in _PROVIDER_MODELS.items():
if pid == current_provider or pid in _AGGREGATORS:
continue
if any(name_lower == m.lower() for m in models):
direct_match = pid
break
if direct_match:
# Check if we have credentials for this provider — env vars,
# credential pool, or auth store entries.
has_creds = False
try:
from hermes_cli.auth import PROVIDER_REGISTRY
pconfig = PROVIDER_REGISTRY.get(direct_match)
if pconfig:
for env_var in pconfig.api_key_env_vars:
if os.getenv(env_var, "").strip():
has_creds = True
break
except Exception:
pass
# Also check credential pool and auth store — covers OAuth,
# Claude Code tokens, and other non-env-var credentials (#10300).
if not has_creds:
try:
from agent.credential_pool import load_pool
pool = load_pool(direct_match)
if pool.has_credentials():
has_creds = True
except Exception:
pass
if not has_creds:
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
has_creds = True
except Exception:
pass
# Always return the direct provider match. If credentials are
# missing, the client init will give a clear error rather than
# silently routing through the wrong provider (#10300).
return (direct_match, name)
# --- Step 2: check OpenRouter catalog ---
# First try exact match (handles provider/model format)
or_slug = _find_openrouter_slug(name)
@@ -1859,19 +1771,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
return live
except Exception:
pass
if normalized == "gmi":
try:
from hermes_cli.auth import resolve_api_key_provider_credentials
creds = resolve_api_key_provider_credentials("gmi")
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip()
if api_key and base_url:
live = fetch_api_models(api_key, base_url)
if live:
return live
except Exception:
pass
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:
@@ -2249,52 +2148,6 @@ def copilot_model_api_mode(
return "chat_completions"
# Azure Foundry model families that require the Responses API. Azure
# rejects /chat/completions against these deployments with
# ``400 "The requested operation is unsupported."`` — the same payload Bob
# Dobolina hit in April 2026 on ``gpt-5.3-codex`` while ``gpt-4o-pure`` on
# the same endpoint worked fine. Keep the patterns broad enough to cover
# vendor-renamed deployments (e.g. ``gpt-5.3-codex``, ``gpt-5-codex``,
# ``gpt-5.4``, ``o1-preview``) but tight enough to leave GPT-4 / 3.5 / Llama /
# Mistral / Grok deployments on chat completions.
_AZURE_FOUNDRY_RESPONSES_PREFIXES = (
"codex", # codex-*, codex-mini
"gpt-5", # gpt-5, gpt-5.x, gpt-5-codex, gpt-5.x-codex
"o1", # o1, o1-preview, o1-mini
"o3", # o3, o3-mini
"o4", # o4, o4-mini
)
def azure_foundry_model_api_mode(model_name: Optional[str]) -> Optional[str]:
"""Infer Azure Foundry api_mode from a deployment/model name.
Returns ``"codex_responses"`` when the model name matches a family that
only accepts the Responses API on Azure Foundry (GPT-5.x, codex, o1/o3/o4
reasoning models). Returns ``None`` otherwise the caller should fall
back to the configured/default api_mode (typically ``chat_completions``)
so GPT-4o, GPT-4 Turbo, Llama, Mistral, etc. keep working.
Intentionally does NOT return ``anthropic_messages``; Anthropic-style
Azure endpoints are disambiguated by URL (``/anthropic`` suffix) in
``runtime_provider._detect_api_mode_for_url`` and by the user setting
``model.api_mode: anthropic_messages`` explicitly.
"""
raw = str(model_name or "").strip().lower()
if not raw:
return None
# Strip any vendor/ prefix a user may have copied from OpenRouter / Copilot.
if "/" in raw:
raw = raw.rsplit("/", 1)[-1]
# gpt-5-mini speaks chat completions on Copilot but Azure Foundry deploys
# the full gpt-5 family uniformly on Responses API — don't carve an
# exception here.
for prefix in _AZURE_FOUNDRY_RESPONSES_PREFIXES:
if raw.startswith(prefix):
return "codex_responses"
return None
def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
provider = normalize_provider(provider_id)
+8 -16
View File
@@ -9,7 +9,6 @@ from typing import Dict, Iterable, Optional, Set
from hermes_cli.auth import get_nous_auth_status
from hermes_cli.config import get_env_value, load_config
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
from utils import is_truthy_value
from tools.tool_backend_helpers import (
fal_key_is_configured,
has_direct_modal_credentials,
@@ -26,13 +25,6 @@ _DEFAULT_PLATFORM_TOOLSETS = {
}
def _uses_gateway(section: object) -> bool:
"""Return True when a config section explicitly opts into the gateway."""
if not isinstance(section, dict):
return False
return is_truthy_value(section.get("use_gateway"), default=False)
@dataclass(frozen=True)
class NousFeatureState:
key: str
@@ -270,11 +262,11 @@ def get_nous_subscription_features(
# use_gateway flags — when True, the user explicitly opted into the
# Tool Gateway via `hermes model`, so direct credentials should NOT
# prevent gateway routing.
web_use_gateway = _uses_gateway(web_cfg)
tts_use_gateway = _uses_gateway(tts_cfg)
browser_use_gateway = _uses_gateway(browser_cfg)
web_use_gateway = bool(web_cfg.get("use_gateway"))
tts_use_gateway = bool(tts_cfg.get("use_gateway"))
browser_use_gateway = bool(browser_cfg.get("use_gateway"))
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
image_use_gateway = _uses_gateway(image_gen_cfg)
image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
direct_exa = bool(get_env_value("EXA_API_KEY"))
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
@@ -609,10 +601,10 @@ def get_gateway_eligible_tools(
# no direct keys exist — we only skip the prompt for tools where
# use_gateway was explicitly set.
opted_in = {
"web": _uses_gateway(config.get("web")),
"image_gen": _uses_gateway(config.get("image_gen")),
"tts": _uses_gateway(config.get("tts")),
"browser": _uses_gateway(config.get("browser")),
"web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
"image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
"tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
"browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
}
unconfigured: list[str] = []
-202
View File
@@ -1,202 +0,0 @@
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
no stderr chatter. Just the agent's final text to stdout.
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
Model / provider selection mirrors `hermes chat`:
- Both optional. If omitted, use the user's configured default.
- If both given, pair them exactly as given.
- If only --model given, auto-detect the provider that serves it.
- If only --provider given, error out (ambiguous caller must pick a model).
Env var fallbacks (used when the corresponding arg is not passed):
- HERMES_INFERENCE_MODEL
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
"""
from __future__ import annotations
import logging
import os
import sys
from contextlib import redirect_stderr, redirect_stdout
from typing import Optional
def run_oneshot(
prompt: str,
model: Optional[str] = None,
provider: Optional[str] = None,
) -> int:
"""Execute a single prompt and print only the final content block.
Args:
prompt: The user message to send.
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
env var, then config.yaml's model.default / model.model.
provider: Optional provider override. Falls back to
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
then "auto".
Returns the exit code. Caller should sys.exit() with the return.
"""
# Silence every stdlib logger for the duration. AIAgent, tools, and
# provider adapters all log to stderr through the root logger; file
# handlers added by setup_logging() keep working (they're attached to
# the root logger's handler list, not affected by level), but no
# bytes reach the terminal.
logging.disable(logging.CRITICAL)
# --provider without --model is ambiguous: carrying the user's configured
# model across to a different provider is usually wrong (that provider may
# not host it), and silently picking the provider's catalog default hides
# the mismatch. Require the caller to be explicit. Validate BEFORE the
# stderr redirect so the message actually reaches the terminal.
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
if provider and not ((model or "").strip() or env_model_early):
sys.stderr.write(
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
"Pass both explicitly, or neither to use your configured defaults.\n"
)
return 2
# Auto-approve any shell / tool approvals. Non-interactive by
# definition — a prompt would hang forever.
os.environ["HERMES_YOLO_MODE"] = "1"
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
# Redirect stderr AND stdout to devnull for the entire call tree.
# We'll print the final response to the real stdout at the end.
real_stdout = sys.stdout
devnull = open(os.devnull, "w")
try:
with redirect_stdout(devnull), redirect_stderr(devnull):
response = _run_agent(prompt, model=model, provider=provider)
finally:
try:
devnull.close()
except Exception:
pass
if response:
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
return 0
def _run_agent(
prompt: str,
model: Optional[str] = None,
provider: Optional[str] = None,
) -> str:
"""Build an AIAgent exactly like a normal CLI chat turn would, then
run a single conversation. Returns the final response string."""
# Imports are local so they don't run when hermes is invoked for
# other commands (keeps top-level CLI startup cheap).
from hermes_cli.config import load_config
from hermes_cli.models import detect_provider_for_model
from hermes_cli.runtime_provider import resolve_runtime_provider
from hermes_cli.tools_config import _get_platform_tools
from run_agent import AIAgent
cfg = load_config()
# Resolve effective model: explicit arg → env var → config.
model_cfg = cfg.get("model") or {}
if isinstance(model_cfg, str):
cfg_model = model_cfg
else:
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
effective_model = (model or "").strip() or env_model or cfg_model
# Resolve effective provider: explicit arg → (auto-detect from model if
# model was explicit) → env / config (handled inside resolve_runtime_provider).
#
# When --model is given without --provider, auto-detect the provider that
# serves that model — same semantic as `/model <name>` in an interactive
# session. Without this, resolve_runtime_provider() would fall back to
# the user's configured default provider, which may not host the model
# the caller just asked for.
effective_provider = (provider or "").strip() or None
if effective_provider is None and (model or env_model):
# Only auto-detect when the model was explicitly requested via arg or
# env var (not when it came from config — that's the "use my defaults"
# path and the configured provider is already correct).
explicit_model = (model or "").strip() or env_model
if explicit_model:
cfg_provider = ""
if isinstance(model_cfg, dict):
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
current_provider = (
cfg_provider
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
or "auto"
)
detected = detect_provider_for_model(explicit_model, current_provider)
if detected:
effective_provider, effective_model = detected
runtime = resolve_runtime_provider(
requested=effective_provider,
target_model=effective_model or None,
)
# Pull in whatever toolsets the user has enabled for "cli".
# sorted() gives stable ordering; set→list for AIAgent's signature.
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
agent = AIAgent(
api_key=runtime.get("api_key"),
base_url=runtime.get("base_url"),
provider=runtime.get("provider"),
api_mode=runtime.get("api_mode"),
model=effective_model,
enabled_toolsets=toolsets_list,
quiet_mode=True,
platform="cli",
credential_pool=runtime.get("credential_pool"),
# Interactive callbacks are intentionally NOT wired beyond this
# one. In oneshot mode there's no user sitting at a terminal:
# - clarify → returns a synthetic "pick a default" instruction
# so the agent continues instead of stalling on
# the tool's built-in "not available" error
# - sudo password prompt → terminal_tool gates on
# HERMES_INTERACTIVE which we never set
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
# (set above); also falls back to deny on non-tty
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
# - skill secret capture → returns gracefully when no callback set
clarify_callback=_oneshot_clarify_callback,
)
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
# display callbacks that would bypass our stdout capture.
agent.suppress_status_output = True
agent.stream_delta_callback = None
agent.tool_gen_callback = None
return agent.chat(prompt) or ""
def _oneshot_clarify_callback(question: str, choices=None) -> str:
"""Clarify is disabled in oneshot mode — tell the agent to pick a
default and proceed instead of stalling or erroring."""
if choices:
return (
f"[oneshot mode: no user available. Pick the best option from "
f"{choices} using your own judgment and continue.]"
)
return (
"[oneshot mode: no user available. Make the most reasonable "
"assumption you can and continue.]"
)
-1
View File
@@ -36,7 +36,6 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
("wecom_callback", PlatformInfo(label="💬 WeCom Callback", default_toolset="hermes-wecom-callback")),
("weixin", PlatformInfo(label="💬 Weixin", default_toolset="hermes-weixin")),
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
("yuanbao", PlatformInfo(label="🤖 Yuanbao", default_toolset="hermes-yuanbao")),
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
-17
View File
@@ -163,22 +163,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
base_url_override="https://api.arcee.ai/api/v1",
base_url_env_var="ARCEE_BASE_URL",
),
"gmi": HermesOverlay(
transport="openai_chat",
extra_env_vars=("GMI_API_KEY",),
base_url_override="https://api.gmi-serving.com/v1",
base_url_env_var="GMI_BASE_URL",
),
"ollama-cloud": HermesOverlay(
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
# The transport is determined at runtime from config.yaml model.api_mode.
"azure-foundry": HermesOverlay(
transport="openai_chat", # default; overridden by api_mode in config
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
),
}
@@ -303,10 +291,6 @@ ALIASES: Dict[str, str] = {
"arcee-ai": "arcee",
"arceeai": "arcee",
# gmi
"gmi-cloud": "gmi",
"gmicloud": "gmi",
# Local server aliases → virtual "local" concept (resolved via user config)
"lmstudio": "lmstudio",
"lm-studio": "lmstudio",
@@ -329,7 +313,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
"gmi": "GMI Cloud",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",
+7 -179
View File
@@ -221,32 +221,6 @@ def _resolve_runtime_from_pool_entry(
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
elif provider == "azure-foundry":
# Azure Foundry: read api_mode and base_url from config
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
if cfg_provider == "azure-foundry":
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
if cfg_base_url:
base_url = cfg_base_url
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
api_mode = configured_mode
# Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
# /chat/completions on these with 400 "operation unsupported" — see
# azure_foundry_model_api_mode() for rationale. Skip when the user
# explicitly picked anthropic_messages (Anthropic-style endpoint).
if effective_model and api_mode != "anthropic_messages":
try:
from hermes_cli.models import azure_foundry_model_api_mode
inferred = azure_foundry_model_api_mode(effective_model)
except Exception:
inferred = None
if inferred:
api_mode = inferred
# For Anthropic-style endpoints, strip /v1 suffix
if api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
@@ -615,88 +589,6 @@ def _resolve_openrouter_runtime(
}
def _resolve_azure_foundry_runtime(
*,
requested_provider: str,
model_cfg: Dict[str, Any],
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
target_model: Optional[str] = None,
) -> Dict[str, Any]:
"""Resolve an Azure Foundry runtime entry.
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
explicit overrides), pulls the API key from ``.env`` / env var, and
strips a trailing ``/v1`` for Anthropic-style endpoints because the
Anthropic SDK appends ``/v1/messages`` internally.
Raises :class:`AuthError` when required values are missing.
"""
explicit_api_key = str(explicit_api_key or "").strip()
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
cfg_base_url = ""
cfg_api_mode = "chat_completions"
if cfg_provider == "azure-foundry":
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
# Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
# reasoning models as Responses-API-only. Calling /chat/completions
# against them returns 400 "The requested operation is unsupported."
# Upgrade api_mode when the model name matches, unless the user has
# explicitly chosen anthropic_messages (Anthropic-style endpoint).
effective_model = str(target_model or model_cfg.get("default") or "").strip()
if effective_model and cfg_api_mode != "anthropic_messages":
try:
from hermes_cli.models import azure_foundry_model_api_mode
inferred = azure_foundry_model_api_mode(effective_model)
except Exception:
inferred = None
if inferred:
cfg_api_mode = inferred
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
if not base_url:
raise AuthError(
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
"the AZURE_FOUNDRY_BASE_URL environment variable."
)
api_key = explicit_api_key
if not api_key:
try:
from hermes_cli.config import get_env_value
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
except Exception:
api_key = ""
if not api_key:
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
if not api_key:
raise AuthError(
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
"~/.hermes/.env or run 'hermes model' to configure."
)
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
# we inherited from the configured base_url to avoid double-/v1 paths.
if cfg_api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
return {
"provider": "azure-foundry",
"api_mode": cfg_api_mode,
"base_url": base_url,
"api_key": api_key,
"source": source,
"requested_provider": requested_provider,
}
def _resolve_explicit_runtime(
*,
provider: str,
@@ -786,15 +678,6 @@ def _resolve_explicit_runtime(
"requested_provider": requested_provider,
}
# Azure Foundry: user-configured endpoint with selectable API mode
if provider == "azure-foundry":
return _resolve_azure_foundry_runtime(
requested_provider=requested_provider,
model_cfg=model_cfg,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key":
env_url = ""
@@ -863,41 +746,6 @@ def resolve_runtime_provider(
"""
requested_provider = resolve_requested_provider(requested)
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
# return provider="custom" with chat_completions api_mode and no valid key).
# Instead, use the Azure key directly with anthropic_messages api_mode.
_eff_base = (explicit_base_url or "").strip()
if requested_provider == "anthropic" and "azure.com" in _eff_base:
_azure_key = (
(explicit_api_key or "").strip()
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
)
return {
"provider": "anthropic",
"api_mode": "anthropic_messages",
"base_url": _eff_base.rstrip("/"),
"api_key": _azure_key,
"source": "azure-explicit",
"requested_provider": requested_provider,
}
# Azure Foundry: user-configured endpoint with selectable API mode
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
# Resolve before the custom-runtime / pool / generic paths so Azure
# config is always picked up from model.base_url + model.api_mode,
# regardless of whether the caller passed explicit_* args.
if requested_provider == "azure-foundry":
azure_runtime = _resolve_azure_foundry_runtime(
requested_provider=requested_provider,
model_cfg=_get_model_config(),
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
target_model=target_model,
)
return azure_runtime
custom_runtime = _resolve_named_custom_runtime(
requested_provider=requested_provider,
explicit_api_key=explicit_api_key,
@@ -1076,6 +924,13 @@ def resolve_runtime_provider(
# Anthropic (native Messages API)
if provider == "anthropic":
from agent.anthropic_adapter import resolve_anthropic_token
token = resolve_anthropic_token()
if not token:
raise AuthError(
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
"run 'claude setup-token', or authenticate with 'claude /login'."
)
# Allow base URL override from config.yaml model.base_url, but only
# when the configured provider is anthropic — otherwise a non-Anthropic
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
@@ -1084,33 +939,6 @@ def resolve_runtime_provider(
if cfg_provider == "anthropic":
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
base_url = cfg_base_url or "https://api.anthropic.com"
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
# would find the Claude Code OAuth token first (priority 3) and return
# that instead, causing 401s. Detect Azure endpoints and use the env
# key directly to bypass the OAuth priority chain.
_is_azure_endpoint = "azure.com" in base_url.lower() or (
cfg_base_url and "azure.com" in cfg_base_url.lower()
)
if _is_azure_endpoint:
token = (
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
)
if not token:
raise AuthError(
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
)
else:
from agent.anthropic_adapter import resolve_anthropic_token
token = resolve_anthropic_token()
if not token:
raise AuthError(
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
"run 'claude setup-token', or authenticate with 'claude /login'."
)
return {
"provider": "anthropic",
"api_mode": "anthropic_messages",
+63 -96
View File
@@ -1856,32 +1856,27 @@ def _setup_slack():
if existing:
print_info("Slack: already configured")
if not prompt_yes_no("Reconfigure Slack?", False):
# Even without reconfiguring, offer to refresh the manifest so
# new commands (e.g. /btw, /stop, ...) get registered in Slack.
if prompt_yes_no(
"Regenerate the Slack app manifest with the latest command "
"list? (recommended after `hermes update`)",
True,
):
_write_slack_manifest_and_instruct()
return
print_info("Steps to create a Slack app:")
print_info(" 1. Go to https://api.slack.com/apps → Create New App")
print_info(" Pick 'From an app manifest' — we'll generate one for you below.")
print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)")
print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable")
print_info(" • Create an App-Level Token with 'connections:write' scope")
print_info(" 3. Install to Workspace: Settings → Install App")
print_info(" 4. After installing, invite the bot to channels: /invite @YourBot")
print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions")
print_info(" Required scopes: chat:write, app_mentions:read,")
print_info(" channels:history, channels:read, im:history,")
print_info(" im:read, im:write, users:read, files:read, files:write")
print_info(" Optional for private channels: groups:history")
print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable")
print_info(" Required events: message.im, message.channels, app_mention")
print_info(" Optional for private channels: message.groups")
print_warning(" ⚠ Without message.channels the bot will ONLY work in DMs,")
print_warning(" not public channels.")
print_info(" 5. Install to Workspace: Settings → Install App")
print_info(" 6. Reinstall the app after any scope or event changes")
print_info(" 7. After installing, invite the bot to channels: /invite @YourBot")
print()
print_info(" Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
print()
# Generate and write manifest up-front so the user can paste it into
# the "Create from manifest" flow instead of clicking through scopes /
# events / slash commands one at a time.
_write_slack_manifest_and_instruct()
print()
bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
if not bot_token:
@@ -1907,49 +1902,6 @@ def _setup_slack():
print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")
def _write_slack_manifest_and_instruct():
"""Generate the Slack manifest, write it under HERMES_HOME, and print
paste-into-Slack instructions.
Exposed as its own helper so both the initial setup flow and the
"reconfigure? → no" branch can refresh the manifest without the user
re-entering tokens. Failures are non-fatal if the manifest write
fails for any reason, we print a warning and skip rather than abort
the whole Slack setup.
"""
try:
from hermes_cli.slack_cli import _build_full_manifest
from hermes_constants import get_hermes_home
manifest = _build_full_manifest(
bot_name="Hermes",
bot_description="Your Hermes agent on Slack",
)
target = Path(get_hermes_home()) / "slack-manifest.json"
target.parent.mkdir(parents=True, exist_ok=True)
import json as _json
target.write_text(
_json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
print_success(f"Slack app manifest written to: {target}")
print_info(
" Paste it into https://api.slack.com/apps → your app → Features "
"→ App Manifest → Edit, then Save. Slack will prompt to "
"reinstall if scopes or slash commands changed."
)
print_info(
" Re-run `hermes slack manifest --write` anytime to refresh after "
"Hermes adds new commands."
)
except Exception as exc: # pragma: no cover - best-effort UX helper
print_warning(f"Couldn't write Slack manifest: {exc}")
print_info(
" You can generate it manually later with: "
"hermes slack manifest --write"
)
def _setup_matrix():
"""Configure Matrix credentials."""
print_header("Matrix")
@@ -2133,12 +2085,6 @@ def _setup_feishu():
_gateway_setup_feishu()
def _setup_yuanbao():
"""Configure Yuanbao via gateway setup."""
from hermes_cli.gateway import _setup_yuanbao as _gateway_setup_yuanbao
_gateway_setup_yuanbao()
def _setup_wecom():
"""Configure WeCom (Enterprise WeChat) via gateway setup."""
from hermes_cli.gateway import _setup_wecom as _gateway_setup_wecom
@@ -2283,7 +2229,6 @@ _GATEWAY_PLATFORMS = [
("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
("DingTalk", "DINGTALK_CLIENT_ID", _setup_dingtalk),
("Feishu / Lark", "FEISHU_APP_ID", _setup_feishu),
("Yuanbao", "YUANBAO_APP_ID", _setup_yuanbao),
("WeCom (Enterprise WeChat)", "WECOM_BOT_ID", _setup_wecom),
("WeCom Callback (Self-Built App)", "WECOM_CALLBACK_CORP_ID", _setup_wecom_callback),
("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
@@ -2918,6 +2863,17 @@ SETUP_SECTIONS = [
("agent", "Agent Settings", setup_agent_settings),
]
# The returning-user menu intentionally omits standalone TTS because model setup
# already includes TTS selection and tools setup covers the rest of the provider
# configuration. Keep this list in the same order as the visible menu entries.
RETURNING_USER_MENU_SECTION_KEYS = [
"model",
"terminal",
"gateway",
"tools",
"agent",
]
def run_setup_wizard(args):
"""Run the interactive setup wizard.
@@ -2942,9 +2898,6 @@ def run_setup_wizard(args):
save_config(copy.deepcopy(DEFAULT_CONFIG))
print_success("Configuration reset to defaults.")
reconfigure_requested = bool(getattr(args, "reconfigure", False))
quick_requested = bool(getattr(args, "quick", False))
config = load_config()
hermes_home = get_hermes_home()
@@ -3036,36 +2989,50 @@ def run_setup_wizard(args):
migration_ran = False
if is_existing:
# Existing install — default is the full-wizard reconfigure flow.
# Every prompt shows the current value as its default, so pressing
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
# missing items" flow (useful after a partial OpenClaw migration
# or when a required API key got cleared).
if quick_requested:
# ── Returning User Menu ──
print()
print_header("Welcome Back!")
print_success("You already have Hermes configured.")
print()
menu_choices = [
"Quick Setup - configure missing items only",
"Full Setup - reconfigure everything",
"Model & Provider",
"Terminal Backend",
"Messaging Platforms (Gateway)",
"Tools",
"Agent Settings",
"Exit",
]
choice = prompt_choice("What would you like to do?", menu_choices, 0)
if choice == 0:
# Quick setup
_run_quick_setup(config, hermes_home)
return
print()
print_header("Reconfigure")
print_success("You already have Hermes configured.")
print_info("Running the full wizard — each prompt shows your current value.")
print_info("Press Enter to keep it, or type a new value to change it.")
print_info("")
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
# Fall through to the "Full Setup — run all sections" block below.
# --reconfigure is now the default on existing installs; the flag
# is preserved for backwards compatibility but is a no-op here.
elif choice == 1:
# Full setup — fall through to run all sections
pass
elif choice == 7:
print_info("Exiting. Run 'hermes setup' again when ready.")
return
elif 2 <= choice <= 6:
# Individual section — map by key, not by position.
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
# so positional indexing (choice - 2) would dispatch the wrong section.
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
if section:
_, label, func = section
func(config)
save_config(config)
_print_setup_summary(config, hermes_home)
return
else:
# ── First-Time Setup ──
print()
# --reconfigure / --quick on a fresh install are meaningless — fall
# through to the normal first-time flow.
if reconfigure_requested or quick_requested:
print_info("No existing configuration found — running first-time setup.")
print()
# Offer OpenClaw migration before configuration begins
migration_ran = _offer_openclaw_migration(hermes_home)
if migration_ran:
+20 -230
View File
@@ -11,10 +11,9 @@ handler are thin wrappers that parse args and delegate.
"""
import json
import re
import shutil
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Any, Dict, Optional
from rich.console import Console
from rich.panel import Panel
@@ -142,103 +141,6 @@ def _derive_category_from_install_path(install_path: str) -> str:
return "" if parent == "." else parent
# ---------------------------------------------------------------------------
# Interactive name/category resolution for URL-installed skills
# ---------------------------------------------------------------------------
_VALID_NAME_RE = re.compile(r"^[a-z][a-z0-9_-]*$")
_VALID_CATEGORY_RE = re.compile(r"^[a-z][a-z0-9_/-]*$")
def _is_valid_installed_skill_name(name: str) -> bool:
"""Accept identifier-shaped names, reject empty / sentinel-y values."""
if not isinstance(name, str):
return False
candidate = name.strip().lower()
if not candidate or candidate in {"skill", "readme", "index", "unnamed-skill"}:
return False
return bool(_VALID_NAME_RE.match(candidate))
def _existing_categories() -> List[str]:
"""Return sorted subdirectory names under ``~/.hermes/skills/`` that look
like category buckets (contain at least one ``SKILL.md`` somewhere below).
Used to suggest reusable categories when interactively installing from a
URL. Hidden dirs (``.hub``, ``.trash``) are skipped.
"""
from tools.skills_hub import SKILLS_DIR
out: List[str] = []
try:
for entry in SKILLS_DIR.iterdir():
if not entry.is_dir() or entry.name.startswith("."):
continue
# Only count as a category if it contains skills, not if it IS a skill.
# Heuristic: if ``<entry>/SKILL.md`` exists, it's a skill at the
# top level (no category); otherwise treat as a category bucket.
if (entry / "SKILL.md").exists():
continue
# Has at least one nested SKILL.md?
try:
if any(entry.rglob("SKILL.md")):
out.append(entry.name)
except OSError:
continue
except (FileNotFoundError, OSError):
return []
return sorted(set(out))
def _prompt_for_skill_name(c: Console, url: str, default: str = "") -> Optional[str]:
"""Prompt interactively for a skill name. Returns None on cancel/EOF."""
c.print()
c.print(
f"[yellow]The SKILL.md at {url} doesn't declare a `name:` in its "
f"frontmatter,[/]\n[yellow]and the URL path doesn't produce a valid "
f"identifier either.[/]"
)
default_hint = f" [{default}]" if default else ""
c.print(
f"[bold]Enter a skill name{default_hint}:[/] "
f"[dim](lowercase letters, digits, hyphens, underscores; starts with a letter)[/]"
)
try:
answer = input("Name: ").strip()
except (EOFError, KeyboardInterrupt):
return None
if not answer and default:
answer = default
if not _is_valid_installed_skill_name(answer):
c.print(f"[bold red]Invalid name:[/] {answer!r}. Aborting install.\n")
return None
return answer
def _prompt_for_category(c: Console, existing: List[str]) -> str:
"""Prompt interactively for a category. Empty/None input means flat install."""
c.print()
if existing:
c.print(
"[bold]Pick a category[/] "
"[dim](reuse an existing bucket, type a new one, or press Enter to install flat)[/]"
)
c.print(f"[dim]Existing: {', '.join(existing)}[/]")
else:
c.print(
"[bold]Category[/] [dim](optional — press Enter to install flat at ~/.hermes/skills/<name>/)[/]"
)
try:
answer = input("Category: ").strip()
except (EOFError, KeyboardInterrupt):
return ""
if not answer:
return ""
if not _VALID_CATEGORY_RE.match(answer):
c.print(f"[dim]Invalid category {answer!r} — installing flat.[/]")
return ""
return answer
def do_search(query: str, source: str = "all", limit: int = 10,
console: Optional[Console] = None) -> None:
"""Search registries and display results as a Rich table."""
@@ -407,17 +309,8 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
def do_install(identifier: str, category: str = "", force: bool = False,
console: Optional[Console] = None, skip_confirm: bool = False,
invalidate_cache: bool = True,
name_override: str = "") -> None:
"""Fetch, quarantine, scan, confirm, and install a skill.
``name_override`` lets non-interactive callers (slash commands, gateway,
scripts) supply a skill name when the upstream SKILL.md lacks a valid
``name:`` frontmatter field. On interactive TTY surfaces, a missing name
triggers a prompt instead; ``skip_confirm=True`` means "non-interactive"
(so pair it with ``name_override`` when installing from a URL that has
no frontmatter).
"""
invalidate_cache: bool = True) -> None:
"""Fetch, quarantine, scan, confirm, and install a skill."""
from tools.skills_hub import (
GitHubAuth, create_source_router, ensure_hub_dirs,
quarantine_bundle, install_from_quarantine, HubLockFile,
@@ -461,58 +354,6 @@ def do_install(identifier: str, category: str = "", force: bool = False,
c.print()
return
# URL-sourced skills may arrive with an empty name when SKILL.md has no
# ``name:`` in frontmatter AND the URL path doesn't yield a valid
# identifier. Resolve by (1) --name override, (2) interactive prompt on
# a TTY, (3) refuse with an actionable error on non-interactive surfaces.
bundle_meta = getattr(bundle, "metadata", {}) or {}
if bundle.source == "url" and (not bundle.name or bundle_meta.get("awaiting_name")):
if name_override and _is_valid_installed_skill_name(name_override):
bundle.name = name_override.strip()
bundle_meta["awaiting_name"] = False
elif name_override:
c.print(
f"[bold red]Invalid --name:[/] {name_override!r}. "
"Must be a lowercase identifier (letters, digits, hyphens, "
"underscores; starts with a letter).\n"
)
return
elif skip_confirm:
# Non-interactive surface (slash command / TUI / gateway). Can't
# prompt — emit an actionable error.
url = bundle_meta.get("url") or identifier
c.print(
f"[bold red]Cannot install from URL:[/] {url}\n"
"[yellow]The SKILL.md has no `name:` in its frontmatter, "
"and the URL path doesn't produce a valid identifier.[/]\n\n"
"Retry with an explicit name:\n"
f" [bold]/skills install {url} --name <your-name>[/]\n"
f" [bold]hermes skills install {url} --name <your-name>[/]\n\n"
"[dim]Or ask the SKILL.md's author to add a `name:` field to "
"its YAML frontmatter.[/]\n"
)
return
else:
# Interactive TTY — prompt.
url = bundle_meta.get("url") or identifier
chosen = _prompt_for_skill_name(c, url)
if not chosen:
c.print("[dim]Installation cancelled.[/]\n")
return
bundle.name = chosen
bundle_meta["awaiting_name"] = False
# Keep SkillMeta in sync so downstream "already installed" checks,
# audit logs, and display all see the final name.
if meta is not None:
meta.name = bundle.name
meta.path = bundle.name
# URL-sourced skills: offer to pick a category interactively when the
# caller didn't specify one (TTY only — non-interactive installs fall
# through to flat install, matching all other sources).
if bundle.source == "url" and not category and not skip_confirm:
category = _prompt_for_category(c, _existing_categories())
# Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
if bundle.source == "official" and not category:
id_parts = bundle.identifier.split("/") # ["official", "category", "skill"]
@@ -758,24 +599,11 @@ def inspect_skill(identifier: str) -> Optional[dict]:
return out
def do_list(source_filter: str = "all",
enabled_only: bool = False,
console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills.
Args:
source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
enabled_only: If True, hide disabled skills from the output.
Enabled/disabled state is resolved against the currently active profile's
config ``hermes -p <profile> skills list`` reads that profile's
``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
start. No explicit profile flag needed here.
"""
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
"""List installed skills, distinguishing hub, builtin, and local skills."""
from tools.skills_hub import HubLockFile, ensure_hub_dirs
from tools.skills_sync import _read_manifest
from tools.skills_tool import _find_all_skills
from agent.skill_utils import get_disabled_skill_names
c = console or _console
ensure_hub_dirs()
@@ -783,26 +611,17 @@ def do_list(source_filter: str = "all",
hub_installed = {e["name"]: e for e in lock.list_installed()}
builtin_names = set(_read_manifest())
# Pull ALL skills (including disabled ones) so we can annotate status.
all_skills = _find_all_skills(skip_disabled=True)
disabled_names = get_disabled_skill_names()
all_skills = _find_all_skills()
title = "Installed Skills"
if enabled_only:
title += " (enabled only)"
table = Table(title=title)
table = Table(title="Installed Skills")
table.add_column("Name", style="bold cyan")
table.add_column("Category", style="dim")
table.add_column("Source", style="dim")
table.add_column("Trust", style="dim")
table.add_column("Status", style="dim")
hub_count = 0
builtin_count = 0
local_count = 0
enabled_count = 0
disabled_count = 0
for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
name = skill["name"]
@@ -813,48 +632,29 @@ def do_list(source_filter: str = "all",
source_type = "hub"
source_display = hub_entry.get("source", "hub")
trust = hub_entry.get("trust_level", "community")
hub_count += 1
elif name in builtin_names:
source_type = "builtin"
source_display = "builtin"
trust = "builtin"
builtin_count += 1
else:
source_type = "local"
source_display = "local"
trust = "local"
local_count += 1
if source_filter != "all" and source_filter != source_type:
continue
is_enabled = name not in disabled_names
if enabled_only and not is_enabled:
continue
if source_type == "hub":
hub_count += 1
elif source_type == "builtin":
builtin_count += 1
else:
local_count += 1
if is_enabled:
enabled_count += 1
status_cell = "[bold green]enabled[/]"
else:
disabled_count += 1
status_cell = "[dim red]disabled[/]"
trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
trust_label = "official" if source_display == "official" else trust
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")
c.print(table)
summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
if enabled_only:
summary += f"{enabled_count} enabled shown"
else:
summary += f"{enabled_count} enabled, {disabled_count} disabled"
summary += "[/]\n"
c.print(summary)
c.print(
f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
)
def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
@@ -1323,15 +1123,11 @@ def skills_command(args) -> None:
do_search(args.query, source=args.source, limit=args.limit)
elif action == "install":
do_install(args.identifier, category=args.category, force=args.force,
skip_confirm=getattr(args, "yes", False),
name_override=getattr(args, "name", "") or "")
skip_confirm=getattr(args, "yes", False))
elif action == "inspect":
do_inspect(args.identifier)
elif action == "list":
do_list(
source_filter=args.source,
enabled_only=getattr(args, "enabled_only", False),
)
do_list(source_filter=args.source)
elif action == "check":
do_check(name=getattr(args, "name", None))
elif action == "update":
@@ -1381,7 +1177,6 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
/skills search kubernetes
/skills install openai/skills/skill-creator
/skills install openai/skills/skill-creator --force
/skills install https://example.com/path/SKILL.md
/skills inspect openai/skills/skill-creator
/skills list
/skills list --source hub
@@ -1458,11 +1253,10 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
elif action == "install":
if not args:
c.print("[bold red]Usage:[/] /skills install <identifier-or-url> [--name <name>] [--category <cat>] [--force] [--now]\n")
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force] [--now]\n")
return
identifier = args[0]
category = ""
name_override = ""
# Slash commands run inside prompt_toolkit where input() hangs.
# Always skip confirmation — the user typing the command is implicit consent.
skip_confirm = True
@@ -1473,11 +1267,9 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
for i, a in enumerate(args):
if a == "--category" and i + 1 < len(args):
category = args[i + 1]
elif a == "--name" and i + 1 < len(args):
name_override = args[i + 1]
do_install(identifier, category=category, force=force,
skip_confirm=skip_confirm, invalidate_cache=invalidate_cache,
name_override=name_override, console=c)
console=c)
elif action == "inspect":
if not args:
@@ -1487,12 +1279,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
elif action == "list":
source_filter = "all"
enabled_only = "--enabled-only" in args or "--enabled" in args
if "--source" in args:
idx = args.index("--source")
if idx + 1 < len(args):
source_filter = args[idx + 1]
do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)
do_list(source_filter=source_filter, console=c)
elif action == "check":
name = args[0] if args else None
@@ -1580,8 +1371,7 @@ def _print_skills_help(console: Console) -> None:
" [cyan]search[/] <query> Search registries for skills\n"
" [cyan]install[/] <identifier> Install a skill (with security scan)\n"
" [cyan]inspect[/] <identifier> Preview a skill without installing\n"
" [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
" List installed skills; --enabled-only filters to the active profile's live set\n"
" [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
" [cyan]check[/] [name] Check hub skills for upstream updates\n"
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
" [cyan]audit[/] [name] Re-scan hub skills for security\n"
-152
View File
@@ -1,152 +0,0 @@
"""``hermes slack ...`` CLI subcommands.
Today only ``hermes slack manifest`` is implemented it generates the
Slack app manifest JSON for registering every gateway command as a native
Slack slash (``/btw``, ``/stop``, ``/model``, ) so users get the same
first-class slash UX Discord and Telegram already have.
Typical workflow::
$ hermes slack manifest > slack-manifest.json
# or:
$ hermes slack manifest --write
Then paste the printed JSON into the Slack app config (Features App
Manifest Edit) and click Save. Slack diffs the manifest and prompts
for reinstall when scopes/commands change.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
"""Build a full Slack manifest merging display info + our slash list.
The slash-command list is always generated from ``COMMAND_REGISTRY`` so
it stays in sync with the rest of Hermes. Other manifest sections
(display info, OAuth scopes, socket mode) are set to sensible defaults
for a Hermes deployment users can tweak them in the Slack UI after
pasting.
"""
from hermes_cli.commands import slack_app_manifest
partial = slack_app_manifest()
slashes = partial["features"]["slash_commands"]
return {
"_metadata": {
"major_version": 1,
"minor_version": 1,
},
"display_information": {
"name": bot_name[:35],
"description": (bot_description or "Your Hermes agent on Slack")[:140],
"background_color": "#1a1a2e",
},
"features": {
"bot_user": {
"display_name": bot_name[:80],
"always_online": True,
},
"slash_commands": slashes,
"assistant_view": {
"assistant_description": "Chat with Hermes in threads and DMs.",
},
},
"oauth_config": {
"scopes": {
"bot": [
"app_mentions:read",
"assistant:write",
"channels:history",
"channels:read",
"chat:write",
"commands",
"files:read",
"files:write",
"groups:history",
"im:history",
"im:read",
"im:write",
"users:read",
],
},
},
"settings": {
"event_subscriptions": {
"bot_events": [
"app_mention",
"assistant_thread_context_changed",
"assistant_thread_started",
"message.channels",
"message.groups",
"message.im",
],
},
"interactivity": {
"is_enabled": True,
},
"org_deploy_enabled": False,
"socket_mode_enabled": True,
"token_rotation_enabled": False,
},
}
def slack_manifest_command(args) -> int:
"""Print or write a Slack app manifest JSON.
Flags (all parsed in ``hermes_cli/main.py``):
--write [PATH] Write to file instead of stdout (default path:
``$HERMES_HOME/slack-manifest.json``)
--name NAME Override the bot display name (default: "Hermes")
--description DESC Override the bot description
--slashes-only Emit only the ``features.slash_commands`` array (for
merging into an existing manifest manually)
"""
name = getattr(args, "name", None) or "Hermes"
description = getattr(args, "description", None) or "Your Hermes agent on Slack"
if getattr(args, "slashes_only", False):
from hermes_cli.commands import slack_app_manifest
manifest = slack_app_manifest()["features"]["slash_commands"]
else:
manifest = _build_full_manifest(name, description)
payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
write_target = getattr(args, "write", None)
if write_target is not None:
if isinstance(write_target, bool) and write_target:
# --write with no value → default location
try:
from hermes_constants import get_hermes_home
target = Path(get_hermes_home()) / "slack-manifest.json"
except Exception:
target = Path.home() / ".hermes" / "slack-manifest.json"
else:
target = Path(write_target).expanduser()
target.parent.mkdir(parents=True, exist_ok=True)
target.write_text(payload, encoding="utf-8")
print(f"Slack manifest written to: {target}", file=sys.stderr)
print(
"\nNext steps:\n"
" 1. Open https://api.slack.com/apps and pick your Hermes app\n"
" (or create a new one: Create New App → From an app manifest).\n"
f" 2. Features → App Manifest → paste the contents of\n"
f" {target}\n"
" 3. Save; Slack will prompt to reinstall the app if scopes or\n"
" slash commands changed.\n"
" 4. Make sure Socket Mode is enabled and you have a bot token\n"
" (xoxb-...) and app token (xapp-...) configured via\n"
" `hermes setup`.\n",
file=sys.stderr,
)
else:
sys.stdout.write(payload)
return 0
+1 -2
View File
@@ -326,8 +326,7 @@ def show_status(args):
"WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
"Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
"QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
"Yuanbao": ("YUANBAO_APP_ID", "YUANBAO_HOME_CHANNEL"),
"QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
}
for name, (token_var, home_var) in platforms.items():
+4 -4
View File
@@ -20,10 +20,10 @@ def get_provider_request_timeout(
try:
from hermes_cli.config import load_config
config = load_config()
except Exception:
except ImportError:
return None
config = load_config()
providers = config.get("providers", {}) if isinstance(config, dict) else {}
provider_config = (
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
@@ -49,10 +49,10 @@ def get_provider_stale_timeout(
try:
from hermes_cli.config import load_config
config = load_config()
except Exception:
except ImportError:
return None
config = load_config()
providers = config.get("providers", {}) if isinstance(config, dict) else {}
provider_config = (
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
+3 -2
View File
@@ -10,7 +10,8 @@ import random
TIPS = [
# --- Slash Commands ---
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
"/background <prompt> runs a task in a separate session while your current one stays free.",
"/branch forks the current session so you can explore a different direction without losing progress.",
"/compress manually compresses conversation context when things get long.",
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
@@ -106,7 +107,7 @@ TIPS = [
"Set display.streaming: true to see tokens appear in real time as the model generates.",
"Set display.show_reasoning: true to watch the model's chain-of-thought reasoning.",
"Set display.compact: true to reduce whitespace in output for denser information.",
"Set display.busy_input_mode: queue to queue messages instead of interrupting the agent, or steer to inject them mid-run via /steer.",
"Set display.busy_input_mode: queue to queue messages instead of interrupting the agent.",
"Set display.resume_display: minimal to skip the full conversation recap on session resume.",
"Set compression.threshold: 0.50 to control when auto-compression fires (default: 50% of context).",
"Set agent.max_turns: 200 to let the agent take more tool-calling steps per turn.",
+15 -137
View File
@@ -11,7 +11,6 @@ the `platform_toolsets` key.
import json as _json
import logging
import os
import sys
from pathlib import Path
from typing import Dict, List, Optional, Set
@@ -26,7 +25,7 @@ from hermes_cli.nous_subscription import (
get_nous_subscription_features,
)
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
from utils import base_url_hostname, is_truthy_value
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@@ -69,59 +68,25 @@ CONFIGURABLE_TOOLSETS = [
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
("homeassistant", "🏠 Home Assistant", "smart home device control"),
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
("discord", "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
("yuanbao", "🤖 Yuanbao", "group info, member queries, DM"),
]
# Toolsets that are OFF by default for new installs.
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
# but the setup checklist won't pre-select them for first-time users.
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
# these platforms, and only resolve/save for these platforms. A toolset
# absent from this map is available on every platform (current behaviour).
#
# Use this for tools whose APIs only make sense on one platform (Discord
# server admin, Slack workspace admin, etc.). Keeps every other platform's
# checklist from filling up with irrelevant toggles.
_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
"discord": {"discord"},
"discord_admin": {"discord"},
}
def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
"""Return True if ``ts_key`` is configurable on ``platform``.
Toolsets without a restriction entry are allowed everywhere (the default).
"""
allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
return allowed is None or platform in allowed
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
def _get_effective_configurable_toolsets():
"""Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.
Plugin toolsets are appended at the end so they appear after the
built-in toolsets in the TUI checklist. A plugin whose toolset key
already appears in ``CONFIGURABLE_TOOLSETS`` is skipped bundled
plugins (e.g. ``plugins/spotify``) share their toolset key with the
built-in entry, and we want the built-in label/description to win.
Without the dedupe, ``hermes tools`` "reconfigure existing" would
list the same toolset twice.
built-in toolsets in the TUI checklist.
"""
result = list(CONFIGURABLE_TOOLSETS)
seen = {ts_key for ts_key, _, _ in result}
try:
from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
discover_plugins() # idempotent — ensures plugins are loaded
for entry in get_plugin_toolsets():
if entry[0] in seen:
continue
seen.add(entry[0])
result.append(entry)
result.extend(get_plugin_toolsets())
except Exception:
pass
return result
@@ -626,7 +591,7 @@ def _get_platform_tools(
include_default_mcp_servers: bool = True,
) -> Set[str]:
"""Resolve which individual toolset names are enabled for a platform."""
from toolsets import resolve_toolset, TOOLSETS
from toolsets import resolve_toolset
platform_toolsets = config.get("platform_toolsets") or {}
toolset_names = platform_toolsets.get(platform)
@@ -640,8 +605,6 @@ def _get_platform_tools(
toolset_names = [str(ts) for ts in toolset_names]
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
plugin_ts_keys = _get_plugin_toolset_keys()
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
# If the saved list contains any configurable keys directly, the user
# has explicitly configured this platform — use direct membership.
@@ -651,10 +614,7 @@ def _get_platform_tools(
has_explicit_config = any(ts in configurable_keys for ts in toolset_names)
if has_explicit_config:
enabled_toolsets = {
ts for ts in toolset_names
if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
}
enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
else:
# No explicit config — fall back to resolving composite toolset names
# (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -664,61 +624,14 @@ def _get_platform_tools(
enabled_toolsets = set()
for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
if not _toolset_allowed_for_platform(ts_key, platform):
continue
ts_tools = set(resolve_toolset(ts_key))
if ts_tools and ts_tools.issubset(all_tool_names):
enabled_toolsets.add(ts_key)
default_off = set(_DEFAULT_OFF_TOOLSETS)
# Legacy safety: if the platform's own name matches a default-off
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
# keep that toolset enabled on first install. Skip this dodge for
# platform-restricted toolsets — those are always opt-in even on
# their own platform (e.g. `discord` + `discord` should stay OFF).
if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
if platform in default_off:
default_off.remove(platform)
# Home Assistant is already runtime-gated by its check_fn (requires
# HASS_TOKEN to register any tools). When a user has configured
# HASS_TOKEN, they've explicitly opted in — don't also strip it via
# _DEFAULT_OFF_TOOLSETS, which would silently drop HA from platforms
# (e.g. cron) that run through _get_platform_tools without an
# explicit saved toolset list. Without this, Norbert's HA cron jobs
# regressed after #14798 made cron honor per-platform tool config.
if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
default_off.remove("homeassistant")
enabled_toolsets -= default_off
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
# feishu_drive). These are part of the platform's default composite but
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
# checklist or in a user-saved config. Must run in BOTH branches —
# otherwise saving via `hermes tools` (which flips has_explicit_config
# to True) silently drops them.
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
configurable_tool_universe = set()
for ck in configurable_keys:
configurable_tool_universe.update(resolve_toolset(ck))
claimed = set()
for ts_key in enabled_toolsets:
claimed.update(resolve_toolset(ts_key))
skip = configurable_keys | plugin_ts_keys | platform_default_keys
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
for ts_key, ts_def in TOOLSETS.items():
if ts_key in skip:
continue
if ts_def.get("includes"):
continue
ts_tools = set(resolve_toolset(ts_key))
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
continue
if ts_tools.issubset(configurable_tool_universe):
continue
if not ts_tools.issubset(claimed):
enabled_toolsets.add(ts_key)
claimed.update(ts_tools)
# Plugin toolsets: enabled by default unless explicitly disabled, or
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
# shipped as a bundled plugin but user must opt in via `hermes tools`
@@ -726,6 +639,7 @@ def _get_platform_tools(
# A plugin toolset is "known" for a platform once `hermes tools`
# has been saved for that platform (tracked via known_plugin_toolsets).
# Unknown plugins default to enabled; known-but-absent = disabled.
plugin_ts_keys = _get_plugin_toolset_keys()
if plugin_ts_keys:
known_map = config.get("known_plugin_toolsets", {})
known_for_platform = set(known_map.get(platform, []))
@@ -743,6 +657,7 @@ def _get_platform_tools(
# Preserve any explicit non-configurable toolset entries (for example,
# custom toolsets or MCP server names saved in platform_toolsets).
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
explicit_passthrough = {
ts
for ts in toolset_names
@@ -788,14 +703,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
"""
config.setdefault("platform_toolsets", {})
# Drop platform-scoped toolsets that don't apply here. Prevents the
# "Configure all platforms" checklist (or a hand-edited config.yaml)
# from turning on, say, the `discord` toolset for Telegram.
enabled_toolset_keys = {
ts for ts in enabled_toolset_keys
if _toolset_allowed_for_platform(ts, platform)
}
# Get the set of all configurable toolset keys (built-in + plugin)
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
plugin_keys = _get_plugin_toolset_keys()
@@ -810,7 +717,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
if not isinstance(existing_toolsets, list):
existing_toolsets = []
existing_toolsets = [str(ts) for ts in existing_toolsets]
# Preserve any entries that are NOT configurable toolsets and NOT platform
# defaults (i.e. only MCP server names should be preserved)
@@ -818,11 +724,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
entry for entry in existing_toolsets
if entry not in configurable_keys and entry not in platform_default_keys
}
# Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
# saving from the picker as consent to clear the "no_mcp" sentinel. The
# picker has no checkbox for no_mcp, so without this users who once set it
# by hand could never re-enable MCP servers through the UI.
preserved_entries.discard("no_mcp")
# Merge preserved entries with new enabled toolsets
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -930,7 +831,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
return _tool_token_cache
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
"""Multi-select checklist of toolsets. Returns set of selected toolset keys."""
from hermes_cli.curses_ui import curses_checklist
from toolsets import resolve_toolset
@@ -938,12 +839,7 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform:
# Pre-compute per-tool token counts (cached after first call).
tool_tokens = _estimate_tool_tokens()
effective_all = _get_effective_configurable_toolsets()
# Drop platform-scoped toolsets that don't apply to this platform.
effective = [
(k, l, d) for (k, l, d) in effective_all
if _toolset_allowed_for_platform(k, platform)
]
effective = _get_effective_configurable_toolsets()
labels = []
for ts_key, ts_label, ts_desc in effective:
@@ -1188,7 +1084,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
configured_provider = image_cfg.get("provider")
if configured_provider not in (None, "", "fal"):
return False
if image_cfg.get("use_gateway") is not None and not is_truthy_value(image_cfg.get("use_gateway"), default=False):
if image_cfg.get("use_gateway") is False:
return False
return feature.managed_by_nous
if provider.get("tts_provider"):
@@ -1220,7 +1116,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
return (
provider["imagegen_backend"] == "fal"
and configured_provider in (None, "", "fal")
and not is_truthy_value(image_cfg.get("use_gateway"), default=False)
and not image_cfg.get("use_gateway")
)
return False
@@ -1857,7 +1753,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
# Show checklist
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
added = new_enabled - current_enabled
removed = current_enabled - new_enabled
@@ -2213,11 +2109,7 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]
def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
"""Print a summary of enabled/disabled toolsets and MCP tool filters."""
effective_all = _get_effective_configurable_toolsets()
effective = [
(k, l, d) for (k, l, d) in effective_all
if _toolset_allowed_for_platform(k, platform)
]
effective = _get_effective_configurable_toolsets()
builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
print(f"Built-in toolsets ({platform}):")
@@ -2283,20 +2175,6 @@ def tools_disable_enable_command(args):
_print_error(f"Unknown toolset '{name}'")
toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
# Reject platform-scoped toolsets on platforms that don't allow them.
restricted_targets = [
t for t in toolset_targets
if not _toolset_allowed_for_platform(t, platform)
]
if restricted_targets:
for name in restricted_targets:
allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
_print_error(
f"Toolset '{name}' is not available on platform '{platform}' "
f"(only: {', '.join(allowed)})"
)
toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
if toolset_targets:
_apply_toolset_change(config, platform, toolset_targets, action)
+9 -7
View File
@@ -287,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
"display.busy_input_mode": {
"type": "select",
"description": "Input behavior while agent is running",
"options": ["interrupt", "queue", "steer"],
"options": ["interrupt", "queue"],
},
"memory.provider": {
"type": "select",
@@ -2327,14 +2327,16 @@ def _resolve_chat_argv(
from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
env = os.environ.copy()
env.setdefault("NODE_ENV", "production")
env: Optional[dict] = None
if resume:
env["HERMES_TUI_RESUME"] = resume
if resume or sidecar_url:
env = os.environ.copy()
if sidecar_url:
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
if resume:
env["HERMES_TUI_RESUME"] = resume
if sidecar_url:
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
return list(argv), str(cwd) if cwd else None, env
+4 -3
View File
@@ -195,6 +195,10 @@ def setup_logging(
The ``logs/`` directory where files are written.
"""
global _logging_initialized
if _logging_initialized and not force:
home = hermes_home or get_hermes_home()
return home / "logs"
home = hermes_home or get_hermes_home()
log_dir = home / "logs"
log_dir.mkdir(parents=True, exist_ok=True)
@@ -244,9 +248,6 @@ def setup_logging(
log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
)
if _logging_initialized and not force:
return log_dir
# Ensure root logger level is low enough for the handlers to fire.
if root.level == logging.NOTSET or root.level > level:
root.setLevel(level)
+68 -348
View File
@@ -22,8 +22,6 @@ import sqlite3
import threading
import time
from pathlib import Path
from agent.memory_manager import sanitize_context
from hermes_constants import get_hermes_home
from typing import Any, Callable, Dict, List, Optional, TypeVar
@@ -33,7 +31,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 10
SCHEMA_VERSION = 8
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@@ -85,8 +83,7 @@ CREATE TABLE IF NOT EXISTS messages (
reasoning TEXT,
reasoning_content TEXT,
reasoning_details TEXT,
codex_reasoning_items TEXT,
codex_message_items TEXT
codex_reasoning_items TEXT
);
CREATE TABLE IF NOT EXISTS state_meta (
@@ -121,32 +118,6 @@ CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
END;
"""
# Trigram FTS5 table for CJK substring search. The default unicode61
# tokenizer splits CJK characters into individual tokens, breaking phrase
# matching. The trigram tokenizer creates overlapping 3-byte sequences so
# substring queries work natively for any script (CJK, Thai, etc.).
FTS_TRIGRAM_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
content,
content=messages,
content_rowid=id,
tokenize='trigram'
);
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
END;
CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
INSERT INTO messages_fts_trigram(messages_fts_trigram, rowid, content) VALUES('delete', old.id, old.content);
INSERT INTO messages_fts_trigram(rowid, content) VALUES (new.id, new.content);
END;
"""
class SessionDB:
"""
@@ -385,27 +356,6 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 8")
if current_version < 9:
# v9: preserve replayable Codex assistant message ids/phases so
# follow-up turns can rebuild Responses API message items instead
# of flattening everything to plain assistant text.
try:
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 9")
if current_version < 10:
# v10: trigram FTS5 table for CJK/substring search.
# Created via FTS_TRIGRAM_SQL below; backfill existing messages.
try:
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_TRIGRAM_SQL)
cursor.execute(
"INSERT INTO messages_fts_trigram(rowid, content) "
"SELECT id, content FROM messages WHERE content IS NOT NULL"
)
cursor.execute("UPDATE schema_version SET version = 10")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@@ -423,12 +373,6 @@ class SessionDB:
except sqlite3.OperationalError:
cursor.executescript(FTS_SQL)
# Trigram FTS5 for CJK/substring search
try:
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_TRIGRAM_SQL)
self._conn.commit()
# =========================================================================
@@ -878,18 +822,7 @@ class SessionDB:
params = []
if not include_children:
# Show root sessions and branch sessions (whose parent ended with
# end_reason='branched' before the child was created), while still
# hiding sub-agent runs and compression continuations (which also
# carry a parent_session_id but were spawned while the parent was
# still live — i.e., started_at < parent.ended_at).
where_clauses.append(
"(s.parent_session_id IS NULL"
" OR EXISTS (SELECT 1 FROM sessions p"
" WHERE p.id = s.parent_session_id"
" AND p.end_reason = 'branched'"
" AND s.started_at >= p.ended_at))"
)
where_clauses.append("s.parent_session_id IS NULL")
if source:
where_clauses.append("s.source = ?")
@@ -1023,7 +956,6 @@ class SessionDB:
reasoning_content: str = None,
reasoning_details: Any = None,
codex_reasoning_items: Any = None,
codex_message_items: Any = None,
) -> int:
"""
Append a message to a session. Returns the message row ID.
@@ -1040,10 +972,6 @@ class SessionDB:
json.dumps(codex_reasoning_items)
if codex_reasoning_items else None
)
codex_message_items_json = (
json.dumps(codex_message_items)
if codex_message_items else None
)
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
# Pre-compute tool call count
@@ -1055,9 +983,8 @@ class SessionDB:
cursor = conn.execute(
"""INSERT INTO messages (session_id, role, content, tool_call_id,
tool_calls, tool_name, timestamp, token_count, finish_reason,
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
codex_message_items)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
session_id,
role,
@@ -1072,7 +999,6 @@ class SessionDB:
reasoning_content,
reasoning_details_json,
codex_items_json,
codex_message_items_json,
),
)
msg_id = cursor.lastrowid
@@ -1178,33 +1104,22 @@ class SessionDB:
current = child_id
return session_id
def get_messages_as_conversation(
self, session_id: str, include_ancestors: bool = False
) -> List[Dict[str, Any]]:
def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
"""
Load messages in the OpenAI conversation format (role + content dicts).
Used by the gateway to restore conversation history.
"""
session_ids = [session_id]
if include_ancestors:
session_ids = self._session_lineage_root_to_tip(session_id)
with self._lock:
placeholders = ",".join("?" for _ in session_ids)
rows = self._conn.execute(
cursor = self._conn.execute(
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
"codex_message_items "
f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY timestamp, id",
tuple(session_ids),
).fetchall()
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
(session_id,),
)
rows = cursor.fetchall()
messages = []
for row in rows:
content = row["content"]
if row["role"] in {"user", "assistant"} and isinstance(content, str):
content = sanitize_context(content).strip()
msg = {"role": row["role"], "content": content}
msg = {"role": row["role"], "content": row["content"]}
if row["tool_call_id"]:
msg["tool_call_id"] = row["tool_call_id"]
if row["tool_name"]:
@@ -1235,53 +1150,9 @@ class SessionDB:
except (json.JSONDecodeError, TypeError):
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
msg["codex_reasoning_items"] = None
if row["codex_message_items"]:
try:
msg["codex_message_items"] = json.loads(row["codex_message_items"])
except (json.JSONDecodeError, TypeError):
logger.warning("Failed to deserialize codex_message_items, falling back to None")
msg["codex_message_items"] = None
if include_ancestors and self._is_duplicate_replayed_user_message(messages, msg):
continue
messages.append(msg)
return messages
def _session_lineage_root_to_tip(self, session_id: str) -> List[str]:
if not session_id:
return [session_id]
chain = []
current = session_id
seen = set()
with self._lock:
for _ in range(100):
if not current or current in seen:
break
seen.add(current)
chain.append(current)
row = self._conn.execute(
"SELECT parent_session_id FROM sessions WHERE id = ?",
(current,),
).fetchone()
if row is None:
break
current = row["parent_session_id"] if hasattr(row, "keys") else row[0]
return list(reversed(chain)) or [session_id]
@staticmethod
def _is_duplicate_replayed_user_message(messages: List[Dict[str, Any]], msg: Dict[str, Any]) -> bool:
if msg.get("role") != "user":
return False
content = msg.get("content")
if not isinstance(content, str) or not content:
return False
for prev in reversed(messages):
if prev.get("role") == "user" and prev.get("content") == content:
return True
if prev.get("role") == "assistant" and (prev.get("content") or prev.get("tool_calls")):
return False
return False
# =========================================================================
# Search
# =========================================================================
@@ -1340,16 +1211,6 @@ class SessionDB:
return sanitized.strip()
@staticmethod
def _is_cjk_codepoint(cp: int) -> bool:
return (0x4E00 <= cp <= 0x9FFF or # CJK Unified Ideographs
0x3400 <= cp <= 0x4DBF or # CJK Extension A
0x20000 <= cp <= 0x2A6DF or # CJK Extension B
0x3000 <= cp <= 0x303F or # CJK Symbols
0x3040 <= cp <= 0x309F or # Hiragana
0x30A0 <= cp <= 0x30FF or # Katakana
0xAC00 <= cp <= 0xD7AF) # Hangul Syllables
@staticmethod
def _contains_cjk(text: str) -> bool:
"""Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1365,11 +1226,6 @@ class SessionDB:
return True
return False
@classmethod
def _count_cjk(cls, text: str) -> int:
"""Count CJK characters in text."""
return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
def search_messages(
self,
query: str,
@@ -1440,113 +1296,52 @@ class SessionDB:
LIMIT ? OFFSET ?
"""
# CJK queries bypass the unicode61 FTS5 table. The default tokenizer
# splits CJK characters into individual tokens, so "大别山项目" becomes
# "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
# missing exact phrase matches.
#
# For queries with 3+ CJK characters, we use the trigram FTS5 table
# (indexed substring matching with ranking and snippets). For shorter
# CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
# bytes = 3 CJK chars), so we fall back to LIKE.
is_cjk = self._contains_cjk(query)
if is_cjk:
raw_query = query.strip('"').strip()
cjk_count = self._count_cjk(raw_query)
if cjk_count >= 3:
# Trigram FTS5 path — quote each non-operator token to handle
# FTS5 special chars (%, *, etc.) while preserving boolean
# operators (AND, OR, NOT) for multi-term queries.
tokens = raw_query.split()
parts = []
for tok in tokens:
if tok.upper() in ("AND", "OR", "NOT"):
parts.append(tok)
else:
parts.append('"' + tok.replace('"', '""') + '"')
trigram_query = " ".join(parts)
tri_where = ["messages_fts_trigram MATCH ?"]
tri_params: list = [trigram_query]
if source_filter is not None:
tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
tri_params.extend(source_filter)
if exclude_sources is not None:
tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
tri_params.extend(exclude_sources)
if role_filter:
tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
tri_params.extend(role_filter)
tri_sql = f"""
SELECT
m.id,
m.session_id,
m.role,
snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
m.content,
m.timestamp,
m.tool_name,
s.source,
s.model,
s.started_at AS session_started
FROM messages_fts_trigram
JOIN messages m ON m.id = messages_fts_trigram.rowid
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(tri_where)}
ORDER BY rank
LIMIT ? OFFSET ?
"""
tri_params.extend([limit, offset])
with self._lock:
try:
tri_cursor = self._conn.execute(tri_sql, tri_params)
except sqlite3.OperationalError:
matches = []
else:
matches = [dict(row) for row in tri_cursor.fetchall()]
else:
# Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
# Fall back to LIKE substring search.
escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
like_where = ["m.content LIKE ? ESCAPE '\\'"]
like_params: list = [f"%{escaped}%"]
if source_filter is not None:
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
like_params.extend(source_filter)
if exclude_sources is not None:
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
like_params.extend(exclude_sources)
if role_filter:
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
like_params.extend(role_filter)
like_sql = f"""
SELECT m.id, m.session_id, m.role,
substr(m.content,
max(1, instr(m.content, ?) - 40),
120) AS snippet,
m.content, m.timestamp, m.tool_name,
s.source, s.model, s.started_at AS session_started
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(like_where)}
ORDER BY m.timestamp DESC
LIMIT ? OFFSET ?
"""
like_params.extend([limit, offset])
# instr() parameter goes first in the bound list
like_params = [raw_query] + like_params
with self._lock:
like_cursor = self._conn.execute(like_sql, like_params)
matches = [dict(row) for row in like_cursor.fetchall()]
else:
with self._lock:
try:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
with self._lock:
try:
cursor = self._conn.execute(sql, params)
except sqlite3.OperationalError:
# FTS5 query syntax error despite sanitization — return empty
# unless query contains CJK (fall back to LIKE below)
if not self._contains_cjk(query):
return []
else:
matches = [dict(row) for row in cursor.fetchall()]
matches = []
else:
matches = [dict(row) for row in cursor.fetchall()]
# LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
# characters individually, causing multi-character queries to fail.
if not matches and self._contains_cjk(query):
raw_query = query.strip('"').strip()
like_where = ["m.content LIKE ?"]
like_params: list = [f"%{raw_query}%"]
if source_filter is not None:
like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
like_params.extend(source_filter)
if exclude_sources is not None:
like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
like_params.extend(exclude_sources)
if role_filter:
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
like_params.extend(role_filter)
like_sql = f"""
SELECT m.id, m.session_id, m.role,
substr(m.content,
max(1, instr(m.content, ?) - 40),
120) AS snippet,
m.content, m.timestamp, m.tool_name,
s.source, s.model, s.started_at AS session_started
FROM messages m
JOIN sessions s ON s.id = m.session_id
WHERE {' AND '.join(like_where)}
ORDER BY m.timestamp DESC
LIMIT ? OFFSET ?
"""
like_params.extend([limit, offset])
# instr() parameter goes first in the bound list
like_params = [raw_query] + like_params
with self._lock:
like_cursor = self._conn.execute(like_sql, like_params)
matches = [dict(row) for row in like_cursor.fetchall()]
# Add surrounding context (1 message before + after each match).
# Done outside the lock so we don't hold it across N sequential queries.
@@ -1606,32 +1401,16 @@ class SessionDB:
limit: int = 20,
offset: int = 0,
) -> List[Dict[str, Any]]:
"""List sessions, optionally filtered by source.
Returns rows enriched with a computed ``last_active`` column (latest
message timestamp for the session, falling back to ``started_at``),
ordered by most-recently-used first.
"""
select_with_last_active = (
"SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
"FROM sessions s "
"LEFT JOIN ("
"SELECT session_id, MAX(timestamp) AS last_active "
"FROM messages GROUP BY session_id"
") m ON m.session_id = s.id "
)
"""List sessions, optionally filtered by source."""
with self._lock:
if source:
cursor = self._conn.execute(
f"{select_with_last_active}"
"WHERE s.source = ? "
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
(source, limit, offset),
)
else:
cursor = self._conn.execute(
f"{select_with_last_active}"
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
(limit, offset),
)
return [dict(row) for row in cursor.fetchall()]
@@ -1698,45 +1477,12 @@ class SessionDB:
)
self._execute_write(_do)
@staticmethod
def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None:
"""Remove on-disk transcript files for a session.
Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any
``request_dump_{session_id}_*.json`` files left by the gateway.
Silently skips files that don't exist and swallows OSError so a
filesystem hiccup never blocks a DB operation.
"""
if sessions_dir is None:
return
for suffix in (".json", ".jsonl"):
p = sessions_dir / f"{session_id}{suffix}"
try:
p.unlink(missing_ok=True)
except OSError:
pass
# request_dump files use session_id as a prefix component
try:
for p in sessions_dir.glob(f"request_dump_{session_id}_*.json"):
try:
p.unlink(missing_ok=True)
except OSError:
pass
except OSError:
pass
def delete_session(
self,
session_id: str,
sessions_dir: Optional[Path] = None,
) -> bool:
def delete_session(self, session_id: str) -> bool:
"""Delete a session and all its messages.
Child sessions are orphaned (parent_session_id set to NULL) rather
than cascade-deleted, so they remain accessible independently.
When *sessions_dir* is provided, also removes on-disk transcript
files (``.json`` / ``.jsonl`` / ``request_dump_*``) for the deleted
session. Returns True if the session was found and deleted.
Returns True if the session was found and deleted.
"""
def _do(conn):
cursor = conn.execute(
@@ -1753,29 +1499,16 @@ class SessionDB:
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
return True
return self._execute_write(_do)
deleted = self._execute_write(_do)
if deleted:
self._remove_session_files(sessions_dir, session_id)
return deleted
def prune_sessions(
self,
older_than_days: int = 90,
source: str = None,
sessions_dir: Optional[Path] = None,
) -> int:
def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
"""Delete sessions older than N days. Returns count of deleted sessions.
Only prunes ended sessions (not active ones). Child sessions outside
the prune window are orphaned (parent_session_id set to NULL) rather
than cascade-deleted. When *sessions_dir* is provided, also removes
on-disk transcript files (``.json`` / ``.jsonl`` /
``request_dump_*``) for every pruned session, outside the DB
transaction.
than cascade-deleted.
"""
cutoff = time.time() - (older_than_days * 86400)
removed_ids: list[str] = []
def _do(conn):
if source:
@@ -1805,14 +1538,9 @@ class SessionDB:
for sid in session_ids:
conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
removed_ids.append(sid)
return len(session_ids)
count = self._execute_write(_do)
# Clean up on-disk files outside the DB transaction
for sid in removed_ids:
self._remove_session_files(sessions_dir, sid)
return count
return self._execute_write(_do)
# ── Meta key/value (for scheduler bookkeeping) ──
@@ -1866,7 +1594,6 @@ class SessionDB:
retention_days: int = 90,
min_interval_hours: int = 24,
vacuum: bool = True,
sessions_dir: Optional[Path] = None,
) -> Dict[str, Any]:
"""Idempotent auto-maintenance: prune old sessions + optional VACUUM.
@@ -1874,10 +1601,6 @@ class SessionDB:
within ``min_interval_hours`` no-op. Designed to be called once at
startup from long-lived entrypoints (CLI, gateway, cron scheduler).
When *sessions_dir* is provided, on-disk transcript files
(``.json`` / ``.jsonl`` / ``request_dump_*``) for pruned sessions
are removed as part of the same sweep (issue #3015).
Never raises. On any failure, logs a warning and returns a dict
with ``"error"`` set.
@@ -1901,10 +1624,7 @@ class SessionDB:
except (TypeError, ValueError):
pass # corrupt meta; treat as no prior run
pruned = self.prune_sessions(
older_than_days=retention_days,
sessions_dir=sessions_dir,
)
pruned = self.prune_sessions(older_than_days=retention_days)
result["pruned"] = pruned
# Only VACUUM if we actually freed rows — VACUUM on a tight DB
+23 -39
View File
@@ -24,7 +24,6 @@ import json
import asyncio
import logging
import threading
import time
from typing import Dict, Any, List, Optional, Tuple
from tools.registry import discover_builtin_tools, registry
@@ -289,34 +288,30 @@ def get_tool_definitions(
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
break
# Rebuild discord / discord_admin schemas based on the bot's privileged
# intents (detected from GET /applications/@me) and the user's action
# allowlist in config. Hides actions the bot's intents don't support so
# the model never attempts them, and annotates fetch_messages when the
# Rebuild discord_server schema based on the bot's privileged intents
# (detected from GET /applications/@me) and the user's action allowlist
# in config. Hides actions the bot's intents don't support so the
# model never attempts them, and annotates fetch_messages when the
# MESSAGE_CONTENT intent is missing.
_discord_schema_fns = {
"discord": "get_dynamic_schema_core",
"discord_admin": "get_dynamic_schema_admin",
}
for discord_tool_name in _discord_schema_fns:
if discord_tool_name in available_tool_names:
try:
from tools import discord_tool as _dt
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
dynamic = schema_fn()
except Exception:
dynamic = None
if dynamic is None:
filtered_tools = [
t for t in filtered_tools
if t.get("function", {}).get("name") != discord_tool_name
]
available_tool_names.discard(discord_tool_name)
else:
for i, td in enumerate(filtered_tools):
if td.get("function", {}).get("name") == discord_tool_name:
filtered_tools[i] = {"type": "function", "function": dynamic}
break
if "discord_server" in available_tool_names:
try:
from tools.discord_tool import get_dynamic_schema
dynamic = get_dynamic_schema()
except Exception: # pragma: no cover — defensive, fall back to static
dynamic = None
if dynamic is None:
# Tool filtered out entirely (empty allowlist or detection disabled
# the only remaining actions). Drop it from the schema list.
filtered_tools = [
t for t in filtered_tools
if t.get("function", {}).get("name") != "discord_server"
]
available_tool_names.discard("discord_server")
else:
for i, td in enumerate(filtered_tools):
if td.get("function", {}).get("name") == "discord_server":
filtered_tools[i] = {"type": "function", "function": dynamic}
break
# Strip web tool cross-references from browser_navigate description when
# web_search / web_extract are not available. The static schema says
@@ -568,14 +563,6 @@ def handle_function_call(
except Exception:
pass # file_tools may not be loaded yet
# Measure tool dispatch latency so post_tool_call and
# transform_tool_result hooks can observe per-tool duration.
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
# PostToolUse hook inputs so plugin authors can build latency
# dashboards, budget alerts, and regression canaries without having
# to wrap every tool manually. We use monotonic() so the value is
# unaffected by wall-clock adjustments during the call.
_dispatch_start = time.monotonic()
if function_name == "execute_code":
# Prefer the caller-provided list so subagents can't overwrite
# the parent's tool set via the process-global.
@@ -591,7 +578,6 @@ def handle_function_call(
task_id=task_id,
user_task=user_task,
)
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
try:
from hermes_cli.plugins import invoke_hook
@@ -603,7 +589,6 @@ def handle_function_call(
task_id=task_id or "",
session_id=session_id or "",
tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
)
except Exception:
pass
@@ -624,7 +609,6 @@ def handle_function_call(
task_id=task_id or "",
session_id=session_id or "",
tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
)
for hook_result in hook_results:
if isinstance(hook_result, str):
+3 -30
View File
@@ -7,7 +7,9 @@
perSystem = { pkgs, system, lib, ... }:
let
hermes-agent = inputs.self.packages.${system}.default;
hermesVenv = hermes-agent.hermesVenv;
hermesVenv = pkgs.callPackage ./python.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
};
configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
@@ -191,35 +193,6 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
echo "ok" > $out/result
'';
# Verify extraPythonPackages PYTHONPATH injection
extra-python-packages = let
testPkg = pkgs.python312Packages.pyfiglet;
hermesWithExtra = hermes-agent.override {
extraPythonPackages = [ testPkg ];
};
in pkgs.runCommand "hermes-extra-python-packages" { } ''
set -e
echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
(echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
echo "PASS: PYTHONPATH present in wrapper"
grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
(echo "FAIL: test package path not in PYTHONPATH"; exit 1)
echo "PASS: test package path found in wrapper"
echo "=== Checking base package has no PYTHONPATH ==="
if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
echo "FAIL: base package should not have PYTHONPATH"; exit 1
fi
echo "PASS: base package clean"
echo "=== All extraPythonPackages checks passed ==="
mkdir -p $out
echo "ok" > $out/result
'';
# ── Config merge + round-trip test ────────────────────────────────
# Tests the merge script (Nix activation behavior) across 7
# scenarios, then verifies Python's load_config() reads correctly.
-186
View File
@@ -1,186 +0,0 @@
# nix/hermes-agent.nix — Overridable Hermes Agent package
#
# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
{
lib,
stdenv,
makeWrapper,
callPackage,
python312,
nodejs_22,
ripgrep,
git,
openssh,
ffmpeg,
tirith,
# Flake inputs — passed explicitly by packages.nix and overlays.nix
uv2nix,
pyproject-nix,
pyproject-build-systems,
npm-lockfile-fix,
# Overridable parameters
extraPythonPackages ? [ ],
}:
let
hermesVenv = callPackage ./python.nix {
inherit uv2nix pyproject-nix pyproject-build-systems;
};
hermesNpmLib = callPackage ./lib.nix {
inherit npm-lockfile-fix;
};
hermesTui = callPackage ./tui.nix {
inherit hermesNpmLib;
};
hermesWeb = callPackage ./web.nix {
inherit hermesNpmLib;
};
bundledSkills = lib.cleanSourceWith {
src = ../skills;
filter = path: _type: !(lib.hasInfix "/index-cache/" path);
};
runtimeDeps = [
nodejs_22
ripgrep
git
openssh
ffmpeg
tirith
];
runtimePath = lib.makeBinPath runtimeDeps;
sitePackagesPath = python312.sitePackages;
# Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
# Without this, a plugin listing e.g. requests as a dep would fail at runtime
# if requests isn't already in the sealed uv2nix venv.
allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
uvLockHash =
if builtins.pathExists ../uv.lock then
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
else
"none";
in
stdenv.mkDerivation {
pname = "hermes-agent";
version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
dontUnpack = true;
dontBuild = true;
nativeBuildInputs = [ makeWrapper ];
installPhase = ''
runHook preInstall
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
mkdir -p $out/ui-tui
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
${lib.concatMapStringsSep "\n"
(name: ''
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
--set HERMES_NODE ${nodejs_22}/bin/node \
${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
'')
[
"hermes"
"hermes-agent"
"hermes-acp"
]
}
${lib.optionalString (extraPythonPackages != [ ]) ''
echo "=== Checking for plugin/core package collisions ==="
${hermesVenv}/bin/python3 -c "
import pathlib, sys, re
def canonical(name):
return re.sub(r'[-_.]+', '-', name).lower()
# Collect core venv package names
core = set()
venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
for di in venv_sp.glob('*.dist-info'):
meta = di / 'METADATA'
if meta.exists():
for line in meta.read_text().splitlines():
if line.startswith('Name:'):
core.add(canonical(line.split(':', 1)[1].strip()))
break
# Check each extra package for collisions
extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
for edir in extras_dirs:
sp = pathlib.Path(edir) / '${sitePackagesPath}'
if not sp.exists():
continue
for di in sp.glob('*.dist-info'):
meta = di / 'METADATA'
if not meta.exists():
continue
for line in meta.read_text().splitlines():
if line.startswith('Name:'):
pkg = canonical(line.split(':', 1)[1].strip())
if pkg in core:
print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
print(f' from: {di}', file=sys.stderr)
print(f' Remove this dependency from extraPythonPackages.', file=sys.stderr)
sys.exit(1)
break
print('No collisions found.')
"
echo "=== No collisions ==="
''}
runHook postInstall
'';
passthru = {
inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
devShellHook = ''
STAMP=".nix-stamps/hermes-agent"
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
else
source .venv/bin/activate
export HERMES_PYTHON=${hermesVenv}/bin/python3
fi
'';
};
meta = with lib; {
description = "AI agent with advanced tool-calling capabilities";
homepage = "https://github.com/NousResearch/hermes-agent";
mainProgram = "hermes";
license = licenses.mit;
platforms = platforms.unix;
};
}
+6 -81
View File
@@ -28,8 +28,6 @@
let
cfg = config.services.hermes-agent;
effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
else cfg.package.override { inherit (cfg) extraPythonPackages; };
hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;
# Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -458,52 +456,6 @@
description = "Extra packages available on PATH.";
};
extraPlugins = mkOption {
type = types.listOf types.package;
default = [ ];
description = ''
Directory-based plugin packages to symlink into the hermes plugins
directory. Each package should contain a plugin.yaml and __init__.py
at its root. Hermes discovers these automatically on startup.
'';
example = literalExpression ''
[
(pkgs.fetchFromGitHub {
owner = "stephenschoettler";
repo = "hermes-lcm";
name = "hermes-lcm";
rev = "v0.7.0";
hash = "sha256-...";
})
]
'';
};
extraPythonPackages = mkOption {
type = types.listOf types.package;
default = [ ];
description = ''
Python packages to add to PYTHONPATH for entry-point plugin discovery.
These are pip-packaged plugins that register via the
hermes_agent.plugins entry-point group. Each package must be built
with the same Python interpreter as hermes (python312).
'';
example = literalExpression ''
[
(pkgs.python312Packages.buildPythonPackage {
pname = "rtk-hermes";
version = "1.0.0";
src = pkgs.fetchFromGitHub {
owner = "ogallotti";
repo = "rtk-hermes";
rev = "main";
hash = "sha256-...";
};
})
]
'';
};
restart = mkOption {
type = types.str;
default = "always";
@@ -618,7 +570,7 @@
# so interactive shells share state (sessions, skills, cron) with the
# gateway service instead of creating a separate ~/.hermes/.
(lib.mkIf cfg.addToSystemPackages {
environment.systemPackages = [ effectivePackage ];
environment.systemPackages = [ cfg.package ];
environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
})
@@ -629,16 +581,6 @@
});
})
# ── Assertions ─────────────────────────────────────────────────────
{
assertions = let
names = map lib.getName cfg.extraPlugins;
in [{
assertion = (lib.length names) == (lib.length (lib.unique names));
message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
}];
}
# ── Warnings ──────────────────────────────────────────────────────
(lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
warnings = [
@@ -660,7 +602,6 @@
"d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/logs 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
"d ${cfg.stateDir}/home 0750 ${cfg.user} ${cfg.group} - -"
"d ${cfg.workingDirectory} 2770 ${cfg.user} ${cfg.group} - -"
];
@@ -682,7 +623,7 @@
find ${cfg.stateDir}/.hermes -maxdepth 1 \
\( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
-exec chmod g+rw {} + 2>/dev/null || true
for _subdir in cron sessions logs memories plugins; do
for _subdir in cron sessions logs memories; do
mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -791,22 +732,6 @@ HERMES_NIX_ENV_EOF
${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
'') cfg.documents)}
# ── Declarative plugins ─────────────────────────────────────────
# Remove stale managed symlinks (plugins removed from config)
find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
${lib.concatStringsSep "\n" (map (plugin:
let
name = lib.getName plugin;
in ''
if [ ! -f "${plugin}/plugin.yaml" ]; then
echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
exit 1
fi
ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
'') cfg.extraPlugins)}
'';
}
@@ -837,7 +762,7 @@ HERMES_NIX_ENV_EOF
# reads them at Python startup — no systemd EnvironmentFile needed.
ExecStart = lib.concatStringsSep " " ([
"${effectivePackage}/bin/hermes"
"${cfg.package}/bin/hermes"
"gateway"
] ++ cfg.extraArgs);
@@ -860,7 +785,7 @@ HERMES_NIX_ENV_EOF
};
path = [
effectivePackage
cfg.package
pkgs.bash
pkgs.coreutils
pkgs.git
@@ -885,11 +810,11 @@ HERMES_NIX_ENV_EOF
preStart = ''
# Stable symlinks — container references these, not store paths directly
ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint
# GC roots so nix-collect-garbage doesn't remove store paths in use
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true
# Check if container needs (re)creation
-10
View File
@@ -1,10 +0,0 @@
# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
{ inputs, ... }:
{
flake.overlays.default = final: _: {
hermes-agent = final.callPackage ./hermes-agent.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
};
};
}
+107 -6
View File
@@ -4,19 +4,120 @@
perSystem =
{ pkgs, inputs', ... }:
let
hermesAgent = pkgs.callPackage ./hermes-agent.nix {
hermesVenv = pkgs.callPackage ./python.nix {
inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
};
hermesNpmLib = pkgs.callPackage ./lib.nix {
npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
};
hermesTui = pkgs.callPackage ./tui.nix {
inherit hermesNpmLib;
};
# Import bundled skills, excluding runtime caches
bundledSkills = pkgs.lib.cleanSourceWith {
src = ../skills;
filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
};
hermesWeb = pkgs.callPackage ./web.nix {
inherit hermesNpmLib;
};
runtimeDeps = with pkgs; [
nodejs_22
ripgrep
git
openssh
ffmpeg
tirith
];
runtimePath = pkgs.lib.makeBinPath runtimeDeps;
# Lockfile hashes for dev shell stamps
pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
uvLockHash =
if builtins.pathExists ../uv.lock then
builtins.hashString "sha256" (builtins.readFile ../uv.lock)
else
"none";
in
{
packages = {
default = hermesAgent;
tui = hermesAgent.hermesTui;
web = hermesAgent.hermesWeb;
default = pkgs.stdenv.mkDerivation {
pname = "hermes-agent";
version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;
fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
dontUnpack = true;
dontBuild = true;
nativeBuildInputs = [ pkgs.makeWrapper ];
installPhase = ''
runHook preInstall
mkdir -p $out/share/hermes-agent $out/bin
cp -r ${bundledSkills} $out/share/hermes-agent/skills
cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
# copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
mkdir -p $out/ui-tui
cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
${pkgs.lib.concatMapStringsSep "\n"
(name: ''
makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
--suffix PATH : "${runtimePath}" \
--set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
--set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
--set HERMES_TUI_DIR $out/ui-tui \
--set HERMES_PYTHON ${hermesVenv}/bin/python3 \
--set HERMES_NODE ${pkgs.nodejs_22}/bin/node
'')
[
"hermes"
"hermes-agent"
"hermes-acp"
]
}
runHook postInstall
'';
passthru.devShellHook = ''
STAMP=".nix-stamps/hermes-agent"
STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
echo "hermes-agent: installing Python dependencies..."
uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
source .venv/bin/activate
uv pip install -e ".[all]"
[ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
[ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
mkdir -p .nix-stamps
echo "$STAMP_VALUE" > "$STAMP"
else
source .venv/bin/activate
export HERMES_PYTHON=${hermesVenv}/bin/python3
fi
'';
meta = with pkgs.lib; {
description = "AI agent with advanced tool-calling capabilities";
homepage = "https://github.com/NousResearch/hermes-agent";
mainProgram = "hermes";
license = licenses.mit;
platforms = platforms.unix;
};
};
tui = hermesTui;
web = hermesWeb;
fix-lockfiles = hermesNpmLib.mkFixLockfiles {
packages = [ hermesTui hermesWeb ];
};
};
};
+1 -2
View File
@@ -7,7 +7,6 @@
pyproject-nix,
pyproject-build-systems,
stdenv,
dependency-groups ? [ "all" ],
}:
let
workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -97,5 +96,5 @@ let
]);
in
pythonSet.mkVirtualEnv "hermes-agent-env" {
hermes-agent = dependency-groups;
hermes-agent = [ "all" ];
}
+1 -2
View File
@@ -4,7 +4,7 @@ let
src = ../ui-tui;
npmDeps = pkgs.fetchNpmDeps {
inherit src;
hash = "sha256-Chz+NW9NXqboXHOa6PKwf5bhAkkcFtKNhvKWwg2XSPc=";
hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
};
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -17,7 +17,6 @@ pkgs.buildNpmPackage (npm // {
inherit src npmDeps version;
doCheck = false;
npmFlags = [ "--legacy-peer-deps" ];
installPhase = ''
runHook preInstall
@@ -1,7 +1,7 @@
---
name: touchdesigner-mcp
description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
version: 1.1.0
version: 1.0.0
author: kshitijk4poor
license: MIT
metadata:
@@ -332,12 +332,6 @@ See `references/network-patterns.md` for complete build scripts + shader code.
| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
| `references/python-api.md` | TD Python: op(), scripting, extensions |
| `references/troubleshooting.md` | Connection diagnostics, debugging |
| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
| `scripts/setup.sh` | Automated setup script |
---
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
```python
# td_execute_python:
for c in list(root.children):
if c.valid and c.name.startswith('my_'):
if c.valid and c.name.startswith('promo_'):
c.destroy()
# ... then create my_audio, my_shader etc. in same script → CRASHES
# ... then create promo_audio, promo_shader etc. in same script → CRASHES
```
**CORRECT (two separate calls):**
```python
# Call 1: td_execute_python — clean only
for c in list(root.children):
if c.valid and c.name.startswith('my_'):
if c.valid and c.name.startswith('promo_'):
c.destroy()
# Call 2: td_execute_python — build (separate MCP call)
audio = root.create(audiofileinCHOP, 'my_audio')
audio = root.create(audiofileinCHOP, 'promo_audio')
# ... rest of build
```
@@ -361,13 +361,21 @@ win.par.winopen.pulse()
`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
### 32. Audio-reactive GLSL: TD-side pipeline
### 32. Audio-reactive GLSL: dual-layer sync pipeline
For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
For audio-synced visuals, use BOTH layers for maximum effect:
**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
**Key gotcha:** AudioFileIn must be cued (`par.cue=True``par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
### 33. twozero MCP: prefer native tools
### 33. twozero MCP: benchmark and prefer native tools
Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
**Always prefer native MCP tools over td_execute_python:**
- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
@@ -417,16 +425,13 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still
**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
Diagnostic sequence when FPS=0:
1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
4. Check for other blocking CHOPs (audiodevin, etc.)
5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
1. `td_get_perf` — check if any op has extreme CPU/s
2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
3. Check for blocking CHOPs (audioout, audiodevin, etc.)
4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
### 39. Recording while FPS=0 produces empty or near-empty files
@@ -479,20 +484,9 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
op("/project1/shader_code").text = shader_string
```
### 42. td_execute_python DOES return print() output — use it for debugging
### 42. td_execute_python does NOT return stdout or print() output
`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
```python
# CORRECT — appears in response:
print('value:', some_value)
# WRONG — not reliably in response:
result = some_value
```
For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
@@ -502,203 +496,13 @@ clean = response_text.rsplit('[fps', 1)[0]
data = json.loads(clean)
```
### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
```bash
# Record in TD first, then extract frames:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
### 45. Heavy shaders cook below record FPS — many duplicate frames in output
A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
```bash
# Extract at 24fps from a 60fps recording of an 8fps shader:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
### 46. Recording duration is manual — no auto-stop at audio end
### 45. Recording duration is manual — no auto-stop at audio end
MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
```bash
ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
```
### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
**How to verify audio is actually playing:**
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
```glsl
float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
bass = clamp(bass, 0.0, 3.0); // prevent whiteout
mids = clamp(mids, 0.0, 3.0);
hi = clamp(hi, 0.0, 3.0);
```
Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
```bash
# Center-crop from 1280x1280 to 1280x720:
ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
```
Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
## Advanced Patterns (pitfalls 51+)
### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
```python
# CORRECT
src.outputConnectors[0].connect(dst.inputConnectors[0])
# WRONG — raises IndexError or AttributeError
src.outputs[0].connect(dst.inputs[0])
```
For feedback TOP, BOTH are required:
```python
fb.par.top = target.path
target.outputConnectors[0].connect(fb.inputConnectors[0])
```
### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
**Workaround — frame capture + ffmpeg:**
```python
out = op('/project1/out')
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
```
### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
```python
start = me.fetch('cap_frame', 0)
for i in range(60):
frame = start + i
op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
me.store('cap_frame', start + 60)
```
Call 5 times for 300 frames. Each picks up where the last left off.
### 54. GLSL TOP pixel shader requirements in TD 2025
```glsl
// REQUIRED — declare output
layout(location = 0) out vec4 fragColor;
void main() {
vec3 col = vec3(1.0, 0.0, 0.0);
fragColor = TDOutputSwizzle(vec4(col, 1.0));
}
```
**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
### 55. TOP.save() doesn't advance time — identical frames in tight loops
`.save()` captures the current cooked frame without advancing TD's timeline:
```python
# WRONG — all frames identical
for i in range(300):
op('/project1/out').save(f'frames/f_{i:04d}.png')
# CORRECT — use run() with delayFrames
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
```
**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
### 56. Feedback loop masks input changes — force switch during capture
With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
**Fix — force switch index per capture:**
```python
for i in range(300):
idx = (i // 8) % num_inputs
delay = i * 5
run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
```
### 57. Large td_execute_python scripts fail — split into incremental calls
10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
### 58. MCP instance reconnection after project.load()
`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
### 59. TOX reverse-engineering workflow
```python
comp = root.loadTox(r'/path/to/file.tox')
comp.name = '_study_comp'
for child in comp.children:
print(f'{child.name} ({child.OPType})')
# Use td_get_operators_info, td_read_dat, check custom params
```
### 60. sliderCOMP naming — TD appends suffix
TD auto-renames: `slider_brightness``slider_brightness1`. Always check names after creation.
### 61. create() requires full operator type suffix
```python
# CORRECT
proj.create('audiofileinCHOP', 'audio_in')
proj.create('glslTOP', 'render')
# WRONG — raises "Unknown operator type"
proj.create('audiofilein', 'audio_in')
proj.create('glsl', 'render')
```
### 62. Reparenting COMPs — use copyOPs, not connect()
Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
```python
copied = target.copyOPs([source]) # preserves internal wiring
source.destroy()
# Re-wire external connections manually after the move
```
### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
```python
# CRASHES TD — don't do this
echop = root.create(expressionCHOP, 'slider_ctrl')
echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
# WORKING — parameterCHOP as bridge
pchop = root.create(parameterCHOP, 'slider_vals')
pchop.par.ops = '/project1/controls'
pchop.par.parameters = 'value0'
pchop.par.custom = True
pchop.par.builtin = False
```
@@ -380,10 +380,6 @@ def backup_existing(path: Path, backup_root: Path) -> Optional[Path]:
# Replace OpenClaw brand names with Hermes in migrated text so that
# memory entries, user profiles, SOUL.md, and workspace instructions
# read as self-referential to the new agent identity.
#
# Case-preserving: ``OpenClaw`` → ``Hermes`` (prose), but lowercase matches
# like ``openclaw`` → ``hermes`` (so filesystem paths like ``~/.openclaw``
# become ``~/.hermes`` — the real Hermes home — not the broken ``~/.Hermes``).
_REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
(re.compile(r'\bOpen[\s-]?Claw\b', re.IGNORECASE), 'Hermes'),
(re.compile(r'\bClawdBot\b', re.IGNORECASE), 'Hermes'),
@@ -391,31 +387,10 @@ _REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
]
def _case_preserving_replacement(replacement: str):
"""Return a re.sub replacement fn that lowercases the result when the
matched text was all-lowercase.
Keeps ``OpenClaw`` ``Hermes`` but maps ``openclaw`` ``hermes`` so a
filesystem path like ``~/.openclaw/config.yaml`` rewrites to
``~/.hermes/config.yaml`` (the real Hermes home) instead of the broken
``~/.Hermes/config.yaml``.
"""
def _sub(match: "re.Match[str]") -> str:
matched = match.group(0)
if matched and matched.islower():
return replacement.lower()
return replacement
return _sub
def rebrand_text(text: str) -> str:
"""Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes.
Preserves case so filesystem-path matches (lowercase) don't become
capitalized directory names that don't exist.
"""
"""Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes."""
for pattern, replacement in _REBRAND_PATTERNS:
text = pattern.sub(_case_preserving_replacement(replacement), text)
text = pattern.sub(replacement, text)
return text
-25
View File
@@ -91,29 +91,4 @@
// Register this plugin — the dashboard picks it up automatically.
window.__HERMES_PLUGINS__.register("example", ExamplePage);
// ─────────────────────────────────────────────────────────────────────
// Page-scoped slot demo: inject a small banner at the top of /sessions.
//
// Built-in pages expose named slots (<page>:top, <page>:bottom) that
// plugins can populate without overriding the whole route. The
// manifest lists the slots we use in its `slots` array so the shell
// knows to render <PluginSlot name="sessions:top" /> there.
// ─────────────────────────────────────────────────────────────────────
function SessionsTopBanner() {
return React.createElement(Card, {
className: "border-dashed",
},
React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
React.createElement(Badge, { variant: "outline" }, "Example"),
React.createElement("span", {
className: "text-xs text-muted-foreground",
}, "This banner was injected into the Sessions page by the example plugin via the ",
React.createElement("code", { className: "font-courier" }, "sessions:top"),
" slot."),
),
);
}
window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
})();
@@ -8,7 +8,6 @@
"path": "/example",
"position": "after:skills"
},
"slots": ["sessions:top"],
"entry": "dist/index.js",
"api": "plugin_api.py"
}
-131
View File
@@ -1,131 +0,0 @@
# google_meet plugin
Let the hermes agent join a Google Meet call, transcribe it, optionally speak
in it, and do the followup work afterwards.
## What ships
| Version | What | Status |
|---|---|---|
| v1 | Transcribe-only: Playwright joins Meet, scrapes captions to transcript file | ✓ ships by default |
| v2 | Realtime duplex audio: bot speaks in-call via OpenAI Realtime + BlackHole/PulseAudio null-sink | ✓ opt in with `mode='realtime'` |
| v3 | Remote node host: run the bot on a different machine than the gateway | ✓ opt in with `node='<name>'` |
## Architecture
```
┌─ gateway (Linux box, where hermes runs) ────────────────────────────┐
│ │
│ agent → meet_join(url, mode='realtime', node='my-mac') │
│ │ │
│ └─ NodeClient ─── ws ────┐ │
│ │ │
└──────────────────────────────────┼───────────────────────────────────┘
│ wss (token auth)
┌─ node host (user's Mac, signed-in Chrome lives here) ───────────────┐
│ │
│ NodeServer (from `hermes meet node run`) │
│ │ │
│ ├─ start_bot → process_manager.start() → spawns meet_bot │
│ │ │
│ └─ meet_bot (Playwright) │
│ ├─ Chromium → meet.google.com │
│ ├─ caption scraper → transcript.txt │
│ └─ (realtime mode only) RealtimeSpeaker thread │
│ ↓ │
│ OpenAI Realtime WS → speaker.pcm │
│ ↓ │
│ paplay → null-sink ← Chrome fake mic │
│ │
└──────────────────────────────────────────────────────────────────────┘
```
Without v3: the whole right column runs on the gateway machine.
Without v2: the "realtime" path is skipped; transcribe runs alone.
## Files
| Path | Purpose |
|---|---|
| `plugin.yaml` | manifest |
| `__init__.py` | `register(ctx)` — registers 5 tools + `on_session_end` hook + `hermes meet` CLI |
| `meet_bot.py` | Playwright bot subprocess (standalone, `python -m plugins.google_meet.meet_bot`) |
| `process_manager.py` | local bot lifecycle + `enqueue_say` |
| `tools.py` | agent-facing tools + node-routing helper |
| `cli.py` | `hermes meet setup / auth / join / status / transcript / say / stop / node ...` |
| `audio_bridge.py` | v2: PulseAudio null-sink (Linux) + BlackHole probe (macOS) |
| `realtime/openai_client.py` | v2: `RealtimeSession` + `RealtimeSpeaker` (file-queue → OpenAI Realtime WS → PCM) |
| `node/protocol.py` | v3: message envelope + validation |
| `node/registry.py` | v3: `$HERMES_HOME/workspace/meetings/nodes.json` |
| `node/server.py` | v3: `NodeServer` (runs on host machine) |
| `node/client.py` | v3: `NodeClient` (used by tool handlers + CLI on gateway) |
| `node/cli.py` | v3: `hermes meet node {run,list,approve,remove,status,ping}` |
| `SKILL.md` | agent usage guide |
## Local quick start
```bash
hermes plugins enable google_meet
hermes meet install # pip + Chromium
hermes meet setup # preflight
hermes meet auth # optional
hermes meet join https://meet.google.com/abc-defg-hij # transcribe
```
## Realtime mode
Linux (preferred, most automated):
```bash
hermes meet install --realtime # installs pulseaudio-utils
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
# then from the agent or CLI:
hermes meet say "Good morning everyone, I'm the note-taker bot."
```
macOS:
```bash
hermes meet install --realtime # runs: brew install blackhole-2ch ffmpeg
# then — manually! — open System Settings → Sound → Input → BlackHole 2ch
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
```
On macOS, hermes will **not** switch your system audio input automatically — the
user has to do it. This is deliberate: switching default input on a whim would
be a surprising side effect.
## Remote node host
On the node machine (e.g. user's Mac with a signed-in Chrome):
```bash
pip install playwright websockets
python -m playwright install chromium
hermes plugins enable google_meet
hermes meet node run --display-name my-mac --host 0.0.0.0 --port 18789
# prints the bearer token on first run; copy it
```
On the gateway:
```bash
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
hermes meet node ping my-mac
# now any meet_* tool call accepts node='my-mac' (or 'auto')
```
## Safety
- URL gate: only `https://meet.google.com/abc-defg-hij`, `/new`, `/lookup/<id>`.
- No calendar scanning, no auto-dial, no auto-consent announcement.
- Node server uses bearer-token auth; no key exchange, no TLS termination
built in — run it on a LAN or behind a reverse proxy you trust.
- One active meeting per (gateway, node) pair. A second `meet_join` leaves the first.
- `meet_say` refuses unless the active meeting was started with `mode='realtime'`.
## Out of scope
- **Calendar scanning** — deliberately not implemented. Join URLs must be explicit.
- **Multi-tenant node sharing** — a node serves one gateway at a time.
- **Windows** — audio bridging isn't tested; `register()` no-ops on Windows.
- **System audio input switching on macOS** — user responsibility, not the bot's.
-148
View File
@@ -1,148 +0,0 @@
---
name: google_meet
description: Join a Google Meet call, transcribe live captions, optionally speak in realtime, and do the followup work afterwards. Use when the user asks the agent to sit in on a meeting, take notes, summarize, respond in-call, or action items from it.
version: 0.2.0
platforms:
- linux
- macos
metadata:
hermes:
tags: [meetings, google-meet, transcription, realtime-voice]
---
# google_meet
## When to use
The user says any of:
- "join my Meet at <url>"
- "take notes on this meeting"
- "summarize the meeting and send followups"
- "sit in on my standup"
- "be a bot in this call and speak up when X"
## Two modes
| Mode | What the bot does |
|---|---|
| `transcribe` (default) | Joins, enables captions, scrapes a transcript. Listen-only. |
| `realtime` | Same as transcribe PLUS speaks into the meeting via OpenAI Realtime. The agent calls `meet_say(text)` and the bot's voice comes out of the call. |
Pick `realtime` only when the user actually wants the agent to speak. It costs real money (OpenAI Realtime is pay-per-audio-minute) and requires a virtual audio device set up on the machine running the bot.
## Two locations
| Location | When |
|---|---|
| Local (default) | Gateway machine runs the Playwright bot directly. |
| Remote node (`node="<name>"`) | Bot runs on a different machine that has a signed-in Chrome and (for realtime) a configured audio bridge. Useful when the gateway runs on a headless Linux box but the user's real signed-in Chrome lives on their Mac. |
## Prerequisites the user must handle once
Easiest path — run the built-in installer:
```bash
hermes plugins enable google_meet
hermes meet install # pip deps + Chromium (transcribe only)
hermes meet install --realtime # + pulseaudio-utils / brew blackhole+ffmpeg
hermes meet auth # optional; skips guest-lobby wait
hermes meet setup # preflight checks
```
`hermes meet install --realtime` prompts before running `sudo apt-get` (Linux)
or `brew install` (macOS). Pass `--yes` to skip the prompt. It will NOT touch
your macOS default-input setting — you have to select BlackHole 2ch in
System Settings yourself before starting a realtime meeting.
Or do it manually:
```bash
pip install playwright websockets && python -m playwright install chromium
# For realtime mode, additionally:
# Linux: sudo apt install pulseaudio-utils
# macOS: brew install blackhole-2ch ffmpeg
# → System Settings → Sound → Input → BlackHole 2ch
# Then set OPENAI_API_KEY or HERMES_MEET_REALTIME_KEY in ~/.hermes/.env
```
For a remote node:
```bash
# on the user's Mac (where Chrome is signed in):
pip install playwright websockets && python -m playwright install chromium
hermes plugins enable google_meet
hermes meet node run --display-name my-mac # persistent server
# copy the printed token
# on the gateway:
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
hermes meet node ping my-mac # confirm reachable
```
Run `hermes meet setup` to preflight local prereqs.
## Flow
1. **Join** — call `meet_join(url=..., mode=..., node=...)`. Returns immediately.
2. **Announce yourself** — no auto-consent. Say (in whatever channel the user is watching): "A Hermes agent bot is in this call taking notes."
3. **Poll**`meet_status()` for liveness, `meet_transcript(last=20)` for recent captions. Don't re-read the whole transcript every turn.
4. **Speak (realtime only)**`meet_say(text="...")` queues text for TTS. The speech lags by ~2s. Don't spam it.
5. **Leave**`meet_leave()` when done, or set `duration="30m"` on `meet_join` for auto-leave.
6. **Follow up** — read `meet_transcript()` in full, summarize, and use regular tools to send the recap, file issues, schedule followups.
## Tool reference
| Tool | Parameters | Use |
|---|---|---|
| `meet_join` | `url`, `mode?`, `guest_name?`, `duration?`, `headed?`, `node?` | Start bot |
| `meet_status` | `node?` | Liveness + progress |
| `meet_transcript` | `last?`, `node?` | Read captions |
| `meet_leave` | `node?` | Close bot |
| `meet_say` | `text`, `node?` | Speak in realtime meeting |
`node?` on all tools: pass a registered node name (or `"auto"` for the sole node) to operate a remote bot instead of a local one. Omit for local.
## Important limits
- Captions are only as good as Google Meet's live captions. English-biased, lossy on overlapping speakers.
- Guest mode sits in the lobby until a host admits. Warn the user; `hermes meet auth` avoids this.
- **Lobby timeout**: if the host doesn't admit the bot within 5 minutes (configurable via `HERMES_MEET_LOBBY_TIMEOUT` env), the bot leaves and `meet_status` reports `leaveReason: "lobby_timeout"`.
- **One active meeting per install per location.** A second `meet_join` leaves the first.
- **Windows not supported.**
- Realtime mode needs a virtual audio device. If the audio bridge setup fails, the bot falls back to transcribe mode and flags it in `meet_status().error`.
- `meet_say` requires `mode='realtime'` on the originating `meet_join`. Calling it against a transcribe-mode meeting returns a clear error.
- **Barge-in is best-effort.** When a caption arrives attributed to a real participant while the bot is generating audio, the bot sends `response.cancel` to OpenAI Realtime. Captions take ~500ms to show up, so the bot will talk over the first second or so of a human interruption.
## Status dict reference
`meet_status()` returns (subset shown, there are more):
| Key | Meaning |
|---|---|
| `inCall` | Past the lobby. False while waiting for admission. |
| `lobbyWaiting` | Clicked "Ask to join", waiting on host. |
| `joinAttemptedAt` / `joinedAt` | Timestamps for lobby-click and actual admission. |
| `captioning` | Caption observer is installed. |
| `transcriptLines` / `lastCaptionAt` | Transcript progress. |
| `realtime` / `realtimeReady` | Realtime mode provisioned / WS connected. |
| `realtimeDevice` | Audio device name the bot is feeding (e.g. `hermes_meet_src`). |
| `audioBytesOut` / `lastAudioOutAt` | How much PCM the OpenAI session has produced. |
| `lastBargeInAt` | Timestamp of the most recent `response.cancel` sent. |
| `leaveReason` | `duration_expired`, `lobby_timeout`, `denied`, `page_closed`, or null. |
| `error` | Last error (soft — bot may still be running). |
## Transcript location
Local:
```
$HERMES_HOME/workspace/meetings/<meeting-id>/transcript.txt
```
Remote node: transcript lives on the node host's disk. Use `meet_transcript(node=...)` to read it over RPC.
## Safety
- URL regex: only `https://meet.google.com/...` URLs pass.
- No calendar scanning. No auto-dial.
- Remote nodes use bearer-token auth; tokens are generated on the node (32 hex chars, persisted in `$HERMES_HOME/workspace/meetings/node_token.json`) and must be copied to the gateway via `hermes meet node approve`.
- `meet_say` text is rate-limited by the OpenAI Realtime session; spam-protection is the bot's problem, not yours, but still — don't queue hundreds of lines.
-103
View File
@@ -1,103 +0,0 @@
"""google_meet plugin — let the agent join a Meet call, transcribe it, follow up.
v1: transcribe-only. Spawns a headless Chromium via Playwright, joins the Meet
URL, enables live captions, scrapes them into a transcript file. The agent then
has the transcript in its workspace and can do whatever followup work it needs
using its regular tools.
v2 (not in this PR): realtime duplex audio so the agent can speak in the
meeting, via OpenAI Realtime / Gemini Live + BlackHole / PulseAudio null-sink.
``meet_say`` exists as a stub today so the tool surface is stable.
Explicit-by-design: only joins ``https://meet.google.com/`` URLs explicitly
passed in. No calendar scanning, no auto-dial, no consent announcement.
"""
from __future__ import annotations
import logging
import platform
from plugins.google_meet import process_manager as pm
from plugins.google_meet.cli import register_cli as _register_meet_cli
from plugins.google_meet.cli import meet_command as _meet_command
from plugins.google_meet.tools import (
MEET_JOIN_SCHEMA,
MEET_LEAVE_SCHEMA,
MEET_SAY_SCHEMA,
MEET_STATUS_SCHEMA,
MEET_TRANSCRIPT_SCHEMA,
check_meet_requirements,
handle_meet_join,
handle_meet_leave,
handle_meet_say,
handle_meet_status,
handle_meet_transcript,
)
logger = logging.getLogger(__name__)
_TOOLS = (
("meet_join", MEET_JOIN_SCHEMA, handle_meet_join, "📞"),
("meet_status", MEET_STATUS_SCHEMA, handle_meet_status, "🟢"),
("meet_transcript", MEET_TRANSCRIPT_SCHEMA, handle_meet_transcript, "📝"),
("meet_leave", MEET_LEAVE_SCHEMA, handle_meet_leave, "👋"),
("meet_say", MEET_SAY_SCHEMA, handle_meet_say, "🗣️"),
)
def _on_session_end(**kwargs) -> None:
"""Best-effort cleanup — if a meet bot is still running when the session
ends, leave the call so we don't orphan a headless Chromium.
No-ops when nothing is active. Swallows all exceptions session end must
not fail because the bot cleanup hit an edge case.
"""
try:
status = pm.status()
if status.get("ok") and status.get("alive"):
pm.stop(reason="session ended")
except Exception as e: # pragma: no cover — defensive
logger.debug("google_meet on_session_end cleanup failed: %s", e)
def register(ctx) -> None:
"""Register tools, CLI, and lifecycle hooks.
Called once by the plugin loader when the plugin is enabled via
``plugins.enabled`` in config.yaml.
"""
# Windows is not supported in v1 — audio routing for v2 doesn't have a
# tested path there and guest-join Chromium is flakier. Refuse to register
# rather than half-working.
system = platform.system().lower()
if system not in ("linux", "darwin"):
logger.info(
"google_meet plugin: platform=%s not supported (linux/macos only)",
system,
)
return
for name, schema, handler, emoji in _TOOLS:
ctx.register_tool(
name=name,
toolset="google_meet",
schema=schema,
handler=handler,
check_fn=check_meet_requirements,
emoji=emoji,
)
ctx.register_cli_command(
name="meet",
help="Google Meet bot (join, transcribe, follow up)",
setup_fn=_register_meet_cli,
handler_fn=_meet_command,
description=(
"Let the hermes agent join a Google Meet call and scrape live "
"captions into a transcript. See: hermes meet setup"
),
)
ctx.register_hook("on_session_end", _on_session_end)
-244
View File
@@ -1,244 +0,0 @@
"""Virtual audio bridge for feeding generated speech into Chrome's mic.
v2 module. Provisions a platform-specific virtual audio device so the
Meet bot's Chromium instance can be pointed at an input source we
control. The OpenAI Realtime client writes PCM bytes into this device;
Chrome reads them as if they were coming from a microphone.
Linux (primary): uses pactl (PulseAudio) to create a null-sink plus a
virtual source whose master is the null-sink's monitor. Callers set
PULSE_SOURCE=<source_name> in Chrome's env and pass the fake-mic flag.
macOS: requires BlackHole 2ch to be installed. This module only
verifies its presence and returns the device name; routing OS default
input is left to the user (or a future switchaudio-osx integration) to
avoid surprising the user's system audio state.
Windows: not supported in v2.
"""
from __future__ import annotations
import platform
import subprocess
from typing import Optional
_BLACKHOLE_DEVICE = "BlackHole 2ch"
class AudioBridge:
"""Manages a virtual audio device for Chrome fake-mic input.
Call ``setup()`` once before launching the Meet bot and
``teardown()`` when the session ends. ``teardown()`` is idempotent.
"""
def __init__(self, name_prefix: str = "hermes_meet") -> None:
self._name_prefix = name_prefix
self._platform: Optional[str] = None
self._device_name: Optional[str] = None
self._write_target: Optional[str] = None
self._module_ids: list[int] = []
self._torn_down = False
# ── public properties ─────────────────────────────────────────────────
@property
def device_name(self) -> str:
if not self._device_name:
raise RuntimeError("AudioBridge not set up yet")
return self._device_name
@property
def write_target(self) -> str:
if not self._write_target:
raise RuntimeError("AudioBridge not set up yet")
return self._write_target
# ── lifecycle ─────────────────────────────────────────────────────────
def setup(self) -> dict:
"""Provision the virtual audio device.
Returns a dict describing the device. Raises RuntimeError on
unsupported platforms or when required system tools are missing.
"""
system = platform.system()
if system == "Linux":
return self._setup_linux()
if system == "Darwin":
return self._setup_darwin()
if system == "Windows":
raise RuntimeError("windows not supported in v2")
raise RuntimeError(f"unsupported platform: {system}")
def teardown(self) -> None:
"""Release the virtual audio device. Idempotent."""
if self._torn_down:
return
# Only Linux needs explicit unloading.
if self._platform == "linux" and self._module_ids:
# Unload in reverse order (virtual-source before null-sink).
for mod_id in reversed(self._module_ids):
try:
subprocess.run(
["pactl", "unload-module", str(mod_id)],
check=False,
capture_output=True,
)
except Exception:
# Best-effort teardown — never raise from here.
pass
self._module_ids = []
self._torn_down = True
# ── platform impls ────────────────────────────────────────────────────
def _setup_linux(self) -> dict:
sink_name = f"{self._name_prefix}_sink"
src_name = f"{self._name_prefix}_src"
try:
sink_out = subprocess.run(
[
"pactl",
"load-module",
"module-null-sink",
f"sink_name={sink_name}",
f"sink_properties=device.description=HermesMeetSink",
],
check=True,
capture_output=True,
text=True,
)
except FileNotFoundError as exc:
raise RuntimeError(
"pactl not found — install PulseAudio/pipewire-pulse"
) from exc
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"pactl load-module null-sink failed: {exc.stderr or exc}"
) from exc
sink_mod_id = self._parse_module_id(sink_out.stdout)
try:
src_out = subprocess.run(
[
"pactl",
"load-module",
"module-virtual-source",
f"source_name={src_name}",
f"master={sink_name}.monitor",
],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as exc:
# Roll back the null-sink we just created so we don't leak it.
subprocess.run(
["pactl", "unload-module", str(sink_mod_id)],
check=False,
capture_output=True,
)
raise RuntimeError(
f"pactl load-module virtual-source failed: {exc.stderr or exc}"
) from exc
src_mod_id = self._parse_module_id(src_out.stdout)
self._platform = "linux"
self._device_name = src_name
self._write_target = sink_name
self._module_ids = [sink_mod_id, src_mod_id]
self._torn_down = False
return {
"platform": "linux",
"device_name": src_name,
"sample_rate": 48000,
"channels": 2,
"module_ids": list(self._module_ids),
"write_target": sink_name,
}
def _setup_darwin(self) -> dict:
try:
out = subprocess.check_output(
["system_profiler", "SPAudioDataType"],
text=True,
stderr=subprocess.STDOUT,
)
except FileNotFoundError as exc:
raise RuntimeError(
"system_profiler not found (macOS-only command)"
) from exc
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"system_profiler failed: {exc.output}"
) from exc
if "BlackHole" not in out:
raise RuntimeError(
"BlackHole virtual audio device not installed. "
"Install via: brew install blackhole-2ch"
)
self._platform = "darwin"
self._device_name = _BLACKHOLE_DEVICE
self._write_target = _BLACKHOLE_DEVICE
self._module_ids = []
self._torn_down = False
return {
"platform": "darwin",
"device_name": _BLACKHOLE_DEVICE,
"sample_rate": 48000,
"channels": 2,
"module_ids": [],
"write_target": _BLACKHOLE_DEVICE,
}
# ── helpers ──────────────────────────────────────────────────────────
@staticmethod
def _parse_module_id(stdout: str) -> int:
"""pactl load-module prints the new module ID to stdout."""
text = (stdout or "").strip()
if not text:
raise RuntimeError("pactl load-module returned empty stdout")
# Take the last whitespace-separated token on the first non-empty line.
first = text.splitlines()[0].strip()
token = first.split()[-1]
try:
return int(token)
except ValueError as exc:
raise RuntimeError(
f"could not parse pactl module id from: {stdout!r}"
) from exc
def chrome_fake_audio_flags(bridge_info: dict) -> list[str]:
"""Return Chrome flags for using the fake audio input.
The PulseAudio source is selected via the ``PULSE_SOURCE`` env var,
which callers must set in Chrome's environment before launch:
env["PULSE_SOURCE"] = bridge_info["device_name"]
On macOS the caller must ensure the system default audio input is
set to the returned BlackHole device (we do not flip that switch).
"""
system = platform.system()
if system == "Linux":
# Chromium on Linux picks up the PulseAudio source selected via
# PULSE_SOURCE env var; the fake-ui flag skips the permission
# prompt so the bot can pick "use my mic" without user input.
return ["--use-fake-ui-for-media-stream"]
if system == "Darwin":
return ["--use-fake-ui-for-media-stream"]
if system == "Windows":
raise RuntimeError("windows not supported in v2")
raise RuntimeError(f"unsupported platform: {system}")

Some files were not shown because too many files have changed in this diff Show More