Compare commits
400 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 18e6cd9938 | |||
| d5a89283b7 | |||
| 633f74504f | |||
| 27936ee02d | |||
| 3aa86717b6 | |||
| 492c4c6573 | |||
| 3824b03237 | |||
| 42b917c92c | |||
| 7ccfb97fee | |||
| 7a6128cc4f | |||
| 4b28140912 | |||
| 653b5ec128 | |||
| 164e33aa46 | |||
| cdfbd89ea5 | |||
| 730347e38f | |||
| 628ca99d9b | |||
| 460a8ce5d9 | |||
| aa53fb661a | |||
| 8402ba150e | |||
| 512c610058 | |||
| b479205396 | |||
| 60f2415a4a | |||
| 082acc75b0 | |||
| 4424a0e0f7 | |||
| 98d75dea5a | |||
| 9b55365f6f | |||
| a0b62e0c5a | |||
| ac0325c257 | |||
| 817633bc5d | |||
| 9692ce2072 | |||
| 008860a23f | |||
| 0046d170dc | |||
| 8ad29a938a | |||
| a59a98b180 | |||
| 500774e30e | |||
| c4ad2c33f4 | |||
| 75b460bc94 | |||
| a9033c9220 | |||
| ea3c5a14c3 | |||
| ec671c4154 | |||
| df3c9593f8 | |||
| 8ed599dc05 | |||
| 920ebd8303 | |||
| bb00b783fb | |||
| 5e92b67807 | |||
| ee1a07f9e9 | |||
| 65f648ee84 | |||
| 64a497bfa9 | |||
| 90a3e73daf | |||
| 2e6699b319 | |||
| 21f503c23c | |||
| a32d07529c | |||
| 3ff3dfb5ac | |||
| 8258f4dcb7 | |||
| 9f1b1977bc | |||
| e3921e7ca4 | |||
| 7d586ddb42 | |||
| a131c134bc | |||
| 55be532369 | |||
| 8c5d3a99d6 | |||
| af3d5150c1 | |||
| 4a2ee6c162 | |||
| bda2dbc29e | |||
| 943465235e | |||
| cfc8befe65 | |||
| 3e68809fe0 | |||
| a0fe73bada | |||
| 7c63c24613 | |||
| c5781d50c7 | |||
| 235bfb192b | |||
| e63929d4f3 | |||
| 859e09b7ce | |||
| 898ccfd667 | |||
| 6c87371815 | |||
| 517f30b043 | |||
| 9c416e20ab | |||
| d308ae27e1 | |||
| b288934dff | |||
| e19854d893 | |||
| 6993e566ba | |||
| 91512b8210 | |||
| 366351b94d | |||
| 16e243e067 | |||
| 3e1664923d | |||
| c23463fce9 | |||
| de790eaceb | |||
| d81b1cd86c | |||
| 7945fcef21 | |||
| ffa33e53f6 | |||
| 635948d0e0 | |||
| c2ca02fcff | |||
| b51c528613 | |||
| 625c31fcea | |||
| dda12775f2 | |||
| 2e4b65b9f5 | |||
| cb51baeceb | |||
| e85b752516 | |||
| 7da2f07641 | |||
| 478444c262 | |||
| ced8f44cd2 | |||
| 977d5f56c9 | |||
| a32b325d06 | |||
| 419535f07f | |||
| e504a599fe | |||
| dbe5015566 | |||
| ebad6d3f1e | |||
| 87610ce380 | |||
| f66ebe64e8 | |||
| 36b13709f5 | |||
| 77d4766602 | |||
| 00c6480a05 | |||
| 88a85d30c1 | |||
| cebf95854b | |||
| 34eb1aaa9a | |||
| ab6879634e | |||
| 5eb6cd82b2 | |||
| 7e3c8a31f0 | |||
| 0bef0b9416 | |||
| 55e9329ee6 | |||
| 0d4247d9bf | |||
| c997183f53 | |||
| f01e4402a9 | |||
| 5b5a53a155 | |||
| 90c84c6dba | |||
| bdaf56a94d | |||
| b1c49d5e73 | |||
| bdc1adf711 | |||
| 55f212a7a2 | |||
| 7eaad06a87 | |||
| a01e767b24 | |||
| fd474d0f00 | |||
| cd2aee36ca | |||
| 3b60abb6bb | |||
| 0ba6471dd1 | |||
| 7317d69f19 | |||
| 2a0fc97c76 | |||
| 8fb861ea6e | |||
| 635253b918 | |||
| 87477756fd | |||
| 930494d687 | |||
| 5db6db891c | |||
| e818ec520a | |||
| 527ac351b4 | |||
| b115ea62da | |||
| 25767513f2 | |||
| c370e2e1e5 | |||
| b16f9d438b | |||
| 85e9a23efb | |||
| 4395c2b007 | |||
| 0cd98499bb | |||
| 4cdb6962ca | |||
| 9a46feb9bd | |||
| 8d2b08342c | |||
| 82f842277e | |||
| f823535db2 | |||
| d3dedf10aa | |||
| 7ca16eea56 | |||
| 4a9070c9ac | |||
| 7242361a69 | |||
| cd7a200e6c | |||
| 71eee26640 | |||
| 69ff201050 | |||
| 2259eac49e | |||
| cb7cfba6de | |||
| debae25f1c | |||
| bde89c169b | |||
| b36007b246 | |||
| c78b528125 | |||
| 319c1c1691 | |||
| 4943ea2a7c | |||
| 4d3e3a738d | |||
| a5319fb7af | |||
| f5552f92e2 | |||
| 1566f1eecc | |||
| a30db69dd5 | |||
| f6846205cc | |||
| 6a3873942f | |||
| 64de685d3f | |||
| cee4036e8b | |||
| cf8439263a | |||
| 3271ffbd80 | |||
| a7831b63db | |||
| d4dde6b5f2 | |||
| 755a280424 | |||
| 6087e04043 | |||
| 4921b26945 | |||
| 822b507a72 | |||
| 18beb69b49 | |||
| bf05b8f4a2 | |||
| 778fd1898e | |||
| 45bfcb9e71 | |||
| aa7b5acfcd | |||
| 36e352afa7 | |||
| 2d86e97a7e | |||
| edadeaf495 | |||
| f9885130b4 | |||
| f414df3a56 | |||
| c0d25df311 | |||
| 10e36188da | |||
| 6a3102f9d4 | |||
| 75d3eaa0e4 | |||
| 802c7acb81 | |||
| 541cd732e8 | |||
| 4d119bb62a | |||
| 878c196738 | |||
| 50dd67c680 | |||
| aea4a90f0e | |||
| 897dc3a2bb | |||
| 350ee1bf23 | |||
| 3d21f97422 | |||
| 4b5a88d714 | |||
| b1be86ef96 | |||
| 7b5b524fc7 | |||
| a30ffbe1d4 | |||
| c9f7b703dd | |||
| a8bfe72d35 | |||
| ae7687cdc5 | |||
| c730f6cc0b | |||
| d993a3f450 | |||
| 1dfcc2ffc3 | |||
| 5b2c59559a | |||
| 2be5e181a9 | |||
| 015f6c825d | |||
| bb59d3bac2 | |||
| 4a21920b5e | |||
| cc16d0ef77 | |||
| 087e74d4d7 | |||
| a8fcd1c742 | |||
| 9be83728a6 | |||
| 9397767513 | |||
| 9662e3218a | |||
| 0824ba6a9d | |||
| 42c076d349 | |||
| 0e2a53eab2 | |||
| 6814646b36 | |||
| eaa7e2db67 | |||
| 4e356098d2 | |||
| de24315978 | |||
| 20cb706e03 | |||
| d7a3468246 | |||
| f2d655529a | |||
| 27f4dba5ce | |||
| 8443998dc3 | |||
| e3901d5b25 | |||
| 06f81752ed | |||
| 9ef1ae138a | |||
| c5196f1fc2 | |||
| 63bf7a29b6 | |||
| 15937a6b46 | |||
| 454d883e69 | |||
| 70f56e7605 | |||
| 7fa70b6c87 | |||
| 9a70260490 | |||
| ffd2621039 | |||
| 1e37ddc929 | |||
| 83c1c201f6 | |||
| 4bda9dcade | |||
| 67dcace412 | |||
| 35c57cc46b | |||
| e8441c4c0f | |||
| 2511207cb0 | |||
| 0f3a6f0fb3 | |||
| a562420383 | |||
| 855366909f | |||
| d09ab8ff13 | |||
| 438db0c7b0 | |||
| 2ccdadcca6 | |||
| 76042f5867 | |||
| 192e7eb21f | |||
| d91e24547c | |||
| 05dc2eec36 | |||
| 2e6c3c7d23 | |||
| a0aebad673 | |||
| 7143d22a83 | |||
| 5ac4088856 | |||
| e16e196c7e | |||
| 7d68ea9501 | |||
| bc17310442 | |||
| 8f0fa0836f | |||
| bbd950efcf | |||
| 381121025e | |||
| 355e0ae960 | |||
| 1c964ed43f | |||
| cd7c5e5606 | |||
| ee7ef33b02 | |||
| 5cd41d2b3b | |||
| 9bb3bc422d | |||
| 19d75d1797 | |||
| 458ce792d2 | |||
| 14fcff60c9 | |||
| db4e4acca0 | |||
| 59b56d445c | |||
| eb28145f36 | |||
| a55de5bcd0 | |||
| cec0af02ad | |||
| 91a7a0acbe | |||
| 7c50ed707c | |||
| 731e1ef8cb | |||
| ac57114284 | |||
| 24b4b24d79 | |||
| c15064fa37 | |||
| 7bfa9442de | |||
| d8e4c7214e | |||
| 6ef3a47ce5 | |||
| 3a7653dd1f | |||
| 125de02056 | |||
| 4c591c2819 | |||
| 01535a4732 | |||
| 0a15dbdc43 | |||
| ce0513dd2e | |||
| dc5e02ea7f | |||
| ff851ba7b9 | |||
| 14dd8e9a72 | |||
| 1d80e92c7e | |||
| edce7522a5 | |||
| 45e1228a8a | |||
| 83129e72de | |||
| 4d170134ef | |||
| 81e01f6ee9 | |||
| 7fd8dc0bfb | |||
| d056b610b7 | |||
| 2536a36f6f | |||
| 1b8ca9254f | |||
| db7c5735f0 | |||
| 8bbeaea6c7 | |||
| 1fdc31b214 | |||
| 5fac6c3440 | |||
| 2c56dce0ed | |||
| 01cf2c65cc | |||
| b2d3308f98 | |||
| 25ba6a4a74 | |||
| 4c797bfae9 | |||
| c58956a9a2 | |||
| 3944b22506 | |||
| 489bed6f96 | |||
| ad0ac89478 | |||
| dc4d92f131 | |||
| 47420a84b9 | |||
| f93d4624bf | |||
| 5ae608152e | |||
| 88b65cc82a | |||
| edc78e258c | |||
| 31d7f1951a | |||
| b1c18e5a41 | |||
| bd66e55a02 | |||
| 1735ced93b | |||
| bba16943f6 | |||
| 132620ba3d | |||
| 876bb60044 | |||
| a68793b6c4 | |||
| bcc5362432 | |||
| 283c8fd6e2 | |||
| 919274b60e | |||
| 6e83d90eb4 | |||
| c6fdf48b79 | |||
| a046483e86 | |||
| fdcbd2257b | |||
| 48bdd2445e | |||
| 5e52011de3 | |||
| e48a497d16 | |||
| 2dfcc8087a | |||
| 4db58d45d4 | |||
| 57b43fdd4b | |||
| e9c47c7042 | |||
| ee0728c6c4 | |||
| 9daa0620a6 | |||
| 648b89911f | |||
| 7c17accb29 | |||
| 5006b2204b | |||
| a9fa73a620 | |||
| 7c8c031f60 | |||
| ea01bdcebe | |||
| d635e2df3f | |||
| cf2fabc40f | |||
| af22421e87 | |||
| 97d54f0e4d | |||
| 6e561ffa6d | |||
| ac05daa189 | |||
| 3c1c65e754 | |||
| f92006ce1c | |||
| b35d692f45 | |||
| facea84559 | |||
| f67a61dc93 | |||
| 6ed37e0f42 | |||
| 591deeb928 | |||
| 5ae07e7b5c | |||
| 47b02e961c | |||
| 0702231dd8 | |||
| db09477b77 | |||
| 81987f0350 | |||
| 9830905dab | |||
| 0d548d1db9 | |||
| eb92222811 | |||
| e4a91ccb76 | |||
| 5ac5365923 | |||
| f433197f23 | |||
| df485628ce | |||
| 9fde22d233 | |||
| 9d7b64b5dd | |||
| 0738b80833 |
+6
-2
@@ -30,18 +30,22 @@ WORKDIR /opt/hermes
|
||||
# unless the lockfiles themselves change.
|
||||
COPY package.json package-lock.json ./
|
||||
COPY web/package.json web/package-lock.json web/
|
||||
COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
|
||||
COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
|
||||
|
||||
RUN npm install --prefer-offline --no-audit && \
|
||||
npx playwright install --with-deps chromium --only-shell && \
|
||||
(cd web && npm install --prefer-offline --no-audit) && \
|
||||
(cd ui-tui && npm install --prefer-offline --no-audit) && \
|
||||
npm cache clean --force
|
||||
|
||||
# ---------- Source code ----------
|
||||
# .dockerignore excludes node_modules, so the installs above survive.
|
||||
COPY --chown=hermes:hermes . .
|
||||
|
||||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
# Build browser dashboard and terminal UI assets.
|
||||
RUN cd web && npm run build && \
|
||||
cd ../ui-tui && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
|
||||
@@ -390,7 +390,16 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
@@ -1680,9 +1689,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
|
||||
+199
-44
@@ -42,6 +42,7 @@ import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -52,6 +53,17 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
clean = urlunparse(parsed._replace(query=""))
|
||||
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
|
||||
return clean, params
|
||||
return url, None
|
||||
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
@@ -390,7 +402,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -1157,8 +1169,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
_extra = {"default_query": _dq} if _dq else {}
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -1172,12 +1186,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1349,6 +1363,49 @@ def _is_auth_error(exc: Exception) -> bool:
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for an unsupported request parameter.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase the same class of error a few
|
||||
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
|
||||
``param`` field, ``X is not supported``, ``unknown parameter: X``,
|
||||
``unrecognized request argument: X``. We match on both the parameter
|
||||
name and a generic "unsupported/unknown/unrecognized parameter" marker so
|
||||
call sites can reactively retry without the offending key instead of
|
||||
surfacing a noisy auxiliary failure.
|
||||
|
||||
Generalizes the temperature-specific detector that originally shipped
|
||||
with PR #15621 so the same retry strategy can cover ``max_tokens``,
|
||||
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
|
||||
for the generalization pattern.
|
||||
"""
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
if param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unsupported_parameter",
|
||||
"not supported",
|
||||
"does not support",
|
||||
"unknown parameter",
|
||||
"unrecognized request argument",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
||||
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
|
||||
|
||||
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
|
||||
public symbol because existing tests and call sites import it by name.
|
||||
"""
|
||||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
@@ -1560,8 +1617,14 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
|
||||
# below — never look up auth env vars ad-hoc.
|
||||
|
||||
|
||||
def _to_async_client(sync_client, model: str):
|
||||
"""Convert a sync client to its async counterpart, preserving Codex routing."""
|
||||
def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
||||
"""Convert a sync client to its async counterpart, preserving Codex routing.
|
||||
|
||||
When ``is_vision=True`` and the underlying base URL is Copilot, the
|
||||
resulting async client carries the ``Copilot-Vision-Request: true``
|
||||
header so the request is routed to Copilot's vision-capable
|
||||
infrastructure (otherwise vision payloads silently time out).
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
@@ -1590,9 +1653,11 @@ def _to_async_client(sync_client, model: str):
|
||||
if base_url_host_matches(sync_base_url, "openrouter.ai"):
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
async_kwargs["default_headers"] = copilot_request_headers(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
)
|
||||
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
|
||||
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
@@ -1619,6 +1684,7 @@ def resolve_provider_client(
|
||||
explicit_api_key: str = None,
|
||||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
@@ -1702,7 +1768,7 @@ def resolve_provider_client(
|
||||
"auxiliary provider (using %r instead)", model, resolved)
|
||||
model = None
|
||||
final_model = model or resolved
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenRouter ───────────────────────────────────────────────────
|
||||
@@ -1715,7 +1781,7 @@ def resolve_provider_client(
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Nous Portal (OAuth) ──────────────────────────────────────────
|
||||
@@ -1732,7 +1798,7 @@ def resolve_provider_client(
|
||||
"but Nous Portal not configured (run: hermes auth)")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
|
||||
@@ -1759,7 +1825,7 @@ def resolve_provider_client(
|
||||
"but no Codex OAuth token found (run: hermes model)")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
@@ -1782,14 +1848,19 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
extra["default_headers"] = copilot_request_headers(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||
@@ -1799,7 +1870,7 @@ def resolve_provider_client(
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
_cbase = str(getattr(client, "base_url", "") or "")
|
||||
client = _wrap_if_needed(client, final_model, _cbase)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: custom/main requested "
|
||||
"but no endpoint credentials found")
|
||||
@@ -1824,6 +1895,8 @@ def resolve_provider_client(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
@@ -1841,8 +1914,8 @@ def resolve_provider_client(
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, custom_key, custom_base, is_oauth=False,
|
||||
@@ -1850,7 +1923,7 @@ def resolve_provider_client(
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
@@ -1861,7 +1934,7 @@ def resolve_provider_client(
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
"resolve_provider_client: named custom provider %r has no base_url",
|
||||
@@ -1893,7 +1966,7 @@ def resolve_provider_client(
|
||||
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
|
||||
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
api_key = str(creds.get("api_key", "")).strip()
|
||||
@@ -1919,7 +1992,7 @@ def resolve_provider_client(
|
||||
if is_native_gemini_base_url(base_url):
|
||||
client = GeminiNativeClient(api_key=api_key, base_url=base_url)
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# Provider-specific headers
|
||||
@@ -1927,9 +2000,11 @@ def resolve_provider_client(
|
||||
if base_url_host_matches(base_url, "api.kimi.com"):
|
||||
headers["User-Agent"] = "claude-code/0.1.0"
|
||||
elif base_url_host_matches(base_url, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
from hermes_cli.copilot_auth import copilot_request_headers
|
||||
|
||||
headers.update(copilot_default_headers())
|
||||
headers.update(copilot_request_headers(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
))
|
||||
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||
**({"default_headers": headers} if headers else {}))
|
||||
|
||||
@@ -1955,7 +2030,7 @@ def resolve_provider_client(
|
||||
client = _wrap_if_needed(client, final_model, base_url)
|
||||
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
if pconfig.auth_type == "external_process":
|
||||
@@ -1987,7 +2062,7 @@ def resolve_provider_client(
|
||||
args=args,
|
||||
)
|
||||
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning("resolve_provider_client: external-process provider %s not "
|
||||
"directly supported", provider)
|
||||
@@ -2023,7 +2098,7 @@ def resolve_provider_client(
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
@@ -2098,8 +2173,13 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
|
||||
return _normalize_aux_provider(provider)
|
||||
|
||||
|
||||
def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
def _resolve_strict_vision_backend(
|
||||
provider: str,
|
||||
model: Optional[str] = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
provider = _normalize_vision_provider(provider)
|
||||
if provider == "copilot":
|
||||
return resolve_provider_client("copilot", model, is_vision=True)
|
||||
if provider == "openrouter":
|
||||
return _try_openrouter()
|
||||
if provider == "nous":
|
||||
@@ -2167,7 +2247,7 @@ def resolve_vision_provider_client(
|
||||
return resolved_provider, None, None
|
||||
final_model = resolved_model or default_model
|
||||
if async_mode:
|
||||
async_client, async_model = _to_async_client(sync_client, final_model)
|
||||
async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
|
||||
return resolved_provider, async_client, async_model
|
||||
return resolved_provider, sync_client, final_model
|
||||
|
||||
@@ -2199,8 +2279,11 @@ def resolve_vision_provider_client(
|
||||
main_provider = _read_main_provider()
|
||||
main_model = _read_main_model()
|
||||
if main_provider and main_provider not in ("auto", ""):
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
if main_provider == "nous":
|
||||
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
|
||||
sync_client, default_model = _resolve_strict_vision_backend(
|
||||
main_provider, vision_model
|
||||
)
|
||||
if sync_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
@@ -2208,10 +2291,10 @@ def resolve_vision_provider_client(
|
||||
)
|
||||
return _finalize(main_provider, sync_client, default_model)
|
||||
else:
|
||||
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
|
||||
rpc_client, rpc_model = resolve_provider_client(
|
||||
main_provider, vision_model,
|
||||
api_mode=resolved_api_mode)
|
||||
api_mode=resolved_api_mode,
|
||||
is_vision=True)
|
||||
if rpc_client is not None:
|
||||
logger.info(
|
||||
"Vision auto-detect: using main provider %s (%s)",
|
||||
@@ -2233,11 +2316,14 @@ def resolve_vision_provider_client(
|
||||
return None, None, None
|
||||
|
||||
if requested in _VISION_AUTO_PROVIDER_ORDER:
|
||||
sync_client, default_model = _resolve_strict_vision_backend(requested)
|
||||
sync_client, default_model = _resolve_strict_vision_backend(
|
||||
requested, resolved_model
|
||||
)
|
||||
return _finalize(requested, sync_client, default_model)
|
||||
|
||||
client, final_model = _get_cached_client(requested, resolved_model, async_mode,
|
||||
api_mode=resolved_api_mode)
|
||||
api_mode=resolved_api_mode,
|
||||
is_vision=True)
|
||||
if client is None:
|
||||
return requested, None, None
|
||||
return requested, client, final_model
|
||||
@@ -2301,10 +2387,11 @@ def _client_cache_key(
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
) -> tuple:
|
||||
runtime = _normalize_main_runtime(main_runtime)
|
||||
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
|
||||
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)
|
||||
|
||||
|
||||
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
|
||||
@@ -2330,6 +2417,7 @@ def _refresh_nous_auxiliary_client(
|
||||
api_key: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
|
||||
runtime = _resolve_nous_runtime_api(force_refresh=True)
|
||||
@@ -2347,7 +2435,7 @@ def _refresh_nous_auxiliary_client(
|
||||
current_loop = _aio.get_event_loop()
|
||||
except RuntimeError:
|
||||
pass
|
||||
client, final_model = _to_async_client(sync_client, final_model or "")
|
||||
client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
|
||||
else:
|
||||
client = sync_client
|
||||
|
||||
@@ -2358,6 +2446,7 @@ def _refresh_nous_auxiliary_client(
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
is_vision=is_vision,
|
||||
)
|
||||
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
|
||||
return client, final_model
|
||||
@@ -2487,6 +2576,7 @@ def _get_cached_client(
|
||||
api_key: str = None,
|
||||
api_mode: str = None,
|
||||
main_runtime: Optional[Dict[str, Any]] = None,
|
||||
is_vision: bool = False,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider.
|
||||
|
||||
@@ -2523,6 +2613,7 @@ def _get_cached_client(
|
||||
api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=main_runtime,
|
||||
is_vision=is_vision,
|
||||
)
|
||||
with _client_cache_lock:
|
||||
if cache_key in _client_cache:
|
||||
@@ -2554,6 +2645,7 @@ def _get_cached_client(
|
||||
explicit_api_key=api_key,
|
||||
api_mode=api_mode,
|
||||
main_runtime=runtime,
|
||||
is_vision=is_vision,
|
||||
)
|
||||
if client is not None:
|
||||
# For async clients, remember which loop they were created on so we
|
||||
@@ -2760,8 +2852,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2849,7 +2941,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
@@ -2952,13 +3044,45 @@ def call_llm(
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s: provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
# If retry still fails, fall through to the max_tokens /
|
||||
# payment / auth chains below using the temperature-stripped
|
||||
# kwargs. Re-raise only if the retry hit something those
|
||||
# chains won't handle.
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -2985,6 +3109,7 @@ def call_llm(
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
is_vision=(task == "vision"),
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
|
||||
@@ -3221,8 +3346,35 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3248,6 +3400,7 @@ async def async_call_llm(
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
is_vision=(task == "vision"),
|
||||
)
|
||||
if refreshed_client is not None:
|
||||
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
|
||||
@@ -3316,7 +3469,9 @@ async def async_call_llm(
|
||||
extra_body=effective_extra_body,
|
||||
base_url=str(getattr(fb_client, "base_url", "") or ""))
|
||||
# Convert sync fallback client to async
|
||||
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
|
||||
async_fb, async_fb_model = _to_async_client(
|
||||
fb_client, fb_model or "", is_vision=(task == "vision")
|
||||
)
|
||||
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
|
||||
fb_kwargs["model"] = async_fb_model
|
||||
return _validate_llm_response(
|
||||
|
||||
@@ -44,22 +44,31 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
converted.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -67,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
converted.append({"type": text_type, "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -218,6 +227,23 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -233,9 +259,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -262,7 +289,57 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
if content_parts:
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -422,6 +499,47 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -429,13 +547,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
validated.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -446,7 +567,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
validated.append({"type": text_type, "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -703,6 +824,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -721,6 +843,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -730,6 +853,18 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -842,6 +977,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
|
||||
@@ -61,9 +61,52 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
|
||||
|
||||
# Chars per token rough estimate
|
||||
_CHARS_PER_TOKEN = 4
|
||||
# Flat token cost per attached image part. Real cost varies by provider and
|
||||
# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
|
||||
# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
|
||||
# that keeps compression budgeting honest for multi-image conversations.
|
||||
# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
|
||||
_IMAGE_TOKEN_ESTIMATE = 1600
|
||||
# Same figure expressed in the char-budget currency the rest of the
|
||||
# compressor speaks in. Used when accumulating message "content length"
|
||||
# for tail-cut decisions.
|
||||
_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
|
||||
_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
|
||||
|
||||
|
||||
def _content_length_for_budget(raw_content: Any) -> int:
|
||||
"""Return the effective char-length of a message's content for token budgeting.
|
||||
|
||||
Plain strings: ``len(content)``. Multimodal lists: sum of text-part
|
||||
``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
|
||||
(``image_url`` / ``input_image`` / Anthropic-style ``image``). This
|
||||
keeps the compressor from treating a turn with 5 attached images as
|
||||
near-zero tokens just because the text part is empty.
|
||||
"""
|
||||
if isinstance(raw_content, str):
|
||||
return len(raw_content)
|
||||
if not isinstance(raw_content, list):
|
||||
return len(str(raw_content or ""))
|
||||
|
||||
total = 0
|
||||
for p in raw_content:
|
||||
if isinstance(p, str):
|
||||
total += len(p)
|
||||
continue
|
||||
if not isinstance(p, dict):
|
||||
total += len(str(p))
|
||||
continue
|
||||
ptype = p.get("type")
|
||||
if ptype in {"image_url", "input_image", "image"}:
|
||||
total += _IMAGE_CHAR_EQUIVALENT
|
||||
else:
|
||||
# text / input_text / tool_result-with-text / anything else with
|
||||
# a text field. Ignore the raw base64 payload inside image_url
|
||||
# dicts — dimensions don't matter, only whether it's an image.
|
||||
total += len(p.get("text", "") or "")
|
||||
return total
|
||||
|
||||
|
||||
def _content_text_for_contains(content: Any) -> str:
|
||||
"""Return a best-effort text view of message content.
|
||||
|
||||
@@ -484,7 +527,7 @@ class ContextCompressor(ContextEngine):
|
||||
for i in range(len(result) - 1, -1, -1):
|
||||
msg = result[i]
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
@@ -1082,8 +1125,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
for i in range(n - 1, head_end - 1, -1):
|
||||
msg = messages[i]
|
||||
content = msg.get("content") or ""
|
||||
msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
raw_content = msg.get("content") or ""
|
||||
content_len = _content_length_for_budget(raw_content)
|
||||
msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata
|
||||
# Include tool call arguments in estimate
|
||||
for tc in msg.get("tool_calls") or []:
|
||||
if isinstance(tc, dict):
|
||||
|
||||
@@ -14,6 +14,7 @@ from datetime import datetime
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from hermes_cli.config import get_env_value
|
||||
import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
@@ -1273,7 +1274,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
@@ -1299,7 +1301,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
|
||||
env_url = ""
|
||||
if pconfig.base_url_env_var:
|
||||
env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
|
||||
env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
|
||||
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
if provider == "anthropic":
|
||||
@@ -1310,7 +1312,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
]
|
||||
|
||||
for env_var in env_vars:
|
||||
token = os.getenv(env_var, "").strip()
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
token = (get_env_value(env_var) or "").strip()
|
||||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
|
||||
@@ -42,6 +42,7 @@ class FailoverReason(enum.Enum):
|
||||
# Context / payload
|
||||
context_overflow = "context_overflow" # Context too large — compress, not failover
|
||||
payload_too_large = "payload_too_large" # 413 — compress payload
|
||||
image_too_large = "image_too_large" # Native image part exceeds provider's per-image limit — shrink and retry
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
@@ -147,6 +148,20 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
|
||||
"error code: 413",
|
||||
]
|
||||
|
||||
# Image-size patterns. Matched against 400 bodies (not 413) because most
|
||||
# providers return a 400 with a specific image-too-big message before the
|
||||
# whole request hits the 413 size limit. Anthropic's wording is the most
|
||||
# important here (hard 5 MB per image, returned as
|
||||
# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
|
||||
_IMAGE_TOO_LARGE_PATTERNS = [
|
||||
"image exceeds", # Anthropic: "image exceeds 5 MB maximum"
|
||||
"image too large", # generic
|
||||
"image_too_large", # error_code variant
|
||||
"image size exceeds", # variant
|
||||
# "request_too_large" on a request known to contain an image → image is
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -671,6 +686,15 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Context overflow from 400
|
||||
if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
|
||||
return result_fn(
|
||||
@@ -798,6 +822,13 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.image_too_large,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Usage-limit patterns need the same disambiguation as 402: some providers
|
||||
# surface "usage limit" errors without an HTTP status code. A transient
|
||||
# signal ("try again", "resets at", …) means it's a periodic quota, not
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
"""Routing helpers for inbound user-attached images.
|
||||
|
||||
Two modes:
|
||||
|
||||
native — attach images as OpenAI-style ``image_url`` content parts on the
|
||||
user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
|
||||
OpenAI chat.completions) already translate these into their
|
||||
vendor-specific multimodal formats.
|
||||
|
||||
text — run ``vision_analyze`` on each image up-front and prepend the
|
||||
description to the user's text. The model never sees the pixels;
|
||||
it only sees a lossy text summary. This is the pre-existing
|
||||
behaviour and still the right choice for non-vision models.
|
||||
|
||||
The decision is made once per message turn by :func:`decide_image_input_mode`.
|
||||
It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
|
||||
| ``text``, default ``auto``) and the active model's capability metadata.
|
||||
|
||||
In ``auto`` mode:
|
||||
- If the user has explicitly configured ``auxiliary.vision.provider``
|
||||
(i.e. not ``auto`` and not empty), we assume they want the text pipeline
|
||||
regardless of the main model — they've opted in to a specific vision
|
||||
backend for a reason (cost, quality, local-only, etc.).
|
||||
- Otherwise, if the active model reports ``supports_vision=True`` in its
|
||||
models.dev metadata, we attach natively.
|
||||
- Otherwise (non-vision model, no explicit override), we fall back to text.
|
||||
|
||||
This keeps ``vision_analyze`` surfaced as a tool in every session — skills
|
||||
and agent flows that chain it (browser screenshots, deeper inspection of
|
||||
URL-referenced images, style-gating loops) keep working. The routing only
|
||||
affects *how user-attached images on the current turn* are presented to the
|
||||
main model.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_VALID_MODES = frozenset({"auto", "native", "text"})
|
||||
|
||||
|
||||
def _coerce_mode(raw: Any) -> str:
|
||||
"""Normalize a config value into one of the valid modes."""
|
||||
if not isinstance(raw, str):
|
||||
return "auto"
|
||||
val = raw.strip().lower()
|
||||
if val in _VALID_MODES:
|
||||
return val
|
||||
return "auto"
|
||||
|
||||
|
||||
def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
|
||||
"""True when the user configured a specific auxiliary vision backend.
|
||||
|
||||
An explicit override means the user *wants* the text pipeline (they're
|
||||
paying for a dedicated vision model), so we don't silently bypass it.
|
||||
"""
|
||||
if not isinstance(cfg, dict):
|
||||
return False
|
||||
aux = cfg.get("auxiliary") or {}
|
||||
if not isinstance(aux, dict):
|
||||
return False
|
||||
vision = aux.get("vision") or {}
|
||||
if not isinstance(vision, dict):
|
||||
return False
|
||||
|
||||
provider = str(vision.get("provider") or "").strip().lower()
|
||||
model = str(vision.get("model") or "").strip()
|
||||
base_url = str(vision.get("base_url") or "").strip()
|
||||
|
||||
# "auto" / "" / blank = not explicit
|
||||
if provider in ("", "auto") and not model and not base_url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
|
||||
"""Return True/False if we can resolve caps, None if unknown."""
|
||||
if not provider or not model:
|
||||
return None
|
||||
try:
|
||||
from agent.models_dev import get_model_capabilities
|
||||
caps = get_model_capabilities(provider, model)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
|
||||
return None
|
||||
if caps is None:
|
||||
return None
|
||||
return bool(caps.supports_vision)
|
||||
|
||||
|
||||
def decide_image_input_mode(
|
||||
provider: str,
|
||||
model: str,
|
||||
cfg: Optional[Dict[str, Any]],
|
||||
) -> str:
|
||||
"""Return ``"native"`` or ``"text"`` for the given turn.
|
||||
|
||||
Args:
|
||||
provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
|
||||
model: active model slug as it would be sent to the provider.
|
||||
cfg: loaded config.yaml dict, or None. When None, behaves as auto.
|
||||
"""
|
||||
mode_cfg = "auto"
|
||||
if isinstance(cfg, dict):
|
||||
agent_cfg = cfg.get("agent") or {}
|
||||
if isinstance(agent_cfg, dict):
|
||||
mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
|
||||
|
||||
if mode_cfg == "native":
|
||||
return "native"
|
||||
if mode_cfg == "text":
|
||||
return "text"
|
||||
|
||||
# auto
|
||||
if _explicit_aux_vision_override(cfg):
|
||||
return "text"
|
||||
|
||||
supports = _lookup_supports_vision(provider, model)
|
||||
if supports is True:
|
||||
return "native"
|
||||
return "text"
|
||||
|
||||
|
||||
# Image size handling is REACTIVE rather than proactive: we attempt native
|
||||
# attachment at full size regardless of provider, and rely on
|
||||
# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
|
||||
# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
|
||||
# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
|
||||
#
|
||||
# Why reactive: our knowledge of provider ceilings is partial and evolving
|
||||
# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
|
||||
# A proactive per-provider table would be stale the moment a provider raises
|
||||
# or lowers its limit, and silently degrading quality for users on providers
|
||||
# that would have accepted the full image is the worse failure mode.
|
||||
# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
|
||||
# it fires, which is cheaper than permanent quality loss.
|
||||
|
||||
|
||||
def _guess_mime(path: Path) -> str:
|
||||
mime, _ = mimetypes.guess_type(str(path))
|
||||
if mime and mime.startswith("image/"):
|
||||
return mime
|
||||
# mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
|
||||
# the suffix looks imagey.
|
||||
suffix = path.suffix.lower()
|
||||
return {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
}.get(suffix, "image/jpeg")
|
||||
|
||||
|
||||
def _file_to_data_url(path: Path) -> Optional[str]:
|
||||
"""Encode a local image as a base64 data URL at its native size.
|
||||
|
||||
Size limits are NOT enforced here — the agent retry loop
|
||||
(``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
|
||||
provider's first rejection. Keeping this simple means providers that
|
||||
accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
|
||||
quality tax just because one other provider is stricter.
|
||||
|
||||
Returns None only if the file can't be read (missing, permission
|
||||
denied, etc.); the caller reports those paths in ``skipped``.
|
||||
"""
|
||||
try:
|
||||
raw = path.read_bytes()
|
||||
except Exception as exc:
|
||||
logger.warning("image_routing: failed to read %s — %s", path, exc)
|
||||
return None
|
||||
mime = _guess_mime(path)
|
||||
b64 = base64.b64encode(raw).decode("ascii")
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def build_native_content_parts(
|
||||
user_text: str,
|
||||
image_paths: List[str],
|
||||
) -> Tuple[List[Dict[str, Any]], List[str]]:
|
||||
"""Build an OpenAI-style ``content`` list for a user turn.
|
||||
|
||||
Shape:
|
||||
[{"type": "text", "text": "..."},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
|
||||
...]
|
||||
|
||||
Images are attached at their native size. If a provider rejects the
|
||||
request because an image is too large (e.g. Anthropic's 5 MB per-image
|
||||
ceiling), the agent's retry loop transparently shrinks and retries
|
||||
once — see ``run_agent._try_shrink_image_parts_in_messages``.
|
||||
|
||||
Returns (content_parts, skipped_paths). Skipped paths are files that
|
||||
couldn't be read from disk.
|
||||
"""
|
||||
parts: List[Dict[str, Any]] = []
|
||||
skipped: List[str] = []
|
||||
|
||||
text = (user_text or "").strip()
|
||||
if text:
|
||||
parts.append({"type": "text", "text": text})
|
||||
|
||||
for raw_path in image_paths:
|
||||
p = Path(raw_path)
|
||||
if not p.exists() or not p.is_file():
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
data_url = _file_to_data_url(p)
|
||||
if not data_url:
|
||||
skipped.append(str(raw_path))
|
||||
continue
|
||||
parts.append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": data_url},
|
||||
})
|
||||
|
||||
# If the text was empty, add a neutral prompt so the turn isn't just images.
|
||||
if not text and any(p.get("type") == "image_url" for p in parts):
|
||||
parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
|
||||
|
||||
return parts, skipped
|
||||
|
||||
|
||||
__all__ = [
|
||||
"decide_image_input_mode",
|
||||
"build_native_content_parts",
|
||||
]
|
||||
+39
-8
@@ -106,9 +106,11 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -143,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -162,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -1193,6 +1206,7 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
@@ -1213,6 +1227,23 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
@@ -1352,7 +1383,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", 128000)
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
|
||||
@@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"💡 First-time tip — I steered your message into the current run; "
|
||||
"it will arrive after the next tool call instead of interrupting. "
|
||||
"Send `/busy interrupt` or `/busy queue` to change this, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"`/busy steer` to inject them mid-run without interrupting, or "
|
||||
"`/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
if mode == "steer":
|
||||
return (
|
||||
"(tip) Your message was steered into the current run; it arrives "
|
||||
"after the next tool call. Use /busy interrupt or /busy queue to "
|
||||
"change this. This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead, "
|
||||
"or /busy steer to inject mid-run. This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def openclaw_residue_hint_cli() -> str:
|
||||
"""Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
|
||||
|
||||
OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
|
||||
otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
|
||||
get carried forward and the agent dutifully reads them). ``hermes claw
|
||||
cleanup`` renames the directory so the agent stops finding it.
|
||||
"""
|
||||
return (
|
||||
"Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
|
||||
"After migrating, the agent can still get confused and read that "
|
||||
"directory's config/memory instead of Hermes's.\n"
|
||||
"Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
|
||||
"This tip only shows once; rerun it any time with `hermes claw cleanup`."
|
||||
)
|
||||
|
||||
|
||||
def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
|
||||
"""Return True if an OpenClaw workspace directory is present in ``$HOME``.
|
||||
|
||||
Pure filesystem check — no side effects. ``home`` override exists for tests.
|
||||
"""
|
||||
base = home or Path.home()
|
||||
try:
|
||||
return (base / ".openclaw").is_dir()
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"OPENCLAW_RESIDUE_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"openclaw_residue_hint_cli",
|
||||
"detect_openclaw_residue",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -141,6 +141,12 @@ DEFAULT_AGENT_IDENTITY = (
|
||||
"Be targeted and efficient in your exploration and investigations."
|
||||
)
|
||||
|
||||
HERMES_AGENT_HELP_GUIDANCE = (
|
||||
"If the user asks about configuring, setting up, or using Hermes Agent "
|
||||
"itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
|
||||
"before answering. Docs: https://hermes-agent.nousresearch.com/docs"
|
||||
)
|
||||
|
||||
MEMORY_GUIDANCE = (
|
||||
"You have persistent memory across sessions. Save durable facts using the memory "
|
||||
"tool: user preferences, environment details, tool quirks, and stable conventions. "
|
||||
@@ -422,6 +428,29 @@ PLATFORM_HINTS = {
|
||||
"your response. Images are sent as native photos, and other files arrive as downloadable "
|
||||
"documents."
|
||||
),
|
||||
"yuanbao": (
|
||||
"You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
|
||||
"Markdown formatting is supported (code blocks, tables, bold/italic). "
|
||||
"You CAN send media files natively — to deliver a file to the user, include "
|
||||
"MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
|
||||
"Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
|
||||
"and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
|
||||
"(max 50 MB). You can also include image URLs in markdown format  and "
|
||||
"they will be downloaded and sent as native photos. "
|
||||
"Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
|
||||
"whenever a file delivery is appropriate.\n\n"
|
||||
"Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
|
||||
"When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
|
||||
"you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
|
||||
" 1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
|
||||
" '捂脸', '合十') to discover matching sticker_ids.\n"
|
||||
" 2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
|
||||
" TIMFaceElem that renders as a native sticker in the chat.\n"
|
||||
"DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
|
||||
"them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
|
||||
"image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
|
||||
"— when a sticker is the right response, use yb_send_sticker."
|
||||
),
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -825,6 +854,11 @@ def build_skills_system_prompt(
|
||||
"Skills also encode the user's preferred approach, conventions, and quality standards "
|
||||
"for tasks like code review, planning, and testing — load them even for tasks you "
|
||||
"already know how to do, because the skill defines how it should be done here.\n"
|
||||
"Whenever the user asks you to configure, set up, install, enable, disable, modify, "
|
||||
"or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
|
||||
"skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
|
||||
"first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
|
||||
"`hermes setup`) so you don't have to guess or invent workarounds.\n"
|
||||
"If a skill has issues, fix it with skill_manage(action='patch').\n"
|
||||
"After difficult/iterative tasks, offer to save as a skill. "
|
||||
"If a skill you loaded was missing steps, had wrong commands, or needed "
|
||||
|
||||
@@ -754,7 +754,11 @@ def _resolve_effective_accept(
|
||||
if env in ("1", "true", "yes", "on"):
|
||||
return True
|
||||
cfg_val = cfg.get("hooks_auto_accept", False)
|
||||
return bool(cfg_val)
|
||||
if isinstance(cfg_val, bool):
|
||||
return cfg_val
|
||||
if isinstance(cfg_val, str):
|
||||
return cfg_val.strip().lower() in ("1", "true", "yes", "on")
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -329,7 +329,7 @@ def build_skill_invocation_message(
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
activation_note = (
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
|
||||
"you to follow its instructions. The full skill content is loaded below.]"
|
||||
)
|
||||
return _build_skill_message(
|
||||
@@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
activation_note = (
|
||||
f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
|
||||
f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
|
||||
"preloaded. Treat its instructions as active guidance for the duration of this "
|
||||
"session unless the user overrides them.]"
|
||||
)
|
||||
|
||||
@@ -6,12 +6,18 @@ adds latency to the user-facing reply.
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Optional
|
||||
from typing import Callable, Optional
|
||||
|
||||
from agent.auxiliary_client import call_llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Callback signature: (task_name, exception) -> None. Used to surface
|
||||
# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
|
||||
# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
|
||||
# become visible instead of piling up as NULL session titles.
|
||||
FailureCallback = Callable[[str, BaseException], None]
|
||||
|
||||
_TITLE_PROMPT = (
|
||||
"Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
|
||||
"following exchange. The title should capture the main topic or intent. "
|
||||
@@ -19,11 +25,21 @@ _TITLE_PROMPT = (
|
||||
)
|
||||
|
||||
|
||||
def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
|
||||
def generate_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
timeout: float = 30.0,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
) -> Optional[str]:
|
||||
"""Generate a session title from the first exchange.
|
||||
|
||||
Uses the auxiliary LLM client (cheapest/fastest available model).
|
||||
Returns the title string or None on failure.
|
||||
|
||||
``failure_callback`` is invoked with ``(task, exception)`` when the
|
||||
auxiliary call raises — the caller typically wires this to
|
||||
``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
|
||||
of silently accumulating untitled sessions.
|
||||
"""
|
||||
# Truncate long messages to keep the request small
|
||||
user_snippet = user_message[:500] if user_message else ""
|
||||
@@ -52,7 +68,15 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
|
||||
title = title[:77] + "..."
|
||||
return title if title else None
|
||||
except Exception as e:
|
||||
logger.debug("Title generation failed: %s", e)
|
||||
# Log at WARNING so this shows up in agent.log without debug mode.
|
||||
# Full detail at debug level for operators who need the stack.
|
||||
logger.warning("Title generation failed: %s", e)
|
||||
logger.debug("Title generation traceback", exc_info=True)
|
||||
if failure_callback is not None:
|
||||
try:
|
||||
failure_callback("title generation", e)
|
||||
except Exception:
|
||||
logger.debug("Title generation failure_callback raised", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
@@ -61,6 +85,7 @@ def auto_title_session(
|
||||
session_id: str,
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
) -> None:
|
||||
"""Generate and set a session title if one doesn't already exist.
|
||||
|
||||
@@ -81,7 +106,9 @@ def auto_title_session(
|
||||
except Exception:
|
||||
return
|
||||
|
||||
title = generate_title(user_message, assistant_response)
|
||||
title = generate_title(
|
||||
user_message, assistant_response, failure_callback=failure_callback
|
||||
)
|
||||
if not title:
|
||||
return
|
||||
|
||||
@@ -98,6 +125,7 @@ def maybe_auto_title(
|
||||
user_message: str,
|
||||
assistant_response: str,
|
||||
conversation_history: list,
|
||||
failure_callback: Optional[FailureCallback] = None,
|
||||
) -> None:
|
||||
"""Fire-and-forget title generation after the first exchange.
|
||||
|
||||
@@ -119,6 +147,7 @@ def maybe_auto_title(
|
||||
thread = threading.Thread(
|
||||
target=auto_title_session,
|
||||
args=(session_db, session_id, user_message, assistant_response),
|
||||
kwargs={"failure_callback": failure_callback},
|
||||
daemon=True,
|
||||
name="auto-title",
|
||||
)
|
||||
|
||||
@@ -23,9 +23,14 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg:
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -59,6 +59,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -120,6 +120,24 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
@@ -160,6 +178,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -126,6 +126,11 @@ class NormalizedResponse:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
+28
-8
@@ -606,6 +606,7 @@ platform_toolsets:
|
||||
signal: [hermes-signal]
|
||||
homeassistant: [hermes-homeassistant]
|
||||
qqbot: [hermes-qqbot]
|
||||
yuanbao: [hermes-yuanbao]
|
||||
|
||||
# =============================================================================
|
||||
# Gateway Platform Settings
|
||||
@@ -824,7 +825,9 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -838,12 +841,19 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
# steer: Inject your message mid-run via /steer, arriving at the agent
|
||||
# after the next tool call — no interrupt, no role violation.
|
||||
# Falls back to 'queue' if the agent isn't running yet or if
|
||||
# images are attached (steer only carries text).
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy <interrupt|queue|steer>.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -859,17 +869,22 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -879,10 +894,15 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
+45
-4
@@ -16,7 +16,7 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -311,6 +311,12 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
|
||||
|
||||
elif schedule["kind"] == "cron":
|
||||
if not HAS_CRONITER:
|
||||
logger.warning(
|
||||
"Cannot compute next run for cron schedule %r: 'croniter' "
|
||||
"is not installed. Install the 'cron' extra (pip install "
|
||||
"'hermes-agent[cron]') to re-enable recurring cron jobs.",
|
||||
schedule.get("expr"),
|
||||
)
|
||||
return None
|
||||
cron = croniter(schedule["expr"], now)
|
||||
next_run = cron.get_next(datetime)
|
||||
@@ -417,6 +423,7 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
@@ -438,6 +445,9 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
@@ -481,6 +491,14 @@ def create_job(
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
"id": job_id,
|
||||
@@ -492,6 +510,7 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
@@ -685,10 +704,32 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
|
||||
# Compute next run
|
||||
job["next_run_at"] = compute_next_run(job["schedule"], now)
|
||||
|
||||
# If no next run (one-shot completed), disable
|
||||
# If no next run, decide whether this is terminal completion
|
||||
# (one-shot) or a transient failure (recurring schedule couldn't
|
||||
# compute — e.g. 'croniter' missing from the runtime env).
|
||||
# Recurring jobs must NEVER be silently disabled: that turns a
|
||||
# missing runtime dep into "job completed" and the user's
|
||||
# schedule quietly goes off. See issue #16265.
|
||||
if job["next_run_at"] is None:
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
kind = job.get("schedule", {}).get("kind")
|
||||
if kind in ("cron", "interval"):
|
||||
job["state"] = "error"
|
||||
if not job.get("last_error"):
|
||||
job["last_error"] = (
|
||||
"Failed to compute next run for recurring "
|
||||
"schedule (is the 'croniter' package "
|
||||
"installed in the gateway's Python env?)"
|
||||
)
|
||||
logger.error(
|
||||
"Job '%s' (%s) could not compute next_run_at; "
|
||||
"leaving enabled and marking state=error so the "
|
||||
"job is not silently disabled.",
|
||||
job.get("name", job["id"]),
|
||||
kind,
|
||||
)
|
||||
else:
|
||||
job["enabled"] = False
|
||||
job["state"] = "completed"
|
||||
elif job.get("state") != "paused":
|
||||
job["state"] = "scheduled"
|
||||
|
||||
|
||||
+77
-4
@@ -77,7 +77,7 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
"telegram", "discord", "slack", "whatsapp", "signal",
|
||||
"matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
|
||||
"wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
|
||||
"qqbot",
|
||||
"qqbot", "yuanbao",
|
||||
})
|
||||
|
||||
# Platforms that support a configured cron/notification home target, mapped to
|
||||
@@ -337,6 +337,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
||||
"sms": Platform.SMS,
|
||||
"bluebubbles": Platform.BLUEBUBBLES,
|
||||
"qqbot": Platform.QQBOT,
|
||||
"yuanbao": Platform.YUANBAO,
|
||||
}
|
||||
|
||||
# Optionally wrap the content with a header/footer so the user knows this
|
||||
@@ -671,10 +672,51 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
"[SYSTEM: You are running as a scheduled cron job. "
|
||||
"[IMPORTANT: You are running as a scheduled cron job. "
|
||||
"DELIVERY: Your final response will be automatically delivered "
|
||||
"to the user — do NOT use send_message or try to deliver "
|
||||
"the output yourself. Just produce your report/output as your "
|
||||
@@ -710,7 +752,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
parts.append("")
|
||||
parts.extend(
|
||||
[
|
||||
f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
|
||||
"",
|
||||
content,
|
||||
]
|
||||
@@ -718,7 +760,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
|
||||
if skipped:
|
||||
notice = (
|
||||
f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
|
||||
f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
|
||||
f"and were skipped: {', '.join(skipped)}. "
|
||||
f"Start your response with a brief notice so the user is aware, e.g.: "
|
||||
f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
|
||||
@@ -780,6 +822,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
logger.info("Running job '%s' (ID: %s)", job_name, job_id)
|
||||
logger.info("Prompt: %s", prompt[:100])
|
||||
|
||||
agent = None
|
||||
|
||||
# Mark this as a cron session so the approval system can apply cron_mode.
|
||||
# This env var is process-wide and persists for the lifetime of the
|
||||
# scheduler process — every job this process runs is a cron job.
|
||||
@@ -1128,6 +1172,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
_session_db.close()
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
|
||||
# Release subprocesses, terminal sandboxes, browser daemons, and the
|
||||
# main OpenAI/httpx client held by this ephemeral cron agent. Without
|
||||
# this, a gateway that ticks cron every N minutes leaks fds per job
|
||||
# until it hits EMFILE (#10200 / "too many open files").
|
||||
try:
|
||||
if agent is not None:
|
||||
agent.close()
|
||||
except (Exception, KeyboardInterrupt) as e:
|
||||
logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
|
||||
# Each cron run spins up a short-lived worker thread whose event loop
|
||||
# dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
|
||||
# httpx clients cached under that loop are now unusable — reap them
|
||||
# so their transports don't accumulate in the process-global cache.
|
||||
try:
|
||||
from agent.auxiliary_client import cleanup_stale_async_clients
|
||||
cleanup_stale_async_clients()
|
||||
except Exception as e:
|
||||
logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)
|
||||
|
||||
|
||||
def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
@@ -1267,6 +1329,17 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
|
||||
# Best-effort sweep of MCP stdio subprocesses that survived their
|
||||
# session teardown during this tick. Runs AFTER every job has
|
||||
# finished so active sessions (including live user chats) are
|
||||
# never touched — only PIDs explicitly detected as orphans in
|
||||
# tools.mcp_tool._run_stdio's finally block are reaped.
|
||||
try:
|
||||
from tools.mcp_tool import _kill_orphaned_mcp_children
|
||||
_kill_orphaned_mcp_children()
|
||||
except Exception as _e:
|
||||
logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
|
||||
|
||||
return sum(_results)
|
||||
finally:
|
||||
if fcntl:
|
||||
|
||||
@@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
|
||||
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
|
||||
# Build / refresh
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Build a channel directory from connected platform adapters and session data.
|
||||
|
||||
@@ -72,7 +72,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
|
||||
if platform == Platform.DISCORD:
|
||||
platforms["discord"] = _build_discord(adapter)
|
||||
elif platform == Platform.SLACK:
|
||||
platforms["slack"] = _build_slack(adapter)
|
||||
platforms["slack"] = await _build_slack(adapter)
|
||||
except Exception as e:
|
||||
logger.warning("Channel directory: failed to build %s: %s", platform.value, e)
|
||||
|
||||
@@ -136,21 +136,66 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
|
||||
return channels
|
||||
|
||||
|
||||
def _build_slack(adapter) -> List[Dict[str, str]]:
|
||||
"""List Slack channels the bot has joined."""
|
||||
# Slack adapter may expose a web client
|
||||
client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
|
||||
if not client:
|
||||
async def _build_slack(adapter) -> List[Dict[str, Any]]:
|
||||
"""List Slack channels the bot has joined across all workspaces.
|
||||
|
||||
Uses ``users.conversations`` against each workspace's web client. Pulls
|
||||
public + private channels the bot is a member of, then merges in DMs
|
||||
discovered from session history (IMs aren't useful to enumerate
|
||||
proactively).
|
||||
"""
|
||||
team_clients = getattr(adapter, "_team_clients", None) or {}
|
||||
if not team_clients:
|
||||
return _build_from_sessions("slack")
|
||||
|
||||
try:
|
||||
from tools.send_message_tool import _send_slack # noqa: F401
|
||||
# Use the Slack Web API directly if available
|
||||
except Exception:
|
||||
pass
|
||||
channels: List[Dict[str, Any]] = []
|
||||
seen_ids: set = set()
|
||||
|
||||
# Fallback to session data
|
||||
return _build_from_sessions("slack")
|
||||
for team_id, client in team_clients.items():
|
||||
try:
|
||||
cursor: Optional[str] = None
|
||||
for _page in range(20): # safety cap on pagination
|
||||
response = await client.users_conversations(
|
||||
types="public_channel,private_channel",
|
||||
exclude_archived=True,
|
||||
limit=200,
|
||||
cursor=cursor,
|
||||
)
|
||||
if not response.get("ok"):
|
||||
logger.warning(
|
||||
"Channel directory: users.conversations not ok for team %s: %s",
|
||||
team_id,
|
||||
response.get("error", "unknown"),
|
||||
)
|
||||
break
|
||||
for ch in response.get("channels", []):
|
||||
cid = ch.get("id")
|
||||
name = ch.get("name")
|
||||
if not cid or not name or cid in seen_ids:
|
||||
continue
|
||||
seen_ids.add(cid)
|
||||
channels.append({
|
||||
"id": cid,
|
||||
"name": name,
|
||||
"type": "private" if ch.get("is_private") else "channel",
|
||||
})
|
||||
cursor = (response.get("response_metadata") or {}).get("next_cursor")
|
||||
if not cursor:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Channel directory: failed to list Slack channels for team %s: %s",
|
||||
team_id, e,
|
||||
)
|
||||
continue
|
||||
|
||||
# Merge in DM/group entries discovered from session history.
|
||||
for entry in _build_from_sessions("slack"):
|
||||
if entry.get("id") not in seen_ids:
|
||||
channels.append(entry)
|
||||
seen_ids.add(entry.get("id"))
|
||||
|
||||
return channels
|
||||
|
||||
|
||||
def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
|
||||
@@ -223,6 +268,14 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
|
||||
if not channels:
|
||||
return None
|
||||
|
||||
# 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
|
||||
# raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
|
||||
# in _parse_target_ref hasn't recognized them as explicit.
|
||||
raw = name.strip()
|
||||
for ch in channels:
|
||||
if ch.get("id") == raw:
|
||||
return ch["id"]
|
||||
|
||||
query = _normalize_channel_query(name)
|
||||
|
||||
# 1. Exact name match, including the display labels shown by send_message(action="list")
|
||||
|
||||
+68
-2
@@ -67,6 +67,7 @@ class Platform(Enum):
|
||||
WEIXIN = "weixin"
|
||||
BLUEBUBBLES = "bluebubbles"
|
||||
QQBOT = "qqbot"
|
||||
YUANBAO = "yuanbao"
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -195,6 +196,14 @@ class StreamingConfig:
|
||||
edit_interval: float = 1.0 # Seconds between message edits (Telegram rate-limits at ~1/s)
|
||||
buffer_threshold: int = 40 # Chars before forcing an edit
|
||||
cursor: str = " ▉" # Cursor shown during streaming
|
||||
# Ported from openclaw/openclaw#72038. When >0, the final edit for
|
||||
# a long-running streamed response is delivered as a fresh message
|
||||
# if the original preview has been visible for at least this many
|
||||
# seconds, so the platform's visible timestamp reflects completion
|
||||
# time instead of the preview creation time. Currently applied to
|
||||
# Telegram only (other platforms ignore the setting). Default 60s
|
||||
# matches the OpenClaw rollout. Set to 0 to disable.
|
||||
fresh_final_after_seconds: float = 60.0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
@@ -203,6 +212,7 @@ class StreamingConfig:
|
||||
"edit_interval": self.edit_interval,
|
||||
"buffer_threshold": self.buffer_threshold,
|
||||
"cursor": self.cursor,
|
||||
"fresh_final_after_seconds": self.fresh_final_after_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -215,6 +225,9 @@ class StreamingConfig:
|
||||
edit_interval=float(data.get("edit_interval", 1.0)),
|
||||
buffer_threshold=int(data.get("buffer_threshold", 40)),
|
||||
cursor=data.get("cursor", " ▉"),
|
||||
fresh_final_after_seconds=float(
|
||||
data.get("fresh_final_after_seconds", 60.0)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -314,6 +327,9 @@ class GatewayConfig:
|
||||
# QQBot uses extra dict for app credentials
|
||||
elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
|
||||
connected.append(platform)
|
||||
# Yuanbao uses extra dict for app credentials
|
||||
elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
|
||||
connected.append(platform)
|
||||
# DingTalk uses client_id/client_secret from config.extra or env vars
|
||||
elif platform == Platform.DINGTALK and (
|
||||
config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
|
||||
@@ -570,6 +586,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
)
|
||||
if "reply_prefix" in platform_cfg:
|
||||
bridged["reply_prefix"] = platform_cfg["reply_prefix"]
|
||||
if "reply_in_thread" in platform_cfg:
|
||||
bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
|
||||
if "require_mention" in platform_cfg:
|
||||
bridged["require_mention"] = platform_cfg["require_mention"]
|
||||
if "free_response_channels" in platform_cfg:
|
||||
@@ -584,7 +602,7 @@ def load_gateway_config() -> GatewayConfig:
|
||||
bridged["group_policy"] = platform_cfg["group_policy"]
|
||||
if "group_allow_from" in platform_cfg:
|
||||
bridged["group_allow_from"] = platform_cfg["group_allow_from"]
|
||||
if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
|
||||
if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
|
||||
bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
|
||||
if "channel_prompts" in platform_cfg:
|
||||
channel_prompts = platform_cfg["channel_prompts"]
|
||||
@@ -609,6 +627,8 @@ def load_gateway_config() -> GatewayConfig:
|
||||
if isinstance(slack_cfg, dict):
|
||||
if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
|
||||
os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
|
||||
if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
|
||||
os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
|
||||
if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
|
||||
os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
|
||||
frc = slack_cfg.get("free_response_channels")
|
||||
@@ -918,8 +938,12 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
if Platform.SLACK not in config.platforms:
|
||||
# No yaml config for Slack — env-only setup, enable it
|
||||
config.platforms[Platform.SLACK] = PlatformConfig()
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
config.platforms[Platform.SLACK].enabled = True
|
||||
# If yaml config exists, respect its enabled flag (don't override
|
||||
# explicit enabled: false). Token is still stored so skills that
|
||||
# send Slack messages can use it without activating the gateway adapter.
|
||||
config.platforms[Platform.SLACK].token = slack_token
|
||||
slack_home = os.getenv("SLACK_HOME_CHANNEL")
|
||||
if slack_home and Platform.SLACK in config.platforms:
|
||||
@@ -1276,6 +1300,48 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
|
||||
)
|
||||
|
||||
# Yuanbao — YUANBAO_APP_ID preferred
|
||||
yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
|
||||
yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
|
||||
if yuanbao_app_id and yuanbao_app_secret:
|
||||
if Platform.YUANBAO not in config.platforms:
|
||||
config.platforms[Platform.YUANBAO] = PlatformConfig()
|
||||
config.platforms[Platform.YUANBAO].enabled = True
|
||||
extra = config.platforms[Platform.YUANBAO].extra
|
||||
extra["app_id"] = yuanbao_app_id
|
||||
extra["app_secret"] = yuanbao_app_secret
|
||||
yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
|
||||
if yuanbao_bot_id:
|
||||
extra["bot_id"] = yuanbao_bot_id
|
||||
yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
|
||||
if yuanbao_ws_url:
|
||||
extra["ws_url"] = yuanbao_ws_url
|
||||
yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
|
||||
if yuanbao_api_domain:
|
||||
extra["api_domain"] = yuanbao_api_domain
|
||||
yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
|
||||
if yuanbao_route_env:
|
||||
extra["route_env"] = yuanbao_route_env
|
||||
yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
|
||||
if yuanbao_home:
|
||||
config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
|
||||
platform=Platform.YUANBAO,
|
||||
chat_id=yuanbao_home,
|
||||
name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
|
||||
)
|
||||
yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
|
||||
if yuanbao_dm_policy:
|
||||
extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
|
||||
yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
|
||||
if yuanbao_dm_allow_from:
|
||||
extra["dm_allow_from"] = yuanbao_dm_allow_from
|
||||
yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
|
||||
if yuanbao_group_policy:
|
||||
extra["group_policy"] = yuanbao_group_policy.strip().lower()
|
||||
yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
|
||||
if yuanbao_group_allow_from:
|
||||
extra["group_allow_from"] = yuanbao_group_allow_from
|
||||
|
||||
# Session settings
|
||||
idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
|
||||
if idle_minutes:
|
||||
|
||||
@@ -79,7 +79,9 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
"discord": _TIER_HIGH,
|
||||
|
||||
# Tier 2 — edit support, often customer/workspace channels
|
||||
"slack": _TIER_MEDIUM,
|
||||
# Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
|
||||
# "new"/"all" spam permanent lines in channels (hermes-agent#14663).
|
||||
"slack": {**_TIER_MEDIUM, "tool_progress": "off"},
|
||||
"mattermost": _TIER_MEDIUM,
|
||||
"matrix": _TIER_MEDIUM,
|
||||
"feishu": _TIER_MEDIUM,
|
||||
|
||||
+57
-11
@@ -28,6 +28,7 @@ def mirror_to_session(
|
||||
message_text: str,
|
||||
source_label: str = "cli",
|
||||
thread_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""
|
||||
Append a delivery-mirror message to the target session's transcript.
|
||||
@@ -39,9 +40,20 @@ def mirror_to_session(
|
||||
All errors are caught -- this is never fatal.
|
||||
"""
|
||||
try:
|
||||
session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
|
||||
session_id = _find_session_id(
|
||||
platform,
|
||||
str(chat_id),
|
||||
thread_id=thread_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
if not session_id:
|
||||
logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
|
||||
logger.debug(
|
||||
"Mirror: no session found for %s:%s:%s:%s",
|
||||
platform,
|
||||
chat_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
)
|
||||
return False
|
||||
|
||||
mirror_msg = {
|
||||
@@ -59,17 +71,33 @@ def mirror_to_session(
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
|
||||
logger.debug(
|
||||
"Mirror failed for %s:%s:%s:%s: %s",
|
||||
platform,
|
||||
chat_id,
|
||||
thread_id,
|
||||
user_id,
|
||||
e,
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
|
||||
def _find_session_id(
|
||||
platform: str,
|
||||
chat_id: str,
|
||||
thread_id: Optional[str] = None,
|
||||
user_id: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Find the active session_id for a platform + chat_id pair.
|
||||
|
||||
Scans sessions.json entries and matches where origin.chat_id == chat_id
|
||||
on the right platform. DM session keys don't embed the chat_id
|
||||
(e.g. "agent:main:telegram:dm"), so we check the origin dict.
|
||||
|
||||
When *user_id* is provided, prefer exact sender matches. If multiple
|
||||
same-chat candidates exist and none matches the user, return None instead
|
||||
of guessing and contaminating another participant's session.
|
||||
"""
|
||||
if not _SESSIONS_INDEX.exists():
|
||||
return None
|
||||
@@ -81,8 +109,7 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
|
||||
return None
|
||||
|
||||
platform_lower = platform.lower()
|
||||
best_match = None
|
||||
best_updated = ""
|
||||
candidates = []
|
||||
|
||||
for _key, entry in data.items():
|
||||
origin = entry.get("origin") or {}
|
||||
@@ -96,12 +123,31 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
|
||||
origin_thread_id = origin.get("thread_id")
|
||||
if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
|
||||
continue
|
||||
updated = entry.get("updated_at", "")
|
||||
if updated > best_updated:
|
||||
best_updated = updated
|
||||
best_match = entry.get("session_id")
|
||||
candidates.append(entry)
|
||||
|
||||
return best_match
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
if user_id:
|
||||
exact_user_matches = [
|
||||
entry for entry in candidates
|
||||
if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
|
||||
]
|
||||
if exact_user_matches:
|
||||
candidates = exact_user_matches
|
||||
elif len(candidates) > 1:
|
||||
return None
|
||||
elif len(candidates) > 1:
|
||||
distinct_user_ids = {
|
||||
str((entry.get("origin") or {}).get("user_id") or "").strip()
|
||||
for entry in candidates
|
||||
if str((entry.get("origin") or {}).get("user_id") or "").strip()
|
||||
}
|
||||
if len(distinct_user_ids) > 1:
|
||||
return None
|
||||
|
||||
best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
|
||||
return best_entry.get("session_id")
|
||||
|
||||
|
||||
def _append_to_jsonl(session_id: str, message: dict) -> None:
|
||||
|
||||
@@ -10,10 +10,12 @@ Each adapter handles:
|
||||
|
||||
from .base import BasePlatformAdapter, MessageEvent, SendResult
|
||||
from .qqbot import QQAdapter
|
||||
from .yuanbao import YuanbaoAdapter
|
||||
|
||||
__all__ = [
|
||||
"BasePlatformAdapter",
|
||||
"MessageEvent",
|
||||
"SendResult",
|
||||
"QQAdapter",
|
||||
"YuanbaoAdapter",
|
||||
]
|
||||
|
||||
@@ -9,6 +9,7 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -586,6 +587,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -2441,6 +2445,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2480,8 +2485,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2540,6 +2548,44 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2554,6 +2600,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2589,6 +2637,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
+158
-5
@@ -336,6 +336,39 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
|
||||
return {}, {"proxy": proxy_url}
|
||||
|
||||
|
||||
def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
|
||||
"""Return True when ``hostname`` matches a ``NO_PROXY`` entry.
|
||||
|
||||
Supports comma- or whitespace-separated entries with optional leading dots
|
||||
and ``*.`` wildcards, which match both the apex domain and subdomains.
|
||||
"""
|
||||
raw = no_proxy_value
|
||||
if raw is None:
|
||||
raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
|
||||
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
return False
|
||||
|
||||
lower_hostname = hostname.lower()
|
||||
for entry in re.split(r"[\s,]+", raw):
|
||||
normalized = entry.strip().lower()
|
||||
if not normalized:
|
||||
continue
|
||||
if normalized == "*":
|
||||
return True
|
||||
|
||||
if normalized.startswith("*."):
|
||||
normalized = normalized[2:]
|
||||
elif normalized.startswith("."):
|
||||
normalized = normalized[1:]
|
||||
|
||||
if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -693,7 +726,15 @@ SUPPORTED_DOCUMENT_TYPES = {
|
||||
".pdf": "application/pdf",
|
||||
".md": "text/markdown",
|
||||
".txt": "text/plain",
|
||||
".csv": "text/csv",
|
||||
".log": "text/plain",
|
||||
".json": "application/json",
|
||||
".xml": "application/xml",
|
||||
".yaml": "application/yaml",
|
||||
".yml": "application/yaml",
|
||||
".toml": "application/toml",
|
||||
".ini": "text/plain",
|
||||
".cfg": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
@@ -982,6 +1023,61 @@ def resolve_channel_prompt(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_channel_skills(
|
||||
config_extra: dict,
|
||||
channel_id: str,
|
||||
parent_id: str | None = None,
|
||||
) -> list[str] | None:
|
||||
"""Resolve auto-loaded skill(s) for a channel/thread from platform config.
|
||||
|
||||
Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
|
||||
|
||||
Config format::
|
||||
|
||||
channel_skill_bindings:
|
||||
- id: "C0123" # Slack channel ID or Discord channel/forum ID
|
||||
skills: ["skill-a", "skill-b"]
|
||||
- id: "D0ABCDE"
|
||||
skill: "solo-skill" # single string also accepted
|
||||
|
||||
Prefers an exact match on *channel_id*; falls back to *parent_id*
|
||||
(useful for forum threads / Slack threads inheriting the parent channel's
|
||||
binding).
|
||||
|
||||
Returns a deduplicated list of skill names (order preserved), or None if
|
||||
no match is found.
|
||||
"""
|
||||
bindings = config_extra.get("channel_skill_bindings") or []
|
||||
if not isinstance(bindings, list) or not bindings:
|
||||
return None
|
||||
ids_to_check: set[str] = set()
|
||||
if channel_id:
|
||||
ids_to_check.add(str(channel_id))
|
||||
if parent_id:
|
||||
ids_to_check.add(str(parent_id))
|
||||
if not ids_to_check:
|
||||
return None
|
||||
for entry in bindings:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_id = str(entry.get("id", ""))
|
||||
if entry_id in ids_to_check:
|
||||
skills = entry.get("skills") or entry.get("skill")
|
||||
if isinstance(skills, str):
|
||||
s = skills.strip()
|
||||
return [s] if s else None
|
||||
if isinstance(skills, list) and skills:
|
||||
seen: list[str] = []
|
||||
for name in skills:
|
||||
if not isinstance(name, str):
|
||||
continue
|
||||
nm = name.strip()
|
||||
if nm and nm not in seen:
|
||||
seen.append(nm)
|
||||
return seen or None
|
||||
return None
|
||||
|
||||
|
||||
class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
Base class for platform adapters.
|
||||
@@ -1025,7 +1121,20 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -1047,6 +1156,21 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -1230,6 +1354,27 @@ class BasePlatformAdapter(ABC):
|
||||
"""
|
||||
return SendResult(success=False, error="Not supported")
|
||||
|
||||
async def delete_message(
|
||||
self,
|
||||
chat_id: str,
|
||||
message_id: str,
|
||||
) -> bool:
|
||||
"""
|
||||
Delete a previously sent message. Optional — platforms that don't
|
||||
support deletion return ``False`` and callers fall back to leaving
|
||||
the message in place.
|
||||
|
||||
Used by the stream consumer's fresh-final cleanup path (see
|
||||
openclaw/openclaw#72038) to remove long-lived preview messages
|
||||
after sending the completed reply as a fresh message so the
|
||||
platform's visible timestamp reflects completion time.
|
||||
|
||||
Returns ``True`` on successful deletion, ``False`` otherwise.
|
||||
Subclasses should override for platforms with a deletion API
|
||||
(e.g. Telegram ``deleteMessage``).
|
||||
"""
|
||||
return False
|
||||
|
||||
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
||||
"""
|
||||
Send a typing indicator.
|
||||
@@ -2214,12 +2359,14 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
_tts_path = None
|
||||
if (event.message_type == MessageType.VOICE
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
and not media_files):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
@@ -2543,6 +2690,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2560,6 +2710,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -2315,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
@@ -2684,21 +2679,8 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
skills: ["skill-a", "skill-b"]
|
||||
Also checks parent_id so forum threads inherit the forum's bindings.
|
||||
"""
|
||||
bindings = self.config.extra.get("channel_skill_bindings", [])
|
||||
if not bindings:
|
||||
return None
|
||||
ids_to_check = {channel_id}
|
||||
if parent_id:
|
||||
ids_to_check.add(parent_id)
|
||||
for entry in bindings:
|
||||
entry_id = str(entry.get("id", ""))
|
||||
if entry_id in ids_to_check:
|
||||
skills = entry.get("skills") or entry.get("skill")
|
||||
if isinstance(skills, str):
|
||||
return [skills]
|
||||
if isinstance(skills, list) and skills:
|
||||
return list(dict.fromkeys(skills)) # dedup, preserve order
|
||||
return None
|
||||
from gateway.platforms.base import resolve_channel_skills
|
||||
return resolve_channel_skills(self.config.extra, channel_id, parent_id)
|
||||
|
||||
def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
|
||||
"""Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
|
||||
@@ -3261,6 +3243,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3311,6 +3294,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)
|
||||
|
||||
# Build source
|
||||
guild = getattr(message, "guild", None)
|
||||
source = self.build_source(
|
||||
chat_id=str(effective_channel.id),
|
||||
chat_name=chat_name,
|
||||
@@ -3320,6 +3304,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(guild.id) if guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
|
||||
@@ -28,6 +28,7 @@ from email.header import decode_header
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.base import MIMEBase
|
||||
from email.utils import formatdate
|
||||
from email import encoders
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
@@ -504,6 +505,7 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
@@ -586,6 +588,7 @@ class EmailAdapter(BasePlatformAdapter):
|
||||
msg["In-Reply-To"] = original_msg_id
|
||||
msg["References"] = original_msg_id
|
||||
|
||||
msg["Date"] = formatdate(localtime=True)
|
||||
msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
|
||||
msg["Message-ID"] = msg_id
|
||||
|
||||
|
||||
@@ -57,6 +57,15 @@ class MessageDeduplicator:
|
||||
if len(self._seen) > self._max_size:
|
||||
cutoff = now - self._ttl
|
||||
self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
|
||||
if len(self._seen) > self._max_size:
|
||||
# TTL pruning alone does not cap the cache when every entry is
|
||||
# still fresh. Keep the newest entries so the helper's
|
||||
# max_size bound is enforced under sustained traffic.
|
||||
newest = sorted(
|
||||
self._seen.items(),
|
||||
key=lambda item: item[1],
|
||||
)[-self._max_size:]
|
||||
self._seen = dict(newest)
|
||||
return False
|
||||
|
||||
def clear(self):
|
||||
|
||||
@@ -1178,13 +1178,83 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
# Event callbacks
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _is_self_sender(self, sender: str) -> bool:
|
||||
"""Return True if the sender refers to the bot's own account.
|
||||
|
||||
Matrix user IDs are byte-compared after trimming whitespace and
|
||||
lowercasing — some homeservers normalize the localpart case
|
||||
differently at different API surfaces, and the reply-loop tail
|
||||
of the "hall of mirrors" bug (#15763) has been observed with the
|
||||
bot's own account bypassing a case-sensitive equality check.
|
||||
|
||||
When ``self._user_id`` is empty (whoami hasn't resolved yet, or
|
||||
login failed), we cannot prove a sender is NOT us, so we return
|
||||
True defensively — an unidentified bot dropping its own events
|
||||
is always preferable to falling into an echo loop.
|
||||
"""
|
||||
own = (self._user_id or "").strip().lower()
|
||||
if not own:
|
||||
return True
|
||||
return sender.strip().lower() == own
|
||||
|
||||
@staticmethod
|
||||
def _is_system_or_bridge_sender(sender: str) -> bool:
|
||||
"""Return True if the sender looks like a system / bridge / appservice
|
||||
identity rather than a real user.
|
||||
|
||||
Appservice namespaces on Matrix conventionally prefix bot / puppet
|
||||
user IDs with an underscore (e.g. ``@_telegram_12345:server``,
|
||||
``@_discord_999:server``, ``@_slack_...:server``). Server-notices
|
||||
bots and bridge-controller bots on many homeservers use the same
|
||||
pattern.
|
||||
|
||||
We treat these as system identities for pairing purposes: they
|
||||
should never be offered a pairing code, because an operator
|
||||
approving the code would hand the bridge itself permanent
|
||||
authorization — and every outbound message relayed by the bridge
|
||||
would then loop back into the agent as an "authorized user
|
||||
message", which is the root of issue #15763.
|
||||
|
||||
Matches:
|
||||
``@_something:server`` — appservice namespace convention
|
||||
``@:server`` — malformed / empty localpart
|
||||
``:server`` — malformed, no leading ``@``
|
||||
"""
|
||||
s = (sender or "").strip()
|
||||
if not s:
|
||||
return True
|
||||
# Localpart is everything between leading '@' and ':'
|
||||
if s.startswith("@"):
|
||||
s = s[1:]
|
||||
if ":" in s:
|
||||
localpart, _, _ = s.partition(":")
|
||||
else:
|
||||
localpart = s
|
||||
if not localpart:
|
||||
return True
|
||||
return localpart.startswith("_")
|
||||
|
||||
async def _on_room_message(self, event: Any) -> None:
|
||||
"""Handle incoming room message events (text, media)."""
|
||||
room_id = str(getattr(event, "room_id", ""))
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
|
||||
# Ignore own messages.
|
||||
if sender == self._user_id:
|
||||
# Ignore own messages (case-insensitive; also drops when our own
|
||||
# user_id hasn't been resolved yet — see _is_self_sender docstring
|
||||
# and issue #15763).
|
||||
if self._is_self_sender(sender):
|
||||
return
|
||||
|
||||
# Ignore appservice / bridge / system identities so they never
|
||||
# trigger the pairing flow. Once a bridge user is paired, every
|
||||
# outbound message it relays would loop back as an authorized
|
||||
# user message (the "hall of mirrors" in #15763).
|
||||
if self._is_system_or_bridge_sender(sender):
|
||||
logger.debug(
|
||||
"Matrix: ignoring system/bridge sender %s in %s",
|
||||
sender,
|
||||
room_id,
|
||||
)
|
||||
return
|
||||
|
||||
# Deduplicate by event ID.
|
||||
@@ -1654,7 +1724,7 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
async def _on_reaction(self, event: Any) -> None:
|
||||
"""Handle incoming reaction events."""
|
||||
sender = str(getattr(event, "sender", ""))
|
||||
if sender == self._user_id:
|
||||
if self._is_self_sender(sender):
|
||||
return
|
||||
event_id = str(getattr(event, "event_id", ""))
|
||||
if self._is_duplicate_event(event_id):
|
||||
|
||||
+753
-70
File diff suppressed because it is too large
Load Diff
@@ -1209,6 +1209,31 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
)
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def delete_message(self, chat_id: str, message_id: str) -> bool:
|
||||
"""Delete a previously sent Telegram message.
|
||||
|
||||
Used by the stream consumer's fresh-final cleanup path (ported
|
||||
from openclaw/openclaw#72038) to remove long-lived preview
|
||||
messages after sending the completed reply as a fresh message.
|
||||
Telegram's Bot API ``deleteMessage`` works for bot-posted
|
||||
messages in the last 48 hours. Failures are non-fatal — the
|
||||
caller leaves the preview in place and logs at debug level.
|
||||
"""
|
||||
if not self._bot:
|
||||
return False
|
||||
try:
|
||||
await self._bot.delete_message(
|
||||
chat_id=int(chat_id),
|
||||
message_id=int(message_id),
|
||||
)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"[%s] Failed to delete Telegram message %s: %s",
|
||||
self.name, message_id, e,
|
||||
)
|
||||
return False
|
||||
|
||||
async def send_update_prompt(
|
||||
self, chat_id: str, prompt: str, default: str = "",
|
||||
session_key: str = "",
|
||||
@@ -2328,6 +2353,26 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
user = getattr(entity, "user", None)
|
||||
if user and getattr(user, "id", None) == bot_id:
|
||||
return True
|
||||
elif entity_type == "bot_command" and expected:
|
||||
# Telegram's official group-disambiguation form for slash
|
||||
# commands (``/cmd@botname``) is emitted as a single
|
||||
# ``bot_command`` entity covering the whole span — there
|
||||
# is no accompanying ``mention`` entity. Treat it as a
|
||||
# direct address to this bot when the ``@botname`` suffix
|
||||
# matches. This is the form Telegram's own command menu
|
||||
# autocomplete produces in groups, so dropping it at the
|
||||
# mention gate would break /new, /reset, /help, ... for
|
||||
# every group that has ``require_mention`` enabled (#15415).
|
||||
offset = int(getattr(entity, "offset", -1))
|
||||
length = int(getattr(entity, "length", 0))
|
||||
if offset < 0 or length <= 0:
|
||||
continue
|
||||
command_text = source_text[offset:offset + length]
|
||||
at_index = command_text.find("@")
|
||||
if at_index < 0:
|
||||
continue
|
||||
if command_text[at_index:].strip().lower() == expected:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, message: Message) -> bool:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,647 @@
|
||||
"""
|
||||
yuanbao_media.py — 元宝平台媒体处理模块
|
||||
|
||||
提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
|
||||
移植自 TypeScript 版 media.ts(yuanbao-openclaw-plugin),
|
||||
使用 httpx 替代 cos-nodejs-sdk-v5,避免引入额外 SDK 依赖。
|
||||
|
||||
COS 上传流程:
|
||||
1. 调用 genUploadInfo 获取临时凭证(tmpSecretId/tmpSecretKey/sessionToken)
|
||||
2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
|
||||
3. HTTP PUT 上传到 COS
|
||||
|
||||
TIM 消息体构建:
|
||||
- buildImageMsgBody() → TIMImageElem
|
||||
- buildFileMsgBody() → TIMFileElem
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import struct
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import Optional, Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ============ 常量 ============
|
||||
|
||||
UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
|
||||
DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
|
||||
DEFAULT_MAX_SIZE_MB = 50
|
||||
|
||||
# COS 加速域名后缀(优先使用全球加速)
|
||||
COS_USE_ACCELERATE = True
|
||||
|
||||
# ============ 类型映射 ============
|
||||
|
||||
# MIME → image_format 数字(TIM 协议字段)
|
||||
_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
|
||||
"image/jpeg": 1,
|
||||
"image/jpg": 1,
|
||||
"image/gif": 2,
|
||||
"image/png": 3,
|
||||
"image/bmp": 4,
|
||||
"image/webp": 255,
|
||||
"image/heic": 255,
|
||||
"image/tiff": 255,
|
||||
}
|
||||
|
||||
# 文件扩展名 → MIME
|
||||
_EXT_TO_MIME: dict[str, str] = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
".bmp": "image/bmp",
|
||||
".heic": "image/heic",
|
||||
".tiff": "image/tiff",
|
||||
".ico": "image/x-icon",
|
||||
".pdf": "application/pdf",
|
||||
".doc": "application/msword",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".xls": "application/vnd.ms-excel",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
".ppt": "application/vnd.ms-powerpoint",
|
||||
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
".txt": "text/plain",
|
||||
".zip": "application/zip",
|
||||
".tar": "application/x-tar",
|
||||
".gz": "application/gzip",
|
||||
".mp3": "audio/mpeg",
|
||||
".mp4": "video/mp4",
|
||||
".wav": "audio/wav",
|
||||
".ogg": "audio/ogg",
|
||||
".webm": "video/webm",
|
||||
}
|
||||
|
||||
|
||||
# ============ 工具函数 ============
|
||||
|
||||
def guess_mime_type(filename: str) -> str:
|
||||
"""根据文件扩展名猜测 MIME 类型。"""
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return _EXT_TO_MIME.get(ext, "application/octet-stream")
|
||||
|
||||
|
||||
def is_image(filename: str, mime_type: str = "") -> bool:
|
||||
"""判断是否为图片类型。"""
|
||||
if mime_type.startswith("image/"):
|
||||
return True
|
||||
ext = os.path.splitext(filename)[-1].lower()
|
||||
return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
|
||||
|
||||
|
||||
def get_image_format(mime_type: str) -> int:
|
||||
"""获取 TIM 图片格式编号。"""
|
||||
return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
|
||||
|
||||
|
||||
def md5_hex(data: bytes) -> str:
|
||||
"""计算 MD5 十六进制摘要。"""
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
|
||||
def generate_file_id() -> str:
|
||||
"""生成随机文件 ID(32 位 hex)。"""
|
||||
return secrets.token_hex(16)
|
||||
|
||||
|
||||
|
||||
# ============ 图片尺寸解析(纯 Python,无需 Pillow) ============
|
||||
|
||||
def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
|
||||
"""
|
||||
解析图片宽高(支持 JPEG/PNG/GIF/WebP),无需第三方依赖。
|
||||
返回 {"width": w, "height": h} 或 None(无法识别)。
|
||||
"""
|
||||
return (
|
||||
_parse_png_size(data)
|
||||
or _parse_jpeg_size(data)
|
||||
or _parse_gif_size(data)
|
||||
or _parse_webp_size(data)
|
||||
)
|
||||
|
||||
|
||||
def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 24:
|
||||
return None
|
||||
if buf[:4] != b"\x89PNG":
|
||||
return None
|
||||
w = struct.unpack(">I", buf[16:20])[0]
|
||||
h = struct.unpack(">I", buf[20:24])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
|
||||
return None
|
||||
i = 2
|
||||
while i < len(buf) - 9:
|
||||
if buf[i] != 0xFF:
|
||||
i += 1
|
||||
continue
|
||||
marker = buf[i + 1]
|
||||
if marker in (0xC0, 0xC2):
|
||||
h = struct.unpack(">H", buf[i + 5: i + 7])[0]
|
||||
w = struct.unpack(">H", buf[i + 7: i + 9])[0]
|
||||
return {"width": w, "height": h}
|
||||
if i + 3 < len(buf):
|
||||
i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
|
||||
else:
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 10:
|
||||
return None
|
||||
sig = buf[:6].decode("ascii", errors="replace")
|
||||
if sig not in ("GIF87a", "GIF89a"):
|
||||
return None
|
||||
w = struct.unpack("<H", buf[6:8])[0]
|
||||
h = struct.unpack("<H", buf[8:10])[0]
|
||||
return {"width": w, "height": h}
|
||||
|
||||
|
||||
def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
|
||||
if len(buf) < 16:
|
||||
return None
|
||||
if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
|
||||
return None
|
||||
chunk = buf[12:16].decode("ascii", errors="replace")
|
||||
if chunk == "VP8 ":
|
||||
if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
|
||||
w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
|
||||
h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8L":
|
||||
if len(buf) >= 25 and buf[20] == 0x2F:
|
||||
bits = struct.unpack("<I", buf[21:25])[0]
|
||||
w = (bits & 0x3FFF) + 1
|
||||
h = ((bits >> 14) & 0x3FFF) + 1
|
||||
return {"width": w, "height": h}
|
||||
elif chunk == "VP8X":
|
||||
if len(buf) >= 30:
|
||||
w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
|
||||
h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
|
||||
return {"width": w, "height": h}
|
||||
return None
|
||||
|
||||
|
||||
# ============ URL 下载 ============
|
||||
|
||||
async def download_url(
|
||||
url: str,
|
||||
max_size_mb: int = DEFAULT_MAX_SIZE_MB,
|
||||
) -> tuple[bytes, str]:
|
||||
"""
|
||||
下载 URL 内容,返回 (bytes, content_type)。
|
||||
|
||||
Args:
|
||||
url: HTTP(S) URL
|
||||
max_size_mb: 最大允许大小(MB),超过则抛出异常
|
||||
|
||||
Returns:
|
||||
(data_bytes, content_type_string)
|
||||
|
||||
Raises:
|
||||
ValueError: 内容超过大小限制
|
||||
httpx.HTTPError: 网络/HTTP 错误
|
||||
"""
|
||||
max_bytes = max_size_mb * 1024 * 1024
|
||||
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
||||
# 先 HEAD 检查大小
|
||||
try:
|
||||
head = await client.head(url)
|
||||
content_length = int(head.headers.get("content-length", 0) or 0)
|
||||
if content_length > 0 and content_length > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
|
||||
)
|
||||
except httpx.HTTPStatusError:
|
||||
pass # 部分服务器不支持 HEAD,忽略
|
||||
|
||||
# GET 下载(流式读取,防止超限)
|
||||
async with client.stream("GET", url) as resp:
|
||||
resp.raise_for_status()
|
||||
|
||||
content_type = resp.headers.get("content-type", "").split(";")[0].strip()
|
||||
|
||||
chunks: list[bytes] = []
|
||||
downloaded = 0
|
||||
async for chunk in resp.aiter_bytes(65536):
|
||||
downloaded += len(chunk)
|
||||
if downloaded > max_bytes:
|
||||
raise ValueError(
|
||||
f"文件过大: 已超过 {max_size_mb} MB 限制"
|
||||
)
|
||||
chunks.append(chunk)
|
||||
|
||||
data = b"".join(chunks)
|
||||
return data, content_type
|
||||
|
||||
|
||||
# ============ COS 鉴权(HMAC-SHA1) ============
|
||||
|
||||
def _cos_sign(
|
||||
method: str,
|
||||
path: str,
|
||||
params: dict[str, str],
|
||||
headers: dict[str, str],
|
||||
secret_id: str,
|
||||
secret_key: str,
|
||||
start_time: Optional[int] = None,
|
||||
expire_seconds: int = 3600,
|
||||
) -> str:
|
||||
"""
|
||||
构建 COS 请求签名(q-sign-algorithm=sha1 方案)。
|
||||
参考:https://cloud.tencent.com/document/product/436/7778
|
||||
|
||||
Args:
|
||||
method: HTTP 方法(小写,如 "put")
|
||||
path: URL 路径(URL encode 后的小写)
|
||||
params: URL 查询参数 dict(用于签名)
|
||||
headers: 参与签名的请求头 dict(key 需小写)
|
||||
secret_id: 临时 SecretId(tmpSecretId)
|
||||
secret_key: 临时 SecretKey(tmpSecretKey)
|
||||
start_time: 签名起始 Unix 时间戳(默认 now)
|
||||
expire_seconds: 签名有效期(秒,默认 3600)
|
||||
|
||||
Returns:
|
||||
Authorization header 值(完整字符串)
|
||||
"""
|
||||
now = int(time.time())
|
||||
q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
|
||||
|
||||
# Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
|
||||
sign_key = hmac.new(
|
||||
secret_key.encode("utf-8"),
|
||||
q_sign_time.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
# Step 2: HttpString
|
||||
# 参数和头部需按字典序排列,key 小写
|
||||
sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
|
||||
sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
|
||||
|
||||
url_param_list = ";".join(k for k, _ in sorted_params)
|
||||
url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
|
||||
header_list = ";".join(k for k, _ in sorted_headers)
|
||||
header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
|
||||
|
||||
http_string = "\n".join([
|
||||
method.lower(),
|
||||
path,
|
||||
url_params,
|
||||
header_str,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 3: StringToSign = sha1 hash of HttpString
|
||||
sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
|
||||
string_to_sign = "\n".join([
|
||||
"sha1",
|
||||
q_sign_time,
|
||||
sha1_of_http,
|
||||
"",
|
||||
])
|
||||
|
||||
# Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
|
||||
signature = hmac.new(
|
||||
sign_key.encode("utf-8"),
|
||||
string_to_sign.encode("utf-8"),
|
||||
hashlib.sha1,
|
||||
).hexdigest()
|
||||
|
||||
return (
|
||||
f"q-sign-algorithm=sha1"
|
||||
f"&q-ak={secret_id}"
|
||||
f"&q-sign-time={q_sign_time}"
|
||||
f"&q-key-time={q_sign_time}"
|
||||
f"&q-header-list={header_list}"
|
||||
f"&q-url-param-list={url_param_list}"
|
||||
f"&q-signature={signature}"
|
||||
)
|
||||
|
||||
|
||||
# ============ 主要公开 API ============
|
||||
|
||||
async def get_cos_credentials(
|
||||
app_key: str,
|
||||
api_domain: str,
|
||||
token: str,
|
||||
filename: str = "file",
|
||||
file_id: Optional[str] = None,
|
||||
bot_id: str = "",
|
||||
route_env: str = "",
|
||||
) -> dict:
|
||||
"""
|
||||
调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
|
||||
|
||||
Args:
|
||||
app_key: 应用 Key(用于 X-ID 头)
|
||||
api_domain: API 域名(如 https://bot.yuanbao.tencent.com)
|
||||
token: 当前有效的签票 token(X-Token 头)
|
||||
filename: 待上传的文件名(含扩展名)
|
||||
file_id: 客户端生成的唯一文件 ID(不传则自动生成)
|
||||
bot_id: Bot 账号 ID(用于 X-ID 头)
|
||||
|
||||
Returns:
|
||||
COS 上传配置 dict,包含以下字段:
|
||||
bucketName (str) — COS Bucket 名称
|
||||
region (str) — COS 地域
|
||||
location (str) — 上传 Key(对象路径)
|
||||
encryptTmpSecretId (str) — 临时 SecretId
|
||||
encryptTmpSecretKey(str) — 临时 SecretKey
|
||||
encryptToken (str) — SessionToken
|
||||
startTime (int) — 凭证起始时间戳(Unix)
|
||||
expiredTime (int) — 凭证过期时间戳(Unix)
|
||||
resourceUrl (str) — 上传后的公网访问 URL
|
||||
resourceID (str) — 资源 ID(可选)
|
||||
|
||||
Raises:
|
||||
RuntimeError: 接口返回非 0 code 或字段缺失
|
||||
"""
|
||||
if file_id is None:
|
||||
file_id = generate_file_id()
|
||||
|
||||
upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Token": token,
|
||||
"X-ID": bot_id or app_key,
|
||||
"X-Source": "web",
|
||||
}
|
||||
if route_env:
|
||||
headers["X-Route-Env"] = route_env
|
||||
body = {
|
||||
"fileName": filename,
|
||||
"fileId": file_id,
|
||||
"docFrom": "localDoc",
|
||||
"docOpenId": "",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||
resp = await client.post(upload_url, json=body, headers=headers)
|
||||
resp.raise_for_status()
|
||||
result: dict[str, Any] = resp.json()
|
||||
|
||||
code = result.get("code")
|
||||
if code != 0 and code is not None:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
|
||||
)
|
||||
|
||||
data = result.get("data") or result
|
||||
required_fields = ["bucketName", "location"]
|
||||
missing = [f for f in required_fields if not data.get(f)]
|
||||
if missing:
|
||||
raise RuntimeError(
|
||||
f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
async def upload_to_cos(
|
||||
file_bytes: bytes,
|
||||
filename: str,
|
||||
content_type: str,
|
||||
credentials: dict,
|
||||
bucket: str,
|
||||
region: str,
|
||||
) -> dict:
|
||||
"""
|
||||
通过 httpx PUT 请求将文件上传到 COS。
|
||||
使用临时凭证(tmpSecretId/tmpSecretKey/sessionToken)构建 HMAC-SHA1 签名。
|
||||
|
||||
Args:
|
||||
file_bytes: 文件二进制内容
|
||||
filename: 文件名(用于辅助计算 MIME、UUID)
|
||||
content_type: MIME 类型(如 "image/jpeg")
|
||||
credentials: get_cos_credentials() 返回的 dict,包含:
|
||||
encryptTmpSecretId → tmpSecretId
|
||||
encryptTmpSecretKey → tmpSecretKey
|
||||
encryptToken → sessionToken
|
||||
location → COS key(对象路径)
|
||||
resourceUrl → 上传后公网 URL
|
||||
startTime → 凭证起始时间(Unix)
|
||||
expiredTime → 凭证过期时间(Unix)
|
||||
bucket: COS Bucket 名称(如 chatbot-1234567890)
|
||||
region: COS 地域(如 ap-guangzhou)
|
||||
|
||||
Returns:
|
||||
上传结果 dict,包含:
|
||||
url (str) — COS 公网访问 URL
|
||||
uuid (str) — 文件内容 MD5
|
||||
size (int) — 文件大小(字节)
|
||||
width (int, optional) — 图片宽度(仅图片)
|
||||
height (int, optional) — 图片高度(仅图片)
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: COS 返回非 2xx 状态
|
||||
RuntimeError: credentials 字段缺失
|
||||
"""
|
||||
secret_id: str = credentials.get("encryptTmpSecretId", "")
|
||||
secret_key: str = credentials.get("encryptTmpSecretKey", "")
|
||||
session_token: str = credentials.get("encryptToken", "")
|
||||
cos_key: str = credentials.get("location", "")
|
||||
resource_url: str = credentials.get("resourceUrl", "")
|
||||
start_time: Optional[int] = credentials.get("startTime")
|
||||
expired_time: Optional[int] = credentials.get("expiredTime")
|
||||
|
||||
if not secret_id or not secret_key or not cos_key:
|
||||
raise RuntimeError(
|
||||
f"COS credentials 不完整: secretId={bool(secret_id)}, "
|
||||
f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
|
||||
)
|
||||
|
||||
# 构建 COS 上传 URL(优先使用全球加速域名)
|
||||
if COS_USE_ACCELERATE:
|
||||
cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
|
||||
else:
|
||||
cos_host = f"{bucket}.cos.{region}.myqcloud.com"
|
||||
|
||||
# URL encode cos_key(保留 /)
|
||||
encoded_key = urllib.parse.quote(cos_key, safe="/")
|
||||
cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
|
||||
|
||||
# 确定 Content-Type
|
||||
if not content_type or content_type == "application/octet-stream":
|
||||
if is_image(filename):
|
||||
content_type = guess_mime_type(filename)
|
||||
else:
|
||||
content_type = "application/octet-stream"
|
||||
|
||||
# 计算文件 MD5 + size
|
||||
file_uuid = md5_hex(file_bytes)
|
||||
file_size = len(file_bytes)
|
||||
|
||||
# 参与签名的请求头
|
||||
sign_headers = {
|
||||
"host": cos_host,
|
||||
"content-type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
# 计算签名有效期
|
||||
now = int(time.time())
|
||||
sign_start = start_time if start_time else now
|
||||
sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
|
||||
|
||||
authorization = _cos_sign(
|
||||
method="put",
|
||||
path=f"/{encoded_key.lstrip('/')}",
|
||||
params={},
|
||||
headers=sign_headers,
|
||||
secret_id=secret_id,
|
||||
secret_key=secret_key,
|
||||
start_time=sign_start,
|
||||
expire_seconds=sign_expire,
|
||||
)
|
||||
|
||||
put_headers = {
|
||||
"Authorization": authorization,
|
||||
"Content-Type": content_type,
|
||||
"x-cos-security-token": session_token,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
|
||||
bucket, region, cos_key, file_size, content_type,
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
resp = await client.put(
|
||||
cos_url,
|
||||
content=file_bytes,
|
||||
headers=put_headers,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
# 解析图片尺寸(仅图片类型)
|
||||
result: dict[str, Any] = {
|
||||
"url": resource_url or cos_url,
|
||||
"uuid": file_uuid,
|
||||
"size": file_size,
|
||||
}
|
||||
|
||||
if content_type.startswith("image/"):
|
||||
size_info = parse_image_size(file_bytes)
|
||||
if size_info:
|
||||
result["width"] = size_info["width"]
|
||||
result["height"] = size_info["height"]
|
||||
|
||||
logger.info(
|
||||
"COS 上传成功: url=%s size=%d",
|
||||
result["url"], file_size,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ============ TIM 媒体消息构建 ============
|
||||
|
||||
def build_image_msg_body(
|
||||
url: str,
|
||||
uuid: Optional[str] = None,
|
||||
filename: Optional[str] = None,
|
||||
size: int = 0,
|
||||
width: int = 0,
|
||||
height: int = 0,
|
||||
mime_type: str = "",
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMImageElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 图片公网访问 URL(COS resourceUrl)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识)
|
||||
filename: 文件名(uuid 为空时作为备用)
|
||||
size: 文件大小(字节)
|
||||
width: 图片宽度(像素)
|
||||
height: 图片高度(像素)
|
||||
mime_type: MIME 类型(用于确定 image_format)
|
||||
|
||||
Returns:
|
||||
TIMImageElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename or _basename_from_url(url) or "image"
|
||||
image_format = get_image_format(mime_type) if mime_type else 255
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMImageElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"image_format": image_format,
|
||||
"image_info_array": [
|
||||
{
|
||||
"type": 1, # 1 = 原图
|
||||
"size": size,
|
||||
"width": width,
|
||||
"height": height,
|
||||
"url": url,
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def build_file_msg_body(
|
||||
url: str,
|
||||
filename: str,
|
||||
uuid: Optional[str] = None,
|
||||
size: int = 0,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
构建腾讯 IM TIMFileElem 消息体。
|
||||
参考:https://cloud.tencent.com/document/product/269/2720
|
||||
|
||||
Args:
|
||||
url: 文件公网访问 URL(COS resourceUrl)
|
||||
filename: 文件名(含扩展名)
|
||||
uuid: 文件 UUID(MD5 或其他唯一标识,不传则使用 filename)
|
||||
size: 文件大小(字节)
|
||||
|
||||
Returns:
|
||||
TIMFileElem 消息体列表(适合直接放入 msg_body)
|
||||
"""
|
||||
_uuid = uuid or filename
|
||||
|
||||
return [
|
||||
{
|
||||
"msg_type": "TIMFileElem",
|
||||
"msg_content": {
|
||||
"uuid": _uuid,
|
||||
"file_name": filename,
|
||||
"file_size": size,
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# ============ 内部工具 ============
|
||||
|
||||
def _basename_from_url(url: str) -> str:
|
||||
"""从 URL 提取文件名。"""
|
||||
try:
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
return os.path.basename(parsed.path)
|
||||
except Exception:
|
||||
return ""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,558 @@
|
||||
"""
|
||||
Yuanbao sticker (TIMFaceElem) support.
|
||||
|
||||
Ported from yuanbao-openclaw-plugin/src/sticker/.
|
||||
|
||||
TIMFaceElem wire format:
|
||||
{
|
||||
"msg_type": "TIMFaceElem",
|
||||
"msg_content": {
|
||||
"index": 0, # always 0 per Yuanbao convention
|
||||
"data": "<json>", # serialised sticker metadata
|
||||
}
|
||||
}
|
||||
|
||||
The `data` field carries a JSON string with the sticker's metadata so the
|
||||
receiver can look up the correct asset in the emoji pack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sticker catalogue – ported from builtin-stickers.json
|
||||
# Key : canonical name (Chinese)
|
||||
# Value : {sticker_id, package_id, name, description, width, height, formats}
|
||||
# ---------------------------------------------------------------------------
|
||||
STICKER_MAP: dict[str, dict] = {
|
||||
"六六六": {
|
||||
"sticker_id": "278", "package_id": "1003", "name": "六六六",
|
||||
"description": "666 厉害 牛 棒 绝了 好强 awesome",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我想开了": {
|
||||
"sticker_id": "262", "package_id": "1003", "name": "我想开了",
|
||||
"description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"害羞": {
|
||||
"sticker_id": "130", "package_id": "1003", "name": "害羞",
|
||||
"description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"比心": {
|
||||
"sticker_id": "252", "package_id": "1003", "name": "比心",
|
||||
"description": "笔芯 爱你 爱心手势 love heart 喜欢你",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"委屈": {
|
||||
"sticker_id": "125", "package_id": "1003", "name": "委屈",
|
||||
"description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"亲亲": {
|
||||
"sticker_id": "146", "package_id": "1003", "name": "亲亲",
|
||||
"description": "么么 mua 亲一下 kiss 飞吻 啵",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"酷": {
|
||||
"sticker_id": "131", "package_id": "1003", "name": "酷",
|
||||
"description": "帅 墨镜 cool 高冷 有型 swagger",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"睡": {
|
||||
"sticker_id": "145", "package_id": "1003", "name": "睡",
|
||||
"description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"发呆": {
|
||||
"sticker_id": "152", "package_id": "1003", "name": "发呆",
|
||||
"description": "懵 愣住 放空 呆滞 出神 脑子空白",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"可怜": {
|
||||
"sticker_id": "157", "package_id": "1003", "name": "可怜",
|
||||
"description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"摊手": {
|
||||
"sticker_id": "200", "package_id": "1003", "name": "摊手",
|
||||
"description": "无奈 没办法 耸肩 随便 那咋整 whatever",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头大": {
|
||||
"sticker_id": "213", "package_id": "1003", "name": "头大",
|
||||
"description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吓": {
|
||||
"sticker_id": "256", "package_id": "1003", "name": "吓",
|
||||
"description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐血": {
|
||||
"sticker_id": "203", "package_id": "1003", "name": "吐血",
|
||||
"description": "无语 崩溃 被雷 内伤 一口老血 屮",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哼": {
|
||||
"sticker_id": "185", "package_id": "1003", "name": "哼",
|
||||
"description": "傲娇 生气 不满 撇嘴 不理 赌气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"嘿嘿": {
|
||||
"sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
|
||||
"description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"头秃": {
|
||||
"sticker_id": "218", "package_id": "1003", "name": "头秃",
|
||||
"description": "程序员 加班 焦虑 没头发 秃了 肝爆",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"暗中观察": {
|
||||
"sticker_id": "221", "package_id": "1003", "name": "暗中观察",
|
||||
"description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"我酸了": {
|
||||
"sticker_id": "224", "package_id": "1003", "name": "我酸了",
|
||||
"description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"打call": {
|
||||
"sticker_id": "246", "package_id": "1003", "name": "打call",
|
||||
"description": "应援 加油 支持 喝彩 助威 call",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"庆祝": {
|
||||
"sticker_id": "251", "package_id": "1003", "name": "庆祝",
|
||||
"description": "祝贺 开心 耶 party 胜利 干杯",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"奋斗": {
|
||||
"sticker_id": "151", "package_id": "1003", "name": "奋斗",
|
||||
"description": "努力 加油 拼搏 冲 干劲 卷起来",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"惊讶": {
|
||||
"sticker_id": "143", "package_id": "1003", "name": "惊讶",
|
||||
"description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"疑问": {
|
||||
"sticker_id": "144", "package_id": "1003", "name": "疑问",
|
||||
"description": "问号 不懂 啥 为什么 啥情况 懵逼问",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"仔细分析": {
|
||||
"sticker_id": "248", "package_id": "1003", "name": "仔细分析",
|
||||
"description": "思考 推敲 认真 研究 琢磨 让我想想",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"撅嘴": {
|
||||
"sticker_id": "184", "package_id": "1003", "name": "撅嘴",
|
||||
"description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"泪奔": {
|
||||
"sticker_id": "199", "package_id": "1003", "name": "泪奔",
|
||||
"description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"尊嘟假嘟": {
|
||||
"sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
|
||||
"description": "真的假的 真假 可爱问 你骗我 是不是",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"略略略": {
|
||||
"sticker_id": "113", "package_id": "1003", "name": "略略略",
|
||||
"description": "调皮 吐舌 不服 略 气死你 鬼脸",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"困": {
|
||||
"sticker_id": "180", "package_id": "1003", "name": "困",
|
||||
"description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"折磨": {
|
||||
"sticker_id": "181", "package_id": "1003", "name": "折磨",
|
||||
"description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抠鼻": {
|
||||
"sticker_id": "182", "package_id": "1003", "name": "抠鼻",
|
||||
"description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鼓掌": {
|
||||
"sticker_id": "183", "package_id": "1003", "name": "鼓掌",
|
||||
"description": "拍手 叫好 赞同 666 喝彩 掌声",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"斜眼笑": {
|
||||
"sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
|
||||
"description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"辣眼睛": {
|
||||
"sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
|
||||
"description": "看不下去 cringe 毁三观 太丑了 瞎了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦哟": {
|
||||
"sticker_id": "217", "package_id": "1003", "name": "哦哟",
|
||||
"description": "惊讶 起哄 哇哦 有戏 不简单 哟",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吃瓜": {
|
||||
"sticker_id": "222", "package_id": "1003", "name": "吃瓜",
|
||||
"description": "围观 看戏 八卦 路人 看热闹 板凳",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"狗头": {
|
||||
"sticker_id": "225", "package_id": "1003", "name": "狗头",
|
||||
"description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"敬礼": {
|
||||
"sticker_id": "227", "package_id": "1003", "name": "敬礼",
|
||||
"description": "salute 尊重 收到 遵命 致敬 报告",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"哦": {
|
||||
"sticker_id": "231", "package_id": "1003", "name": "哦",
|
||||
"description": "知道了 明白 敷衍 嗯 这样啊 收到",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拿到红包": {
|
||||
"sticker_id": "236", "package_id": "1003", "name": "拿到红包",
|
||||
"description": "红包 谢谢老板 发财 开心 抢到了 欧气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"牛吖": {
|
||||
"sticker_id": "239", "package_id": "1003", "name": "牛吖",
|
||||
"description": "牛 厉害 强 666 佩服 大佬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"贴贴": {
|
||||
"sticker_id": "272", "package_id": "1003", "name": "贴贴",
|
||||
"description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"爱心": {
|
||||
"sticker_id": "138", "package_id": "1003", "name": "爱心",
|
||||
"description": "心 love 喜欢你 红心 示爱 么么哒",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"晚安": {
|
||||
"sticker_id": "170", "package_id": "1003", "name": "晚安",
|
||||
"description": "好梦 睡了 night 早点休息 安啦 moon",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"太阳": {
|
||||
"sticker_id": "176", "package_id": "1003", "name": "太阳",
|
||||
"description": "晴天 早上好 阳光 morning 好天气 日",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"柠檬": {
|
||||
"sticker_id": "266", "package_id": "1003", "name": "柠檬",
|
||||
"description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"大冤种": {
|
||||
"sticker_id": "267", "package_id": "1003", "name": "大冤种",
|
||||
"description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"吐了": {
|
||||
"sticker_id": "132", "package_id": "1003", "name": "吐了",
|
||||
"description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"怒": {
|
||||
"sticker_id": "134", "package_id": "1003", "name": "怒",
|
||||
"description": "生气 愤怒 火大 暴躁 气炸 怼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"玫瑰": {
|
||||
"sticker_id": "165", "package_id": "1003", "name": "玫瑰",
|
||||
"description": "花 示爱 表白 浪漫 送你花 情人节",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"凋谢": {
|
||||
"sticker_id": "119", "package_id": "1003", "name": "凋谢",
|
||||
"description": "花谢 失恋 难过 枯萎 心碎 凉了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"点赞": {
|
||||
"sticker_id": "159", "package_id": "1003", "name": "点赞",
|
||||
"description": "赞 认同 好棒 good like 大拇指 顶",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"握手": {
|
||||
"sticker_id": "164", "package_id": "1003", "name": "握手",
|
||||
"description": "合作 你好 商务 hello deal 成交 友好",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"抱拳": {
|
||||
"sticker_id": "163", "package_id": "1003", "name": "抱拳",
|
||||
"description": "谢谢 失敬 江湖 承让 拜托 有礼",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"ok": {
|
||||
"sticker_id": "169", "package_id": "1003", "name": "ok",
|
||||
"description": "好的 收到 没问题 okay 行 可以 懂了",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"拳头": {
|
||||
"sticker_id": "174", "package_id": "1003", "name": "拳头",
|
||||
"description": "加油 干 冲 fight 力量 击拳 硬气",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"鞭炮": {
|
||||
"sticker_id": "191", "package_id": "1003", "name": "鞭炮",
|
||||
"description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
"烟花": {
|
||||
"sticker_id": "258", "package_id": "1003", "name": "烟花",
|
||||
"description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
|
||||
"width": 128, "height": 128, "formats": "png",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_sticker_by_name(name: str) -> Optional[dict]:
|
||||
"""
|
||||
按名称查找贴纸,支持模糊匹配。
|
||||
|
||||
匹配优先级:
|
||||
1. 完全相等(name)
|
||||
2. name 包含查询词(前缀/子串)
|
||||
3. description 包含查询词(同义词搜索)
|
||||
4. 通用模糊评分(与 sticker-search 同算法),命中即返回得分最高的一条
|
||||
|
||||
返回 sticker dict,找不到返回 None。
|
||||
"""
|
||||
if not name:
|
||||
return None
|
||||
|
||||
query = name.strip()
|
||||
|
||||
if query in STICKER_MAP:
|
||||
return STICKER_MAP[query]
|
||||
|
||||
for key, sticker in STICKER_MAP.items():
|
||||
if query in key or key in query:
|
||||
return sticker
|
||||
|
||||
for sticker in STICKER_MAP.values():
|
||||
desc = sticker.get("description", "")
|
||||
if query in desc:
|
||||
return sticker
|
||||
|
||||
matches = search_stickers(query, limit=1)
|
||||
return matches[0] if matches else None
|
||||
|
||||
|
||||
def get_random_sticker(category: str = None) -> dict:
|
||||
"""
|
||||
随机返回一个贴纸。
|
||||
|
||||
若指定 category,则在 description 中含有该关键词的贴纸里随机选取;
|
||||
category 为 None 时从全表随机。
|
||||
"""
|
||||
if category:
|
||||
candidates = [
|
||||
s for s in STICKER_MAP.values()
|
||||
if category in s.get("description", "") or category in s.get("name", "")
|
||||
]
|
||||
if candidates:
|
||||
return random.choice(candidates)
|
||||
return random.choice(list(STICKER_MAP.values()))
|
||||
|
||||
|
||||
def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
|
||||
"""按 sticker_id 精确查找贴纸。"""
|
||||
if not sticker_id:
|
||||
return None
|
||||
sid = str(sticker_id).strip()
|
||||
for sticker in STICKER_MAP.values():
|
||||
if sticker.get("sticker_id") == sid:
|
||||
return sticker
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 模糊搜索(对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,,。!!??\"“”'‘’、/\\]+")
|
||||
|
||||
|
||||
def _normalize_text(raw: str) -> str:
|
||||
return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
|
||||
|
||||
|
||||
def _compact_text(raw: str) -> str:
|
||||
return _PUNCT_RE.sub("", _normalize_text(raw))
|
||||
|
||||
|
||||
def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
bag: dict[str, int] = {}
|
||||
for ch in haystack:
|
||||
bag[ch] = bag.get(ch, 0) + 1
|
||||
hits = 0
|
||||
for ch in needle:
|
||||
n = bag.get(ch, 0)
|
||||
if n > 0:
|
||||
hits += 1
|
||||
bag[ch] = n - 1
|
||||
return hits / len(needle)
|
||||
|
||||
|
||||
def _bigram_jaccard(a: str, b: str) -> float:
|
||||
if len(a) < 2 or len(b) < 2:
|
||||
return 0.0
|
||||
A = {a[i:i + 2] for i in range(len(a) - 1)}
|
||||
B = {b[i:i + 2] for i in range(len(b) - 1)}
|
||||
inter = len(A & B)
|
||||
union = len(A) + len(B) - inter
|
||||
return inter / union if union else 0.0
|
||||
|
||||
|
||||
def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
|
||||
if not needle:
|
||||
return 0.0
|
||||
j = 0
|
||||
for ch in haystack:
|
||||
if j >= len(needle):
|
||||
break
|
||||
if ch == needle[j]:
|
||||
j += 1
|
||||
return j / len(needle)
|
||||
|
||||
|
||||
def _score_field(haystack: str, query: str) -> float:
|
||||
hay = _normalize_text(haystack)
|
||||
q = _normalize_text(query)
|
||||
if not hay or not q:
|
||||
return 0.0
|
||||
hay_c = _compact_text(haystack)
|
||||
q_c = _compact_text(query)
|
||||
best = 0.0
|
||||
if hay == q:
|
||||
best = max(best, 100.0)
|
||||
if q in hay:
|
||||
best = max(best, 92 + min(6, len(q)))
|
||||
if len(q) >= 2 and hay.startswith(q):
|
||||
best = max(best, 88.0)
|
||||
if q_c and q_c in hay_c:
|
||||
best = max(best, 86.0)
|
||||
best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
|
||||
best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
|
||||
best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
|
||||
if len(q) == 1 and q in hay:
|
||||
best = max(best, 68.0)
|
||||
return best
|
||||
|
||||
|
||||
def search_stickers(query: str, limit: int = 10) -> list[dict]:
|
||||
"""
|
||||
在内置贴纸表中按模糊匹配排序返回前 N 条结果。
|
||||
|
||||
评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
|
||||
name 权重略高于 description(×0.88)。空 query 时按字典顺序返回前 N 条。
|
||||
"""
|
||||
safe_limit = max(1, min(500, int(limit) if limit else 10))
|
||||
if not query or not _normalize_text(query):
|
||||
return list(STICKER_MAP.values())[:safe_limit]
|
||||
|
||||
scored: list[tuple[float, dict]] = []
|
||||
for sticker in STICKER_MAP.values():
|
||||
name_s = _score_field(sticker.get("name", ""), query)
|
||||
desc_s = _score_field(sticker.get("description", ""), query) * 0.88
|
||||
sid = str(sticker.get("sticker_id", "")).strip()
|
||||
q_norm = _normalize_text(query)
|
||||
id_s = 0.0
|
||||
if sid and q_norm:
|
||||
sid_norm = _normalize_text(sid)
|
||||
if sid_norm == q_norm:
|
||||
id_s = 100.0
|
||||
elif q_norm in sid_norm:
|
||||
id_s = 84.0
|
||||
scored.append((max(name_s, desc_s, id_s), sticker))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
top = scored[0][0] if scored else 0
|
||||
if top <= 0:
|
||||
return [s for _, s in scored[:safe_limit]]
|
||||
|
||||
if top >= 22:
|
||||
floor = 18.0
|
||||
elif top >= 12:
|
||||
floor = max(10.0, top * 0.5)
|
||||
else:
|
||||
floor = max(6.0, top * 0.35)
|
||||
|
||||
filtered = [pair for pair in scored if pair[0] >= floor]
|
||||
out = filtered if filtered else scored
|
||||
return [s for _, s in out[:safe_limit]]
|
||||
|
||||
|
||||
def build_face_msg_body(
|
||||
face_index: int,
|
||||
face_type: int = 1,
|
||||
data: Optional[str] = None,
|
||||
) -> list:
|
||||
"""
|
||||
构造 TIMFaceElem 消息体。
|
||||
|
||||
Yuanbao 约定:
|
||||
- index 固定传 0(服务端通过 data 字段识别具体表情)
|
||||
- data 为 JSON 字符串,包含 sticker_id / package_id 等字段
|
||||
|
||||
Args:
|
||||
face_index: 保留字段,暂时不影响 wire format(Yuanbao 固定 index=0)。
|
||||
当 face_index > 0 时视为旧版 QQ 表情 ID,直接放入 index。
|
||||
face_type: 保留字段(兼容旧接口,当前未使用)。
|
||||
data: 已序列化的 JSON 字符串;为 None 时仅传 index。
|
||||
|
||||
Returns:
|
||||
符合 Yuanbao TIM 协议的 msg_body list,如::
|
||||
|
||||
[{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
|
||||
"""
|
||||
msg_content: dict = {"index": face_index}
|
||||
if data is not None:
|
||||
msg_content["data"] = data
|
||||
return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
|
||||
|
||||
|
||||
def build_sticker_msg_body(sticker: dict) -> list:
|
||||
"""
|
||||
从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
|
||||
|
||||
这是 send_sticker() 的内部辅助,确保 data 字段与原始 JS 插件一致。
|
||||
"""
|
||||
data_payload = json.dumps(
|
||||
{
|
||||
"sticker_id": sticker["sticker_id"],
|
||||
"package_id": sticker["package_id"],
|
||||
"width": sticker.get("width", 128),
|
||||
"height": sticker.get("height", 128),
|
||||
"formats": sticker.get("formats", "png"),
|
||||
"name": sticker["name"],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
separators=(",", ":"),
|
||||
)
|
||||
return build_face_msg_body(face_index=0, data=data_payload)
|
||||
+842
-451
File diff suppressed because it is too large
Load Diff
+85
-18
@@ -87,6 +87,9 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -124,8 +127,14 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -139,6 +148,9 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -190,6 +202,31 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
and the LLM needs the real ID to tag users."""
|
||||
|
||||
|
||||
def _discord_tools_loaded() -> bool:
|
||||
"""True iff the agent will actually have Discord tools this session.
|
||||
|
||||
Two conditions must hold:
|
||||
1. The `discord` or `discord_admin` toolset is enabled for the
|
||||
Discord platform via `hermes tools` (opt-in, default OFF).
|
||||
2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
|
||||
at registry time, so the toolset being enabled in config is not
|
||||
enough if the token isn't configured.
|
||||
|
||||
Returns False (safe default — keeps the stale-API disclaimer) on any
|
||||
error so a bad config can't silently promise tools the agent lacks.
|
||||
"""
|
||||
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
cfg = load_config()
|
||||
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
|
||||
return "discord" in enabled or "discord_admin" in enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
@@ -273,18 +310,38 @@ def build_session_context_prompt(
|
||||
"**Platform notes:** You are running inside Slack. "
|
||||
"You do NOT have access to Slack-specific APIs — you cannot search "
|
||||
"channel history, pin/unpin messages, manage channels, or list users. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
"Do not promise to perform these actions. The gateway may inline the "
|
||||
"current message's Slack block/attachment payload when available, but "
|
||||
"you still cannot call Slack APIs yourself."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
# Inject the Discord IDs block only when the agent actually has
|
||||
# Discord tools loaded this session — i.e. the user opted into
|
||||
# `discord` / `discord_admin` via `hermes tools` AND the bot
|
||||
# token is configured. Otherwise keep the stale-API disclaimer
|
||||
# honest so we never promise tools the agent lacks.
|
||||
if _discord_tools_loaded():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
@@ -297,6 +354,14 @@ def build_session_context_prompt(
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
)
|
||||
elif context.source.platform == Platform.YUANBAO:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Yuanbao. "
|
||||
"You CAN send private (DM) messages via the send_message tool. "
|
||||
"Use target='yuanbao:direct:<account_id>' for DM "
|
||||
"and target='yuanbao:group:<group_code>' for group chat."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
@@ -383,11 +448,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -423,7 +488,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -475,7 +540,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -1176,6 +1241,7 @@ class SessionStore:
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1208,6 +1274,7 @@ class SessionStore:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
@@ -44,6 +44,14 @@ class StreamConsumerConfig:
|
||||
buffer_threshold: int = 40
|
||||
cursor: str = " ▉"
|
||||
buffer_only: bool = False
|
||||
# When >0, the final edit for a streamed response is delivered as a
|
||||
# fresh message if the original preview has been visible for at least
|
||||
# this many seconds. This makes the platform's visible timestamp
|
||||
# reflect completion time instead of first-token time for long-running
|
||||
# responses (e.g. reasoning models that stream slowly). Ported from
|
||||
# openclaw/openclaw#72038. Default 0 = always edit in place (legacy
|
||||
# behavior). The gateway enables this selectively per-platform.
|
||||
fresh_final_after_seconds: float = 0.0
|
||||
|
||||
|
||||
class GatewayStreamConsumer:
|
||||
@@ -91,6 +99,12 @@ class GatewayStreamConsumer:
|
||||
self._queue: queue.Queue = queue.Queue()
|
||||
self._accumulated = ""
|
||||
self._message_id: Optional[str] = None
|
||||
# Wall-clock timestamp (time.monotonic) when ``_message_id`` was
|
||||
# first assigned from a successful first-send. Used by the
|
||||
# fresh-final logic to detect long-lived previews whose edit
|
||||
# timestamps would be stale by completion time. Ported from
|
||||
# openclaw/openclaw#72038.
|
||||
self._message_created_ts: Optional[float] = None
|
||||
self._already_sent = False
|
||||
self._edit_supported = True # Disabled when progressive edits are no longer usable
|
||||
self._last_edit_time = 0.0
|
||||
@@ -136,6 +150,7 @@ class GatewayStreamConsumer:
|
||||
if preserve_no_edit and self._message_id == "__no_edit__":
|
||||
return
|
||||
self._message_id = None
|
||||
self._message_created_ts = None
|
||||
self._accumulated = ""
|
||||
self._last_sent_text = ""
|
||||
self._fallback_final_send = False
|
||||
@@ -734,6 +749,81 @@ class GatewayStreamConsumer:
|
||||
logger.error("Commentary send error: %s", e)
|
||||
return False
|
||||
|
||||
def _should_send_fresh_final(self) -> bool:
|
||||
"""Return True when a long-lived preview should be replaced with a
|
||||
fresh final message instead of an edit.
|
||||
|
||||
Conditions:
|
||||
- Fresh-final is enabled (``fresh_final_after_seconds > 0``).
|
||||
- We have a real preview message id (not the ``__no_edit__`` sentinel
|
||||
and not ``None``).
|
||||
- The preview has been visible for at least the configured threshold.
|
||||
|
||||
Ported from openclaw/openclaw#72038.
|
||||
"""
|
||||
threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
|
||||
if threshold <= 0:
|
||||
return False
|
||||
if not self._message_id or self._message_id == "__no_edit__":
|
||||
return False
|
||||
if self._message_created_ts is None:
|
||||
return False
|
||||
age = time.monotonic() - self._message_created_ts
|
||||
return age >= threshold
|
||||
|
||||
async def _try_fresh_final(self, text: str) -> bool:
|
||||
"""Send ``text`` as a brand-new message (best-effort delete the old
|
||||
preview) so the platform's visible timestamp reflects completion
|
||||
time. Returns True on successful delivery, False on any failure so
|
||||
the caller falls back to the normal edit path.
|
||||
|
||||
Ported from openclaw/openclaw#72038.
|
||||
"""
|
||||
old_message_id = self._message_id
|
||||
try:
|
||||
result = await self.adapter.send(
|
||||
chat_id=self.chat_id,
|
||||
content=text,
|
||||
metadata=self.metadata,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Fresh-final send failed, falling back to edit: %s", e)
|
||||
return False
|
||||
if not getattr(result, "success", False):
|
||||
return False
|
||||
# Successful fresh send — try to delete the stale preview so the
|
||||
# user doesn't see the old edit-stuck message underneath. Cleanup
|
||||
# is best-effort; platforms that don't implement ``delete_message``
|
||||
# just leave the preview behind (still an acceptable outcome —
|
||||
# the visible final timestamp is the important part).
|
||||
if old_message_id and old_message_id != "__no_edit__":
|
||||
delete_fn = getattr(self.adapter, "delete_message", None)
|
||||
if delete_fn is not None:
|
||||
try:
|
||||
await delete_fn(self.chat_id, old_message_id)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Fresh-final preview cleanup failed (%s): %s",
|
||||
old_message_id, e,
|
||||
)
|
||||
# Adopt the new message id as the current message so subsequent
|
||||
# callers (e.g. overflow split loops, finalize retries) see a
|
||||
# consistent state.
|
||||
new_message_id = getattr(result, "message_id", None)
|
||||
if new_message_id:
|
||||
self._message_id = new_message_id
|
||||
self._message_created_ts = time.monotonic()
|
||||
else:
|
||||
# Send succeeded but platform didn't return an id — treat the
|
||||
# delivery as final-only and fall back to "__no_edit__" so we
|
||||
# don't try to edit something we can't address.
|
||||
self._message_id = "__no_edit__"
|
||||
self._message_created_ts = None
|
||||
self._already_sent = True
|
||||
self._last_sent_text = text
|
||||
self._final_response_sent = True
|
||||
return True
|
||||
|
||||
async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
|
||||
"""Send or edit the streaming message.
|
||||
|
||||
@@ -786,6 +876,22 @@ class GatewayStreamConsumer:
|
||||
finalize and self._adapter_requires_finalize
|
||||
):
|
||||
return True
|
||||
# Fresh-final for long-lived previews: when finalizing
|
||||
# the last edit in a streaming sequence, if the
|
||||
# original preview has been visible for at least
|
||||
# ``fresh_final_after_seconds``, send the completed
|
||||
# reply as a fresh message so the platform's visible
|
||||
# timestamp reflects completion time instead of the
|
||||
# preview creation time. Best-effort cleanup of the
|
||||
# old preview follows. Ported from
|
||||
# openclaw/openclaw#72038. Gated by config so the
|
||||
# legacy edit-in-place path stays the default.
|
||||
if (
|
||||
finalize
|
||||
and self._should_send_fresh_final()
|
||||
and await self._try_fresh_final(text)
|
||||
):
|
||||
return True
|
||||
# Edit existing message
|
||||
result = await self.adapter.edit_message(
|
||||
chat_id=self.chat_id,
|
||||
@@ -852,6 +958,10 @@ class GatewayStreamConsumer:
|
||||
if result.success:
|
||||
if result.message_id:
|
||||
self._message_id = result.message_id
|
||||
# Track when the preview first became visible to
|
||||
# the user so fresh-final logic can detect stale
|
||||
# preview timestamps on long-running responses.
|
||||
self._message_created_ts = time.monotonic()
|
||||
else:
|
||||
self._edit_supported = False
|
||||
self._already_sent = True
|
||||
|
||||
@@ -31,8 +31,17 @@ Hermes' own session keys.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Set
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
|
||||
# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
|
||||
# full-width digits / Unicode word chars can't sneak through.
|
||||
_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
@@ -81,6 +90,16 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
# Defense-in-depth: reject identifiers that could sneak path
|
||||
# separators / traversal segments into the ``lid-mapping-{current}``
|
||||
# filename below. The hardcoded ``lid-mapping-`` prefix already
|
||||
# prevents escape via pathlib's component split (an attacker can't
|
||||
# create ``lid-mapping-..`` as a real directory in session_dir), but
|
||||
# this keeps the identifier space to the characters WhatsApp JIDs
|
||||
# actually use and avoids depending on that filesystem-layout
|
||||
# invariant.
|
||||
if not _SAFE_IDENTIFIER_RE.match(current):
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
@@ -91,7 +110,8 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
+27
-3
@@ -356,6 +356,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=(),
|
||||
base_url_env_var="BEDROCK_BASE_URL",
|
||||
),
|
||||
"azure-foundry": ProviderConfig(
|
||||
id="azure-foundry",
|
||||
name="Azure Foundry",
|
||||
auth_type="api_key",
|
||||
inference_base_url="", # User-provided endpoint
|
||||
api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -459,11 +467,27 @@ def _resolve_api_key_provider_secret(
|
||||
pass
|
||||
return "", ""
|
||||
|
||||
from hermes_cli.config import get_env_value
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
val = os.getenv(env_var, "").strip()
|
||||
# Check both os.environ and ~/.hermes/.env file
|
||||
val = (get_env_value(env_var) or "").strip()
|
||||
if has_usable_secret(val):
|
||||
return val, env_var
|
||||
|
||||
# Fallback: try credential pool (e.g. zai key stored via auth.json)
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(provider_id)
|
||||
if pool and pool.has_credentials():
|
||||
entry = pool.peek()
|
||||
if entry:
|
||||
key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
|
||||
key = str(key).strip()
|
||||
if has_usable_secret(key):
|
||||
return key, f"credential_pool:{provider_id}"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return "", ""
|
||||
|
||||
|
||||
@@ -4236,10 +4260,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS, get_pricing_for_provider,
|
||||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
||||
print()
|
||||
unavailable_models: list = []
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
+177
-1
@@ -36,12 +36,23 @@ _EXCLUDED_DIRS = {
|
||||
"__pycache__", # bytecode caches — regenerated on import
|
||||
".git", # nested git dirs (profiles shouldn't have these, but safety)
|
||||
"node_modules", # js deps if website/ somehow leaks in
|
||||
"backups", # prior auto-backups — don't nest backups exponentially
|
||||
"checkpoints", # session-local trajectory caches — regenerated per-session,
|
||||
# session-hash-keyed so they don't port to another machine anyway
|
||||
}
|
||||
|
||||
# File-name suffixes to skip
|
||||
_EXCLUDED_SUFFIXES = (
|
||||
".pyc",
|
||||
".pyo",
|
||||
# SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
|
||||
# via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
|
||||
# rollback-journal alongside would pair a fresh snapshot with stale sidecar
|
||||
# state and produce a torn restore on the next open. They're transient and
|
||||
# regenerated on first connection anyway.
|
||||
".db-wal",
|
||||
".db-shm",
|
||||
".db-journal",
|
||||
)
|
||||
|
||||
# File names to skip (runtime state that's meaningless on another machine)
|
||||
@@ -454,6 +465,12 @@ def run_import(args) -> None:
|
||||
# Critical state files to include in quick snapshots (relative to HERMES_HOME).
|
||||
# Everything else is either regeneratable (logs, cache) or managed separately
|
||||
# (skills, repo, sessions/).
|
||||
#
|
||||
# Entries may be individual files OR directories. Directories are captured
|
||||
# recursively; missing entries are silently skipped. Pairing data lives in
|
||||
# platform-specific JSON blobs outside state.db, so it's listed here explicitly
|
||||
# — `hermes update` snapshots this set before pulling so approved-user lists
|
||||
# are recoverable if anything goes wrong (issue #15733).
|
||||
_QUICK_STATE_FILES = (
|
||||
"state.db",
|
||||
"config.yaml",
|
||||
@@ -463,6 +480,10 @@ _QUICK_STATE_FILES = (
|
||||
"gateway_state.json",
|
||||
"channel_directory.json",
|
||||
"processes.json",
|
||||
# Pairing stores (generic + per-platform JSONs outside state.db)
|
||||
"pairing", # legacy location (gateway/pairing.py)
|
||||
"platforms/pairing", # new location (gateway/pairing.py)
|
||||
"feishu_comment_pairing.json", # Feishu comment subscription pairings
|
||||
)
|
||||
|
||||
_QUICK_SNAPSHOTS_DIR = "state-snapshots"
|
||||
@@ -498,7 +519,27 @@ def create_quick_snapshot(
|
||||
|
||||
for rel in _QUICK_STATE_FILES:
|
||||
src = home / rel
|
||||
if not src.exists() or not src.is_file():
|
||||
if not src.exists():
|
||||
continue
|
||||
|
||||
if src.is_dir():
|
||||
# Walk the directory and record each file individually in the
|
||||
# manifest so restore can treat them uniformly. Empty dirs are
|
||||
# skipped (nothing to snapshot).
|
||||
for sub in src.rglob("*"):
|
||||
if not sub.is_file():
|
||||
continue
|
||||
sub_rel = sub.relative_to(home).as_posix()
|
||||
dst = snap_dir / sub_rel
|
||||
dst.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
shutil.copy2(sub, dst)
|
||||
manifest[sub_rel] = dst.stat().st_size
|
||||
except (OSError, PermissionError) as exc:
|
||||
logger.warning("Could not snapshot %s: %s", sub_rel, exc)
|
||||
continue
|
||||
|
||||
if not src.is_file():
|
||||
continue
|
||||
|
||||
dst = snap_dir / rel
|
||||
@@ -653,3 +694,138 @@ def run_quick_backup(args) -> None:
|
||||
print(f" Restore with: /snapshot restore {snap_id}")
|
||||
else:
|
||||
print("No state files found to snapshot.")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pre-update auto-backup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_PRE_UPDATE_BACKUPS_DIR = "backups"
|
||||
_PRE_UPDATE_PREFIX = "pre-update-"
|
||||
_PRE_UPDATE_DEFAULT_KEEP = 5
|
||||
|
||||
|
||||
def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
|
||||
home = hermes_home or get_hermes_home()
|
||||
return home / _PRE_UPDATE_BACKUPS_DIR
|
||||
|
||||
|
||||
def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
|
||||
"""Remove oldest pre-update backups beyond the keep limit.
|
||||
|
||||
Returns the number of files deleted. Only touches files matching
|
||||
``pre-update-*.zip`` so hand-made zips dropped in the same directory
|
||||
are never touched.
|
||||
"""
|
||||
if keep < 0:
|
||||
keep = 0
|
||||
if not backup_dir.exists():
|
||||
return 0
|
||||
|
||||
backups = sorted(
|
||||
(p for p in backup_dir.iterdir()
|
||||
if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
|
||||
key=lambda p: p.name,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
deleted = 0
|
||||
for p in backups[keep:]:
|
||||
try:
|
||||
p.unlink()
|
||||
deleted += 1
|
||||
except OSError as exc:
|
||||
logger.warning("Failed to prune backup %s: %s", p.name, exc)
|
||||
|
||||
return deleted
|
||||
|
||||
|
||||
def create_pre_update_backup(
|
||||
hermes_home: Optional[Path] = None,
|
||||
keep: int = _PRE_UPDATE_DEFAULT_KEEP,
|
||||
) -> Optional[Path]:
|
||||
"""Create a full zip backup of HERMES_HOME under ``backups/``.
|
||||
|
||||
Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
|
||||
but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
|
||||
auto-prunes old pre-update backups.
|
||||
|
||||
Returns the path to the created zip, or ``None`` if no files were
|
||||
found or the backup could not be created. Never raises — the caller
|
||||
(``hermes update``) should continue even if the backup fails.
|
||||
"""
|
||||
hermes_root = hermes_home or get_default_hermes_root()
|
||||
if not hermes_root.is_dir():
|
||||
return None
|
||||
|
||||
backup_dir = _pre_update_backup_dir(hermes_root)
|
||||
try:
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
except OSError as exc:
|
||||
logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
|
||||
return None
|
||||
|
||||
stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
|
||||
out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
|
||||
|
||||
# Collect files (same logic as run_backup, minus the chatty progress prints)
|
||||
files_to_add: list[tuple[Path, Path]] = []
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
|
||||
dp = Path(dirpath)
|
||||
# Prune excluded directories in-place so os.walk doesn't descend
|
||||
dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
|
||||
|
||||
for fname in filenames:
|
||||
fpath = dp / fname
|
||||
try:
|
||||
rel = fpath.relative_to(hermes_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if _should_exclude(rel):
|
||||
continue
|
||||
|
||||
# Skip the output zip itself if it already exists
|
||||
try:
|
||||
if fpath.resolve() == out_path.resolve():
|
||||
continue
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
|
||||
files_to_add.append((fpath, rel))
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: walk failed: %s", exc)
|
||||
return None
|
||||
|
||||
if not files_to_add:
|
||||
return None
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
|
||||
for abs_path, rel_path in files_to_add:
|
||||
try:
|
||||
if abs_path.suffix == ".db":
|
||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
|
||||
tmp_db = Path(tmp.name)
|
||||
try:
|
||||
if _safe_copy_db(abs_path, tmp_db):
|
||||
zf.write(tmp_db, arcname=str(rel_path))
|
||||
finally:
|
||||
tmp_db.unlink(missing_ok=True)
|
||||
else:
|
||||
zf.write(abs_path, arcname=str(rel_path))
|
||||
except (PermissionError, OSError, ValueError) as exc:
|
||||
logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
|
||||
continue
|
||||
except OSError as exc:
|
||||
logger.warning("Pre-update backup: zip write failed: %s", exc)
|
||||
# Best-effort cleanup of partial file
|
||||
try:
|
||||
out_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
return None
|
||||
|
||||
_prune_pre_update_backups(backup_dir, keep=keep)
|
||||
return out_path
|
||||
|
||||
+115
-6
@@ -62,6 +62,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
aliases=("reset",)),
|
||||
CommandDef("clear", "Clear screen and start a new session", "Session",
|
||||
cli_only=True),
|
||||
CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
|
||||
cli_only=True),
|
||||
CommandDef("history", "Show conversation history", "Session",
|
||||
cli_only=True),
|
||||
CommandDef("save", "Save the current conversation", "Session",
|
||||
@@ -84,9 +86,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -103,7 +103,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
@@ -127,8 +128,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
cli_only=True, args_hint="[queue|steer|interrupt|status]",
|
||||
subcommands=("queue", "steer", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
@@ -807,6 +808,114 @@ def discord_skill_commands_by_category(
|
||||
return trimmed_categories, uncategorized, hidden
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Slack native slash commands
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
|
||||
# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
|
||||
# commands per app.
|
||||
_SLACK_MAX_SLASH_COMMANDS = 50
|
||||
_SLACK_NAME_LIMIT = 32
|
||||
_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
|
||||
|
||||
|
||||
def _sanitize_slack_name(raw: str) -> str:
|
||||
"""Convert a command name to a valid Slack slash command name.
|
||||
|
||||
Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
|
||||
chars. Uppercase is lowercased; invalid chars are stripped.
|
||||
"""
|
||||
name = raw.lower()
|
||||
name = _SLACK_INVALID_CHARS.sub("", name)
|
||||
name = name.strip("-_")
|
||||
return name[:_SLACK_NAME_LIMIT]
|
||||
|
||||
|
||||
def slack_native_slashes() -> list[tuple[str, str, str]]:
|
||||
"""Return (slash_name, description, usage_hint) triples for Slack.
|
||||
|
||||
Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
|
||||
a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
|
||||
matching Discord's and Telegram's model where every command is a
|
||||
first-class slash and not a ``/hermes <verb>`` subcommand.
|
||||
|
||||
Both canonical names and aliases are included so users can type any
|
||||
documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
|
||||
Plugin-registered slash commands are included too.
|
||||
|
||||
Results are clamped to Slack's 50-command limit with duplicate-name
|
||||
avoidance. ``/hermes`` is always reserved as the first entry so the
|
||||
legacy ``/hermes <subcommand>`` form keeps working for anything that
|
||||
gets dropped by the clamp or for free-form questions.
|
||||
"""
|
||||
overrides = _resolve_config_gates()
|
||||
entries: list[tuple[str, str, str]] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
# Reserve /hermes as the catch-all top-level command.
|
||||
entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
|
||||
seen.add("hermes")
|
||||
|
||||
def _add(name: str, desc: str, hint: str) -> None:
|
||||
slack_name = _sanitize_slack_name(name)
|
||||
if not slack_name or slack_name in seen:
|
||||
return
|
||||
if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
|
||||
return
|
||||
# Slack description cap is 2000 chars; keep it short.
|
||||
entries.append((slack_name, desc[:140], hint[:100]))
|
||||
seen.add(slack_name)
|
||||
|
||||
# First pass: canonical names (so they win slots if we hit the cap).
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
_add(cmd.name, cmd.description, cmd.args_hint or "")
|
||||
|
||||
# Second pass: aliases.
|
||||
for cmd in COMMAND_REGISTRY:
|
||||
if not _is_gateway_available(cmd, overrides):
|
||||
continue
|
||||
for alias in cmd.aliases:
|
||||
# Skip aliases that only differ from canonical by case/punctuation
|
||||
# normalization (already covered by _add dedup).
|
||||
_add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "")
|
||||
|
||||
# Third pass: plugin commands.
|
||||
for name, description, args_hint in _iter_plugin_command_entries():
|
||||
_add(name, description, args_hint or "")
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
|
||||
"""Generate a Slack app manifest with all gateway commands as slashes.
|
||||
|
||||
``request_url`` is required by Slack's manifest schema for every slash
|
||||
command, but in Socket Mode (which we use) Slack ignores it and routes
|
||||
the command event through the WebSocket. A placeholder URL is fine.
|
||||
|
||||
The returned dict is the ``features.slash_commands`` portion only —
|
||||
callers compose it into a full manifest (or merge into an existing
|
||||
one). Keeping it narrow avoids coupling us to the rest of the manifest
|
||||
schema (display_information, oauth_config, settings, etc.) which users
|
||||
set up once in the Slack UI and rarely change.
|
||||
"""
|
||||
slashes = []
|
||||
for name, desc, usage in slack_native_slashes():
|
||||
entry = {
|
||||
"command": f"/{name}",
|
||||
"description": desc or f"Run /{name}",
|
||||
"should_escape": False,
|
||||
"url": request_url,
|
||||
}
|
||||
if usage:
|
||||
entry["usage_hint"] = usage
|
||||
slashes.append(entry)
|
||||
return {"features": {"slash_commands": slashes}}
|
||||
|
||||
|
||||
def slack_subcommand_map() -> dict[str, str]:
|
||||
"""Return subcommand -> /command mapping for Slack /hermes handler.
|
||||
|
||||
|
||||
+190
-10
@@ -389,6 +389,20 @@ DEFAULT_CONFIG = {
|
||||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
# How user-attached images are presented to the main model on each turn.
|
||||
# "auto" — attach natively when the active model reports
|
||||
# supports_vision=True AND the user hasn't explicitly
|
||||
# configured auxiliary.vision.provider. Otherwise fall
|
||||
# back to text (vision_analyze pre-analysis).
|
||||
# "native" — always attach natively; non-vision models will either
|
||||
# error at the provider or get a last-chance text fallback
|
||||
# (see run_agent._prepare_messages_for_api).
|
||||
# "text" — always pre-analyze with vision_analyze and prepend the
|
||||
# description as text; the main model never sees pixels.
|
||||
# Affects gateway platforms, the TUI, and CLI /attach. vision_analyze
|
||||
# remains available as a tool regardless of this setting — the routing
|
||||
# only controls how inbound user images are presented.
|
||||
"image_input_mode": "auto",
|
||||
},
|
||||
|
||||
"terminal": {
|
||||
@@ -465,6 +479,7 @@ DEFAULT_CONFIG = {
|
||||
"command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.)
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
@@ -486,6 +501,19 @@ DEFAULT_CONFIG = {
|
||||
"checkpoints": {
|
||||
"enabled": True,
|
||||
"max_snapshots": 50, # Max checkpoints to keep per directory
|
||||
# Auto-maintenance: shadow repos accumulate forever under
|
||||
# ~/.hermes/checkpoints/ (one per cd'd working directory). Field
|
||||
# reports put the typical offender at 1000+ repos / ~12 GB. When
|
||||
# auto_prune is on, hermes sweeps at startup (at most once per
|
||||
# min_interval_hours) and deletes:
|
||||
# * orphan repos: HERMES_WORKDIR no longer exists on disk
|
||||
# * stale repos: newest mtime older than retention_days
|
||||
# Opt-in so users who rely on /rollback against long-ago sessions
|
||||
# never lose data silently.
|
||||
"auto_prune": False,
|
||||
"retention_days": 7,
|
||||
"delete_orphans": True,
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Maximum characters returned by a single read_file call. Reads that
|
||||
@@ -612,14 +640,6 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -634,7 +654,7 @@ DEFAULT_CONFIG = {
|
||||
"compact": False,
|
||||
"personality": "kawaii",
|
||||
"resume_display": "full",
|
||||
"busy_input_mode": "interrupt",
|
||||
"busy_input_mode": "interrupt", # interrupt | queue | steer
|
||||
"bell_on_complete": False,
|
||||
"show_reasoning": False,
|
||||
"streaming": False,
|
||||
@@ -848,7 +868,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -967,6 +987,27 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -1003,6 +1044,27 @@ DEFAULT_CONFIG = {
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# ``hermes update`` behaviour.
|
||||
"updates": {
|
||||
# Run a full ``hermes backup``-style zip of HERMES_HOME before every
|
||||
# ``hermes update``. Backups land in ``<HERMES_HOME>/backups/`` and
|
||||
# can be restored with ``hermes import <path>``. Off by default —
|
||||
# on large HERMES_HOME directories the zip can add minutes to every
|
||||
# update. Set to true to re-enable, or pass ``--backup`` to opt in
|
||||
# for a single update run.
|
||||
"pre_update_backup": False,
|
||||
# How many pre-update backup zips to retain. Older ones are pruned
|
||||
# automatically after each successful backup.
|
||||
"backup_keep": 5,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -1379,6 +1441,21 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"AZURE_FOUNDRY_API_KEY": {
|
||||
"description": "Azure Foundry API key for custom Azure endpoints",
|
||||
"prompt": "Azure Foundry API Key",
|
||||
"url": "https://ai.azure.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"AZURE_FOUNDRY_BASE_URL": {
|
||||
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
|
||||
"prompt": "Azure Foundry base URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
@@ -1546,6 +1623,44 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
},
|
||||
|
||||
# ── Bundled skills (opt-in: only needed if the user uses that skill) ──
|
||||
# These use category="skill" (distinct from "tool") so the sandbox
|
||||
# env blocklist in tools/environments/local.py does NOT rewrite them —
|
||||
# skills legitimately need these passed through to curl via
|
||||
# tools/env_passthrough.py when the user's skill calls out.
|
||||
"NOTION_API_KEY": {
|
||||
"description": "Notion integration token (used by the `notion` skill)",
|
||||
"prompt": "Notion API key",
|
||||
"url": "https://www.notion.so/my-integrations",
|
||||
"password": True,
|
||||
"category": "skill",
|
||||
"advanced": True,
|
||||
},
|
||||
"LINEAR_API_KEY": {
|
||||
"description": "Linear personal API key (used by the `linear` skill)",
|
||||
"prompt": "Linear API key",
|
||||
"url": "https://linear.app/settings/api",
|
||||
"password": True,
|
||||
"category": "skill",
|
||||
"advanced": True,
|
||||
},
|
||||
"AIRTABLE_API_KEY": {
|
||||
"description": "Airtable personal access token (used by the `airtable` skill)",
|
||||
"prompt": "Airtable API key",
|
||||
"url": "https://airtable.com/create/tokens",
|
||||
"password": True,
|
||||
"category": "skill",
|
||||
"advanced": True,
|
||||
},
|
||||
"TENOR_API_KEY": {
|
||||
"description": "Tenor API key for GIF search (used by the `gif-search` skill)",
|
||||
"prompt": "Tenor API key",
|
||||
"url": "https://developers.google.com/tenor/guides/quickstart",
|
||||
"password": True,
|
||||
"category": "skill",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Honcho ──
|
||||
"HONCHO_API_KEY": {
|
||||
"description": "Honcho API key for AI-native persistent memory",
|
||||
@@ -2214,6 +2329,71 @@ def get_compatible_custom_providers(
|
||||
return compatible
|
||||
|
||||
|
||||
def get_custom_provider_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Look up a per-model ``context_length`` override from ``custom_providers``.
|
||||
|
||||
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
|
||||
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
|
||||
if present and valid. Returns ``None`` when no override applies.
|
||||
|
||||
This is the single source of truth for custom-provider context overrides,
|
||||
used by:
|
||||
* ``AIAgent.__init__`` (startup resolution)
|
||||
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
|
||||
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
|
||||
* ``gateway.run._format_session_info`` (``/info`` display)
|
||||
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
|
||||
|
||||
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
|
||||
startup path only; every other path (notably ``/model`` switch) fell back
|
||||
to the 128K default. See #15779.
|
||||
"""
|
||||
if not model or not base_url:
|
||||
return None
|
||||
if custom_providers is None:
|
||||
try:
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
if config is None:
|
||||
return None
|
||||
raw = config.get("custom_providers")
|
||||
custom_providers = raw if isinstance(raw, list) else []
|
||||
if not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
target_url = (base_url or "").rstrip("/")
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_url = (entry.get("base_url") or "").rstrip("/")
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
models = entry.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
model_cfg = models.get(model)
|
||||
if not isinstance(model_cfg, dict):
|
||||
continue
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
if raw_ctx is None:
|
||||
continue
|
||||
try:
|
||||
ctx = int(raw_ctx)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
|
||||
+11
-5
@@ -45,8 +45,13 @@ def _pending_file() -> Path:
|
||||
Each entry: ``{"url": "...", "expire_at": <unix_ts>}``. Scheduled
|
||||
DELETEs used to be handled by spawning a detached Python process per
|
||||
paste that slept for 6 hours; those accumulated forever if the user
|
||||
ran ``hermes debug share`` repeatedly. We now persist the schedule
|
||||
to disk and sweep expired entries on the next debug invocation.
|
||||
ran ``hermes debug share`` repeatedly.
|
||||
|
||||
Deletion is now driven by the gateway's cron ticker
|
||||
(``gateway/run.py::_start_cron_ticker``) which calls
|
||||
``_sweep_expired_pastes`` once per hour. ``hermes debug share`` also
|
||||
runs an opportunistic sweep on entry as a fallback for CLI-only users
|
||||
who never start the gateway.
|
||||
"""
|
||||
return get_hermes_home() / "pastes" / "pending.json"
|
||||
|
||||
@@ -223,9 +228,10 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
|
||||
interpreters that never exited until the sleep completed.
|
||||
|
||||
The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
|
||||
and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
|
||||
every ``hermes debug`` invocation. If the user never runs ``hermes debug``
|
||||
again, paste.rs's own retention policy handles cleanup.
|
||||
and the gateway's cron ticker sweeps expired entries once per hour.
|
||||
``hermes debug share`` also runs an opportunistic sweep as a fallback
|
||||
for CLI-only users. If neither runs again, paste.rs's own retention
|
||||
policy handles cleanup.
|
||||
"""
|
||||
_record_pending(urls, delay_seconds=delay_seconds)
|
||||
|
||||
|
||||
@@ -320,7 +320,11 @@ def run_doctor(args):
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
if provider and _resolve_provider_full is not None and provider != "auto":
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
@@ -2724,6 +2724,24 @@ _PLATFORMS = [
|
||||
"help": "OpenID to deliver cron results and notifications to."},
|
||||
],
|
||||
},
|
||||
{
|
||||
"key": "yuanbao",
|
||||
"label": "Yuanbao",
|
||||
"emoji": "💎",
|
||||
"token_var": "YUANBAO_APP_ID",
|
||||
"setup_instructions": [
|
||||
"1. Download the Yuanbao app from https://yuanbao.tencent.com/",
|
||||
"2. In the app, go to PAI → My Bot and create a new bot",
|
||||
"3. After the bot is created, copy the App ID and App Secret",
|
||||
"4. Enter them below and Hermes will connect automatically over WebSocket",
|
||||
],
|
||||
"vars": [
|
||||
{"name": "YUANBAO_APP_ID", "prompt": "App ID", "password": False,
|
||||
"help": "The App ID from your Yuanbao IM Bot credentials."},
|
||||
{"name": "YUANBAO_APP_SECRET", "prompt": "App Secret", "password": True,
|
||||
"help": "The App Secret (used for HMAC signing) from your Yuanbao IM Bot."},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -3108,6 +3126,12 @@ def _setup_wecom():
|
||||
print_success("💬 WeCom configured!")
|
||||
|
||||
|
||||
def _setup_yuanbao():
|
||||
"""Configure Yuanbao via the standard platform setup."""
|
||||
yuanbao_platform = next(p for p in _PLATFORMS if p["key"] == "yuanbao")
|
||||
_setup_standard_platform(yuanbao_platform)
|
||||
|
||||
|
||||
def _is_service_installed() -> bool:
|
||||
"""Check if the gateway is installed as a system service."""
|
||||
if supports_systemd_services():
|
||||
|
||||
@@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
|
||||
"task_id": "test-task",
|
||||
"tool_call_id": "test-call",
|
||||
"result": '{"output": "hello"}',
|
||||
"duration_ms": 42,
|
||||
},
|
||||
"pre_llm_call": {
|
||||
"session_id": "test-session",
|
||||
|
||||
+1026
-56
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,329 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
+39
-12
@@ -533,6 +533,7 @@ def resolve_display_context_length(
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
@@ -543,6 +544,11 @@ def resolve_display_context_length(
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
When ``custom_providers`` is provided, per-model ``context_length``
|
||||
overrides from ``custom_providers[].models.<id>.context_length`` are
|
||||
honored — this closes #15779 where ``/model`` switch ignored user-set
|
||||
overrides.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
@@ -553,6 +559,7 @@ def resolve_display_context_length(
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
@@ -831,9 +838,14 @@ def switch_model(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
# If resolution fell through to "custom" (e.g. named custom provider like
|
||||
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
|
||||
# credentials. Otherwise use the resolved values (picks up credential rotation,
|
||||
# base_url adjustments for OpenCode, etc.).
|
||||
if runtime.get("provider") != "custom":
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -867,16 +879,31 @@ def switch_model(
|
||||
"message": f"Could not validate `{new_model}`: {e}",
|
||||
}
|
||||
|
||||
# Override rejection if model is in the user's saved provider config.
|
||||
# API /v1/models may not list cloud/aliased models even though the server supports them.
|
||||
if not validation.get("accepted"):
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
|
||||
else:
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
|
||||
# Apply auto-correction if validation found a closer match
|
||||
if validation.get("corrected_model"):
|
||||
|
||||
+186
-62
@@ -33,8 +33,6 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
@@ -111,8 +109,6 @@ def _codex_curated_models() -> list[str]:
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
@@ -383,6 +379,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
||||
"us.meta.llama4-scout-17b-instruct-v1:0",
|
||||
],
|
||||
# Azure Foundry: user-provided endpoint and model.
|
||||
# Empty list because models depend on the endpoint configuration.
|
||||
"azure-foundry": [],
|
||||
}
|
||||
|
||||
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
|
||||
@@ -740,6 +739,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
]
|
||||
|
||||
# Derived dicts — used throughout the codebase
|
||||
@@ -872,7 +872,16 @@ def fetch_openrouter_models(
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
|
||||
# snapshot when the manifest is unreachable. Both are curated lists that
|
||||
# drive the picker; the OpenRouter live /v1/models filter (tool support,
|
||||
# free pricing) is applied on top either way.
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_openrouter_models
|
||||
remote = get_curated_openrouter_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
@@ -925,6 +934,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def get_curated_nous_model_ids() -> list[str]:
|
||||
"""Return the curated Nous Portal model-id list.
|
||||
|
||||
Prefers the remotely-hosted catalog manifest (published under
|
||||
``website/static/api/model-catalog.json``); falls back to the in-repo
|
||||
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
|
||||
unreachable. Always returns a list (never None).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_nous_models
|
||||
remote = get_curated_nous_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
if remote:
|
||||
return list(remote)
|
||||
return list(_PROVIDER_MODELS.get("nous", []))
|
||||
|
||||
|
||||
def _ai_gateway_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True if an AI Gateway model has $0 input AND output pricing."""
|
||||
if not isinstance(pricing, dict):
|
||||
@@ -1379,27 +1406,93 @@ def curated_models_for_provider(
|
||||
return [(m, "") for m in models]
|
||||
|
||||
|
||||
def detect_provider_for_model(
|
||||
def _provider_keys(provider: str) -> set[str]:
|
||||
key = (provider or "").strip().lower()
|
||||
normalized = normalize_provider(provider)
|
||||
return {k for k in (key, normalized) if k}
|
||||
|
||||
|
||||
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
|
||||
return any(
|
||||
name_lower == model.lower()
|
||||
for provider in providers
|
||||
for model in _PROVIDER_MODELS.get(provider, [])
|
||||
)
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset(
|
||||
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_static_model_alias(
|
||||
name_lower: str,
|
||||
current_keys: set[str],
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
|
||||
try:
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES.get(name_lower)
|
||||
if identity is None:
|
||||
return None
|
||||
|
||||
vendor = identity.vendor
|
||||
family = identity.family
|
||||
|
||||
def _match(provider: str) -> Optional[str]:
|
||||
models = _PROVIDER_MODELS.get(provider, [])
|
||||
if not models:
|
||||
return None
|
||||
prefix = (
|
||||
f"{vendor}/{family}"
|
||||
if provider in _AGGREGATOR_PROVIDERS
|
||||
else family
|
||||
).lower()
|
||||
for model in models:
|
||||
if model.lower().startswith(prefix):
|
||||
return model
|
||||
return None
|
||||
|
||||
for provider in current_keys:
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _PROVIDER_MODELS:
|
||||
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _AGGREGATOR_PROVIDERS:
|
||||
if provider in current_keys and (matched := _match(provider)):
|
||||
return provider, matched
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_static_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
"""Auto-detect a provider from static catalogs only.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``(provider_id, model_name)``. The model name may be remapped
|
||||
when a static alias or bare provider name resolves to a catalog default.
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider with credentials (highest)
|
||||
2. Direct provider without credentials → remap to OpenRouter slug
|
||||
3. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
name_lower = name.lower()
|
||||
current_keys = _provider_keys(current_provider)
|
||||
|
||||
alias_match = _resolve_static_model_alias(name_lower, current_keys)
|
||||
if alias_match:
|
||||
return alias_match
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
@@ -1412,64 +1505,49 @@ def detect_provider_for_model(
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider != normalize_provider(current_provider)
|
||||
and resolved_provider not in current_keys
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if any(name_lower == m.lower() for m in current_models):
|
||||
if _model_in_provider_catalog(name_lower, current_keys):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
direct_match: Optional[str] = None
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid == current_provider or pid in _AGGREGATORS:
|
||||
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
direct_match = pid
|
||||
break
|
||||
return (pid, name)
|
||||
|
||||
if direct_match:
|
||||
# Check if we have credentials for this provider — env vars,
|
||||
# credential pool, or auth store entries.
|
||||
has_creds = False
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
||||
if pconfig:
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if os.getenv(env_var, "").strip():
|
||||
has_creds = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# Also check credential pool and auth store — covers OAuth,
|
||||
# Claude Code tokens, and other non-env-var credentials (#10300).
|
||||
if not has_creds:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(direct_match)
|
||||
if pool.has_credentials():
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# Always return the direct provider match. If credentials are
|
||||
# missing, the client init will give a clear error rather than
|
||||
# silently routing through the wrong provider (#10300).
|
||||
return (direct_match, name)
|
||||
|
||||
def detect_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider static catalog match
|
||||
2. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
static_match = detect_static_provider_for_model(name, current_provider)
|
||||
if static_match:
|
||||
return static_match
|
||||
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
|
||||
return None
|
||||
|
||||
# --- Step 2: check OpenRouter catalog ---
|
||||
# First try exact match (handles provider/model format)
|
||||
@@ -2148,6 +2226,52 @@ def copilot_model_api_mode(
|
||||
return "chat_completions"
|
||||
|
||||
|
||||
# Azure Foundry model families that require the Responses API. Azure
|
||||
# rejects /chat/completions against these deployments with
|
||||
# ``400 "The requested operation is unsupported."`` — the same payload Bob
|
||||
# Dobolina hit in April 2026 on ``gpt-5.3-codex`` while ``gpt-4o-pure`` on
|
||||
# the same endpoint worked fine. Keep the patterns broad enough to cover
|
||||
# vendor-renamed deployments (e.g. ``gpt-5.3-codex``, ``gpt-5-codex``,
|
||||
# ``gpt-5.4``, ``o1-preview``) but tight enough to leave GPT-4 / 3.5 / Llama /
|
||||
# Mistral / Grok deployments on chat completions.
|
||||
_AZURE_FOUNDRY_RESPONSES_PREFIXES = (
|
||||
"codex", # codex-*, codex-mini
|
||||
"gpt-5", # gpt-5, gpt-5.x, gpt-5-codex, gpt-5.x-codex
|
||||
"o1", # o1, o1-preview, o1-mini
|
||||
"o3", # o3, o3-mini
|
||||
"o4", # o4, o4-mini
|
||||
)
|
||||
|
||||
|
||||
def azure_foundry_model_api_mode(model_name: Optional[str]) -> Optional[str]:
|
||||
"""Infer Azure Foundry api_mode from a deployment/model name.
|
||||
|
||||
Returns ``"codex_responses"`` when the model name matches a family that
|
||||
only accepts the Responses API on Azure Foundry (GPT-5.x, codex, o1/o3/o4
|
||||
reasoning models). Returns ``None`` otherwise — the caller should fall
|
||||
back to the configured/default api_mode (typically ``chat_completions``)
|
||||
so GPT-4o, GPT-4 Turbo, Llama, Mistral, etc. keep working.
|
||||
|
||||
Intentionally does NOT return ``anthropic_messages``; Anthropic-style
|
||||
Azure endpoints are disambiguated by URL (``/anthropic`` suffix) in
|
||||
``runtime_provider._detect_api_mode_for_url`` and by the user setting
|
||||
``model.api_mode: anthropic_messages`` explicitly.
|
||||
"""
|
||||
raw = str(model_name or "").strip().lower()
|
||||
if not raw:
|
||||
return None
|
||||
# Strip any vendor/ prefix a user may have copied from OpenRouter / Copilot.
|
||||
if "/" in raw:
|
||||
raw = raw.rsplit("/", 1)[-1]
|
||||
# gpt-5-mini speaks chat completions on Copilot but Azure Foundry deploys
|
||||
# the full gpt-5 family uniformly on Responses API — don't carve an
|
||||
# exception here.
|
||||
for prefix in _AZURE_FOUNDRY_RESPONSES_PREFIXES:
|
||||
if raw.startswith(prefix):
|
||||
return "codex_responses"
|
||||
return None
|
||||
|
||||
|
||||
def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
|
||||
"""Normalize OpenCode config IDs to the bare model slug used in API requests."""
|
||||
provider = normalize_provider(provider_id)
|
||||
@@ -2571,8 +2695,8 @@ def validate_requested_model(
|
||||
)
|
||||
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Iterable, Optional, Set
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
from hermes_cli.config import get_env_value, load_config
|
||||
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
|
||||
from utils import is_truthy_value
|
||||
from tools.tool_backend_helpers import (
|
||||
fal_key_is_configured,
|
||||
has_direct_modal_credentials,
|
||||
@@ -25,6 +26,13 @@ _DEFAULT_PLATFORM_TOOLSETS = {
|
||||
}
|
||||
|
||||
|
||||
def _uses_gateway(section: object) -> bool:
|
||||
"""Return True when a config section explicitly opts into the gateway."""
|
||||
if not isinstance(section, dict):
|
||||
return False
|
||||
return is_truthy_value(section.get("use_gateway"), default=False)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NousFeatureState:
|
||||
key: str
|
||||
@@ -262,11 +270,11 @@ def get_nous_subscription_features(
|
||||
# use_gateway flags — when True, the user explicitly opted into the
|
||||
# Tool Gateway via `hermes model`, so direct credentials should NOT
|
||||
# prevent gateway routing.
|
||||
web_use_gateway = bool(web_cfg.get("use_gateway"))
|
||||
tts_use_gateway = bool(tts_cfg.get("use_gateway"))
|
||||
browser_use_gateway = bool(browser_cfg.get("use_gateway"))
|
||||
web_use_gateway = _uses_gateway(web_cfg)
|
||||
tts_use_gateway = _uses_gateway(tts_cfg)
|
||||
browser_use_gateway = _uses_gateway(browser_cfg)
|
||||
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
|
||||
image_use_gateway = bool(image_gen_cfg.get("use_gateway"))
|
||||
image_use_gateway = _uses_gateway(image_gen_cfg)
|
||||
|
||||
direct_exa = bool(get_env_value("EXA_API_KEY"))
|
||||
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
|
||||
@@ -601,10 +609,10 @@ def get_gateway_eligible_tools(
|
||||
# no direct keys exist — we only skip the prompt for tools where
|
||||
# use_gateway was explicitly set.
|
||||
opted_in = {
|
||||
"web": bool((config.get("web") if isinstance(config.get("web"), dict) else {}).get("use_gateway")),
|
||||
"image_gen": bool((config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}).get("use_gateway")),
|
||||
"tts": bool((config.get("tts") if isinstance(config.get("tts"), dict) else {}).get("use_gateway")),
|
||||
"browser": bool((config.get("browser") if isinstance(config.get("browser"), dict) else {}).get("use_gateway")),
|
||||
"web": _uses_gateway(config.get("web")),
|
||||
"image_gen": _uses_gateway(config.get("image_gen")),
|
||||
"tts": _uses_gateway(config.get("tts")),
|
||||
"browser": _uses_gateway(config.get("browser")),
|
||||
}
|
||||
|
||||
unconfigured: list[str] = []
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -36,6 +36,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("wecom_callback", PlatformInfo(label="💬 WeCom Callback", default_toolset="hermes-wecom-callback")),
|
||||
("weixin", PlatformInfo(label="💬 Weixin", default_toolset="hermes-weixin")),
|
||||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("yuanbao", PlatformInfo(label="🤖 Yuanbao", default_toolset="hermes-yuanbao")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
|
||||
@@ -167,6 +167,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
|
||||
# The transport is determined at runtime from config.yaml model.api_mode.
|
||||
"azure-foundry": HermesOverlay(
|
||||
transport="openai_chat", # default; overridden by api_mode in config
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -221,6 +221,32 @@ def _resolve_runtime_from_pool_entry(
|
||||
elif provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
|
||||
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
|
||||
elif provider == "azure-foundry":
|
||||
# Azure Foundry: read api_mode and base_url from config
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
|
||||
# /chat/completions on these with 400 "operation unsupported" — see
|
||||
# azure_foundry_model_api_mode() for rationale. Skip when the user
|
||||
# explicitly picked anthropic_messages (Anthropic-style endpoint).
|
||||
if effective_model and api_mode != "anthropic_messages":
|
||||
try:
|
||||
from hermes_cli.models import azure_foundry_model_api_mode
|
||||
|
||||
inferred = azure_foundry_model_api_mode(effective_model)
|
||||
except Exception:
|
||||
inferred = None
|
||||
if inferred:
|
||||
api_mode = inferred
|
||||
# For Anthropic-style endpoints, strip /v1 suffix
|
||||
if api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
else:
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Honour model.base_url from config.yaml when the configured provider
|
||||
@@ -589,6 +615,88 @@ def _resolve_openrouter_runtime(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_azure_foundry_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
model_cfg: Dict[str, Any],
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
target_model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve an Azure Foundry runtime entry.
|
||||
|
||||
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
|
||||
explicit overrides), pulls the API key from ``.env`` / env var, and
|
||||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
|
||||
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
# Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
|
||||
# reasoning models as Responses-API-only. Calling /chat/completions
|
||||
# against them returns 400 "The requested operation is unsupported."
|
||||
# Upgrade api_mode when the model name matches, unless the user has
|
||||
# explicitly chosen anthropic_messages (Anthropic-style endpoint).
|
||||
effective_model = str(target_model or model_cfg.get("default") or "").strip()
|
||||
if effective_model and cfg_api_mode != "anthropic_messages":
|
||||
try:
|
||||
from hermes_cli.models import azure_foundry_model_api_mode
|
||||
|
||||
inferred = azure_foundry_model_api_mode(effective_model)
|
||||
except Exception:
|
||||
inferred = None
|
||||
if inferred:
|
||||
cfg_api_mode = inferred
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
except Exception:
|
||||
api_key = ""
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_explicit_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
@@ -678,6 +786,15 @@ def _resolve_explicit_runtime(
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
if provider == "azure-foundry":
|
||||
return _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
env_url = ""
|
||||
@@ -746,6 +863,41 @@ def resolve_runtime_provider(
|
||||
"""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
|
||||
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
|
||||
# return provider="custom" with chat_completions api_mode and no valid key).
|
||||
# Instead, use the Azure key directly with anthropic_messages api_mode.
|
||||
_eff_base = (explicit_base_url or "").strip()
|
||||
if requested_provider == "anthropic" and "azure.com" in _eff_base:
|
||||
_azure_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": _eff_base.rstrip("/"),
|
||||
"api_key": _azure_key,
|
||||
"source": "azure-explicit",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
|
||||
# Resolve before the custom-runtime / pool / generic paths so Azure
|
||||
# config is always picked up from model.base_url + model.api_mode,
|
||||
# regardless of whether the caller passed explicit_* args.
|
||||
if requested_provider == "azure-foundry":
|
||||
azure_runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=_get_model_config(),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
target_model=target_model,
|
||||
)
|
||||
return azure_runtime
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
@@ -924,13 +1076,6 @@ def resolve_runtime_provider(
|
||||
|
||||
# Anthropic (native Messages API)
|
||||
if provider == "anthropic":
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url, but only
|
||||
# when the configured provider is anthropic — otherwise a non-Anthropic
|
||||
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
|
||||
@@ -939,6 +1084,33 @@ def resolve_runtime_provider(
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
|
||||
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
|
||||
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
|
||||
# would find the Claude Code OAuth token first (priority 3) and return
|
||||
# that instead, causing 401s. Detect Azure endpoints and use the env
|
||||
# key directly to bypass the OAuth priority chain.
|
||||
_is_azure_endpoint = "azure.com" in base_url.lower() or (
|
||||
cfg_base_url and "azure.com" in cfg_base_url.lower()
|
||||
)
|
||||
if _is_azure_endpoint:
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
|
||||
)
|
||||
else:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
|
||||
+96
-63
@@ -1856,27 +1856,32 @@ def _setup_slack():
|
||||
if existing:
|
||||
print_info("Slack: already configured")
|
||||
if not prompt_yes_no("Reconfigure Slack?", False):
|
||||
# Even without reconfiguring, offer to refresh the manifest so
|
||||
# new commands (e.g. /btw, /stop, ...) get registered in Slack.
|
||||
if prompt_yes_no(
|
||||
"Regenerate the Slack app manifest with the latest command "
|
||||
"list? (recommended after `hermes update`)",
|
||||
True,
|
||||
):
|
||||
_write_slack_manifest_and_instruct()
|
||||
return
|
||||
|
||||
print_info("Steps to create a Slack app:")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)")
|
||||
print_info(" 1. Go to https://api.slack.com/apps → Create New App")
|
||||
print_info(" Pick 'From an app manifest' — we'll generate one for you below.")
|
||||
print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable")
|
||||
print_info(" • Create an App-Level Token with 'connections:write' scope")
|
||||
print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions")
|
||||
print_info(" Required scopes: chat:write, app_mentions:read,")
|
||||
print_info(" channels:history, channels:read, im:history,")
|
||||
print_info(" im:read, im:write, users:read, files:read, files:write")
|
||||
print_info(" Optional for private channels: groups:history")
|
||||
print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable")
|
||||
print_info(" Required events: message.im, message.channels, app_mention")
|
||||
print_info(" Optional for private channels: message.groups")
|
||||
print_warning(" ⚠ Without message.channels the bot will ONLY work in DMs,")
|
||||
print_warning(" not public channels.")
|
||||
print_info(" 5. Install to Workspace: Settings → Install App")
|
||||
print_info(" 6. Reinstall the app after any scope or event changes")
|
||||
print_info(" 7. After installing, invite the bot to channels: /invite @YourBot")
|
||||
print_info(" 3. Install to Workspace: Settings → Install App")
|
||||
print_info(" 4. After installing, invite the bot to channels: /invite @YourBot")
|
||||
print()
|
||||
print_info(" Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/")
|
||||
print()
|
||||
|
||||
# Generate and write manifest up-front so the user can paste it into
|
||||
# the "Create from manifest" flow instead of clicking through scopes /
|
||||
# events / slash commands one at a time.
|
||||
_write_slack_manifest_and_instruct()
|
||||
|
||||
print()
|
||||
bot_token = prompt("Slack Bot Token (xoxb-...)", password=True)
|
||||
if not bot_token:
|
||||
@@ -1902,6 +1907,49 @@ def _setup_slack():
|
||||
print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.")
|
||||
|
||||
|
||||
def _write_slack_manifest_and_instruct():
|
||||
"""Generate the Slack manifest, write it under HERMES_HOME, and print
|
||||
paste-into-Slack instructions.
|
||||
|
||||
Exposed as its own helper so both the initial setup flow and the
|
||||
"reconfigure? → no" branch can refresh the manifest without the user
|
||||
re-entering tokens. Failures are non-fatal — if the manifest write
|
||||
fails for any reason, we print a warning and skip rather than abort
|
||||
the whole Slack setup.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.slack_cli import _build_full_manifest
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
manifest = _build_full_manifest(
|
||||
bot_name="Hermes",
|
||||
bot_description="Your Hermes agent on Slack",
|
||||
)
|
||||
target = Path(get_hermes_home()) / "slack-manifest.json"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
import json as _json
|
||||
target.write_text(
|
||||
_json.dumps(manifest, indent=2, ensure_ascii=False) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
print_success(f"Slack app manifest written to: {target}")
|
||||
print_info(
|
||||
" Paste it into https://api.slack.com/apps → your app → Features "
|
||||
"→ App Manifest → Edit, then Save. Slack will prompt to "
|
||||
"reinstall if scopes or slash commands changed."
|
||||
)
|
||||
print_info(
|
||||
" Re-run `hermes slack manifest --write` anytime to refresh after "
|
||||
"Hermes adds new commands."
|
||||
)
|
||||
except Exception as exc: # pragma: no cover - best-effort UX helper
|
||||
print_warning(f"Couldn't write Slack manifest: {exc}")
|
||||
print_info(
|
||||
" You can generate it manually later with: "
|
||||
"hermes slack manifest --write"
|
||||
)
|
||||
|
||||
|
||||
def _setup_matrix():
|
||||
"""Configure Matrix credentials."""
|
||||
print_header("Matrix")
|
||||
@@ -2085,6 +2133,12 @@ def _setup_feishu():
|
||||
_gateway_setup_feishu()
|
||||
|
||||
|
||||
def _setup_yuanbao():
|
||||
"""Configure Yuanbao via gateway setup."""
|
||||
from hermes_cli.gateway import _setup_yuanbao as _gateway_setup_yuanbao
|
||||
_gateway_setup_yuanbao()
|
||||
|
||||
|
||||
def _setup_wecom():
|
||||
"""Configure WeCom (Enterprise WeChat) via gateway setup."""
|
||||
from hermes_cli.gateway import _setup_wecom as _gateway_setup_wecom
|
||||
@@ -2229,6 +2283,7 @@ _GATEWAY_PLATFORMS = [
|
||||
("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
|
||||
("DingTalk", "DINGTALK_CLIENT_ID", _setup_dingtalk),
|
||||
("Feishu / Lark", "FEISHU_APP_ID", _setup_feishu),
|
||||
("Yuanbao", "YUANBAO_APP_ID", _setup_yuanbao),
|
||||
("WeCom (Enterprise WeChat)", "WECOM_BOT_ID", _setup_wecom),
|
||||
("WeCom Callback (Self-Built App)", "WECOM_CALLBACK_CORP_ID", _setup_wecom_callback),
|
||||
("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
|
||||
@@ -2863,17 +2918,6 @@ SETUP_SECTIONS = [
|
||||
("agent", "Agent Settings", setup_agent_settings),
|
||||
]
|
||||
|
||||
# The returning-user menu intentionally omits standalone TTS because model setup
|
||||
# already includes TTS selection and tools setup covers the rest of the provider
|
||||
# configuration. Keep this list in the same order as the visible menu entries.
|
||||
RETURNING_USER_MENU_SECTION_KEYS = [
|
||||
"model",
|
||||
"terminal",
|
||||
"gateway",
|
||||
"tools",
|
||||
"agent",
|
||||
]
|
||||
|
||||
|
||||
def run_setup_wizard(args):
|
||||
"""Run the interactive setup wizard.
|
||||
@@ -2898,6 +2942,9 @@ def run_setup_wizard(args):
|
||||
save_config(copy.deepcopy(DEFAULT_CONFIG))
|
||||
print_success("Configuration reset to defaults.")
|
||||
|
||||
reconfigure_requested = bool(getattr(args, "reconfigure", False))
|
||||
quick_requested = bool(getattr(args, "quick", False))
|
||||
|
||||
config = load_config()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
@@ -2989,50 +3036,36 @@ def run_setup_wizard(args):
|
||||
migration_ran = False
|
||||
|
||||
if is_existing:
|
||||
# ── Returning User Menu ──
|
||||
print()
|
||||
print_header("Welcome Back!")
|
||||
print_success("You already have Hermes configured.")
|
||||
print()
|
||||
|
||||
menu_choices = [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"Exit",
|
||||
]
|
||||
choice = prompt_choice("What would you like to do?", menu_choices, 0)
|
||||
|
||||
if choice == 0:
|
||||
# Quick setup
|
||||
# Existing install — default is the full-wizard reconfigure flow.
|
||||
# Every prompt shows the current value as its default, so pressing
|
||||
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
|
||||
# missing items" flow (useful after a partial OpenClaw migration
|
||||
# or when a required API key got cleared).
|
||||
if quick_requested:
|
||||
_run_quick_setup(config, hermes_home)
|
||||
return
|
||||
elif choice == 1:
|
||||
# Full setup — fall through to run all sections
|
||||
pass
|
||||
elif choice == 7:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif 2 <= choice <= 6:
|
||||
# Individual section — map by key, not by position.
|
||||
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
|
||||
# so positional indexing (choice - 2) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
|
||||
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
|
||||
if section:
|
||||
_, label, func = section
|
||||
func(config)
|
||||
save_config(config)
|
||||
_print_setup_summary(config, hermes_home)
|
||||
return
|
||||
|
||||
print()
|
||||
print_header("Reconfigure")
|
||||
print_success("You already have Hermes configured.")
|
||||
print_info("Running the full wizard — each prompt shows your current value.")
|
||||
print_info("Press Enter to keep it, or type a new value to change it.")
|
||||
print_info("")
|
||||
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
|
||||
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
|
||||
# Fall through to the "Full Setup — run all sections" block below.
|
||||
# --reconfigure is now the default on existing installs; the flag
|
||||
# is preserved for backwards compatibility but is a no-op here.
|
||||
else:
|
||||
# ── First-Time Setup ──
|
||||
print()
|
||||
|
||||
# --reconfigure / --quick on a fresh install are meaningless — fall
|
||||
# through to the normal first-time flow.
|
||||
if reconfigure_requested or quick_requested:
|
||||
print_info("No existing configuration found — running first-time setup.")
|
||||
print()
|
||||
|
||||
# Offer OpenClaw migration before configuration begins
|
||||
migration_ran = _offer_openclaw_migration(hermes_home)
|
||||
if migration_ran:
|
||||
|
||||
+230
-20
@@ -11,9 +11,10 @@ handler are thin wrappers that parse args and delegate.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
@@ -141,6 +142,103 @@ def _derive_category_from_install_path(install_path: str) -> str:
|
||||
return "" if parent == "." else parent
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Interactive name/category resolution for URL-installed skills
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_VALID_NAME_RE = re.compile(r"^[a-z][a-z0-9_-]*$")
|
||||
_VALID_CATEGORY_RE = re.compile(r"^[a-z][a-z0-9_/-]*$")
|
||||
|
||||
|
||||
def _is_valid_installed_skill_name(name: str) -> bool:
|
||||
"""Accept identifier-shaped names, reject empty / sentinel-y values."""
|
||||
if not isinstance(name, str):
|
||||
return False
|
||||
candidate = name.strip().lower()
|
||||
if not candidate or candidate in {"skill", "readme", "index", "unnamed-skill"}:
|
||||
return False
|
||||
return bool(_VALID_NAME_RE.match(candidate))
|
||||
|
||||
|
||||
def _existing_categories() -> List[str]:
|
||||
"""Return sorted subdirectory names under ``~/.hermes/skills/`` that look
|
||||
like category buckets (contain at least one ``SKILL.md`` somewhere below).
|
||||
|
||||
Used to suggest reusable categories when interactively installing from a
|
||||
URL. Hidden dirs (``.hub``, ``.trash``) are skipped.
|
||||
"""
|
||||
from tools.skills_hub import SKILLS_DIR
|
||||
out: List[str] = []
|
||||
try:
|
||||
for entry in SKILLS_DIR.iterdir():
|
||||
if not entry.is_dir() or entry.name.startswith("."):
|
||||
continue
|
||||
# Only count as a category if it contains skills, not if it IS a skill.
|
||||
# Heuristic: if ``<entry>/SKILL.md`` exists, it's a skill at the
|
||||
# top level (no category); otherwise treat as a category bucket.
|
||||
if (entry / "SKILL.md").exists():
|
||||
continue
|
||||
# Has at least one nested SKILL.md?
|
||||
try:
|
||||
if any(entry.rglob("SKILL.md")):
|
||||
out.append(entry.name)
|
||||
except OSError:
|
||||
continue
|
||||
except (FileNotFoundError, OSError):
|
||||
return []
|
||||
return sorted(set(out))
|
||||
|
||||
|
||||
def _prompt_for_skill_name(c: Console, url: str, default: str = "") -> Optional[str]:
|
||||
"""Prompt interactively for a skill name. Returns None on cancel/EOF."""
|
||||
c.print()
|
||||
c.print(
|
||||
f"[yellow]The SKILL.md at {url} doesn't declare a `name:` in its "
|
||||
f"frontmatter,[/]\n[yellow]and the URL path doesn't produce a valid "
|
||||
f"identifier either.[/]"
|
||||
)
|
||||
default_hint = f" [{default}]" if default else ""
|
||||
c.print(
|
||||
f"[bold]Enter a skill name{default_hint}:[/] "
|
||||
f"[dim](lowercase letters, digits, hyphens, underscores; starts with a letter)[/]"
|
||||
)
|
||||
try:
|
||||
answer = input("Name: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return None
|
||||
if not answer and default:
|
||||
answer = default
|
||||
if not _is_valid_installed_skill_name(answer):
|
||||
c.print(f"[bold red]Invalid name:[/] {answer!r}. Aborting install.\n")
|
||||
return None
|
||||
return answer
|
||||
|
||||
|
||||
def _prompt_for_category(c: Console, existing: List[str]) -> str:
|
||||
"""Prompt interactively for a category. Empty/None input means flat install."""
|
||||
c.print()
|
||||
if existing:
|
||||
c.print(
|
||||
"[bold]Pick a category[/] "
|
||||
"[dim](reuse an existing bucket, type a new one, or press Enter to install flat)[/]"
|
||||
)
|
||||
c.print(f"[dim]Existing: {', '.join(existing)}[/]")
|
||||
else:
|
||||
c.print(
|
||||
"[bold]Category[/] [dim](optional — press Enter to install flat at ~/.hermes/skills/<name>/)[/]"
|
||||
)
|
||||
try:
|
||||
answer = input("Category: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return ""
|
||||
if not answer:
|
||||
return ""
|
||||
if not _VALID_CATEGORY_RE.match(answer):
|
||||
c.print(f"[dim]Invalid category {answer!r} — installing flat.[/]")
|
||||
return ""
|
||||
return answer
|
||||
|
||||
|
||||
def do_search(query: str, source: str = "all", limit: int = 10,
|
||||
console: Optional[Console] = None) -> None:
|
||||
"""Search registries and display results as a Rich table."""
|
||||
@@ -309,8 +407,17 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
|
||||
|
||||
def do_install(identifier: str, category: str = "", force: bool = False,
|
||||
console: Optional[Console] = None, skip_confirm: bool = False,
|
||||
invalidate_cache: bool = True) -> None:
|
||||
"""Fetch, quarantine, scan, confirm, and install a skill."""
|
||||
invalidate_cache: bool = True,
|
||||
name_override: str = "") -> None:
|
||||
"""Fetch, quarantine, scan, confirm, and install a skill.
|
||||
|
||||
``name_override`` lets non-interactive callers (slash commands, gateway,
|
||||
scripts) supply a skill name when the upstream SKILL.md lacks a valid
|
||||
``name:`` frontmatter field. On interactive TTY surfaces, a missing name
|
||||
triggers a prompt instead; ``skip_confirm=True`` means "non-interactive"
|
||||
(so pair it with ``name_override`` when installing from a URL that has
|
||||
no frontmatter).
|
||||
"""
|
||||
from tools.skills_hub import (
|
||||
GitHubAuth, create_source_router, ensure_hub_dirs,
|
||||
quarantine_bundle, install_from_quarantine, HubLockFile,
|
||||
@@ -354,6 +461,58 @@ def do_install(identifier: str, category: str = "", force: bool = False,
|
||||
c.print()
|
||||
return
|
||||
|
||||
# URL-sourced skills may arrive with an empty name when SKILL.md has no
|
||||
# ``name:`` in frontmatter AND the URL path doesn't yield a valid
|
||||
# identifier. Resolve by (1) --name override, (2) interactive prompt on
|
||||
# a TTY, (3) refuse with an actionable error on non-interactive surfaces.
|
||||
bundle_meta = getattr(bundle, "metadata", {}) or {}
|
||||
if bundle.source == "url" and (not bundle.name or bundle_meta.get("awaiting_name")):
|
||||
if name_override and _is_valid_installed_skill_name(name_override):
|
||||
bundle.name = name_override.strip()
|
||||
bundle_meta["awaiting_name"] = False
|
||||
elif name_override:
|
||||
c.print(
|
||||
f"[bold red]Invalid --name:[/] {name_override!r}. "
|
||||
"Must be a lowercase identifier (letters, digits, hyphens, "
|
||||
"underscores; starts with a letter).\n"
|
||||
)
|
||||
return
|
||||
elif skip_confirm:
|
||||
# Non-interactive surface (slash command / TUI / gateway). Can't
|
||||
# prompt — emit an actionable error.
|
||||
url = bundle_meta.get("url") or identifier
|
||||
c.print(
|
||||
f"[bold red]Cannot install from URL:[/] {url}\n"
|
||||
"[yellow]The SKILL.md has no `name:` in its frontmatter, "
|
||||
"and the URL path doesn't produce a valid identifier.[/]\n\n"
|
||||
"Retry with an explicit name:\n"
|
||||
f" [bold]/skills install {url} --name <your-name>[/]\n"
|
||||
f" [bold]hermes skills install {url} --name <your-name>[/]\n\n"
|
||||
"[dim]Or ask the SKILL.md's author to add a `name:` field to "
|
||||
"its YAML frontmatter.[/]\n"
|
||||
)
|
||||
return
|
||||
else:
|
||||
# Interactive TTY — prompt.
|
||||
url = bundle_meta.get("url") or identifier
|
||||
chosen = _prompt_for_skill_name(c, url)
|
||||
if not chosen:
|
||||
c.print("[dim]Installation cancelled.[/]\n")
|
||||
return
|
||||
bundle.name = chosen
|
||||
bundle_meta["awaiting_name"] = False
|
||||
# Keep SkillMeta in sync so downstream "already installed" checks,
|
||||
# audit logs, and display all see the final name.
|
||||
if meta is not None:
|
||||
meta.name = bundle.name
|
||||
meta.path = bundle.name
|
||||
|
||||
# URL-sourced skills: offer to pick a category interactively when the
|
||||
# caller didn't specify one (TTY only — non-interactive installs fall
|
||||
# through to flat install, matching all other sources).
|
||||
if bundle.source == "url" and not category and not skip_confirm:
|
||||
category = _prompt_for_category(c, _existing_categories())
|
||||
|
||||
# Auto-detect category for official skills (e.g. "official/autonomous-ai-agents/blackbox")
|
||||
if bundle.source == "official" and not category:
|
||||
id_parts = bundle.identifier.split("/") # ["official", "category", "skill"]
|
||||
@@ -599,11 +758,24 @@ def inspect_skill(identifier: str) -> Optional[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None:
|
||||
"""List installed skills, distinguishing hub, builtin, and local skills."""
|
||||
def do_list(source_filter: str = "all",
|
||||
enabled_only: bool = False,
|
||||
console: Optional[Console] = None) -> None:
|
||||
"""List installed skills, distinguishing hub, builtin, and local skills.
|
||||
|
||||
Args:
|
||||
source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``.
|
||||
enabled_only: If True, hide disabled skills from the output.
|
||||
|
||||
Enabled/disabled state is resolved against the currently active profile's
|
||||
config — ``hermes -p <profile> skills list`` reads that profile's
|
||||
``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process
|
||||
start. No explicit profile flag needed here.
|
||||
"""
|
||||
from tools.skills_hub import HubLockFile, ensure_hub_dirs
|
||||
from tools.skills_sync import _read_manifest
|
||||
from tools.skills_tool import _find_all_skills
|
||||
from agent.skill_utils import get_disabled_skill_names
|
||||
|
||||
c = console or _console
|
||||
ensure_hub_dirs()
|
||||
@@ -611,17 +783,26 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
|
||||
hub_installed = {e["name"]: e for e in lock.list_installed()}
|
||||
builtin_names = set(_read_manifest())
|
||||
|
||||
all_skills = _find_all_skills()
|
||||
# Pull ALL skills (including disabled ones) so we can annotate status.
|
||||
all_skills = _find_all_skills(skip_disabled=True)
|
||||
disabled_names = get_disabled_skill_names()
|
||||
|
||||
table = Table(title="Installed Skills")
|
||||
title = "Installed Skills"
|
||||
if enabled_only:
|
||||
title += " (enabled only)"
|
||||
|
||||
table = Table(title=title)
|
||||
table.add_column("Name", style="bold cyan")
|
||||
table.add_column("Category", style="dim")
|
||||
table.add_column("Source", style="dim")
|
||||
table.add_column("Trust", style="dim")
|
||||
table.add_column("Status", style="dim")
|
||||
|
||||
hub_count = 0
|
||||
builtin_count = 0
|
||||
local_count = 0
|
||||
enabled_count = 0
|
||||
disabled_count = 0
|
||||
|
||||
for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])):
|
||||
name = skill["name"]
|
||||
@@ -632,29 +813,48 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No
|
||||
source_type = "hub"
|
||||
source_display = hub_entry.get("source", "hub")
|
||||
trust = hub_entry.get("trust_level", "community")
|
||||
hub_count += 1
|
||||
elif name in builtin_names:
|
||||
source_type = "builtin"
|
||||
source_display = "builtin"
|
||||
trust = "builtin"
|
||||
builtin_count += 1
|
||||
else:
|
||||
source_type = "local"
|
||||
source_display = "local"
|
||||
trust = "local"
|
||||
local_count += 1
|
||||
|
||||
if source_filter != "all" and source_filter != source_type:
|
||||
continue
|
||||
|
||||
is_enabled = name not in disabled_names
|
||||
if enabled_only and not is_enabled:
|
||||
continue
|
||||
|
||||
if source_type == "hub":
|
||||
hub_count += 1
|
||||
elif source_type == "builtin":
|
||||
builtin_count += 1
|
||||
else:
|
||||
local_count += 1
|
||||
|
||||
if is_enabled:
|
||||
enabled_count += 1
|
||||
status_cell = "[bold green]enabled[/]"
|
||||
else:
|
||||
disabled_count += 1
|
||||
status_cell = "[dim red]disabled[/]"
|
||||
|
||||
trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim")
|
||||
trust_label = "official" if source_display == "official" else trust
|
||||
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]")
|
||||
table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell)
|
||||
|
||||
c.print(table)
|
||||
c.print(
|
||||
f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n"
|
||||
)
|
||||
summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local"
|
||||
if enabled_only:
|
||||
summary += f" — {enabled_count} enabled shown"
|
||||
else:
|
||||
summary += f" — {enabled_count} enabled, {disabled_count} disabled"
|
||||
summary += "[/]\n"
|
||||
c.print(summary)
|
||||
|
||||
|
||||
def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None:
|
||||
@@ -1123,11 +1323,15 @@ def skills_command(args) -> None:
|
||||
do_search(args.query, source=args.source, limit=args.limit)
|
||||
elif action == "install":
|
||||
do_install(args.identifier, category=args.category, force=args.force,
|
||||
skip_confirm=getattr(args, "yes", False))
|
||||
skip_confirm=getattr(args, "yes", False),
|
||||
name_override=getattr(args, "name", "") or "")
|
||||
elif action == "inspect":
|
||||
do_inspect(args.identifier)
|
||||
elif action == "list":
|
||||
do_list(source_filter=args.source)
|
||||
do_list(
|
||||
source_filter=args.source,
|
||||
enabled_only=getattr(args, "enabled_only", False),
|
||||
)
|
||||
elif action == "check":
|
||||
do_check(name=getattr(args, "name", None))
|
||||
elif action == "update":
|
||||
@@ -1177,6 +1381,7 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
/skills search kubernetes
|
||||
/skills install openai/skills/skill-creator
|
||||
/skills install openai/skills/skill-creator --force
|
||||
/skills install https://example.com/path/SKILL.md
|
||||
/skills inspect openai/skills/skill-creator
|
||||
/skills list
|
||||
/skills list --source hub
|
||||
@@ -1253,10 +1458,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
|
||||
elif action == "install":
|
||||
if not args:
|
||||
c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force] [--now]\n")
|
||||
c.print("[bold red]Usage:[/] /skills install <identifier-or-url> [--name <name>] [--category <cat>] [--force] [--now]\n")
|
||||
return
|
||||
identifier = args[0]
|
||||
category = ""
|
||||
name_override = ""
|
||||
# Slash commands run inside prompt_toolkit where input() hangs.
|
||||
# Always skip confirmation — the user typing the command is implicit consent.
|
||||
skip_confirm = True
|
||||
@@ -1267,9 +1473,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
for i, a in enumerate(args):
|
||||
if a == "--category" and i + 1 < len(args):
|
||||
category = args[i + 1]
|
||||
elif a == "--name" and i + 1 < len(args):
|
||||
name_override = args[i + 1]
|
||||
do_install(identifier, category=category, force=force,
|
||||
skip_confirm=skip_confirm, invalidate_cache=invalidate_cache,
|
||||
console=c)
|
||||
name_override=name_override, console=c)
|
||||
|
||||
elif action == "inspect":
|
||||
if not args:
|
||||
@@ -1279,11 +1487,12 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
|
||||
|
||||
elif action == "list":
|
||||
source_filter = "all"
|
||||
enabled_only = "--enabled-only" in args or "--enabled" in args
|
||||
if "--source" in args:
|
||||
idx = args.index("--source")
|
||||
if idx + 1 < len(args):
|
||||
source_filter = args[idx + 1]
|
||||
do_list(source_filter=source_filter, console=c)
|
||||
do_list(source_filter=source_filter, enabled_only=enabled_only, console=c)
|
||||
|
||||
elif action == "check":
|
||||
name = args[0] if args else None
|
||||
@@ -1371,7 +1580,8 @@ def _print_skills_help(console: Console) -> None:
|
||||
" [cyan]search[/] <query> Search registries for skills\n"
|
||||
" [cyan]install[/] <identifier> Install a skill (with security scan)\n"
|
||||
" [cyan]inspect[/] <identifier> Preview a skill without installing\n"
|
||||
" [cyan]list[/] [--source hub|builtin|local] List installed skills\n"
|
||||
" [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n"
|
||||
" List installed skills; --enabled-only filters to the active profile's live set\n"
|
||||
" [cyan]check[/] [name] Check hub skills for upstream updates\n"
|
||||
" [cyan]update[/] [name] Update hub skills with upstream changes\n"
|
||||
" [cyan]audit[/] [name] Re-scan hub skills for security\n"
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""``hermes slack ...`` CLI subcommands.
|
||||
|
||||
Today only ``hermes slack manifest`` is implemented — it generates the
|
||||
Slack app manifest JSON for registering every gateway command as a native
|
||||
Slack slash (``/btw``, ``/stop``, ``/model``, …) so users get the same
|
||||
first-class slash UX Discord and Telegram already have.
|
||||
|
||||
Typical workflow::
|
||||
|
||||
$ hermes slack manifest > slack-manifest.json
|
||||
# or:
|
||||
$ hermes slack manifest --write
|
||||
|
||||
Then paste the printed JSON into the Slack app config (Features → App
|
||||
Manifest → Edit) and click Save. Slack diffs the manifest and prompts
|
||||
for reinstall when scopes/commands change.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _build_full_manifest(bot_name: str, bot_description: str) -> dict:
|
||||
"""Build a full Slack manifest merging display info + our slash list.
|
||||
|
||||
The slash-command list is always generated from ``COMMAND_REGISTRY`` so
|
||||
it stays in sync with the rest of Hermes. Other manifest sections
|
||||
(display info, OAuth scopes, socket mode) are set to sensible defaults
|
||||
for a Hermes deployment — users can tweak them in the Slack UI after
|
||||
pasting.
|
||||
"""
|
||||
from hermes_cli.commands import slack_app_manifest
|
||||
|
||||
partial = slack_app_manifest()
|
||||
slashes = partial["features"]["slash_commands"]
|
||||
|
||||
return {
|
||||
"_metadata": {
|
||||
"major_version": 1,
|
||||
"minor_version": 1,
|
||||
},
|
||||
"display_information": {
|
||||
"name": bot_name[:35],
|
||||
"description": (bot_description or "Your Hermes agent on Slack")[:140],
|
||||
"background_color": "#1a1a2e",
|
||||
},
|
||||
"features": {
|
||||
"bot_user": {
|
||||
"display_name": bot_name[:80],
|
||||
"always_online": True,
|
||||
},
|
||||
"slash_commands": slashes,
|
||||
"assistant_view": {
|
||||
"assistant_description": "Chat with Hermes in threads and DMs.",
|
||||
},
|
||||
},
|
||||
"oauth_config": {
|
||||
"scopes": {
|
||||
"bot": [
|
||||
"app_mentions:read",
|
||||
"assistant:write",
|
||||
"channels:history",
|
||||
"channels:read",
|
||||
"chat:write",
|
||||
"commands",
|
||||
"files:read",
|
||||
"files:write",
|
||||
"groups:history",
|
||||
"im:history",
|
||||
"im:read",
|
||||
"im:write",
|
||||
"users:read",
|
||||
],
|
||||
},
|
||||
},
|
||||
"settings": {
|
||||
"event_subscriptions": {
|
||||
"bot_events": [
|
||||
"app_mention",
|
||||
"assistant_thread_context_changed",
|
||||
"assistant_thread_started",
|
||||
"message.channels",
|
||||
"message.groups",
|
||||
"message.im",
|
||||
],
|
||||
},
|
||||
"interactivity": {
|
||||
"is_enabled": True,
|
||||
},
|
||||
"org_deploy_enabled": False,
|
||||
"socket_mode_enabled": True,
|
||||
"token_rotation_enabled": False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def slack_manifest_command(args) -> int:
|
||||
"""Print or write a Slack app manifest JSON.
|
||||
|
||||
Flags (all parsed in ``hermes_cli/main.py``):
|
||||
--write [PATH] Write to file instead of stdout (default path:
|
||||
``$HERMES_HOME/slack-manifest.json``)
|
||||
--name NAME Override the bot display name (default: "Hermes")
|
||||
--description DESC Override the bot description
|
||||
--slashes-only Emit only the ``features.slash_commands`` array (for
|
||||
merging into an existing manifest manually)
|
||||
"""
|
||||
name = getattr(args, "name", None) or "Hermes"
|
||||
description = getattr(args, "description", None) or "Your Hermes agent on Slack"
|
||||
|
||||
if getattr(args, "slashes_only", False):
|
||||
from hermes_cli.commands import slack_app_manifest
|
||||
|
||||
manifest = slack_app_manifest()["features"]["slash_commands"]
|
||||
else:
|
||||
manifest = _build_full_manifest(name, description)
|
||||
|
||||
payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n"
|
||||
|
||||
write_target = getattr(args, "write", None)
|
||||
if write_target is not None:
|
||||
if isinstance(write_target, bool) and write_target:
|
||||
# --write with no value → default location
|
||||
try:
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
target = Path(get_hermes_home()) / "slack-manifest.json"
|
||||
except Exception:
|
||||
target = Path.home() / ".hermes" / "slack-manifest.json"
|
||||
else:
|
||||
target = Path(write_target).expanduser()
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(payload, encoding="utf-8")
|
||||
print(f"Slack manifest written to: {target}", file=sys.stderr)
|
||||
print(
|
||||
"\nNext steps:\n"
|
||||
" 1. Open https://api.slack.com/apps and pick your Hermes app\n"
|
||||
" (or create a new one: Create New App → From an app manifest).\n"
|
||||
f" 2. Features → App Manifest → paste the contents of\n"
|
||||
f" {target}\n"
|
||||
" 3. Save; Slack will prompt to reinstall the app if scopes or\n"
|
||||
" slash commands changed.\n"
|
||||
" 4. Make sure Socket Mode is enabled and you have a bot token\n"
|
||||
" (xoxb-...) and app token (xapp-...) configured via\n"
|
||||
" `hermes setup`.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
else:
|
||||
sys.stdout.write(payload)
|
||||
return 0
|
||||
@@ -326,7 +326,8 @@ def show_status(args):
|
||||
"WeCom Callback": ("WECOM_CALLBACK_CORP_ID", None),
|
||||
"Weixin": ("WEIXIN_ACCOUNT_ID", "WEIXIN_HOME_CHANNEL"),
|
||||
"BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"),
|
||||
"QQBot": ("QQ_APP_ID", "QQBOT_HOME_CHANNEL"),
|
||||
"QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"),
|
||||
"Yuanbao": ("YUANBAO_APP_ID", "YUANBAO_HOME_CHANNEL"),
|
||||
}
|
||||
|
||||
for name, (token_var, home_var) in platforms.items():
|
||||
|
||||
@@ -20,10 +20,10 @@ def get_provider_request_timeout(
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
config = load_config()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
config = load_config()
|
||||
providers = config.get("providers", {}) if isinstance(config, dict) else {}
|
||||
provider_config = (
|
||||
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
|
||||
@@ -49,10 +49,10 @@ def get_provider_stale_timeout(
|
||||
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
except ImportError:
|
||||
config = load_config()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
config = load_config()
|
||||
providers = config.get("providers", {}) if isinstance(config, dict) else {}
|
||||
provider_config = (
|
||||
providers.get(provider_id, {}) if isinstance(providers, dict) else {}
|
||||
|
||||
+2
-3
@@ -10,8 +10,7 @@ import random
|
||||
|
||||
TIPS = [
|
||||
# --- Slash Commands ---
|
||||
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
|
||||
"/background <prompt> runs a task in a separate session while your current one stays free.",
|
||||
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
|
||||
"/branch forks the current session so you can explore a different direction without losing progress.",
|
||||
"/compress manually compresses conversation context when things get long.",
|
||||
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
|
||||
@@ -107,7 +106,7 @@ TIPS = [
|
||||
"Set display.streaming: true to see tokens appear in real time as the model generates.",
|
||||
"Set display.show_reasoning: true to watch the model's chain-of-thought reasoning.",
|
||||
"Set display.compact: true to reduce whitespace in output for denser information.",
|
||||
"Set display.busy_input_mode: queue to queue messages instead of interrupting the agent.",
|
||||
"Set display.busy_input_mode: queue to queue messages instead of interrupting the agent, or steer to inject them mid-run via /steer.",
|
||||
"Set display.resume_display: minimal to skip the full conversation recap on session resume.",
|
||||
"Set compression.threshold: 0.50 to control when auto-compression fires (default: 50% of context).",
|
||||
"Set agent.max_turns: 200 to let the agent take more tool-calling steps per turn.",
|
||||
|
||||
+137
-15
@@ -11,6 +11,7 @@ the `platform_toolsets` key.
|
||||
|
||||
import json as _json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Set
|
||||
@@ -25,7 +26,7 @@ from hermes_cli.nous_subscription import (
|
||||
get_nous_subscription_features,
|
||||
)
|
||||
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
|
||||
from utils import base_url_hostname
|
||||
from utils import base_url_hostname, is_truthy_value
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -68,25 +69,59 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
("discord", "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
("yuanbao", "🤖 Yuanbao", "group info, member queries, DM"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
|
||||
|
||||
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
|
||||
# these platforms, and only resolve/save for these platforms. A toolset
|
||||
# absent from this map is available on every platform (current behaviour).
|
||||
#
|
||||
# Use this for tools whose APIs only make sense on one platform (Discord
|
||||
# server admin, Slack workspace admin, etc.). Keeps every other platform's
|
||||
# checklist from filling up with irrelevant toggles.
|
||||
_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
|
||||
"discord": {"discord"},
|
||||
"discord_admin": {"discord"},
|
||||
}
|
||||
|
||||
|
||||
def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
|
||||
"""Return True if ``ts_key`` is configurable on ``platform``.
|
||||
|
||||
Toolsets without a restriction entry are allowed everywhere (the default).
|
||||
"""
|
||||
allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
|
||||
return allowed is None or platform in allowed
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
"""Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.
|
||||
|
||||
Plugin toolsets are appended at the end so they appear after the
|
||||
built-in toolsets in the TUI checklist.
|
||||
built-in toolsets in the TUI checklist. A plugin whose toolset key
|
||||
already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
|
||||
plugins (e.g. ``plugins/spotify``) share their toolset key with the
|
||||
built-in entry, and we want the built-in label/description to win.
|
||||
Without the dedupe, ``hermes tools`` → "reconfigure existing" would
|
||||
list the same toolset twice.
|
||||
"""
|
||||
result = list(CONFIGURABLE_TOOLSETS)
|
||||
seen = {ts_key for ts_key, _, _ in result}
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
|
||||
discover_plugins() # idempotent — ensures plugins are loaded
|
||||
result.extend(get_plugin_toolsets())
|
||||
for entry in get_plugin_toolsets():
|
||||
if entry[0] in seen:
|
||||
continue
|
||||
seen.add(entry[0])
|
||||
result.append(entry)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -591,7 +626,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -605,6 +640,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -614,7 +651,10 @@ def _get_platform_tools(
|
||||
has_explicit_config = any(ts in configurable_keys for ts in toolset_names)
|
||||
|
||||
if has_explicit_config:
|
||||
enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
|
||||
enabled_toolsets = {
|
||||
ts for ts in toolset_names
|
||||
if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
else:
|
||||
# No explicit config — fall back to resolving composite toolset names
|
||||
# (e.g. "hermes-cli") to individual tool names and reverse-mapping.
|
||||
@@ -624,14 +664,61 @@ def _get_platform_tools(
|
||||
|
||||
enabled_toolsets = set()
|
||||
for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
|
||||
if not _toolset_allowed_for_platform(ts_key, platform):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
# Legacy safety: if the platform's own name matches a default-off
|
||||
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
|
||||
# keep that toolset enabled on first install. Skip this dodge for
|
||||
# platform-restricted toolsets — those are always opt-in even on
|
||||
# their own platform (e.g. `discord` + `discord` should stay OFF).
|
||||
if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
|
||||
default_off.remove(platform)
|
||||
# Home Assistant is already runtime-gated by its check_fn (requires
|
||||
# HASS_TOKEN to register any tools). When a user has configured
|
||||
# HASS_TOKEN, they've explicitly opted in — don't also strip it via
|
||||
# _DEFAULT_OFF_TOOLSETS, which would silently drop HA from platforms
|
||||
# (e.g. cron) that run through _get_platform_tools without an
|
||||
# explicit saved toolset list. Without this, Norbert's HA cron jobs
|
||||
# regressed after #14798 made cron honor per-platform tool config.
|
||||
if "homeassistant" in default_off and os.getenv("HASS_TOKEN"):
|
||||
default_off.remove("homeassistant")
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
@@ -639,7 +726,6 @@ def _get_platform_tools(
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -657,7 +743,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -703,6 +788,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
"""
|
||||
config.setdefault("platform_toolsets", {})
|
||||
|
||||
# Drop platform-scoped toolsets that don't apply here. Prevents the
|
||||
# "Configure all platforms" checklist (or a hand-edited config.yaml)
|
||||
# from turning on, say, the `discord` toolset for Telegram.
|
||||
enabled_toolset_keys = {
|
||||
ts for ts in enabled_toolset_keys
|
||||
if _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
|
||||
# Get the set of all configurable toolset keys (built-in + plugin)
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_keys = _get_plugin_toolset_keys()
|
||||
@@ -717,6 +810,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
|
||||
# Preserve any entries that are NOT configurable toolsets and NOT platform
|
||||
# defaults (i.e. only MCP server names should be preserved)
|
||||
@@ -724,6 +818,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
# Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
|
||||
# saving from the picker as consent to clear the "no_mcp" sentinel. The
|
||||
# picker has no checkbox for no_mcp, so without this users who once set it
|
||||
# by hand could never re-enable MCP servers through the UI.
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
@@ -831,7 +930,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
return _tool_token_cache
|
||||
|
||||
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
|
||||
"""Multi-select checklist of toolsets. Returns set of selected toolset keys."""
|
||||
from hermes_cli.curses_ui import curses_checklist
|
||||
from toolsets import resolve_toolset
|
||||
@@ -839,7 +938,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
|
||||
# Pre-compute per-tool token counts (cached after first call).
|
||||
tool_tokens = _estimate_tool_tokens()
|
||||
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
# Drop platform-scoped toolsets that don't apply to this platform.
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
|
||||
labels = []
|
||||
for ts_key, ts_label, ts_desc in effective:
|
||||
@@ -1084,7 +1188,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
|
||||
configured_provider = image_cfg.get("provider")
|
||||
if configured_provider not in (None, "", "fal"):
|
||||
return False
|
||||
if image_cfg.get("use_gateway") is False:
|
||||
if image_cfg.get("use_gateway") is not None and not is_truthy_value(image_cfg.get("use_gateway"), default=False):
|
||||
return False
|
||||
return feature.managed_by_nous
|
||||
if provider.get("tts_provider"):
|
||||
@@ -1116,7 +1220,7 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
|
||||
return (
|
||||
provider["imagegen_backend"] == "fal"
|
||||
and configured_provider in (None, "", "fal")
|
||||
and not image_cfg.get("use_gateway")
|
||||
and not is_truthy_value(image_cfg.get("use_gateway"), default=False)
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -1753,7 +1857,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
|
||||
checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
# Show checklist
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
|
||||
|
||||
added = new_enabled - current_enabled
|
||||
removed = current_enabled - new_enabled
|
||||
@@ -2109,7 +2213,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]
|
||||
|
||||
def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
|
||||
"""Print a summary of enabled/disabled toolsets and MCP tool filters."""
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
|
||||
print(f"Built-in toolsets ({platform}):")
|
||||
@@ -2175,6 +2283,20 @@ def tools_disable_enable_command(args):
|
||||
_print_error(f"Unknown toolset '{name}'")
|
||||
toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
|
||||
|
||||
# Reject platform-scoped toolsets on platforms that don't allow them.
|
||||
restricted_targets = [
|
||||
t for t in toolset_targets
|
||||
if not _toolset_allowed_for_platform(t, platform)
|
||||
]
|
||||
if restricted_targets:
|
||||
for name in restricted_targets:
|
||||
allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
|
||||
_print_error(
|
||||
f"Toolset '{name}' is not available on platform '{platform}' "
|
||||
f"(only: {', '.join(allowed)})"
|
||||
)
|
||||
toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
|
||||
|
||||
if toolset_targets:
|
||||
_apply_toolset_change(config, platform, toolset_targets, action)
|
||||
|
||||
|
||||
+11
-13
@@ -287,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
|
||||
"display.busy_input_mode": {
|
||||
"type": "select",
|
||||
"description": "Input behavior while agent is running",
|
||||
"options": ["interrupt", "queue"],
|
||||
"options": ["interrupt", "queue", "steer"],
|
||||
},
|
||||
"memory.provider": {
|
||||
"type": "select",
|
||||
@@ -2212,7 +2212,7 @@ async def get_usage_analytics(days: int = 30):
|
||||
cutoff = time.time() - (days * 86400)
|
||||
cur = db._conn.execute("""
|
||||
SELECT date(started_at, 'unixepoch') as day,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
SUM(cache_read_tokens) as cache_read_tokens,
|
||||
SUM(reasoning_tokens) as reasoning_tokens,
|
||||
@@ -2227,18 +2227,18 @@ async def get_usage_analytics(days: int = 30):
|
||||
|
||||
cur2 = db._conn.execute("""
|
||||
SELECT model,
|
||||
SUM(input_tokens) as input_tokens,
|
||||
SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as input_tokens,
|
||||
SUM(output_tokens) as output_tokens,
|
||||
COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
|
||||
COUNT(*) as sessions,
|
||||
SUM(COALESCE(api_call_count, 0)) as api_calls
|
||||
FROM sessions WHERE started_at > ? AND model IS NOT NULL
|
||||
GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
|
||||
GROUP BY model ORDER BY SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) + SUM(output_tokens) DESC
|
||||
""", (cutoff,))
|
||||
by_model = [dict(r) for r in cur2.fetchall()]
|
||||
|
||||
cur3 = db._conn.execute("""
|
||||
SELECT SUM(input_tokens) as total_input,
|
||||
SELECT SUM(input_tokens + COALESCE(cache_read_tokens, 0) + COALESCE(cache_write_tokens, 0)) as total_input,
|
||||
SUM(output_tokens) as total_output,
|
||||
SUM(cache_read_tokens) as total_cache_read,
|
||||
SUM(reasoning_tokens) as total_reasoning,
|
||||
@@ -2327,16 +2327,14 @@ def _resolve_chat_argv(
|
||||
from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
|
||||
|
||||
argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
|
||||
env: Optional[dict] = None
|
||||
env = os.environ.copy()
|
||||
env.setdefault("NODE_ENV", "production")
|
||||
|
||||
if resume or sidecar_url:
|
||||
env = os.environ.copy()
|
||||
if resume:
|
||||
env["HERMES_TUI_RESUME"] = resume
|
||||
|
||||
if resume:
|
||||
env["HERMES_TUI_RESUME"] = resume
|
||||
|
||||
if sidecar_url:
|
||||
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
|
||||
if sidecar_url:
|
||||
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
|
||||
|
||||
return list(argv), str(cwd) if cwd else None, env
|
||||
|
||||
|
||||
+3
-4
@@ -195,10 +195,6 @@ def setup_logging(
|
||||
The ``logs/`` directory where files are written.
|
||||
"""
|
||||
global _logging_initialized
|
||||
if _logging_initialized and not force:
|
||||
home = hermes_home or get_hermes_home()
|
||||
return home / "logs"
|
||||
|
||||
home = hermes_home or get_hermes_home()
|
||||
log_dir = home / "logs"
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -248,6 +244,9 @@ def setup_logging(
|
||||
log_filter=_ComponentFilter(COMPONENT_PREFIXES["gateway"]),
|
||||
)
|
||||
|
||||
if _logging_initialized and not force:
|
||||
return log_dir
|
||||
|
||||
# Ensure root logger level is low enough for the handlers to fire.
|
||||
if root.level == logging.NOTSET or root.level > level:
|
||||
root.setLevel(level)
|
||||
|
||||
+177
-22
@@ -31,7 +31,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 8
|
||||
SCHEMA_VERSION = 9
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
@@ -83,7 +83,8 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning TEXT,
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -356,6 +357,15 @@ class SessionDB:
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 8")
|
||||
if current_version < 9:
|
||||
# v9: preserve replayable Codex assistant message ids/phases so
|
||||
# follow-up turns can rebuild Responses API message items instead
|
||||
# of flattening everything to plain assistant text.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 9")
|
||||
|
||||
# Unique title index — always ensure it exists (safe to run after migrations
|
||||
# since the title column is guaranteed to exist at this point)
|
||||
@@ -822,7 +832,18 @@ class SessionDB:
|
||||
params = []
|
||||
|
||||
if not include_children:
|
||||
where_clauses.append("s.parent_session_id IS NULL")
|
||||
# Show root sessions and branch sessions (whose parent ended with
|
||||
# end_reason='branched' before the child was created), while still
|
||||
# hiding sub-agent runs and compression continuations (which also
|
||||
# carry a parent_session_id but were spawned while the parent was
|
||||
# still live — i.e., started_at < parent.ended_at).
|
||||
where_clauses.append(
|
||||
"(s.parent_session_id IS NULL"
|
||||
" OR EXISTS (SELECT 1 FROM sessions p"
|
||||
" WHERE p.id = s.parent_session_id"
|
||||
" AND p.end_reason = 'branched'"
|
||||
" AND s.started_at >= p.ended_at))"
|
||||
)
|
||||
|
||||
if source:
|
||||
where_clauses.append("s.source = ?")
|
||||
@@ -956,6 +977,7 @@ class SessionDB:
|
||||
reasoning_content: str = None,
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
@@ -972,6 +994,10 @@ class SessionDB:
|
||||
json.dumps(codex_reasoning_items)
|
||||
if codex_reasoning_items else None
|
||||
)
|
||||
codex_message_items_json = (
|
||||
json.dumps(codex_message_items)
|
||||
if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
|
||||
# Pre-compute tool call count
|
||||
@@ -983,8 +1009,9 @@ class SessionDB:
|
||||
cursor = conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -999,6 +1026,7 @@ class SessionDB:
|
||||
reasoning_content,
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1104,19 +1132,27 @@ class SessionDB:
|
||||
current = child_id
|
||||
return session_id
|
||||
|
||||
def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
def get_messages_as_conversation(
|
||||
self, session_id: str, include_ancestors: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load messages in the OpenAI conversation format (role + content dicts).
|
||||
Used by the gateway to restore conversation history.
|
||||
"""
|
||||
session_ids = [session_id]
|
||||
if include_ancestors:
|
||||
session_ids = self._session_lineage_root_to_tip(session_id)
|
||||
|
||||
with self._lock:
|
||||
cursor = self._conn.execute(
|
||||
placeholders = ",".join("?" for _ in session_ids)
|
||||
rows = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
|
||||
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
|
||||
(session_id,),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
|
||||
"codex_message_items "
|
||||
f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY timestamp, id",
|
||||
tuple(session_ids),
|
||||
).fetchall()
|
||||
|
||||
messages = []
|
||||
for row in rows:
|
||||
msg = {"role": row["role"], "content": row["content"]}
|
||||
@@ -1150,9 +1186,53 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
|
||||
msg["codex_reasoning_items"] = None
|
||||
if row["codex_message_items"]:
|
||||
try:
|
||||
msg["codex_message_items"] = json.loads(row["codex_message_items"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_message_items, falling back to None")
|
||||
msg["codex_message_items"] = None
|
||||
if include_ancestors and self._is_duplicate_replayed_user_message(messages, msg):
|
||||
continue
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
def _session_lineage_root_to_tip(self, session_id: str) -> List[str]:
|
||||
if not session_id:
|
||||
return [session_id]
|
||||
|
||||
chain = []
|
||||
current = session_id
|
||||
seen = set()
|
||||
with self._lock:
|
||||
for _ in range(100):
|
||||
if not current or current in seen:
|
||||
break
|
||||
seen.add(current)
|
||||
chain.append(current)
|
||||
row = self._conn.execute(
|
||||
"SELECT parent_session_id FROM sessions WHERE id = ?",
|
||||
(current,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
break
|
||||
current = row["parent_session_id"] if hasattr(row, "keys") else row[0]
|
||||
return list(reversed(chain)) or [session_id]
|
||||
|
||||
@staticmethod
|
||||
def _is_duplicate_replayed_user_message(messages: List[Dict[str, Any]], msg: Dict[str, Any]) -> bool:
|
||||
if msg.get("role") != "user":
|
||||
return False
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, str) or not content:
|
||||
return False
|
||||
for prev in reversed(messages):
|
||||
if prev.get("role") == "user" and prev.get("content") == content:
|
||||
return True
|
||||
if prev.get("role") == "assistant" and (prev.get("content") or prev.get("tool_calls")):
|
||||
return False
|
||||
return False
|
||||
|
||||
# =========================================================================
|
||||
# Search
|
||||
# =========================================================================
|
||||
@@ -1401,16 +1481,32 @@ class SessionDB:
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List sessions, optionally filtered by source."""
|
||||
"""List sessions, optionally filtered by source.
|
||||
|
||||
Returns rows enriched with a computed ``last_active`` column (latest
|
||||
message timestamp for the session, falling back to ``started_at``),
|
||||
ordered by most-recently-used first.
|
||||
"""
|
||||
select_with_last_active = (
|
||||
"SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
|
||||
"FROM sessions s "
|
||||
"LEFT JOIN ("
|
||||
"SELECT session_id, MAX(timestamp) AS last_active "
|
||||
"FROM messages GROUP BY session_id"
|
||||
") m ON m.session_id = s.id "
|
||||
)
|
||||
with self._lock:
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
f"{select_with_last_active}"
|
||||
"WHERE s.source = ? "
|
||||
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
|
||||
(source, limit, offset),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
f"{select_with_last_active}"
|
||||
"ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
@@ -1477,12 +1573,45 @@ class SessionDB:
|
||||
)
|
||||
self._execute_write(_do)
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
@staticmethod
|
||||
def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None:
|
||||
"""Remove on-disk transcript files for a session.
|
||||
|
||||
Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any
|
||||
``request_dump_{session_id}_*.json`` files left by the gateway.
|
||||
Silently skips files that don't exist and swallows OSError so a
|
||||
filesystem hiccup never blocks a DB operation.
|
||||
"""
|
||||
if sessions_dir is None:
|
||||
return
|
||||
for suffix in (".json", ".jsonl"):
|
||||
p = sessions_dir / f"{session_id}{suffix}"
|
||||
try:
|
||||
p.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
# request_dump files use session_id as a prefix component
|
||||
try:
|
||||
for p in sessions_dir.glob(f"request_dump_{session_id}_*.json"):
|
||||
try:
|
||||
p.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def delete_session(
|
||||
self,
|
||||
session_id: str,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> bool:
|
||||
"""Delete a session and all its messages.
|
||||
|
||||
Child sessions are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted, so they remain accessible independently.
|
||||
Returns True if the session was found and deleted.
|
||||
When *sessions_dir* is provided, also removes on-disk transcript
|
||||
files (``.json`` / ``.jsonl`` / ``request_dump_*``) for the deleted
|
||||
session. Returns True if the session was found and deleted.
|
||||
"""
|
||||
def _do(conn):
|
||||
cursor = conn.execute(
|
||||
@@ -1499,16 +1628,29 @@ class SessionDB:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
return True
|
||||
return self._execute_write(_do)
|
||||
|
||||
def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
|
||||
deleted = self._execute_write(_do)
|
||||
if deleted:
|
||||
self._remove_session_files(sessions_dir, session_id)
|
||||
return deleted
|
||||
|
||||
def prune_sessions(
|
||||
self,
|
||||
older_than_days: int = 90,
|
||||
source: str = None,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> int:
|
||||
"""Delete sessions older than N days. Returns count of deleted sessions.
|
||||
|
||||
Only prunes ended sessions (not active ones). Child sessions outside
|
||||
the prune window are orphaned (parent_session_id set to NULL) rather
|
||||
than cascade-deleted.
|
||||
than cascade-deleted. When *sessions_dir* is provided, also removes
|
||||
on-disk transcript files (``.json`` / ``.jsonl`` /
|
||||
``request_dump_*``) for every pruned session, outside the DB
|
||||
transaction.
|
||||
"""
|
||||
cutoff = time.time() - (older_than_days * 86400)
|
||||
removed_ids: list[str] = []
|
||||
|
||||
def _do(conn):
|
||||
if source:
|
||||
@@ -1538,9 +1680,14 @@ class SessionDB:
|
||||
for sid in session_ids:
|
||||
conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
|
||||
conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
|
||||
removed_ids.append(sid)
|
||||
return len(session_ids)
|
||||
|
||||
return self._execute_write(_do)
|
||||
count = self._execute_write(_do)
|
||||
# Clean up on-disk files outside the DB transaction
|
||||
for sid in removed_ids:
|
||||
self._remove_session_files(sessions_dir, sid)
|
||||
return count
|
||||
|
||||
# ── Meta key/value (for scheduler bookkeeping) ──
|
||||
|
||||
@@ -1594,6 +1741,7 @@ class SessionDB:
|
||||
retention_days: int = 90,
|
||||
min_interval_hours: int = 24,
|
||||
vacuum: bool = True,
|
||||
sessions_dir: Optional[Path] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Idempotent auto-maintenance: prune old sessions + optional VACUUM.
|
||||
|
||||
@@ -1601,6 +1749,10 @@ class SessionDB:
|
||||
within ``min_interval_hours`` no-op. Designed to be called once at
|
||||
startup from long-lived entrypoints (CLI, gateway, cron scheduler).
|
||||
|
||||
When *sessions_dir* is provided, on-disk transcript files
|
||||
(``.json`` / ``.jsonl`` / ``request_dump_*``) for pruned sessions
|
||||
are removed as part of the same sweep (issue #3015).
|
||||
|
||||
Never raises. On any failure, logs a warning and returns a dict
|
||||
with ``"error"`` set.
|
||||
|
||||
@@ -1624,7 +1776,10 @@ class SessionDB:
|
||||
except (TypeError, ValueError):
|
||||
pass # corrupt meta; treat as no prior run
|
||||
|
||||
pruned = self.prune_sessions(older_than_days=retention_days)
|
||||
pruned = self.prune_sessions(
|
||||
older_than_days=retention_days,
|
||||
sessions_dir=sessions_dir,
|
||||
)
|
||||
result["pruned"] = pruned
|
||||
|
||||
# Only VACUUM if we actually freed rows — VACUUM on a tight DB
|
||||
|
||||
+39
-23
@@ -24,6 +24,7 @@ import json
|
||||
import asyncio
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
@@ -288,30 +289,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -563,6 +568,14 @@ def handle_function_call(
|
||||
except Exception:
|
||||
pass # file_tools may not be loaded yet
|
||||
|
||||
# Measure tool dispatch latency so post_tool_call and
|
||||
# transform_tool_result hooks can observe per-tool duration.
|
||||
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
|
||||
# PostToolUse hook inputs so plugin authors can build latency
|
||||
# dashboards, budget alerts, and regression canaries without having
|
||||
# to wrap every tool manually. We use monotonic() so the value is
|
||||
# unaffected by wall-clock adjustments during the call.
|
||||
_dispatch_start = time.monotonic()
|
||||
if function_name == "execute_code":
|
||||
# Prefer the caller-provided list so subagents can't overwrite
|
||||
# the parent's tool set via the process-global.
|
||||
@@ -578,6 +591,7 @@ def handle_function_call(
|
||||
task_id=task_id,
|
||||
user_task=user_task,
|
||||
)
|
||||
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
|
||||
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook
|
||||
@@ -589,6 +603,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -609,6 +624,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
for hook_result in hook_results:
|
||||
if isinstance(hook_result, str):
|
||||
|
||||
+2
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../ui-tui;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
|
||||
hash = "sha256-Chz+NW9NXqboXHOa6PKwf5bhAkkcFtKNhvKWwg2XSPc=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
|
||||
@@ -17,6 +17,7 @@ pkgs.buildNpmPackage (npm // {
|
||||
inherit src npmDeps version;
|
||||
|
||||
doCheck = false;
|
||||
npmFlags = [ "--legacy-peer-deps" ];
|
||||
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
name: touchdesigner-mcp
|
||||
description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
|
||||
version: 1.0.0
|
||||
version: 1.1.0
|
||||
author: kshitijk4poor
|
||||
license: MIT
|
||||
metadata:
|
||||
@@ -332,6 +332,12 @@ See `references/network-patterns.md` for complete build scripts + shader code.
|
||||
| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
|
||||
| `references/python-api.md` | TD Python: op(), scripting, extensions |
|
||||
| `references/troubleshooting.md` | Connection diagnostics, debugging |
|
||||
| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
|
||||
| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
|
||||
| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
|
||||
| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
|
||||
| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
|
||||
| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
|
||||
| `scripts/setup.sh` | Automated setup script |
|
||||
|
||||
---
|
||||
|
||||
@@ -0,0 +1,175 @@
|
||||
# Audio-Reactive Reference
|
||||
|
||||
Patterns for driving visuals from audio — spectrum analysis, beat detection, envelope following.
|
||||
|
||||
## Audio Input
|
||||
|
||||
```python
|
||||
# Live input from audio interface
|
||||
audio_in = root.create(audiodeviceinCHOP, 'audio_in')
|
||||
audio_in.par.rate = 44100
|
||||
|
||||
# OR: from audio file (for testing)
|
||||
audio_file = root.create(audiofileinCHOP, 'audio_in')
|
||||
audio_file.par.file = '/path/to/track.wav'
|
||||
audio_file.par.play = True
|
||||
audio_file.par.repeat = 'on' # NOT par.loop
|
||||
audio_file.par.playmode = 'locked'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Audio Band Extraction (Verified TD 2025.32460)
|
||||
|
||||
Use `audiofilterCHOP` for band separation (NOT `selectCHOP` by channel index):
|
||||
|
||||
```python
|
||||
# Audio input
|
||||
af = root.create(audiofileinCHOP, 'audio_in')
|
||||
af.par.file = path
|
||||
af.par.play = True
|
||||
af.par.repeat = 'on'
|
||||
af.par.playmode = 'locked'
|
||||
|
||||
# Low band: lowpass @ 250Hz
|
||||
flt_low = root.create(audiofilterCHOP, 'flt_low')
|
||||
flt_low.par.filter = 'lowpass'
|
||||
flt_low.par.cutofffrequency = 250
|
||||
flt_low.par.rolloff = 2
|
||||
flt_low.inputConnectors[0].connect(af)
|
||||
|
||||
# Mid band: highpass@250 → lowpass@4000
|
||||
flt_mid_hp = root.create(audiofilterCHOP, 'flt_mid_hp')
|
||||
flt_mid_hp.par.filter = 'highpass'
|
||||
flt_mid_hp.par.cutofffrequency = 250
|
||||
flt_mid_hp.par.rolloff = 2
|
||||
flt_mid_hp.inputConnectors[0].connect(af)
|
||||
|
||||
flt_mid_lp = root.create(audiofilterCHOP, 'flt_mid_lp')
|
||||
flt_mid_lp.par.filter = 'lowpass'
|
||||
flt_mid_lp.par.cutofffrequency = 4000
|
||||
flt_mid_lp.par.rolloff = 2
|
||||
flt_mid_lp.inputConnectors[0].connect(flt_mid_hp)
|
||||
|
||||
# High band: highpass @ 4000Hz
|
||||
flt_high = root.create(audiofilterCHOP, 'flt_high')
|
||||
flt_high.par.filter = 'highpass'
|
||||
flt_high.par.cutofffrequency = 4000
|
||||
flt_high.par.rolloff = 2
|
||||
flt_high.inputConnectors[0].connect(af)
|
||||
|
||||
# Per-band: RMS → lag → gain → clamp
|
||||
for name, filt in [('low', flt_low), ('mid', flt_mid_lp), ('high', flt_high)]:
|
||||
rms = root.create(analyzeCHOP, f'rms_{name}')
|
||||
rms.par.function = 'rmspower' # NOT 'rms'
|
||||
rms.inputConnectors[0].connect(filt)
|
||||
|
||||
lag = root.create(lagCHOP, f'lag_{name}')
|
||||
lag.par.lag1 = 0.05 # attack (NOT par.lagin)
|
||||
lag.par.lag2 = 0.25 # release (NOT par.lagout)
|
||||
lag.inputConnectors[0].connect(rms)
|
||||
|
||||
math = root.create(mathCHOP, f'scale_{name}')
|
||||
math.par.gain = 8.0
|
||||
math.inputConnectors[0].connect(lag)
|
||||
|
||||
# mathCHOP has NO par.clamp — use limitCHOP
|
||||
lim = root.create(limitCHOP, f'clamp_{name}')
|
||||
lim.par.type = 'clamp'
|
||||
lim.par.min = 0.0
|
||||
lim.par.max = 1.0
|
||||
lim.inputConnectors[0].connect(math)
|
||||
|
||||
null = root.create(nullCHOP, f'out_{name}')
|
||||
null.inputConnectors[0].connect(lim)
|
||||
null.viewer = True
|
||||
```
|
||||
|
||||
**Key TD 2025 corrections:**
|
||||
- `analyzeCHOP.par.function = 'rmspower'` NOT `'rms'`
|
||||
- `lagCHOP.par.lag1` / `par.lag2` NOT `par.lagin` / `par.lagout`
|
||||
- `mathCHOP` has NO `par.clamp` — use separate `limitCHOP`
|
||||
|
||||
---
|
||||
|
||||
## Beat / Onset Detection
|
||||
|
||||
### Kick Detection (slope → trigger)
|
||||
|
||||
```python
|
||||
slope = root.create(slopeCHOP, 'kick_slope')
|
||||
slope.inputConnectors[0].connect(op('out_low'))
|
||||
|
||||
trig = root.create(triggerCHOP, 'kick_trig')
|
||||
trig.par.threshold = 0.12
|
||||
trig.par.attack = 0.005 # NOT par.attacktime
|
||||
trig.par.decay = 0.15 # NOT par.decaytime
|
||||
trig.par.triggeron = 'increase'
|
||||
trig.inputConnectors[0].connect(slope)
|
||||
|
||||
kick_out = root.create(nullCHOP, 'out_kick')
|
||||
kick_out.inputConnectors[0].connect(trig)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Passing Audio to GLSL
|
||||
|
||||
```python
|
||||
glsl.par.vec0name = 'uLow'
|
||||
glsl.par.vec0valuex.expr = "op('out_low')['chan1']"
|
||||
glsl.par.vec0valuex.mode = ParMode.EXPRESSION
|
||||
|
||||
glsl.par.vec1name = 'uKick'
|
||||
glsl.par.vec1valuex.expr = "op('out_kick')['chan1']"
|
||||
glsl.par.vec1valuex.mode = ParMode.EXPRESSION
|
||||
```
|
||||
|
||||
```glsl
|
||||
uniform float uLow;
|
||||
uniform float uKick;
|
||||
float scale = 1.0 + uKick * 0.4 + uLow * 0.2;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Standard Audio Bus Pattern
|
||||
|
||||
Recommended structure:
|
||||
|
||||
```
|
||||
audiodeviceinCHOP (audio_in)
|
||||
↓
|
||||
[null_audio_in]
|
||||
├──→ audiofilterCHOP (lowpass@250) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
|
||||
├──→ audiofilterCHOP (bandpass@250-4k) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
|
||||
├──→ audiofilterCHOP (highpass@4k) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
|
||||
│
|
||||
└──→ slopeCHOP → triggerCHOP (beat_trigger)
|
||||
```
|
||||
|
||||
Keep this entire bus inside a `baseCOMP` (e.g., `audio_bus`) and reference via paths from visual networks.
|
||||
|
||||
---
|
||||
|
||||
## MIDI Input
|
||||
|
||||
```python
|
||||
midi_in = root.create(midiinCHOP, 'midi_in')
|
||||
midi_in.par.device = 0 # Check midiinDAT for device index
|
||||
# Outputs channels named by MIDI note/CC: 'ch1n60', 'ch1c74', etc.
|
||||
|
||||
# Map CC to a parameter
|
||||
op('bloom1').par.threshold.mode = ParMode.EXPRESSION
|
||||
op('bloom1').par.threshold.expr = "op('midi_in')['ch1c74'][0]"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CRITICAL: DO NOT use Lag CHOP for spectrum smoothing
|
||||
|
||||
Lag CHOP in timeslice mode expands 256-sample spectrum to 1600-2400 samples, averaging all values to near-zero (~1e-06). The shader receives no usable data. Use `mathCHOP(gain=8)` directly, or smooth in GLSL via temporal lerp with a feedback texture.
|
||||
|
||||
Verified:
|
||||
- Without Lag CHOP: bass bins = 5.0-5.4 (strong, usable)
|
||||
- With Lag CHOP: ALL bins = 0.000001 (dead)
|
||||
@@ -0,0 +1,121 @@
|
||||
# Geometry COMP Reference
|
||||
|
||||
## Creating Geometry COMPs
|
||||
|
||||
```python
|
||||
geo = root.create(geometryCOMP, 'geo1')
|
||||
# Remove default torus
|
||||
for c in list(geo.children):
|
||||
if c.valid: c.destroy()
|
||||
# Build your shape inside
|
||||
```
|
||||
|
||||
## Correct Pattern (shapes inside geo)
|
||||
|
||||
```python
|
||||
# Create shape INSIDE the geo COMP
|
||||
box = geo.create(boxSOP, 'cube')
|
||||
box.par.sizex = 1.5; box.par.sizey = 1.5; box.par.sizez = 1.5
|
||||
|
||||
# For POP-based geometry (TD 099), POPs must be inside:
|
||||
sph = geo.create(spherePOP, 'shape')
|
||||
out1 = geo.create(outPOP, 'out1')
|
||||
out1.inputConnectors[0].connect(sph.outputConnectors[0])
|
||||
```
|
||||
|
||||
## DO NOT: Common Mistakes
|
||||
|
||||
```python
|
||||
# BAD: Don't create geometry at parent level and wire into COMP
|
||||
box = root.create(boxPOP, 'box1') # ← outside geo, won't render
|
||||
|
||||
# BAD: Don't reference parent operators from inside COMP
|
||||
choptopop1.par.chop = '../null1' # ← hidden dependency, breaks on move
|
||||
```
|
||||
|
||||
## Instancing
|
||||
|
||||
```python
|
||||
geo.par.instancing = True
|
||||
geo.par.instanceop = 'sopto1' # relative path to CHOP/SOP with instance data
|
||||
geo.par.instancetx = 'tx'
|
||||
geo.par.instancety = 'ty'
|
||||
geo.par.instancetz = 'tz'
|
||||
```
|
||||
|
||||
### Instance Attribute Names by OP Type
|
||||
|
||||
| OP Type | Attribute Names |
|
||||
|---------|-----------------|
|
||||
| CHOP | Channel names: `tx`, `ty`, `tz` |
|
||||
| SOP/POP | `P(0)`, `P(1)`, `P(2)` for position |
|
||||
| DAT | Column header names from first row |
|
||||
| TOP | `r`, `g`, `b`, `a` |
|
||||
|
||||
### Mixed Data Sources
|
||||
|
||||
```python
|
||||
geo.par.instanceop = 'pos_chop' # Position from CHOP
|
||||
geo.par.instancetx = 'tx'
|
||||
geo.par.instancecolorop = 'color_top' # Color from TOP
|
||||
geo.par.instancecolorr = 'r'
|
||||
```
|
||||
|
||||
## Rendering Setup
|
||||
|
||||
```python
|
||||
# Camera
|
||||
cam = root.create(cameraCOMP, 'cam1')
|
||||
cam.par.tx = 0; cam.par.ty = 0; cam.par.tz = 4
|
||||
|
||||
# Render TOP
|
||||
render = root.create(renderTOP, 'render1')
|
||||
render.par.outputresolution = 'custom'
|
||||
render.par.resolutionw = 1280; render.par.resolutionh = 720
|
||||
render.par.camera = cam.path
|
||||
render.par.geometry = geo.path # accepts path string
|
||||
```
|
||||
|
||||
## POPs vs SOPs for Rendering
|
||||
|
||||
In TD 099, `geometryCOMP` renders **POPs** but NOT SOPs. A `boxSOP` inside a geometry COMP is invisible — no errors.
|
||||
|
||||
```python
|
||||
# WRONG — SOPs don't render (invisible, no errors)
|
||||
box = geo.create(boxSOP, 'cube') # ✗ invisible
|
||||
|
||||
# CORRECT — POPs render
|
||||
box = geo.create(boxPOP, 'cube') # ✓ visible
|
||||
```
|
||||
|
||||
| SOP | POP | Notes |
|
||||
|-----|-----|-------|
|
||||
| `boxSOP` | `boxPOP` | `sizex/y/z`, `surftype` |
|
||||
| `sphereSOP` | `spherePOP` | `radx/y/z`, `freq`, `type` (geodesic/grid/sharedpoles/tetrahedron) |
|
||||
| `torusSOP` | `torusPOP` | TD auto-creates in new geo COMPs |
|
||||
| `circleSOP` | `circlePOP` | |
|
||||
| `gridSOP` | `gridPOP` | |
|
||||
| `tubeSOP` | `tubePOP` | |
|
||||
|
||||
New geometry COMPs auto-create: `in1` (inPOP), `out1` (outPOP), `torus1` (torusPOP). Always clean before building.
|
||||
|
||||
## Morphing Between Shapes (switchPOP)
|
||||
|
||||
```python
|
||||
sw = geo.create(switchPOP, 'shape_switch')
|
||||
sw.par.index.expr = 'int(absTime.seconds / 3) % 4'
|
||||
sw.inputConnectors[0].connect(tetra.outputConnectors[0]) # shape 0
|
||||
sw.inputConnectors[1].connect(box.outputConnectors[0]) # shape 1
|
||||
sw.inputConnectors[2].connect(octa.outputConnectors[0]) # shape 2
|
||||
sw.inputConnectors[3].connect(sphere.outputConnectors[0]) # shape 3
|
||||
|
||||
out = geo.create(outPOP, 'out1')
|
||||
out.inputConnectors[0].connect(sw.outputConnectors[0])
|
||||
```
|
||||
|
||||
`spherePOP.par.type` options: `geodesic`, `grid`, `sharedpoles`, `tetrahedron`. Use `tetrahedron` for platonic solid polyhedra.
|
||||
|
||||
## Misc
|
||||
|
||||
- `connect()` replaces existing connections — no need to disconnect first
|
||||
- `project.name` returns the TOE filename, `project.folder` returns the directory
|
||||
@@ -0,0 +1,151 @@
|
||||
# GLSL Reference
|
||||
|
||||
## Uniforms
|
||||
|
||||
```
|
||||
TouchDesigner GLSL
|
||||
─────────────────────────────
|
||||
vec0name = 'uTime' → uniform float uTime;
|
||||
vec0valuex = 1.0 → uTime value
|
||||
```
|
||||
|
||||
### Pass Time
|
||||
|
||||
```python
|
||||
glsl_op.par.vec0name = 'uTime'
|
||||
glsl_op.par.vec0valuex.mode = ParMode.EXPRESSION
|
||||
glsl_op.par.vec0valuex.expr = 'absTime.seconds'
|
||||
```
|
||||
|
||||
```glsl
|
||||
uniform float uTime;
|
||||
void main() { float t = uTime * 0.5; }
|
||||
```
|
||||
|
||||
### Built-in Uniforms (TOP)
|
||||
|
||||
```glsl
|
||||
// Output resolution (always available)
|
||||
vec2 res = uTDOutputInfo.res.zw;
|
||||
|
||||
// Input texture (only when inputs connected)
|
||||
vec2 inputRes = uTD2DInfos[0].res.zw;
|
||||
vec4 color = texture(sTD2DInputs[0], vUV.st);
|
||||
|
||||
// UV coordinates
|
||||
vUV.st // 0-1 texture coords
|
||||
```
|
||||
|
||||
**IMPORTANT:** `uTD2DInfos` requires input textures. For standalone shaders use `uTDOutputInfo`.
|
||||
|
||||
## Built-in Utility Functions
|
||||
|
||||
```glsl
|
||||
// Noise
|
||||
float TDPerlinNoise(vec2/vec3/vec4 v);
|
||||
float TDSimplexNoise(vec2/vec3/vec4 v);
|
||||
|
||||
// Color conversion
|
||||
vec3 TDHSVToRGB(vec3 c);
|
||||
vec3 TDRGBToHSV(vec3 c);
|
||||
|
||||
// Matrix transforms
|
||||
mat4 TDTranslate(float x, float y, float z);
|
||||
mat3 TDRotateX/Y/Z(float radians);
|
||||
mat3 TDRotateOnAxis(float radians, vec3 axis);
|
||||
mat3 TDScale(float x, float y, float z);
|
||||
mat3 TDRotateToVector(vec3 forward, vec3 up);
|
||||
mat3 TDCreateRotMatrix(vec3 from, vec3 to); // vectors must be normalized
|
||||
|
||||
// Resolution struct
|
||||
struct TDTexInfo {
|
||||
vec4 res; // (1/width, 1/height, width, height)
|
||||
vec4 depth;
|
||||
};
|
||||
|
||||
// Output (always use this — handles sRGB correctly)
|
||||
fragColor = TDOutputSwizzle(color);
|
||||
|
||||
// Instancing (MAT only)
|
||||
int TDInstanceID();
|
||||
```
|
||||
|
||||
## glslTOP
|
||||
|
||||
Docked DATs created automatically:
|
||||
- `glsl1_pixel` — Pixel shader
|
||||
- `glsl1_compute` — Compute shader
|
||||
- `glsl1_info` — Compile info
|
||||
|
||||
### Pixel Shader Template
|
||||
|
||||
```glsl
|
||||
out vec4 fragColor;
|
||||
void main() {
|
||||
vec4 color = texture(sTD2DInputs[0], vUV.st);
|
||||
fragColor = TDOutputSwizzle(color);
|
||||
}
|
||||
```
|
||||
|
||||
### Compute Shader Template
|
||||
|
||||
```glsl
|
||||
layout (local_size_x = 8, local_size_y = 8) in;
|
||||
void main() {
|
||||
vec4 color = texelFetch(sTD2DInputs[0], ivec2(gl_GlobalInvocationID.xy), 0);
|
||||
TDImageStoreOutput(0, gl_GlobalInvocationID, color);
|
||||
}
|
||||
```
|
||||
|
||||
### Update Shader
|
||||
|
||||
```python
|
||||
op('/project1/glsl1_pixel').text = shader_code
|
||||
op('/project1/glsl1').cook(force=True)
|
||||
# Check errors:
|
||||
print(op('/project1/glsl1_info').text)
|
||||
```
|
||||
|
||||
## glslMAT
|
||||
|
||||
Docked DATs:
|
||||
- `glslmat1_vertex` — Vertex shader (param: `vdat`)
|
||||
- `glslmat1_pixel` — Pixel shader (param: `pdat`)
|
||||
- `glslmat1_info` — Compile info
|
||||
|
||||
Note: MAT uses `vdat`/`pdat`, TOP uses `vertexdat`/`pixeldat`.
|
||||
|
||||
### Vertex Shader Template
|
||||
|
||||
```glsl
|
||||
uniform float uTime;
|
||||
void main() {
|
||||
vec3 pos = TDPos();
|
||||
pos.z += sin(pos.x * 3.0 + uTime) * 0.2;
|
||||
vec4 worldSpacePos = TDDeform(pos);
|
||||
gl_Position = TDWorldToProj(worldSpacePos);
|
||||
}
|
||||
```
|
||||
|
||||
## Bayer 8x8 Dither Matrix
|
||||
|
||||
Reusable ordered dither function for retro/print aesthetics:
|
||||
|
||||
```glsl
|
||||
float bayer8(vec2 pos) {
|
||||
int x = int(mod(pos.x, 8.0)), y = int(mod(pos.y, 8.0)), idx = x + y * 8;
|
||||
int b[64] = int[64](
|
||||
0,32,8,40,2,34,10,42,48,16,56,24,50,18,58,26,
|
||||
12,44,4,36,14,46,6,38,60,28,52,20,62,30,54,22,
|
||||
3,35,11,43,1,33,9,41,51,19,59,27,49,17,57,25,
|
||||
15,47,7,39,13,45,5,37,63,31,55,23,61,29,53,21
|
||||
);
|
||||
return float(b[idx]) / 64.0;
|
||||
}
|
||||
```
|
||||
|
||||
## glslPOP / glsladvancedPOP / glslcopyPOP
|
||||
|
||||
All use compute shaders. Docked DATs follow naming convention:
|
||||
- `glsl1_compute` / `glsladv1_compute`
|
||||
- `glslcopy1_ptCompute` / `glslcopy1_vertCompute` / `glslcopy1_primCompute`
|
||||
@@ -0,0 +1,131 @@
|
||||
# Layout Compositor Reference
|
||||
|
||||
Patterns for building modular multi-panel grids — useful for HUD interfaces, data dashboards, and multi-source visual composites.
|
||||
|
||||
## Layout Approaches
|
||||
|
||||
| Approach | Best For | Notes |
|
||||
|----------|----------|-------|
|
||||
| `layoutTOP` | Fixed grid, quick setup | GPU, simple tiling |
|
||||
| Container COMP + `overTOP` | Full control, mixed-size panels | More setup, very flexible |
|
||||
| GLSL compositor | Procedural / BSP-style | Most powerful, more complex |
|
||||
|
||||
---
|
||||
|
||||
## layoutTOP
|
||||
|
||||
Built-in grid compositor — fastest path for uniform tile grids.
|
||||
|
||||
```python
|
||||
layout = root.create(layoutTOP, 'layout1')
|
||||
layout.par.resolutionw = 1920
|
||||
layout.par.resolutionh = 1080
|
||||
layout.par.cols = 3
|
||||
layout.par.rows = 2
|
||||
layout.par.gap = 4
|
||||
```
|
||||
|
||||
Connect inputs (up to cols×rows):
|
||||
```python
|
||||
layout.inputConnectors[0].connect(op('panel_radar'))
|
||||
layout.inputConnectors[1].connect(op('panel_wave'))
|
||||
layout.inputConnectors[2].connect(op('panel_data'))
|
||||
```
|
||||
|
||||
**Variable-width columns:** Not directly supported. Use overTOP approach for non-uniform grids.
|
||||
|
||||
---
|
||||
|
||||
## Container COMP Grid
|
||||
|
||||
Build each element as its own `containerCOMP`. Compose with `overTOP`:
|
||||
|
||||
```python
|
||||
def create_panel(root, name, width, height, x=0, y=0):
|
||||
panel = root.create(containerCOMP, name)
|
||||
panel.par.w = width
|
||||
panel.par.h = height
|
||||
panel.viewer = True
|
||||
return panel
|
||||
|
||||
# Composite with overTOP chain
|
||||
over1 = root.create(overTOP, 'over1')
|
||||
over1.inputConnectors[0].connect(panel_radar)
|
||||
over1.inputConnectors[1].connect(panel_wave)
|
||||
over1.par.topx2 = 0
|
||||
over1.par.topy2 = 512
|
||||
```
|
||||
|
||||
**Tip:** Use a `resolutionTOP` before each `overTOP` input if panels are different sizes.
|
||||
|
||||
---
|
||||
|
||||
## Panel Dividers (GLSL)
|
||||
|
||||
```glsl
|
||||
out vec4 fragColor;
|
||||
uniform vec2 uGridDivisions; // e.g. vec2(3, 2) for 3 cols, 2 rows
|
||||
uniform float uLineWidth; // pixels
|
||||
uniform vec4 uLineColor; // e.g. vec4(0.0, 1.0, 0.8, 0.6) for cyan
|
||||
|
||||
void main() {
|
||||
vec2 res = uTDOutputInfo.res.zw;
|
||||
vec2 uv = vUV.st;
|
||||
vec4 bg = texture(sTD2DInputs[0], uv);
|
||||
|
||||
float lineW = uLineWidth / res.x;
|
||||
float lineH = uLineWidth / res.y;
|
||||
|
||||
float vDiv = 0.0;
|
||||
for (float i = 1.0; i < uGridDivisions.x; i++) {
|
||||
float x = i / uGridDivisions.x;
|
||||
vDiv = max(vDiv, step(abs(uv.x - x), lineW));
|
||||
}
|
||||
|
||||
float hDiv = 0.0;
|
||||
for (float i = 1.0; i < uGridDivisions.y; i++) {
|
||||
float y = i / uGridDivisions.y;
|
||||
hDiv = max(hDiv, step(abs(uv.y - y), lineH));
|
||||
}
|
||||
|
||||
float line = max(vDiv, hDiv);
|
||||
vec4 result = mix(bg, uLineColor, line * uLineColor.a);
|
||||
fragColor = TDOutputSwizzle(result);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Element Library Pattern
|
||||
|
||||
Each visual element lives in its own `baseCOMP` as a reusable `.tox`:
|
||||
|
||||
### Standard Interface
|
||||
```
|
||||
inputs:
|
||||
- in_audio (CHOP) — audio envelope / beat data
|
||||
- in_data (CHOP) — optional data stream
|
||||
- in_control (CHOP) — intensity, color, speed params
|
||||
|
||||
outputs:
|
||||
- out_top (TOP) — rendered element
|
||||
```
|
||||
|
||||
### Network Structure
|
||||
```
|
||||
/project1/
|
||||
audio_bus/ ← all audio analysis (see audio-reactive.md)
|
||||
elements/
|
||||
elem_radar/ ← baseCOMP with out_top
|
||||
elem_wave/
|
||||
elem_data/
|
||||
compositor/
|
||||
layout1 ← layoutTOP or overTOP chain
|
||||
dividers1 ← GLSL divider lines
|
||||
postfx/ ← bloom → chrom → CRT stack (see postfx.md)
|
||||
null_out ← final output
|
||||
output/
|
||||
windowCOMP ← full-screen output
|
||||
```
|
||||
|
||||
**Key principle:** Elements don't know about each other. The compositor assembles them. Audio bus is referenced by all elements but lives separately.
|
||||
@@ -0,0 +1,106 @@
|
||||
# Operator Tips
|
||||
|
||||
## Wireframe Rendering Pattern
|
||||
|
||||
Reusable setup for wireframe geometry on black background:
|
||||
|
||||
```python
|
||||
# 1. Material
|
||||
mat = root.create(wireframeMAT, 'wire_mat')
|
||||
mat.par.colorr = 1.0; mat.par.colorg = 0.0; mat.par.colorb = 0.0
|
||||
mat.par.linewidth = 3
|
||||
|
||||
# 2. Geometry COMP
|
||||
geo = root.create(geometryCOMP, 'my_geo')
|
||||
geo.par.rx.expr = 'absTime.seconds * 30'
|
||||
geo.par.ry.expr = 'absTime.seconds * 45'
|
||||
geo.par.material = mat.path # NOTE: 'material' not 'mat'
|
||||
|
||||
# 3. Shape inside the geo
|
||||
box = geo.create(boxSOP, 'cube')
|
||||
box.par.sizex = 1.5; box.par.sizey = 1.5; box.par.sizez = 1.5
|
||||
|
||||
# 4. Camera
|
||||
cam = root.create(cameraCOMP, 'cam1')
|
||||
cam.par.tx = 0; cam.par.ty = 0; cam.par.tz = 4; cam.par.fov = 45
|
||||
|
||||
# 5. Render TOP
|
||||
render = root.create(renderTOP, 'render1')
|
||||
render.par.outputresolution = 'custom'
|
||||
render.par.resolutionw = 1280; render.par.resolutionh = 720
|
||||
render.par.bgcolorr = 0; render.par.bgcolorg = 0; render.par.bgcolorb = 0
|
||||
render.par.camera = cam.path
|
||||
render.par.geometry = geo.path
|
||||
|
||||
# 6. Output null
|
||||
out = root.create(nullTOP, 'out1')
|
||||
out.inputConnectors[0].connect(render.outputConnectors[0])
|
||||
```
|
||||
|
||||
**Key rules:**
|
||||
- Class names: `wireframeMAT` not `wireframeMat` (all-caps suffix)
|
||||
- Geometry SOPs/POPs go INSIDE the geo comp
|
||||
- Material: `geo.par.material` not `geo.par.mat`
|
||||
- Render geometry: `render.par.geometry = geo.path` (string path)
|
||||
- `wireframeMAT.par.wireframemode = 'topology'` for clean wireframe (vs `'tesselated'` for triangle edges)
|
||||
- Alternative: Use `renderTOP.par.overridemat` instead of per-geo material
|
||||
|
||||
## Feedback TOP
|
||||
|
||||
### Basic Structure
|
||||
|
||||
```
|
||||
input (initial state) ──┐
|
||||
├──→ feedback_top ──→ processing ──→ null_out
|
||||
│ ↑
|
||||
└── par.top = 'null_out' ────────────────┘
|
||||
```
|
||||
|
||||
### Setup Pattern
|
||||
|
||||
```python
|
||||
# 1. Processing chain
|
||||
glsl = root.create(glslTOP, 'sim')
|
||||
null_out = root.create(nullTOP, 'null_out')
|
||||
glsl.outputConnectors[0].connect(null_out.inputConnectors[0])
|
||||
|
||||
# 2. Feedback referencing null_out
|
||||
feedback = root.create(feedbackTOP, 'feedback')
|
||||
feedback.par.top = 'null_out'
|
||||
|
||||
# 3. Black initial state
|
||||
const_init = root.create(constantTOP, 'const_init')
|
||||
const_init.par.colorr = 0; const_init.par.colorg = 0; const_init.par.colorb = 0
|
||||
|
||||
# 4. Wire: initial → feedback, feedback → processing
|
||||
feedback.inputConnectors[0].connect(const_init)
|
||||
glsl.inputConnectors[0].connect(feedback)
|
||||
|
||||
# 5. Reset to apply initial state
|
||||
feedback.par.resetpulse.pulse()
|
||||
```
|
||||
|
||||
### Common Errors
|
||||
|
||||
| Error | Cause | Solution |
|
||||
|-------|-------|----------|
|
||||
| "Not enough sources specified" | No input connected | Connect initial state TOP |
|
||||
| Unexpected initial pattern | Wrong initial state | Use Constant TOP (black) |
|
||||
|
||||
### Tips
|
||||
|
||||
1. Use float format for simulations: `glsl.par.format = 'rgba32float'`
|
||||
2. Reset after setup: `feedback.par.resetpulse.pulse()`
|
||||
3. Match resolutions — feedback, processing, and initial state must match
|
||||
4. Soft boundary prevents edge artifacts:
|
||||
```glsl
|
||||
float edge = 3.0 * texel.x;
|
||||
float bx = smoothstep(0.0, edge, uv.x) * smoothstep(0.0, edge, 1.0 - uv.x);
|
||||
float by = smoothstep(0.0, edge, uv.y) * smoothstep(0.0, edge, 1.0 - uv.y);
|
||||
value *= bx * by;
|
||||
```
|
||||
|
||||
### Use Cases
|
||||
- **Wave Simulation** — R=height, G=velocity, black initial state
|
||||
- **Cellular Automata** — white=alive, black=dead, random noise initial state
|
||||
- **Trail / Motion Blur** — blend current frame with feedback, black initial
|
||||
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
|
||||
```python
|
||||
# td_execute_python:
|
||||
for c in list(root.children):
|
||||
if c.valid and c.name.startswith('promo_'):
|
||||
if c.valid and c.name.startswith('my_'):
|
||||
c.destroy()
|
||||
# ... then create promo_audio, promo_shader etc. in same script → CRASHES
|
||||
# ... then create my_audio, my_shader etc. in same script → CRASHES
|
||||
```
|
||||
|
||||
**CORRECT (two separate calls):**
|
||||
```python
|
||||
# Call 1: td_execute_python — clean only
|
||||
for c in list(root.children):
|
||||
if c.valid and c.name.startswith('promo_'):
|
||||
if c.valid and c.name.startswith('my_'):
|
||||
c.destroy()
|
||||
|
||||
# Call 2: td_execute_python — build (separate MCP call)
|
||||
audio = root.create(audiofileinCHOP, 'promo_audio')
|
||||
audio = root.create(audiofileinCHOP, 'my_audio')
|
||||
# ... rest of build
|
||||
```
|
||||
|
||||
@@ -361,21 +361,13 @@ win.par.winopen.pulse()
|
||||
|
||||
`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
|
||||
|
||||
### 32. Audio-reactive GLSL: dual-layer sync pipeline
|
||||
### 32. Audio-reactive GLSL: TD-side pipeline
|
||||
|
||||
For audio-synced visuals, use BOTH layers for maximum effect:
|
||||
|
||||
**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
|
||||
|
||||
**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
|
||||
|
||||
Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
|
||||
For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
|
||||
|
||||
**Key gotcha:** AudioFileIn must be cued (`par.cue=True` → `par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
|
||||
|
||||
### 33. twozero MCP: benchmark and prefer native tools
|
||||
|
||||
Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
|
||||
### 33. twozero MCP: prefer native tools
|
||||
|
||||
**Always prefer native MCP tools over td_execute_python:**
|
||||
- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
|
||||
@@ -425,13 +417,16 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still
|
||||
|
||||
**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
|
||||
|
||||
**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
|
||||
**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
|
||||
|
||||
Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
|
||||
|
||||
Diagnostic sequence when FPS=0:
|
||||
1. `td_get_perf` — check if any op has extreme CPU/s
|
||||
2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
|
||||
3. Check for blocking CHOPs (audioout, audiodevin, etc.)
|
||||
4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
|
||||
1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
|
||||
2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
|
||||
3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
|
||||
4. Check for other blocking CHOPs (audiodevin, etc.)
|
||||
5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
|
||||
|
||||
### 39. Recording while FPS=0 produces empty or near-empty files
|
||||
|
||||
@@ -484,9 +479,20 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
|
||||
op("/project1/shader_code").text = shader_string
|
||||
```
|
||||
|
||||
### 42. td_execute_python does NOT return stdout or print() output
|
||||
### 42. td_execute_python DOES return print() output — use it for debugging
|
||||
|
||||
Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
|
||||
`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
|
||||
|
||||
However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
|
||||
```python
|
||||
# CORRECT — appears in response:
|
||||
print('value:', some_value)
|
||||
|
||||
# WRONG — not reliably in response:
|
||||
result = some_value
|
||||
```
|
||||
|
||||
For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
|
||||
|
||||
### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
|
||||
|
||||
@@ -496,13 +502,203 @@ clean = response_text.rsplit('[fps', 1)[0]
|
||||
data = json.loads(clean)
|
||||
```
|
||||
|
||||
### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
|
||||
### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
|
||||
|
||||
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
|
||||
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
|
||||
|
||||
### 45. Recording duration is manual — no auto-stop at audio end
|
||||
**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
|
||||
```bash
|
||||
# Record in TD first, then extract frames:
|
||||
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
|
||||
```
|
||||
|
||||
If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
|
||||
|
||||
### 45. Heavy shaders cook below record FPS — many duplicate frames in output
|
||||
|
||||
A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
|
||||
```bash
|
||||
# Extract at 24fps from a 60fps recording of an 8fps shader:
|
||||
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
|
||||
```
|
||||
Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
|
||||
|
||||
### 46. Recording duration is manual — no auto-stop at audio end
|
||||
|
||||
MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
|
||||
```bash
|
||||
ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
|
||||
```
|
||||
|
||||
### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
|
||||
|
||||
When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
|
||||
|
||||
**How to verify audio is actually playing:**
|
||||
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
|
||||
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
|
||||
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
|
||||
|
||||
### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
|
||||
|
||||
Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
|
||||
|
||||
```glsl
|
||||
float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
|
||||
bass = clamp(bass, 0.0, 3.0); // prevent whiteout
|
||||
mids = clamp(mids, 0.0, 3.0);
|
||||
hi = clamp(hi, 0.0, 3.0);
|
||||
```
|
||||
|
||||
Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
|
||||
|
||||
### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
|
||||
|
||||
Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
|
||||
|
||||
```bash
|
||||
# Center-crop from 1280x1280 to 1280x720:
|
||||
ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
|
||||
```
|
||||
|
||||
Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
|
||||
|
||||
## Advanced Patterns (pitfalls 51+)
|
||||
|
||||
### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
|
||||
|
||||
```python
|
||||
# CORRECT
|
||||
src.outputConnectors[0].connect(dst.inputConnectors[0])
|
||||
# WRONG — raises IndexError or AttributeError
|
||||
src.outputs[0].connect(dst.inputs[0])
|
||||
```
|
||||
|
||||
For feedback TOP, BOTH are required:
|
||||
```python
|
||||
fb.par.top = target.path
|
||||
target.outputConnectors[0].connect(fb.inputConnectors[0])
|
||||
```
|
||||
|
||||
### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
|
||||
|
||||
Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
|
||||
|
||||
**Workaround — frame capture + ffmpeg:**
|
||||
```python
|
||||
out = op('/project1/out')
|
||||
for i in range(300):
|
||||
delay = i * 5
|
||||
run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
|
||||
# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
|
||||
```
|
||||
|
||||
### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
|
||||
|
||||
```python
|
||||
start = me.fetch('cap_frame', 0)
|
||||
for i in range(60):
|
||||
frame = start + i
|
||||
op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
|
||||
me.store('cap_frame', start + 60)
|
||||
```
|
||||
Call 5 times for 300 frames. Each picks up where the last left off.
|
||||
|
||||
### 54. GLSL TOP pixel shader requirements in TD 2025
|
||||
|
||||
```glsl
|
||||
// REQUIRED — declare output
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main() {
|
||||
vec3 col = vec3(1.0, 0.0, 0.0);
|
||||
fragColor = TDOutputSwizzle(vec4(col, 1.0));
|
||||
}
|
||||
```
|
||||
**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
|
||||
**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
|
||||
|
||||
### 55. TOP.save() doesn't advance time — identical frames in tight loops
|
||||
|
||||
`.save()` captures the current cooked frame without advancing TD's timeline:
|
||||
```python
|
||||
# WRONG — all frames identical
|
||||
for i in range(300):
|
||||
op('/project1/out').save(f'frames/f_{i:04d}.png')
|
||||
|
||||
# CORRECT — use run() with delayFrames
|
||||
for i in range(300):
|
||||
delay = i * 5
|
||||
run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
|
||||
```
|
||||
**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
|
||||
|
||||
### 56. Feedback loop masks input changes — force switch during capture
|
||||
|
||||
With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
|
||||
|
||||
**Fix — force switch index per capture:**
|
||||
```python
|
||||
for i in range(300):
|
||||
idx = (i // 8) % num_inputs
|
||||
delay = i * 5
|
||||
run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
|
||||
```
|
||||
|
||||
### 57. Large td_execute_python scripts fail — split into incremental calls
|
||||
|
||||
10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
|
||||
|
||||
### 58. MCP instance reconnection after project.load()
|
||||
|
||||
`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
|
||||
|
||||
### 59. TOX reverse-engineering workflow
|
||||
|
||||
```python
|
||||
comp = root.loadTox(r'/path/to/file.tox')
|
||||
comp.name = '_study_comp'
|
||||
for child in comp.children:
|
||||
print(f'{child.name} ({child.OPType})')
|
||||
# Use td_get_operators_info, td_read_dat, check custom params
|
||||
```
|
||||
|
||||
### 60. sliderCOMP naming — TD appends suffix
|
||||
|
||||
TD auto-renames: `slider_brightness` → `slider_brightness1`. Always check names after creation.
|
||||
|
||||
### 61. create() requires full operator type suffix
|
||||
|
||||
```python
|
||||
# CORRECT
|
||||
proj.create('audiofileinCHOP', 'audio_in')
|
||||
proj.create('glslTOP', 'render')
|
||||
|
||||
# WRONG — raises "Unknown operator type"
|
||||
proj.create('audiofilein', 'audio_in')
|
||||
proj.create('glsl', 'render')
|
||||
```
|
||||
|
||||
### 62. Reparenting COMPs — use copyOPs, not connect()
|
||||
|
||||
Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
|
||||
```python
|
||||
copied = target.copyOPs([source]) # preserves internal wiring
|
||||
source.destroy()
|
||||
# Re-wire external connections manually after the move
|
||||
```
|
||||
|
||||
### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
|
||||
|
||||
```python
|
||||
# CRASHES TD — don't do this
|
||||
echop = root.create(expressionCHOP, 'slider_ctrl')
|
||||
echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
|
||||
|
||||
# WORKING — parameterCHOP as bridge
|
||||
pchop = root.create(parameterCHOP, 'slider_vals')
|
||||
pchop.par.ops = '/project1/controls'
|
||||
pchop.par.parameters = 'value0'
|
||||
pchop.par.custom = True
|
||||
pchop.par.builtin = False
|
||||
```
|
||||
@@ -0,0 +1,183 @@
|
||||
# Post-FX Reference
|
||||
|
||||
Bloom, CRT scanlines, chromatic aberration, and feedback glow patterns for live visual work.
|
||||
|
||||
---
|
||||
|
||||
## Bloom
|
||||
|
||||
### Built-in Bloom TOP
|
||||
|
||||
TD's `bloomTOP` is the fastest path — GPU-accelerated, no shader needed.
|
||||
|
||||
```python
|
||||
bloom = root.create(bloomTOP, 'bloom1')
|
||||
bloom.par.threshold = 0.6 # Luminance threshold (0-1)
|
||||
bloom.par.size = 0.03 # Spread radius (0-1)
|
||||
bloom.par.strength = 1.5 # Bloom intensity
|
||||
bloom.par.blendmode = 'add' # 'add' or 'screen'
|
||||
```
|
||||
|
||||
**Audio reactive bloom:**
|
||||
```python
|
||||
bloom.par.strength.mode = ParMode.EXPRESSION
|
||||
bloom.par.strength.expr = "op('audio_env')['envelope'][0] * 3.0 + 0.5"
|
||||
```
|
||||
|
||||
### GLSL Bloom (More Control)
|
||||
|
||||
For multi-pass bloom with color tinting:
|
||||
|
||||
```glsl
|
||||
// bloom_pixel.glsl — pass1: threshold + tint
|
||||
out vec4 fragColor;
|
||||
uniform float uThreshold;
|
||||
uniform vec3 uBloomColor;
|
||||
|
||||
void main() {
|
||||
vec4 col = texture(sTD2DInputs[0], vUV.st);
|
||||
float luma = dot(col.rgb, vec3(0.299, 0.587, 0.114));
|
||||
float bloom = max(0.0, luma - uThreshold);
|
||||
fragColor = TDOutputSwizzle(vec4(col.rgb * bloom * uBloomColor, col.a));
|
||||
}
|
||||
```
|
||||
|
||||
Then blur with `blurTOP` (size ~0.02-0.05), composite back over source with `addTOP` or `compositeTOP` in Add mode.
|
||||
|
||||
---
|
||||
|
||||
## CRT / Scanlines
|
||||
|
||||
Pure GLSL — create a `glslTOP` and paste into its `_pixel` DAT.
|
||||
|
||||
```glsl
|
||||
// crt_pixel.glsl
|
||||
out vec4 fragColor;
|
||||
uniform float uTime;
|
||||
uniform float uScanlineIntensity; // 0.0 - 1.0, default 0.4
|
||||
uniform float uCurvature; // 0.0 - 0.15, default 0.05
|
||||
uniform float uVignette; // 0.0 - 1.0, default 0.8
|
||||
|
||||
vec2 curveUV(vec2 uv, float amount) {
|
||||
uv = uv * 2.0 - 1.0;
|
||||
vec2 offset = abs(uv.yx) / vec2(6.0, 4.0);
|
||||
uv = uv + uv * offset * offset * amount;
|
||||
return uv * 0.5 + 0.5;
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec2 res = uTDOutputInfo.res.zw;
|
||||
vec2 uv = vUV.st;
|
||||
|
||||
// CRT barrel distortion
|
||||
uv = curveUV(uv, uCurvature * 10.0);
|
||||
|
||||
// Kill pixels outside curved screen
|
||||
if (uv.x < 0.0 || uv.x > 1.0 || uv.y < 0.0 || uv.y > 1.0) {
|
||||
fragColor = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
return;
|
||||
}
|
||||
|
||||
vec4 col = texture(sTD2DInputs[0], uv);
|
||||
|
||||
// Scanlines
|
||||
float scanline = sin(uv.y * res.y * 3.14159) * 0.5 + 0.5;
|
||||
col.rgb *= mix(1.0, scanline, uScanlineIntensity);
|
||||
|
||||
// Horizontal noise flicker
|
||||
float flicker = TDSimplexNoise(vec2(uv.y * 100.0, uTime * 8.0)) * 0.03;
|
||||
col.rgb += flicker;
|
||||
|
||||
// Vignette
|
||||
vec2 vig = uv * (1.0 - uv.yx);
|
||||
float v = pow(vig.x * vig.y * 15.0, uVignette);
|
||||
col.rgb *= v;
|
||||
|
||||
fragColor = TDOutputSwizzle(col);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Chromatic Aberration
|
||||
|
||||
Splits RGB channels and offsets them along screen axes.
|
||||
|
||||
```glsl
|
||||
out vec4 fragColor;
|
||||
uniform float uAmount; // 0.001 - 0.02, default 0.006
|
||||
|
||||
void main() {
|
||||
vec2 uv = vUV.st;
|
||||
vec2 dir = uv - 0.5;
|
||||
|
||||
float r = texture(sTD2DInputs[0], uv + dir * uAmount).r;
|
||||
float g = texture(sTD2DInputs[0], uv).g;
|
||||
float b = texture(sTD2DInputs[0], uv - dir * uAmount).b;
|
||||
float a = texture(sTD2DInputs[0], uv).a;
|
||||
|
||||
fragColor = TDOutputSwizzle(vec4(r, g, b, a));
|
||||
}
|
||||
```
|
||||
|
||||
**Audio-reactive variant** — spike aberration on beats:
|
||||
```glsl
|
||||
uniform float uBeat;
|
||||
void main() {
|
||||
vec2 uv = vUV.st;
|
||||
vec2 dir = uv - 0.5;
|
||||
float amount = uAmount + uBeat * 0.04;
|
||||
float r = texture(sTD2DInputs[0], uv + dir * amount * 1.2).r;
|
||||
float g = texture(sTD2DInputs[0], uv).g;
|
||||
float b = texture(sTD2DInputs[0], uv - dir * amount * 0.8).b;
|
||||
fragColor = TDOutputSwizzle(vec4(r, g, b, 1.0));
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Feedback Glow
|
||||
|
||||
Warm persistent trails for glow effects.
|
||||
|
||||
```glsl
|
||||
out vec4 fragColor;
|
||||
uniform float uDecay; // 0.92 - 0.98 for slow trails
|
||||
uniform vec3 uGlowColor; // tint accumulated feedback
|
||||
|
||||
void main() {
|
||||
vec2 uv = vUV.st;
|
||||
vec4 prev = texture(sTD2DInputs[0], uv); // feedback input
|
||||
vec4 curr = texture(sTD2DInputs[1], uv); // current frame
|
||||
|
||||
vec3 glow = prev.rgb * uDecay * uGlowColor;
|
||||
vec3 result = max(glow, curr.rgb);
|
||||
|
||||
fragColor = TDOutputSwizzle(vec4(result, 1.0));
|
||||
}
|
||||
```
|
||||
|
||||
**Tips:**
|
||||
- `uDecay = 0.95` → medium trail
|
||||
- `uDecay = 0.98` → long comet tail
|
||||
- Set `glslTOP` format to `rgba16float` for smooth gradients
|
||||
|
||||
---
|
||||
|
||||
## Full Post-FX Stack
|
||||
|
||||
Recommended order:
|
||||
|
||||
```
|
||||
[scene / composite]
|
||||
↓
|
||||
bloomTOP ← luminance threshold bloom
|
||||
↓
|
||||
glslTOP (chrom) ← chromatic aberration
|
||||
↓
|
||||
glslTOP (crt) ← scanlines + barrel distortion + vignette
|
||||
↓
|
||||
null_out ← final output
|
||||
```
|
||||
|
||||
**Performance note:** Each glslTOP is a full GPU pass. For 1920×1080 at 60fps this stack is comfortably real-time. For 4K, consider downsampling bloom input with `resolutionTOP` first.
|
||||
@@ -380,6 +380,10 @@ def backup_existing(path: Path, backup_root: Path) -> Optional[Path]:
|
||||
# Replace OpenClaw brand names with Hermes in migrated text so that
|
||||
# memory entries, user profiles, SOUL.md, and workspace instructions
|
||||
# read as self-referential to the new agent identity.
|
||||
#
|
||||
# Case-preserving: ``OpenClaw`` → ``Hermes`` (prose), but lowercase matches
|
||||
# like ``openclaw`` → ``hermes`` (so filesystem paths like ``~/.openclaw``
|
||||
# become ``~/.hermes`` — the real Hermes home — not the broken ``~/.Hermes``).
|
||||
_REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
|
||||
(re.compile(r'\bOpen[\s-]?Claw\b', re.IGNORECASE), 'Hermes'),
|
||||
(re.compile(r'\bClawdBot\b', re.IGNORECASE), 'Hermes'),
|
||||
@@ -387,10 +391,31 @@ _REBRAND_PATTERNS: List[Tuple[re.Pattern, str]] = [
|
||||
]
|
||||
|
||||
|
||||
def _case_preserving_replacement(replacement: str):
|
||||
"""Return a re.sub replacement fn that lowercases the result when the
|
||||
matched text was all-lowercase.
|
||||
|
||||
Keeps ``OpenClaw`` → ``Hermes`` but maps ``openclaw`` → ``hermes`` so a
|
||||
filesystem path like ``~/.openclaw/config.yaml`` rewrites to
|
||||
``~/.hermes/config.yaml`` (the real Hermes home) instead of the broken
|
||||
``~/.Hermes/config.yaml``.
|
||||
"""
|
||||
def _sub(match: "re.Match[str]") -> str:
|
||||
matched = match.group(0)
|
||||
if matched and matched.islower():
|
||||
return replacement.lower()
|
||||
return replacement
|
||||
return _sub
|
||||
|
||||
|
||||
def rebrand_text(text: str) -> str:
|
||||
"""Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes."""
|
||||
"""Replace OpenClaw / ClawdBot / MoltBot brand names with Hermes.
|
||||
|
||||
Preserves case so filesystem-path matches (lowercase) don't become
|
||||
capitalized directory names that don't exist.
|
||||
"""
|
||||
for pattern, replacement in _REBRAND_PATTERNS:
|
||||
text = pattern.sub(replacement, text)
|
||||
text = pattern.sub(_case_preserving_replacement(replacement), text)
|
||||
return text
|
||||
|
||||
|
||||
|
||||
@@ -91,4 +91,29 @@
|
||||
|
||||
// Register this plugin — the dashboard picks it up automatically.
|
||||
window.__HERMES_PLUGINS__.register("example", ExamplePage);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Page-scoped slot demo: inject a small banner at the top of /sessions.
|
||||
//
|
||||
// Built-in pages expose named slots (<page>:top, <page>:bottom) that
|
||||
// plugins can populate without overriding the whole route. The
|
||||
// manifest lists the slots we use in its `slots` array so the shell
|
||||
// knows to render <PluginSlot name="sessions:top" /> there.
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
function SessionsTopBanner() {
|
||||
return React.createElement(Card, {
|
||||
className: "border-dashed",
|
||||
},
|
||||
React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
|
||||
React.createElement(Badge, { variant: "outline" }, "Example"),
|
||||
React.createElement("span", {
|
||||
className: "text-xs text-muted-foreground",
|
||||
}, "This banner was injected into the Sessions page by the example plugin via the ",
|
||||
React.createElement("code", { className: "font-courier" }, "sessions:top"),
|
||||
" slot."),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
|
||||
})();
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
"path": "/example",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"slots": ["sessions:top"],
|
||||
"entry": "dist/index.js",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
# google_meet plugin
|
||||
|
||||
Let the hermes agent join a Google Meet call, transcribe it, optionally speak
|
||||
in it, and do the followup work afterwards.
|
||||
|
||||
## What ships
|
||||
|
||||
| Version | What | Status |
|
||||
|---|---|---|
|
||||
| v1 | Transcribe-only: Playwright joins Meet, scrapes captions to transcript file | ✓ ships by default |
|
||||
| v2 | Realtime duplex audio: bot speaks in-call via OpenAI Realtime + BlackHole/PulseAudio null-sink | ✓ opt in with `mode='realtime'` |
|
||||
| v3 | Remote node host: run the bot on a different machine than the gateway | ✓ opt in with `node='<name>'` |
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─ gateway (Linux box, where hermes runs) ────────────────────────────┐
|
||||
│ │
|
||||
│ agent → meet_join(url, mode='realtime', node='my-mac') │
|
||||
│ │ │
|
||||
│ └─ NodeClient ─── ws ────┐ │
|
||||
│ │ │
|
||||
└──────────────────────────────────┼───────────────────────────────────┘
|
||||
│ wss (token auth)
|
||||
▼
|
||||
┌─ node host (user's Mac, signed-in Chrome lives here) ───────────────┐
|
||||
│ │
|
||||
│ NodeServer (from `hermes meet node run`) │
|
||||
│ │ │
|
||||
│ ├─ start_bot → process_manager.start() → spawns meet_bot │
|
||||
│ │ │
|
||||
│ └─ meet_bot (Playwright) │
|
||||
│ ├─ Chromium → meet.google.com │
|
||||
│ ├─ caption scraper → transcript.txt │
|
||||
│ └─ (realtime mode only) RealtimeSpeaker thread │
|
||||
│ ↓ │
|
||||
│ OpenAI Realtime WS → speaker.pcm │
|
||||
│ ↓ │
|
||||
│ paplay → null-sink ← Chrome fake mic │
|
||||
│ │
|
||||
└──────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Without v3: the whole right column runs on the gateway machine.
|
||||
Without v2: the "realtime" path is skipped; transcribe runs alone.
|
||||
|
||||
## Files
|
||||
|
||||
| Path | Purpose |
|
||||
|---|---|
|
||||
| `plugin.yaml` | manifest |
|
||||
| `__init__.py` | `register(ctx)` — registers 5 tools + `on_session_end` hook + `hermes meet` CLI |
|
||||
| `meet_bot.py` | Playwright bot subprocess (standalone, `python -m plugins.google_meet.meet_bot`) |
|
||||
| `process_manager.py` | local bot lifecycle + `enqueue_say` |
|
||||
| `tools.py` | agent-facing tools + node-routing helper |
|
||||
| `cli.py` | `hermes meet setup / auth / join / status / transcript / say / stop / node ...` |
|
||||
| `audio_bridge.py` | v2: PulseAudio null-sink (Linux) + BlackHole probe (macOS) |
|
||||
| `realtime/openai_client.py` | v2: `RealtimeSession` + `RealtimeSpeaker` (file-queue → OpenAI Realtime WS → PCM) |
|
||||
| `node/protocol.py` | v3: message envelope + validation |
|
||||
| `node/registry.py` | v3: `$HERMES_HOME/workspace/meetings/nodes.json` |
|
||||
| `node/server.py` | v3: `NodeServer` (runs on host machine) |
|
||||
| `node/client.py` | v3: `NodeClient` (used by tool handlers + CLI on gateway) |
|
||||
| `node/cli.py` | v3: `hermes meet node {run,list,approve,remove,status,ping}` |
|
||||
| `SKILL.md` | agent usage guide |
|
||||
|
||||
## Local quick start
|
||||
|
||||
```bash
|
||||
hermes plugins enable google_meet
|
||||
hermes meet install # pip + Chromium
|
||||
hermes meet setup # preflight
|
||||
hermes meet auth # optional
|
||||
hermes meet join https://meet.google.com/abc-defg-hij # transcribe
|
||||
```
|
||||
|
||||
## Realtime mode
|
||||
|
||||
Linux (preferred, most automated):
|
||||
```bash
|
||||
hermes meet install --realtime # installs pulseaudio-utils
|
||||
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
|
||||
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
|
||||
# then from the agent or CLI:
|
||||
hermes meet say "Good morning everyone, I'm the note-taker bot."
|
||||
```
|
||||
|
||||
macOS:
|
||||
```bash
|
||||
hermes meet install --realtime # runs: brew install blackhole-2ch ffmpeg
|
||||
# then — manually! — open System Settings → Sound → Input → BlackHole 2ch
|
||||
echo 'OPENAI_API_KEY=sk-...' >> ~/.hermes/.env
|
||||
hermes meet join https://meet.google.com/abc-defg-hij --mode realtime
|
||||
```
|
||||
|
||||
On macOS, hermes will **not** switch your system audio input automatically — the
|
||||
user has to do it. This is deliberate: switching default input on a whim would
|
||||
be a surprising side effect.
|
||||
|
||||
## Remote node host
|
||||
|
||||
On the node machine (e.g. user's Mac with a signed-in Chrome):
|
||||
```bash
|
||||
pip install playwright websockets
|
||||
python -m playwright install chromium
|
||||
hermes plugins enable google_meet
|
||||
hermes meet node run --display-name my-mac --host 0.0.0.0 --port 18789
|
||||
# prints the bearer token on first run; copy it
|
||||
```
|
||||
|
||||
On the gateway:
|
||||
```bash
|
||||
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
|
||||
hermes meet node ping my-mac
|
||||
# now any meet_* tool call accepts node='my-mac' (or 'auto')
|
||||
```
|
||||
|
||||
## Safety
|
||||
|
||||
- URL gate: only `https://meet.google.com/abc-defg-hij`, `/new`, `/lookup/<id>`.
|
||||
- No calendar scanning, no auto-dial, no auto-consent announcement.
|
||||
- Node server uses bearer-token auth; no key exchange, no TLS termination
|
||||
built in — run it on a LAN or behind a reverse proxy you trust.
|
||||
- One active meeting per (gateway, node) pair. A second `meet_join` leaves the first.
|
||||
- `meet_say` refuses unless the active meeting was started with `mode='realtime'`.
|
||||
|
||||
## Out of scope
|
||||
|
||||
- **Calendar scanning** — deliberately not implemented. Join URLs must be explicit.
|
||||
- **Multi-tenant node sharing** — a node serves one gateway at a time.
|
||||
- **Windows** — audio bridging isn't tested; `register()` no-ops on Windows.
|
||||
- **System audio input switching on macOS** — user responsibility, not the bot's.
|
||||
@@ -0,0 +1,148 @@
|
||||
---
|
||||
name: google_meet
|
||||
description: Join a Google Meet call, transcribe live captions, optionally speak in realtime, and do the followup work afterwards. Use when the user asks the agent to sit in on a meeting, take notes, summarize, respond in-call, or action items from it.
|
||||
version: 0.2.0
|
||||
platforms:
|
||||
- linux
|
||||
- macos
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [meetings, google-meet, transcription, realtime-voice]
|
||||
---
|
||||
|
||||
# google_meet
|
||||
|
||||
## When to use
|
||||
|
||||
The user says any of:
|
||||
|
||||
- "join my Meet at <url>"
|
||||
- "take notes on this meeting"
|
||||
- "summarize the meeting and send followups"
|
||||
- "sit in on my standup"
|
||||
- "be a bot in this call and speak up when X"
|
||||
|
||||
## Two modes
|
||||
|
||||
| Mode | What the bot does |
|
||||
|---|---|
|
||||
| `transcribe` (default) | Joins, enables captions, scrapes a transcript. Listen-only. |
|
||||
| `realtime` | Same as transcribe PLUS speaks into the meeting via OpenAI Realtime. The agent calls `meet_say(text)` and the bot's voice comes out of the call. |
|
||||
|
||||
Pick `realtime` only when the user actually wants the agent to speak. It costs real money (OpenAI Realtime is pay-per-audio-minute) and requires a virtual audio device set up on the machine running the bot.
|
||||
|
||||
## Two locations
|
||||
|
||||
| Location | When |
|
||||
|---|---|
|
||||
| Local (default) | Gateway machine runs the Playwright bot directly. |
|
||||
| Remote node (`node="<name>"`) | Bot runs on a different machine that has a signed-in Chrome and (for realtime) a configured audio bridge. Useful when the gateway runs on a headless Linux box but the user's real signed-in Chrome lives on their Mac. |
|
||||
|
||||
## Prerequisites the user must handle once
|
||||
|
||||
Easiest path — run the built-in installer:
|
||||
|
||||
```bash
|
||||
hermes plugins enable google_meet
|
||||
hermes meet install # pip deps + Chromium (transcribe only)
|
||||
hermes meet install --realtime # + pulseaudio-utils / brew blackhole+ffmpeg
|
||||
hermes meet auth # optional; skips guest-lobby wait
|
||||
hermes meet setup # preflight checks
|
||||
```
|
||||
|
||||
`hermes meet install --realtime` prompts before running `sudo apt-get` (Linux)
|
||||
or `brew install` (macOS). Pass `--yes` to skip the prompt. It will NOT touch
|
||||
your macOS default-input setting — you have to select BlackHole 2ch in
|
||||
System Settings yourself before starting a realtime meeting.
|
||||
|
||||
Or do it manually:
|
||||
```bash
|
||||
pip install playwright websockets && python -m playwright install chromium
|
||||
|
||||
# For realtime mode, additionally:
|
||||
# Linux: sudo apt install pulseaudio-utils
|
||||
# macOS: brew install blackhole-2ch ffmpeg
|
||||
# → System Settings → Sound → Input → BlackHole 2ch
|
||||
# Then set OPENAI_API_KEY or HERMES_MEET_REALTIME_KEY in ~/.hermes/.env
|
||||
```
|
||||
|
||||
For a remote node:
|
||||
```bash
|
||||
# on the user's Mac (where Chrome is signed in):
|
||||
pip install playwright websockets && python -m playwright install chromium
|
||||
hermes plugins enable google_meet
|
||||
hermes meet node run --display-name my-mac # persistent server
|
||||
# copy the printed token
|
||||
|
||||
# on the gateway:
|
||||
hermes meet node approve my-mac ws://<mac-ip>:18789 <token>
|
||||
hermes meet node ping my-mac # confirm reachable
|
||||
```
|
||||
|
||||
Run `hermes meet setup` to preflight local prereqs.
|
||||
|
||||
## Flow
|
||||
|
||||
1. **Join** — call `meet_join(url=..., mode=..., node=...)`. Returns immediately.
|
||||
2. **Announce yourself** — no auto-consent. Say (in whatever channel the user is watching): "A Hermes agent bot is in this call taking notes."
|
||||
3. **Poll** — `meet_status()` for liveness, `meet_transcript(last=20)` for recent captions. Don't re-read the whole transcript every turn.
|
||||
4. **Speak (realtime only)** — `meet_say(text="...")` queues text for TTS. The speech lags by ~2s. Don't spam it.
|
||||
5. **Leave** — `meet_leave()` when done, or set `duration="30m"` on `meet_join` for auto-leave.
|
||||
6. **Follow up** — read `meet_transcript()` in full, summarize, and use regular tools to send the recap, file issues, schedule followups.
|
||||
|
||||
## Tool reference
|
||||
|
||||
| Tool | Parameters | Use |
|
||||
|---|---|---|
|
||||
| `meet_join` | `url`, `mode?`, `guest_name?`, `duration?`, `headed?`, `node?` | Start bot |
|
||||
| `meet_status` | `node?` | Liveness + progress |
|
||||
| `meet_transcript` | `last?`, `node?` | Read captions |
|
||||
| `meet_leave` | `node?` | Close bot |
|
||||
| `meet_say` | `text`, `node?` | Speak in realtime meeting |
|
||||
|
||||
`node?` on all tools: pass a registered node name (or `"auto"` for the sole node) to operate a remote bot instead of a local one. Omit for local.
|
||||
|
||||
## Important limits
|
||||
|
||||
- Captions are only as good as Google Meet's live captions. English-biased, lossy on overlapping speakers.
|
||||
- Guest mode sits in the lobby until a host admits. Warn the user; `hermes meet auth` avoids this.
|
||||
- **Lobby timeout**: if the host doesn't admit the bot within 5 minutes (configurable via `HERMES_MEET_LOBBY_TIMEOUT` env), the bot leaves and `meet_status` reports `leaveReason: "lobby_timeout"`.
|
||||
- **One active meeting per install per location.** A second `meet_join` leaves the first.
|
||||
- **Windows not supported.**
|
||||
- Realtime mode needs a virtual audio device. If the audio bridge setup fails, the bot falls back to transcribe mode and flags it in `meet_status().error`.
|
||||
- `meet_say` requires `mode='realtime'` on the originating `meet_join`. Calling it against a transcribe-mode meeting returns a clear error.
|
||||
- **Barge-in is best-effort.** When a caption arrives attributed to a real participant while the bot is generating audio, the bot sends `response.cancel` to OpenAI Realtime. Captions take ~500ms to show up, so the bot will talk over the first second or so of a human interruption.
|
||||
|
||||
## Status dict reference
|
||||
|
||||
`meet_status()` returns (subset shown, there are more):
|
||||
|
||||
| Key | Meaning |
|
||||
|---|---|
|
||||
| `inCall` | Past the lobby. False while waiting for admission. |
|
||||
| `lobbyWaiting` | Clicked "Ask to join", waiting on host. |
|
||||
| `joinAttemptedAt` / `joinedAt` | Timestamps for lobby-click and actual admission. |
|
||||
| `captioning` | Caption observer is installed. |
|
||||
| `transcriptLines` / `lastCaptionAt` | Transcript progress. |
|
||||
| `realtime` / `realtimeReady` | Realtime mode provisioned / WS connected. |
|
||||
| `realtimeDevice` | Audio device name the bot is feeding (e.g. `hermes_meet_src`). |
|
||||
| `audioBytesOut` / `lastAudioOutAt` | How much PCM the OpenAI session has produced. |
|
||||
| `lastBargeInAt` | Timestamp of the most recent `response.cancel` sent. |
|
||||
| `leaveReason` | `duration_expired`, `lobby_timeout`, `denied`, `page_closed`, or null. |
|
||||
| `error` | Last error (soft — bot may still be running). |
|
||||
|
||||
## Transcript location
|
||||
|
||||
Local:
|
||||
```
|
||||
$HERMES_HOME/workspace/meetings/<meeting-id>/transcript.txt
|
||||
```
|
||||
|
||||
Remote node: transcript lives on the node host's disk. Use `meet_transcript(node=...)` to read it over RPC.
|
||||
|
||||
## Safety
|
||||
|
||||
- URL regex: only `https://meet.google.com/...` URLs pass.
|
||||
- No calendar scanning. No auto-dial.
|
||||
- Remote nodes use bearer-token auth; tokens are generated on the node (32 hex chars, persisted in `$HERMES_HOME/workspace/meetings/node_token.json`) and must be copied to the gateway via `hermes meet node approve`.
|
||||
- `meet_say` text is rate-limited by the OpenAI Realtime session; spam-protection is the bot's problem, not yours, but still — don't queue hundreds of lines.
|
||||
@@ -0,0 +1,103 @@
|
||||
"""google_meet plugin — let the agent join a Meet call, transcribe it, follow up.
|
||||
|
||||
v1: transcribe-only. Spawns a headless Chromium via Playwright, joins the Meet
|
||||
URL, enables live captions, scrapes them into a transcript file. The agent then
|
||||
has the transcript in its workspace and can do whatever followup work it needs
|
||||
using its regular tools.
|
||||
|
||||
v2 (not in this PR): realtime duplex audio so the agent can speak in the
|
||||
meeting, via OpenAI Realtime / Gemini Live + BlackHole / PulseAudio null-sink.
|
||||
``meet_say`` exists as a stub today so the tool surface is stable.
|
||||
|
||||
Explicit-by-design: only joins ``https://meet.google.com/`` URLs explicitly
|
||||
passed in. No calendar scanning, no auto-dial, no consent announcement.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import platform
|
||||
|
||||
from plugins.google_meet import process_manager as pm
|
||||
from plugins.google_meet.cli import register_cli as _register_meet_cli
|
||||
from plugins.google_meet.cli import meet_command as _meet_command
|
||||
from plugins.google_meet.tools import (
|
||||
MEET_JOIN_SCHEMA,
|
||||
MEET_LEAVE_SCHEMA,
|
||||
MEET_SAY_SCHEMA,
|
||||
MEET_STATUS_SCHEMA,
|
||||
MEET_TRANSCRIPT_SCHEMA,
|
||||
check_meet_requirements,
|
||||
handle_meet_join,
|
||||
handle_meet_leave,
|
||||
handle_meet_say,
|
||||
handle_meet_status,
|
||||
handle_meet_transcript,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_TOOLS = (
|
||||
("meet_join", MEET_JOIN_SCHEMA, handle_meet_join, "📞"),
|
||||
("meet_status", MEET_STATUS_SCHEMA, handle_meet_status, "🟢"),
|
||||
("meet_transcript", MEET_TRANSCRIPT_SCHEMA, handle_meet_transcript, "📝"),
|
||||
("meet_leave", MEET_LEAVE_SCHEMA, handle_meet_leave, "👋"),
|
||||
("meet_say", MEET_SAY_SCHEMA, handle_meet_say, "🗣️"),
|
||||
)
|
||||
|
||||
|
||||
def _on_session_end(**kwargs) -> None:
|
||||
"""Best-effort cleanup — if a meet bot is still running when the session
|
||||
ends, leave the call so we don't orphan a headless Chromium.
|
||||
|
||||
No-ops when nothing is active. Swallows all exceptions — session end must
|
||||
not fail because the bot cleanup hit an edge case.
|
||||
"""
|
||||
try:
|
||||
status = pm.status()
|
||||
if status.get("ok") and status.get("alive"):
|
||||
pm.stop(reason="session ended")
|
||||
except Exception as e: # pragma: no cover — defensive
|
||||
logger.debug("google_meet on_session_end cleanup failed: %s", e)
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register tools, CLI, and lifecycle hooks.
|
||||
|
||||
Called once by the plugin loader when the plugin is enabled via
|
||||
``plugins.enabled`` in config.yaml.
|
||||
"""
|
||||
# Windows is not supported in v1 — audio routing for v2 doesn't have a
|
||||
# tested path there and guest-join Chromium is flakier. Refuse to register
|
||||
# rather than half-working.
|
||||
system = platform.system().lower()
|
||||
if system not in ("linux", "darwin"):
|
||||
logger.info(
|
||||
"google_meet plugin: platform=%s not supported (linux/macos only)",
|
||||
system,
|
||||
)
|
||||
return
|
||||
|
||||
for name, schema, handler, emoji in _TOOLS:
|
||||
ctx.register_tool(
|
||||
name=name,
|
||||
toolset="google_meet",
|
||||
schema=schema,
|
||||
handler=handler,
|
||||
check_fn=check_meet_requirements,
|
||||
emoji=emoji,
|
||||
)
|
||||
|
||||
ctx.register_cli_command(
|
||||
name="meet",
|
||||
help="Google Meet bot (join, transcribe, follow up)",
|
||||
setup_fn=_register_meet_cli,
|
||||
handler_fn=_meet_command,
|
||||
description=(
|
||||
"Let the hermes agent join a Google Meet call and scrape live "
|
||||
"captions into a transcript. See: hermes meet setup"
|
||||
),
|
||||
)
|
||||
|
||||
ctx.register_hook("on_session_end", _on_session_end)
|
||||
@@ -0,0 +1,244 @@
|
||||
"""Virtual audio bridge for feeding generated speech into Chrome's mic.
|
||||
|
||||
v2 module. Provisions a platform-specific virtual audio device so the
|
||||
Meet bot's Chromium instance can be pointed at an input source we
|
||||
control. The OpenAI Realtime client writes PCM bytes into this device;
|
||||
Chrome reads them as if they were coming from a microphone.
|
||||
|
||||
Linux (primary): uses pactl (PulseAudio) to create a null-sink plus a
|
||||
virtual source whose master is the null-sink's monitor. Callers set
|
||||
PULSE_SOURCE=<source_name> in Chrome's env and pass the fake-mic flag.
|
||||
|
||||
macOS: requires BlackHole 2ch to be installed. This module only
|
||||
verifies its presence and returns the device name; routing OS default
|
||||
input is left to the user (or a future switchaudio-osx integration) to
|
||||
avoid surprising the user's system audio state.
|
||||
|
||||
Windows: not supported in v2.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import platform
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
|
||||
_BLACKHOLE_DEVICE = "BlackHole 2ch"
|
||||
|
||||
|
||||
class AudioBridge:
|
||||
"""Manages a virtual audio device for Chrome fake-mic input.
|
||||
|
||||
Call ``setup()`` once before launching the Meet bot and
|
||||
``teardown()`` when the session ends. ``teardown()`` is idempotent.
|
||||
"""
|
||||
|
||||
def __init__(self, name_prefix: str = "hermes_meet") -> None:
|
||||
self._name_prefix = name_prefix
|
||||
self._platform: Optional[str] = None
|
||||
self._device_name: Optional[str] = None
|
||||
self._write_target: Optional[str] = None
|
||||
self._module_ids: list[int] = []
|
||||
self._torn_down = False
|
||||
|
||||
# ── public properties ─────────────────────────────────────────────────
|
||||
|
||||
@property
|
||||
def device_name(self) -> str:
|
||||
if not self._device_name:
|
||||
raise RuntimeError("AudioBridge not set up yet")
|
||||
return self._device_name
|
||||
|
||||
@property
|
||||
def write_target(self) -> str:
|
||||
if not self._write_target:
|
||||
raise RuntimeError("AudioBridge not set up yet")
|
||||
return self._write_target
|
||||
|
||||
# ── lifecycle ─────────────────────────────────────────────────────────
|
||||
|
||||
def setup(self) -> dict:
|
||||
"""Provision the virtual audio device.
|
||||
|
||||
Returns a dict describing the device. Raises RuntimeError on
|
||||
unsupported platforms or when required system tools are missing.
|
||||
"""
|
||||
system = platform.system()
|
||||
if system == "Linux":
|
||||
return self._setup_linux()
|
||||
if system == "Darwin":
|
||||
return self._setup_darwin()
|
||||
if system == "Windows":
|
||||
raise RuntimeError("windows not supported in v2")
|
||||
raise RuntimeError(f"unsupported platform: {system}")
|
||||
|
||||
def teardown(self) -> None:
|
||||
"""Release the virtual audio device. Idempotent."""
|
||||
if self._torn_down:
|
||||
return
|
||||
# Only Linux needs explicit unloading.
|
||||
if self._platform == "linux" and self._module_ids:
|
||||
# Unload in reverse order (virtual-source before null-sink).
|
||||
for mod_id in reversed(self._module_ids):
|
||||
try:
|
||||
subprocess.run(
|
||||
["pactl", "unload-module", str(mod_id)],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
except Exception:
|
||||
# Best-effort teardown — never raise from here.
|
||||
pass
|
||||
self._module_ids = []
|
||||
self._torn_down = True
|
||||
|
||||
# ── platform impls ────────────────────────────────────────────────────
|
||||
|
||||
def _setup_linux(self) -> dict:
|
||||
sink_name = f"{self._name_prefix}_sink"
|
||||
src_name = f"{self._name_prefix}_src"
|
||||
|
||||
try:
|
||||
sink_out = subprocess.run(
|
||||
[
|
||||
"pactl",
|
||||
"load-module",
|
||||
"module-null-sink",
|
||||
f"sink_name={sink_name}",
|
||||
f"sink_properties=device.description=HermesMeetSink",
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
"pactl not found — install PulseAudio/pipewire-pulse"
|
||||
) from exc
|
||||
except subprocess.CalledProcessError as exc:
|
||||
raise RuntimeError(
|
||||
f"pactl load-module null-sink failed: {exc.stderr or exc}"
|
||||
) from exc
|
||||
|
||||
sink_mod_id = self._parse_module_id(sink_out.stdout)
|
||||
|
||||
try:
|
||||
src_out = subprocess.run(
|
||||
[
|
||||
"pactl",
|
||||
"load-module",
|
||||
"module-virtual-source",
|
||||
f"source_name={src_name}",
|
||||
f"master={sink_name}.monitor",
|
||||
],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
# Roll back the null-sink we just created so we don't leak it.
|
||||
subprocess.run(
|
||||
["pactl", "unload-module", str(sink_mod_id)],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
)
|
||||
raise RuntimeError(
|
||||
f"pactl load-module virtual-source failed: {exc.stderr or exc}"
|
||||
) from exc
|
||||
|
||||
src_mod_id = self._parse_module_id(src_out.stdout)
|
||||
|
||||
self._platform = "linux"
|
||||
self._device_name = src_name
|
||||
self._write_target = sink_name
|
||||
self._module_ids = [sink_mod_id, src_mod_id]
|
||||
self._torn_down = False
|
||||
|
||||
return {
|
||||
"platform": "linux",
|
||||
"device_name": src_name,
|
||||
"sample_rate": 48000,
|
||||
"channels": 2,
|
||||
"module_ids": list(self._module_ids),
|
||||
"write_target": sink_name,
|
||||
}
|
||||
|
||||
def _setup_darwin(self) -> dict:
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["system_profiler", "SPAudioDataType"],
|
||||
text=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
"system_profiler not found (macOS-only command)"
|
||||
) from exc
|
||||
except subprocess.CalledProcessError as exc:
|
||||
raise RuntimeError(
|
||||
f"system_profiler failed: {exc.output}"
|
||||
) from exc
|
||||
|
||||
if "BlackHole" not in out:
|
||||
raise RuntimeError(
|
||||
"BlackHole virtual audio device not installed. "
|
||||
"Install via: brew install blackhole-2ch"
|
||||
)
|
||||
|
||||
self._platform = "darwin"
|
||||
self._device_name = _BLACKHOLE_DEVICE
|
||||
self._write_target = _BLACKHOLE_DEVICE
|
||||
self._module_ids = []
|
||||
self._torn_down = False
|
||||
|
||||
return {
|
||||
"platform": "darwin",
|
||||
"device_name": _BLACKHOLE_DEVICE,
|
||||
"sample_rate": 48000,
|
||||
"channels": 2,
|
||||
"module_ids": [],
|
||||
"write_target": _BLACKHOLE_DEVICE,
|
||||
}
|
||||
|
||||
# ── helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
@staticmethod
|
||||
def _parse_module_id(stdout: str) -> int:
|
||||
"""pactl load-module prints the new module ID to stdout."""
|
||||
text = (stdout or "").strip()
|
||||
if not text:
|
||||
raise RuntimeError("pactl load-module returned empty stdout")
|
||||
# Take the last whitespace-separated token on the first non-empty line.
|
||||
first = text.splitlines()[0].strip()
|
||||
token = first.split()[-1]
|
||||
try:
|
||||
return int(token)
|
||||
except ValueError as exc:
|
||||
raise RuntimeError(
|
||||
f"could not parse pactl module id from: {stdout!r}"
|
||||
) from exc
|
||||
|
||||
|
||||
def chrome_fake_audio_flags(bridge_info: dict) -> list[str]:
|
||||
"""Return Chrome flags for using the fake audio input.
|
||||
|
||||
The PulseAudio source is selected via the ``PULSE_SOURCE`` env var,
|
||||
which callers must set in Chrome's environment before launch:
|
||||
|
||||
env["PULSE_SOURCE"] = bridge_info["device_name"]
|
||||
|
||||
On macOS the caller must ensure the system default audio input is
|
||||
set to the returned BlackHole device (we do not flip that switch).
|
||||
"""
|
||||
system = platform.system()
|
||||
if system == "Linux":
|
||||
# Chromium on Linux picks up the PulseAudio source selected via
|
||||
# PULSE_SOURCE env var; the fake-ui flag skips the permission
|
||||
# prompt so the bot can pick "use my mic" without user input.
|
||||
return ["--use-fake-ui-for-media-stream"]
|
||||
if system == "Darwin":
|
||||
return ["--use-fake-ui-for-media-stream"]
|
||||
if system == "Windows":
|
||||
raise RuntimeError("windows not supported in v2")
|
||||
raise RuntimeError(f"unsupported platform: {system}")
|
||||
@@ -0,0 +1,478 @@
|
||||
"""CLI commands for the google_meet plugin.
|
||||
|
||||
Wires ``hermes meet <subcommand>``:
|
||||
setup — preflight playwright, chromium, auth file, print fixes
|
||||
auth — open a browser to sign into Google, save storage state
|
||||
join <url> — join a Meet URL synchronously (also callable from the agent)
|
||||
status — print current bot state
|
||||
transcript — print the transcript
|
||||
stop — leave the current meeting
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
from plugins.google_meet import process_manager as pm
|
||||
from plugins.google_meet.meet_bot import _is_safe_meet_url
|
||||
|
||||
|
||||
def _auth_state_path() -> Path:
|
||||
return Path(get_hermes_home()) / "workspace" / "meetings" / "auth.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def register_cli(subparser: argparse.ArgumentParser) -> None:
|
||||
"""Build the ``hermes meet`` argparse tree.
|
||||
|
||||
Called by :func:`_register_cli_commands` at plugin load time.
|
||||
"""
|
||||
subs = subparser.add_subparsers(dest="meet_command")
|
||||
|
||||
subs.add_parser("setup", help="Preflight: playwright, chromium, auth")
|
||||
|
||||
inst_p = subs.add_parser(
|
||||
"install",
|
||||
help="Install prerequisites (pip deps, Chromium, platform audio tools)",
|
||||
)
|
||||
inst_p.add_argument(
|
||||
"--realtime", action="store_true",
|
||||
help="Also install realtime audio tools (pulseaudio-utils on Linux, BlackHole+ffmpeg on macOS). Uses sudo/brew, prompts before invoking either.",
|
||||
)
|
||||
inst_p.add_argument(
|
||||
"--yes", "-y", action="store_true",
|
||||
help="Answer yes to all prompts (use with care; will run sudo apt-get or brew without asking).",
|
||||
)
|
||||
|
||||
subs.add_parser("auth", help="Sign in to Google and save session state")
|
||||
|
||||
join_p = subs.add_parser("join", help="Join a Meet URL")
|
||||
join_p.add_argument("url", help="https://meet.google.com/...")
|
||||
join_p.add_argument("--guest-name", default="Hermes Agent")
|
||||
join_p.add_argument("--duration", default=None, help="e.g. 30m, 2h, 90s")
|
||||
join_p.add_argument("--headed", action="store_true", help="show browser")
|
||||
join_p.add_argument(
|
||||
"--mode", choices=("transcribe", "realtime"), default="transcribe",
|
||||
help="transcribe (default, listen-only) or realtime (speak via OpenAI Realtime)"
|
||||
)
|
||||
join_p.add_argument(
|
||||
"--node", default=None,
|
||||
help="remote node name, or 'auto' to use the sole registered node"
|
||||
)
|
||||
|
||||
subs.add_parser("status", help="Print current Meet bot state")
|
||||
|
||||
tr_p = subs.add_parser("transcript", help="Print the scraped transcript")
|
||||
tr_p.add_argument("--last", type=int, default=None)
|
||||
|
||||
say_p = subs.add_parser("say", help="Speak text in an active realtime meeting")
|
||||
say_p.add_argument("text", help="what to say")
|
||||
say_p.add_argument("--node", default=None)
|
||||
|
||||
subs.add_parser("stop", help="Leave the current meeting")
|
||||
|
||||
# v3: remote node host management.
|
||||
node_p = subs.add_parser(
|
||||
"node",
|
||||
help="Manage remote meet node hosts (run/list/approve/remove/status/ping)",
|
||||
)
|
||||
try:
|
||||
from plugins.google_meet.node.cli import register_cli as _register_node_cli
|
||||
_register_node_cli(node_p)
|
||||
except Exception as e: # pragma: no cover — defensive
|
||||
# If the node module fails to import for any reason (optional dep
|
||||
# missing at import time etc.), leave the subparser present but
|
||||
# flag it. The argparse dispatch will surface a clear error.
|
||||
def _node_unavailable(args):
|
||||
print(f"hermes meet node: module unavailable ({e})")
|
||||
return 1
|
||||
node_p.set_defaults(func=_node_unavailable)
|
||||
|
||||
subparser.set_defaults(func=meet_command)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def meet_command(args: argparse.Namespace) -> int:
|
||||
sub = getattr(args, "meet_command", None)
|
||||
if not sub:
|
||||
print("usage: hermes meet {setup,auth,join,status,transcript,say,stop,node}")
|
||||
return 2
|
||||
if sub == "setup":
|
||||
return _cmd_setup()
|
||||
if sub == "install":
|
||||
return _cmd_install(
|
||||
realtime=bool(getattr(args, "realtime", False)),
|
||||
assume_yes=bool(getattr(args, "yes", False)),
|
||||
)
|
||||
if sub == "auth":
|
||||
return _cmd_auth()
|
||||
if sub == "join":
|
||||
return _cmd_join(
|
||||
url=args.url,
|
||||
guest_name=args.guest_name,
|
||||
duration=args.duration,
|
||||
headed=args.headed,
|
||||
mode=getattr(args, "mode", "transcribe"),
|
||||
node=getattr(args, "node", None),
|
||||
)
|
||||
if sub == "status":
|
||||
return _cmd_status()
|
||||
if sub == "transcript":
|
||||
return _cmd_transcript(last=args.last)
|
||||
if sub == "say":
|
||||
return _cmd_say(text=args.text, node=getattr(args, "node", None))
|
||||
if sub == "stop":
|
||||
return _cmd_stop()
|
||||
if sub == "node":
|
||||
# Dispatch was set by the node cli's register_cli; fall through to
|
||||
# whatever its subparsers wired.
|
||||
fn = getattr(args, "func", None)
|
||||
if fn is None or fn is meet_command:
|
||||
print("usage: hermes meet node {run,list,approve,remove,status,ping}")
|
||||
return 2
|
||||
return fn(args)
|
||||
print(f"unknown subcommand: {sub}")
|
||||
return 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _cmd_setup() -> int:
|
||||
import platform as _p
|
||||
|
||||
print("google_meet preflight")
|
||||
print("---------------------")
|
||||
|
||||
system = _p.system()
|
||||
system_ok = system in ("Linux", "Darwin")
|
||||
print(f" platform : {system} [{'ok' if system_ok else 'unsupported'}]")
|
||||
|
||||
try:
|
||||
import playwright # noqa: F401
|
||||
pw_ok = True
|
||||
pw_msg = "installed"
|
||||
except ImportError:
|
||||
pw_ok = False
|
||||
pw_msg = "NOT installed — run: pip install playwright"
|
||||
print(f" playwright : {pw_msg}")
|
||||
|
||||
chromium_ok = False
|
||||
chromium_msg = "unknown"
|
||||
if pw_ok:
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
with sync_playwright() as p:
|
||||
try:
|
||||
exe = p.chromium.executable_path
|
||||
if exe and Path(exe).exists():
|
||||
chromium_ok = True
|
||||
chromium_msg = f"ok ({exe})"
|
||||
else:
|
||||
chromium_msg = (
|
||||
"not installed — run: "
|
||||
"python -m playwright install chromium"
|
||||
)
|
||||
except Exception as e:
|
||||
chromium_msg = f"probe failed: {e}"
|
||||
except Exception as e:
|
||||
chromium_msg = f"probe failed: {e}"
|
||||
print(f" chromium : {chromium_msg}")
|
||||
|
||||
auth_path = _auth_state_path()
|
||||
auth_ok = auth_path.is_file()
|
||||
print(
|
||||
" google auth : "
|
||||
+ (f"ok ({auth_path})" if auth_ok else "not saved — run: hermes meet auth")
|
||||
)
|
||||
|
||||
print()
|
||||
all_ok = system_ok and pw_ok and chromium_ok
|
||||
if all_ok:
|
||||
print(
|
||||
"ready. Join a meeting: "
|
||||
"hermes meet join https://meet.google.com/abc-defg-hij"
|
||||
)
|
||||
else:
|
||||
print("not ready yet — fix the items above.")
|
||||
return 0 if all_ok else 1
|
||||
|
||||
|
||||
def _cmd_install(*, realtime: bool, assume_yes: bool) -> int:
|
||||
"""Install the plugin's prerequisites.
|
||||
|
||||
Always: pip install playwright + websockets, then
|
||||
``python -m playwright install chromium``.
|
||||
|
||||
With ``--realtime``: also install the platform audio bridge deps.
|
||||
Linux : ``sudo apt-get install -y pulseaudio-utils``
|
||||
macOS : ``brew install blackhole-2ch ffmpeg`` (+ remind the user
|
||||
to select BlackHole as the default input device manually)
|
||||
|
||||
Prompts before every package-manager invocation unless ``--yes``.
|
||||
Refuses to run on Windows.
|
||||
"""
|
||||
import platform as _p
|
||||
import shutil as _shutil
|
||||
import subprocess as _sp
|
||||
|
||||
system = _p.system()
|
||||
if system not in ("Linux", "Darwin"):
|
||||
print(f"google_meet install: {system} is not supported (linux/macos only)")
|
||||
return 1
|
||||
|
||||
def _confirm(prompt: str) -> bool:
|
||||
if assume_yes:
|
||||
return True
|
||||
try:
|
||||
ans = input(f"{prompt} [y/N] ").strip().lower()
|
||||
except EOFError:
|
||||
return False
|
||||
return ans in ("y", "yes")
|
||||
|
||||
print("google_meet install")
|
||||
print("-------------------")
|
||||
|
||||
# 1) pip deps — always safe, venv-scoped.
|
||||
pip_pkgs = ["playwright", "websockets"]
|
||||
print(f"\n[1/3] pip install: {' '.join(pip_pkgs)}")
|
||||
try:
|
||||
res = _sp.run(
|
||||
[sys.executable, "-m", "pip", "install", "--upgrade", *pip_pkgs],
|
||||
check=False,
|
||||
)
|
||||
if res.returncode != 0:
|
||||
print(" pip install failed")
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f" pip install failed: {e}")
|
||||
return 1
|
||||
|
||||
# 2) Playwright browsers — pulls chromium (~300MB first run).
|
||||
print("\n[2/3] python -m playwright install chromium")
|
||||
try:
|
||||
res = _sp.run(
|
||||
[sys.executable, "-m", "playwright", "install", "chromium"],
|
||||
check=False,
|
||||
)
|
||||
if res.returncode != 0:
|
||||
print(" playwright install failed (may already be installed)")
|
||||
except Exception as e:
|
||||
print(f" playwright install failed: {e}")
|
||||
return 1
|
||||
|
||||
# 3) Platform audio deps for realtime mode.
|
||||
if realtime:
|
||||
print("\n[3/3] realtime audio deps")
|
||||
if system == "Linux":
|
||||
if _shutil.which("paplay") and _shutil.which("pactl"):
|
||||
print(" pulseaudio-utils already installed.")
|
||||
else:
|
||||
if not _confirm(
|
||||
" install pulseaudio-utils? this runs `sudo apt-get install -y pulseaudio-utils`"
|
||||
):
|
||||
print(" skipped (you can run it manually later)")
|
||||
else:
|
||||
cmd = ["sudo", "apt-get", "install", "-y", "pulseaudio-utils"]
|
||||
print(f" $ {' '.join(cmd)}")
|
||||
res = _sp.run(cmd, check=False)
|
||||
if res.returncode != 0:
|
||||
print(" apt install failed — install pulseaudio-utils manually")
|
||||
elif system == "Darwin":
|
||||
have_bh = False
|
||||
try:
|
||||
out = _sp.check_output(["system_profiler", "SPAudioDataType"], text=True)
|
||||
have_bh = "BlackHole" in out
|
||||
except Exception:
|
||||
pass
|
||||
have_ffmpeg = bool(_shutil.which("ffmpeg"))
|
||||
needs = []
|
||||
if not have_bh:
|
||||
needs.append("blackhole-2ch")
|
||||
if not have_ffmpeg:
|
||||
needs.append("ffmpeg")
|
||||
if not needs:
|
||||
print(" BlackHole and ffmpeg already installed.")
|
||||
elif not _shutil.which("brew"):
|
||||
print(
|
||||
" missing: " + ", ".join(needs) + "\n"
|
||||
" install Homebrew first (https://brew.sh) or install the packages manually."
|
||||
)
|
||||
else:
|
||||
if not _confirm(f" install via brew: {' '.join(needs)}?"):
|
||||
print(" skipped (you can run it manually later)")
|
||||
else:
|
||||
cmd = ["brew", "install", *needs]
|
||||
print(f" $ {' '.join(cmd)}")
|
||||
res = _sp.run(cmd, check=False)
|
||||
if res.returncode != 0:
|
||||
print(" brew install failed — install them manually")
|
||||
print(
|
||||
"\n NOTE: macOS does not auto-route audio. Open\n"
|
||||
" System Settings → Sound → Input\n"
|
||||
" and select 'BlackHole 2ch' before starting a realtime meeting.\n"
|
||||
" hermes will not switch your default input for you."
|
||||
)
|
||||
else:
|
||||
print("\n[3/3] skipped (pass --realtime to install audio tooling too)")
|
||||
|
||||
print("\ndone. verify with: hermes meet setup")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_auth() -> int:
|
||||
"""Open a headed Chromium, let the user sign in, save storage_state."""
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError:
|
||||
print(
|
||||
"playwright is not installed. run:\n"
|
||||
" pip install playwright && python -m playwright install chromium"
|
||||
)
|
||||
return 1
|
||||
|
||||
path = _auth_state_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"opening Chromium — sign in to Google, then return here and press Enter.")
|
||||
print(f"saving storage state to: {path}")
|
||||
try:
|
||||
with sync_playwright() as pw:
|
||||
browser = pw.chromium.launch(headless=False)
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
page.goto("https://accounts.google.com/", wait_until="domcontentloaded")
|
||||
try:
|
||||
input("press Enter after you've signed in ... ")
|
||||
except EOFError:
|
||||
pass
|
||||
context.storage_state(path=str(path))
|
||||
browser.close()
|
||||
except Exception as e:
|
||||
print(f"auth failed: {e}")
|
||||
return 1
|
||||
print("saved. you can now run: hermes meet join <url>")
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_join(
|
||||
url: str,
|
||||
*,
|
||||
guest_name: str,
|
||||
duration: Optional[str],
|
||||
headed: bool,
|
||||
mode: str = "transcribe",
|
||||
node: Optional[str] = None,
|
||||
) -> int:
|
||||
if not _is_safe_meet_url(url):
|
||||
print(f"refusing: not a meet.google.com URL: {url}")
|
||||
return 2
|
||||
if node:
|
||||
# Remote: go through NodeClient.
|
||||
try:
|
||||
from plugins.google_meet.node.registry import NodeRegistry
|
||||
from plugins.google_meet.node.client import NodeClient
|
||||
except ImportError as e:
|
||||
print(f"node module unavailable: {e}")
|
||||
return 1
|
||||
reg = NodeRegistry()
|
||||
entry = reg.resolve(node if node != "auto" else None)
|
||||
if entry is None:
|
||||
print(f"no registered node matches {node!r}")
|
||||
return 1
|
||||
client = NodeClient(url=entry["url"], token=entry["token"])
|
||||
try:
|
||||
res = client.start_bot(
|
||||
url=url, guest_name=guest_name, duration=duration,
|
||||
headed=headed, mode=mode,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"remote start_bot failed: {e}")
|
||||
return 1
|
||||
print(json.dumps({"node": entry.get("name"), **res}, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
auth = _auth_state_path()
|
||||
res = pm.start(
|
||||
url=url,
|
||||
headed=headed,
|
||||
guest_name=guest_name,
|
||||
duration=duration,
|
||||
auth_state=str(auth) if auth.is_file() else None,
|
||||
mode=mode,
|
||||
)
|
||||
print(json.dumps(res, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
|
||||
def _cmd_say(text: str, node: Optional[str] = None) -> int:
|
||||
if not (text or "").strip():
|
||||
print("refusing: empty text")
|
||||
return 2
|
||||
if node:
|
||||
try:
|
||||
from plugins.google_meet.node.registry import NodeRegistry
|
||||
from plugins.google_meet.node.client import NodeClient
|
||||
except ImportError as e:
|
||||
print(f"node module unavailable: {e}")
|
||||
return 1
|
||||
reg = NodeRegistry()
|
||||
entry = reg.resolve(node if node != "auto" else None)
|
||||
if entry is None:
|
||||
print(f"no registered node matches {node!r}")
|
||||
return 1
|
||||
client = NodeClient(url=entry["url"], token=entry["token"])
|
||||
try:
|
||||
res = client.say(text)
|
||||
except Exception as e:
|
||||
print(f"remote say failed: {e}")
|
||||
return 1
|
||||
print(json.dumps({"node": entry.get("name"), **res}, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
res = pm.enqueue_say(text)
|
||||
print(json.dumps(res, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
|
||||
def _cmd_status() -> int:
|
||||
res = pm.status()
|
||||
print(json.dumps(res, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
|
||||
def _cmd_transcript(last: Optional[int]) -> int:
|
||||
res = pm.transcript(last=last)
|
||||
if not res.get("ok"):
|
||||
print(json.dumps(res, indent=2))
|
||||
return 1
|
||||
for ln in res.get("lines", []):
|
||||
print(ln)
|
||||
return 0
|
||||
|
||||
|
||||
def _cmd_stop() -> int:
|
||||
res = pm.stop(reason="hermes meet stop")
|
||||
print(json.dumps(res, indent=2))
|
||||
return 0 if res.get("ok") else 1
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
parser = argparse.ArgumentParser(prog="hermes meet")
|
||||
register_cli(parser)
|
||||
ns = parser.parse_args()
|
||||
sys.exit(meet_command(ns))
|
||||
@@ -0,0 +1,852 @@
|
||||
"""Headless Google Meet bot — Playwright + live-caption scraping.
|
||||
|
||||
Runs as a standalone subprocess spawned by ``process_manager.py``. Reads config
|
||||
from env vars, writes status + transcript to files under
|
||||
``$HERMES_HOME/workspace/meetings/<meeting-id>/``. The main hermes process
|
||||
reads those files via the ``meet_*`` tools — no IPC beyond filesystem.
|
||||
|
||||
The scraping strategy mirrors OpenUtter (sumansid/openutter): we don't parse
|
||||
WebRTC audio, we enable Google Meet's built-in live captions and observe the
|
||||
captions container in the DOM via a MutationObserver. This is lossy and
|
||||
English-biased but it is:
|
||||
|
||||
* deterministic (no API keys, no STT billing),
|
||||
* works behind Meet's normal login / admission,
|
||||
* survives Meet UI rewrites fairly well because the caption container has a
|
||||
stable ARIA role.
|
||||
|
||||
Run standalone for debugging::
|
||||
|
||||
HERMES_MEET_URL=https://meet.google.com/abc-defg-hij \\
|
||||
HERMES_MEET_OUT_DIR=/tmp/meet-debug \\
|
||||
HERMES_MEET_HEADED=1 \\
|
||||
python -m plugins.google_meet.meet_bot
|
||||
|
||||
No meet.google.com URL → exits non-zero. Any URL that doesn't start with
|
||||
``https://meet.google.com/`` is rejected (explicit-by-design).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
# Match ``https://meet.google.com/abc-defg-hij`` or ``.../lookup/...`` — the
|
||||
# short three-segment code or a lookup URL. Anything else is rejected.
|
||||
MEET_URL_RE = re.compile(
|
||||
r"^https://meet\.google\.com/("
|
||||
r"[a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,}"
|
||||
r"|lookup/[^/?#]+"
|
||||
r"|new"
|
||||
r")(?:[/?#].*)?$"
|
||||
)
|
||||
|
||||
|
||||
# Filenames the bot reads/writes in ``HERMES_MEET_OUT_DIR``.
|
||||
SAY_QUEUE_FILENAME = "say_queue.jsonl"
|
||||
SAY_PCM_FILENAME = "speaker.pcm"
|
||||
|
||||
|
||||
def _is_safe_meet_url(url: str) -> bool:
|
||||
"""Return True if *url* is a Google Meet URL we're willing to navigate to."""
|
||||
if not isinstance(url, str):
|
||||
return False
|
||||
return bool(MEET_URL_RE.match(url.strip()))
|
||||
|
||||
|
||||
def _meeting_id_from_url(url: str) -> str:
|
||||
"""Extract the 3-segment meeting code from a Meet URL.
|
||||
|
||||
For ``https://meet.google.com/abc-defg-hij`` → ``abc-defg-hij``.
|
||||
For ``.../lookup/<id>`` or ``/new`` we fall back to a timestamped id — the
|
||||
bot won't know the real code until after redirect, and callers pass this
|
||||
through to filename anyway.
|
||||
"""
|
||||
m = re.search(
|
||||
r"meet\.google\.com/([a-z0-9]{3,}-[a-z0-9]{3,}-[a-z0-9]{3,})",
|
||||
url or "",
|
||||
)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return f"meet-{int(time.time())}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Status + transcript file writers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _BotState:
|
||||
"""Single-process mutable state, flushed to ``status.json`` on each change."""
|
||||
|
||||
def __init__(self, out_dir: Path, meeting_id: str, url: str):
|
||||
self.out_dir = out_dir
|
||||
self.meeting_id = meeting_id
|
||||
self.url = url
|
||||
self.in_call = False
|
||||
self.captioning = False
|
||||
self.captions_enabled_attempted = False
|
||||
self.lobby_waiting = False
|
||||
self.join_attempted_at: Optional[float] = None
|
||||
self.joined_at: Optional[float] = None
|
||||
self.last_caption_at: Optional[float] = None
|
||||
self.transcript_lines = 0
|
||||
self.error: Optional[str] = None
|
||||
self.exited = False
|
||||
# v2 realtime fields.
|
||||
self.realtime = False
|
||||
self.realtime_ready = False
|
||||
self.realtime_device: Optional[str] = None
|
||||
self.audio_bytes_out: int = 0
|
||||
self.last_audio_out_at: Optional[float] = None
|
||||
self.last_barge_in_at: Optional[float] = None
|
||||
self.leave_reason: Optional[str] = None
|
||||
# Scraped captions, in order, deduped. Each entry is a dict of
|
||||
# {"ts": <epoch>, "speaker": str, "text": str}.
|
||||
self._seen: set = set()
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.transcript_path = out_dir / "transcript.txt"
|
||||
self.status_path = out_dir / "status.json"
|
||||
self._flush()
|
||||
|
||||
# -------- transcript ------------------------------------------------
|
||||
|
||||
def record_caption(self, speaker: str, text: str) -> None:
|
||||
"""Append a caption line if we haven't seen this exact (speaker, text)."""
|
||||
speaker = (speaker or "").strip() or "Unknown"
|
||||
text = (text or "").strip()
|
||||
if not text:
|
||||
return
|
||||
key = f"{speaker}|{text}"
|
||||
if key in self._seen:
|
||||
return
|
||||
self._seen.add(key)
|
||||
self.transcript_lines += 1
|
||||
self.last_caption_at = time.time()
|
||||
ts = time.strftime("%H:%M:%S", time.localtime(self.last_caption_at))
|
||||
line = f"[{ts}] {speaker}: {text}\n"
|
||||
# Atomic-ish append — good enough for a single-writer.
|
||||
with self.transcript_path.open("a", encoding="utf-8") as f:
|
||||
f.write(line)
|
||||
self._flush()
|
||||
|
||||
# -------- status file ----------------------------------------------
|
||||
|
||||
def _flush(self) -> None:
|
||||
data = {
|
||||
"meetingId": self.meeting_id,
|
||||
"url": self.url,
|
||||
"inCall": self.in_call,
|
||||
"captioning": self.captioning,
|
||||
"captionsEnabledAttempted": self.captions_enabled_attempted,
|
||||
"lobbyWaiting": self.lobby_waiting,
|
||||
"joinAttemptedAt": self.join_attempted_at,
|
||||
"joinedAt": self.joined_at,
|
||||
"lastCaptionAt": self.last_caption_at,
|
||||
"transcriptLines": self.transcript_lines,
|
||||
"transcriptPath": str(self.transcript_path),
|
||||
"error": self.error,
|
||||
"exited": self.exited,
|
||||
"pid": os.getpid(),
|
||||
# v2 realtime telemetry.
|
||||
"realtime": self.realtime,
|
||||
"realtimeReady": self.realtime_ready,
|
||||
"realtimeDevice": self.realtime_device,
|
||||
"audioBytesOut": self.audio_bytes_out,
|
||||
"lastAudioOutAt": self.last_audio_out_at,
|
||||
"lastBargeInAt": self.last_barge_in_at,
|
||||
"leaveReason": self.leave_reason,
|
||||
}
|
||||
tmp = self.status_path.with_suffix(".json.tmp")
|
||||
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
tmp.replace(self.status_path)
|
||||
|
||||
def set(self, **kwargs) -> None:
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
self._flush()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Playwright bot entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# JavaScript injected into the Meet tab to observe captions. Captures
|
||||
# {speaker, text} tuples via a MutationObserver on the caption container,
|
||||
# and exposes ``window.__hermesMeetDrain()`` to pull new entries. This
|
||||
# mirrors the OpenUtter caption scraping approach.
|
||||
_CAPTION_OBSERVER_JS = r"""
|
||||
(() => {
|
||||
if (window.__hermesMeetInstalled) return;
|
||||
window.__hermesMeetInstalled = true;
|
||||
window.__hermesMeetQueue = [];
|
||||
|
||||
const captionSelector = '[role="region"][aria-label*="aption" i], ' +
|
||||
'div[jsname="YSxPC"], ' + // legacy
|
||||
'div[jsname="tgaKEf"]'; // current (Apr 2026)
|
||||
|
||||
function pushEntry(speaker, text) {
|
||||
if (!text || !text.trim()) return;
|
||||
window.__hermesMeetQueue.push({
|
||||
ts: Date.now(),
|
||||
speaker: (speaker || '').trim(),
|
||||
text: text.trim(),
|
||||
});
|
||||
}
|
||||
|
||||
function scan(root) {
|
||||
// Meet captions render as a list of rows; each row contains a speaker
|
||||
// label and a text block. Selectors vary across Meet rewrites; we try
|
||||
// a few shapes and fall back to raw text.
|
||||
const rows = root.querySelectorAll('div[jsname="dsyhDe"], div.CNusmb, div.TBMuR');
|
||||
if (rows.length) {
|
||||
rows.forEach((row) => {
|
||||
const spkEl = row.querySelector('div.KcIKyf, div.zs7s8d, span[jsname="YSxPC"]');
|
||||
const txtEl = row.querySelector('div.bh44bd, span[jsname="tgaKEf"], div.iTTPOb');
|
||||
const speaker = spkEl ? spkEl.innerText : '';
|
||||
const text = txtEl ? txtEl.innerText : row.innerText;
|
||||
pushEntry(speaker, text);
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Fallback: treat the whole region's innerText as one anonymous line.
|
||||
const text = (root.innerText || '').split('\n').filter(Boolean).pop();
|
||||
pushEntry('', text);
|
||||
}
|
||||
|
||||
function attach() {
|
||||
const el = document.querySelector(captionSelector);
|
||||
if (!el) return false;
|
||||
const obs = new MutationObserver(() => scan(el));
|
||||
obs.observe(el, { childList: true, subtree: true, characterData: true });
|
||||
scan(el);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Try now and retry on interval — the caption region only appears after
|
||||
// captions are enabled and someone speaks.
|
||||
if (!attach()) {
|
||||
const iv = setInterval(() => { if (attach()) clearInterval(iv); }, 1500);
|
||||
}
|
||||
|
||||
window.__hermesMeetDrain = () => {
|
||||
const out = window.__hermesMeetQueue.slice();
|
||||
window.__hermesMeetQueue = [];
|
||||
return out;
|
||||
};
|
||||
})();
|
||||
"""
|
||||
|
||||
|
||||
def _enable_captions_js() -> str:
|
||||
"""Return a small JS snippet that tries to click the 'Turn on captions' button.
|
||||
|
||||
Best-effort — Meet's caption toggle is keyboard-accessible via ``c``. We
|
||||
dispatch that keystroke as a cheap fallback. Real click targeting is too
|
||||
brittle to rely on.
|
||||
"""
|
||||
return r"""
|
||||
(() => {
|
||||
const ev = new KeyboardEvent('keydown', {
|
||||
key: 'c', code: 'KeyC', keyCode: 67, which: 67, bubbles: true,
|
||||
});
|
||||
document.body.dispatchEvent(ev);
|
||||
return true;
|
||||
})();
|
||||
"""
|
||||
|
||||
|
||||
def _start_realtime_speaker(
|
||||
*,
|
||||
rt: dict,
|
||||
out_dir: Path,
|
||||
bridge_info: dict,
|
||||
api_key: str,
|
||||
model: str,
|
||||
voice: str,
|
||||
instructions: str,
|
||||
stop_flag: dict,
|
||||
state: "_BotState",
|
||||
) -> None:
|
||||
"""Wire up the OpenAI Realtime session + speaker thread + PCM pump.
|
||||
|
||||
The speaker thread reads text lines from ``say_queue.jsonl``, sends each
|
||||
to OpenAI Realtime, and writes PCM audio into ``speaker.pcm``. A
|
||||
separate *pump* thread forwards that PCM into the OS audio sink so
|
||||
Chrome's fake mic picks it up. On Linux we pipe to ``paplay`` against
|
||||
the null-sink; on macOS the caller is expected to have the BlackHole
|
||||
device selected as default input.
|
||||
"""
|
||||
try:
|
||||
from plugins.google_meet.realtime.openai_client import (
|
||||
RealtimeSession,
|
||||
RealtimeSpeaker,
|
||||
)
|
||||
except Exception as e:
|
||||
state.set(error=f"realtime import failed: {e}")
|
||||
return
|
||||
|
||||
pcm_path = out_dir / SAY_PCM_FILENAME
|
||||
queue_path = out_dir / SAY_QUEUE_FILENAME
|
||||
processed_path = out_dir / "say_processed.jsonl"
|
||||
# Reset the sink file so we start clean each session.
|
||||
pcm_path.write_bytes(b"")
|
||||
# Make sure the queue exists so the speaker poller doesn't error on
|
||||
# first iteration.
|
||||
queue_path.touch()
|
||||
|
||||
try:
|
||||
session = RealtimeSession(
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
voice=voice,
|
||||
instructions=instructions,
|
||||
audio_sink_path=pcm_path,
|
||||
sample_rate=24000,
|
||||
)
|
||||
session.connect()
|
||||
except Exception as e:
|
||||
state.set(error=f"realtime connect failed: {e}")
|
||||
return
|
||||
|
||||
rt["session"] = session
|
||||
|
||||
def _stop_fn():
|
||||
return stop_flag.get("stop", False)
|
||||
|
||||
rt["speaker_stop"] = lambda: stop_flag.__setitem__("stop", stop_flag.get("stop", False))
|
||||
|
||||
speaker = RealtimeSpeaker(
|
||||
session=session,
|
||||
queue_path=queue_path,
|
||||
processed_path=processed_path,
|
||||
)
|
||||
|
||||
def _speaker_loop():
|
||||
try:
|
||||
speaker.run_until_stopped(_stop_fn)
|
||||
except Exception as e:
|
||||
state.set(error=f"realtime speaker crashed: {e}")
|
||||
|
||||
t_speaker = threading.Thread(target=_speaker_loop, name="meet-speaker", daemon=True)
|
||||
t_speaker.start()
|
||||
rt["speaker_thread"] = t_speaker
|
||||
|
||||
# PCM pump: feeds speaker.pcm (24kHz s16le mono) into the OS audio
|
||||
# device that Chrome's fake mic reads from. Different tools per
|
||||
# platform, but the contract is the same — block-read the growing
|
||||
# PCM file and stream it to the device in near-real-time.
|
||||
platform_tag = (bridge_info or {}).get("platform")
|
||||
if platform_tag == "linux":
|
||||
import subprocess as _sp
|
||||
|
||||
sink = (bridge_info or {}).get("write_target") or "hermes_meet_sink"
|
||||
try:
|
||||
proc = _sp.Popen(
|
||||
[
|
||||
"paplay",
|
||||
"--raw",
|
||||
"--rate=24000",
|
||||
"--format=s16le",
|
||||
"--channels=1",
|
||||
f"--device={sink}",
|
||||
str(pcm_path),
|
||||
],
|
||||
stdin=_sp.DEVNULL,
|
||||
stdout=_sp.DEVNULL,
|
||||
stderr=_sp.DEVNULL,
|
||||
)
|
||||
rt["pcm_pump"] = proc
|
||||
except FileNotFoundError:
|
||||
state.set(error="paplay not found — install pulseaudio-utils for realtime on Linux")
|
||||
elif platform_tag == "darwin":
|
||||
# macOS: use ffmpeg to tail-read speaker.pcm and write it to the
|
||||
# BlackHole output device. The user must have BlackHole selected
|
||||
# as the default input in System Settings → Sound for Chrome to
|
||||
# pick it up. We prefer ffmpeg because it's scriptable and can
|
||||
# target AVFoundation devices by name; fall back to afplay-ing
|
||||
# the file in a tight loop if ffmpeg is absent.
|
||||
import shutil as _shutil
|
||||
import subprocess as _sp
|
||||
|
||||
device_name = (bridge_info or {}).get("write_target") or "BlackHole 2ch"
|
||||
if _shutil.which("ffmpeg"):
|
||||
try:
|
||||
# -re: read input at native frame rate.
|
||||
# -f avfoundation -i: speaker path as raw PCM.
|
||||
# -f s16le -ar 24000 -ac 1 -i <pcm>: interpret the file.
|
||||
# -f audiotoolbox -audio_device_index: write to BlackHole.
|
||||
# Simpler: output as raw via coreaudio using "-f audiotoolbox".
|
||||
# ffmpeg's audiotoolbox output picks the current default
|
||||
# output device, which isn't what we want. Instead we use
|
||||
# -f avfoundation with the named device as OUTPUT via
|
||||
# -vn and the device name.
|
||||
proc = _sp.Popen(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-nostdin", "-hide_banner", "-loglevel", "error",
|
||||
"-re",
|
||||
"-f", "s16le", "-ar", "24000", "-ac", "1",
|
||||
"-i", str(pcm_path),
|
||||
"-f", "audiotoolbox",
|
||||
"-audio_device_index", _mac_audio_device_index(device_name),
|
||||
"-",
|
||||
],
|
||||
stdin=_sp.DEVNULL,
|
||||
stdout=_sp.DEVNULL,
|
||||
stderr=_sp.DEVNULL,
|
||||
)
|
||||
rt["pcm_pump"] = proc
|
||||
except FileNotFoundError:
|
||||
state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
|
||||
except Exception as e:
|
||||
state.set(error=f"macOS pcm pump failed to start: {e}")
|
||||
else:
|
||||
state.set(error="ffmpeg not found — install via `brew install ffmpeg` for realtime on macOS")
|
||||
|
||||
|
||||
def _mac_audio_device_index(device_name: str) -> str:
|
||||
"""Return the ffmpeg ``-audio_device_index`` for *device_name*, as a string.
|
||||
|
||||
Probes ``ffmpeg -f avfoundation -list_devices true -i ''`` (which prints
|
||||
the device table on stderr) and matches *device_name* case-insensitively.
|
||||
Defaults to ``"0"`` if the device can't be found — caller will get a
|
||||
misrouted stream but not a crash, and the error will be obvious.
|
||||
"""
|
||||
import subprocess as _sp
|
||||
|
||||
try:
|
||||
out = _sp.run(
|
||||
["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
except Exception:
|
||||
return "0"
|
||||
# ffmpeg prints the table on stderr. Lines look like:
|
||||
# [AVFoundation indev @ 0x...] [0] BlackHole 2ch
|
||||
import re as _re
|
||||
|
||||
needle = device_name.strip().lower()
|
||||
for line in (out.stderr or "").splitlines():
|
||||
m = _re.search(r"\[(\d+)\]\s+(.+)$", line)
|
||||
if not m:
|
||||
continue
|
||||
if m.group(2).strip().lower() == needle:
|
||||
return m.group(1)
|
||||
return "0"
|
||||
|
||||
|
||||
def run_bot() -> int: # noqa: C901 — orchestration, explicit branches
|
||||
url = os.environ.get("HERMES_MEET_URL", "").strip()
|
||||
out_dir_env = os.environ.get("HERMES_MEET_OUT_DIR", "").strip()
|
||||
headed = os.environ.get("HERMES_MEET_HEADED", "").lower() in ("1", "true", "yes")
|
||||
auth_state = os.environ.get("HERMES_MEET_AUTH_STATE", "").strip()
|
||||
guest_name = os.environ.get("HERMES_MEET_GUEST_NAME", "Hermes Agent")
|
||||
duration_s = _parse_duration(os.environ.get("HERMES_MEET_DURATION", ""))
|
||||
# v2: optional realtime mode. Enabled when HERMES_MEET_MODE=realtime.
|
||||
mode = os.environ.get("HERMES_MEET_MODE", "transcribe").strip().lower()
|
||||
realtime_model = os.environ.get("HERMES_MEET_REALTIME_MODEL", "gpt-realtime")
|
||||
realtime_voice = os.environ.get("HERMES_MEET_REALTIME_VOICE", "alloy")
|
||||
realtime_instructions = os.environ.get("HERMES_MEET_REALTIME_INSTRUCTIONS", "")
|
||||
realtime_api_key = os.environ.get("HERMES_MEET_REALTIME_KEY") or os.environ.get("OPENAI_API_KEY", "")
|
||||
|
||||
if not url or not _is_safe_meet_url(url):
|
||||
sys.stderr.write(
|
||||
"google_meet bot: refusing to launch — HERMES_MEET_URL must be a "
|
||||
"meet.google.com URL. got: %r\n" % url
|
||||
)
|
||||
return 2
|
||||
if not out_dir_env:
|
||||
sys.stderr.write("google_meet bot: HERMES_MEET_OUT_DIR is required\n")
|
||||
return 2
|
||||
|
||||
out_dir = Path(out_dir_env)
|
||||
meeting_id = _meeting_id_from_url(url)
|
||||
state = _BotState(out_dir=out_dir, meeting_id=meeting_id, url=url)
|
||||
|
||||
# SIGTERM → exit cleanly so the parent ``meet_leave`` gets a finalized
|
||||
# transcript. We set a flag instead of raising so the Playwright context
|
||||
# teardown runs in the finally block below.
|
||||
stop_flag = {"stop": False}
|
||||
|
||||
def _on_signal(_sig, _frame):
|
||||
stop_flag["stop"] = True
|
||||
|
||||
signal.signal(signal.SIGTERM, _on_signal)
|
||||
signal.signal(signal.SIGINT, _on_signal)
|
||||
|
||||
# v2 realtime: provision virtual audio device + start speaker thread.
|
||||
# We track these in a dict so the finally block can tear them down
|
||||
# regardless of how we exit. If anything in the realtime setup fails we
|
||||
# fall back to transcribe mode with a status flag.
|
||||
rt = {
|
||||
"enabled": mode == "realtime",
|
||||
"bridge": None, # AudioBridge | None
|
||||
"bridge_info": None, # dict | None
|
||||
"session": None, # RealtimeSession | None
|
||||
"speaker_thread": None, # threading.Thread | None
|
||||
"speaker_stop": None, # callable | None
|
||||
}
|
||||
if rt["enabled"]:
|
||||
if not realtime_api_key:
|
||||
state.set(error="realtime mode requested but no API key in HERMES_MEET_REALTIME_KEY/OPENAI_API_KEY — falling back to transcribe")
|
||||
rt["enabled"] = False
|
||||
else:
|
||||
try:
|
||||
from plugins.google_meet.audio_bridge import AudioBridge
|
||||
bridge = AudioBridge()
|
||||
rt["bridge_info"] = bridge.setup()
|
||||
rt["bridge"] = bridge
|
||||
state.set(realtime=True, realtime_device=rt["bridge_info"].get("device_name"))
|
||||
except Exception as e:
|
||||
state.set(error=f"audio bridge setup failed: {e} — falling back to transcribe")
|
||||
rt["enabled"] = False
|
||||
|
||||
try:
|
||||
from playwright.sync_api import sync_playwright
|
||||
except ImportError as e:
|
||||
state.set(error=f"playwright not installed: {e}", exited=True)
|
||||
sys.stderr.write(
|
||||
"google_meet bot: playwright is not installed. Run "
|
||||
"`pip install playwright && python -m playwright install chromium`\n"
|
||||
)
|
||||
if rt["bridge"]:
|
||||
rt["bridge"].teardown()
|
||||
return 3
|
||||
|
||||
# Chrome env: if realtime is live on Linux, point PULSE_SOURCE at the
|
||||
# virtual source so Chrome's fake mic reads the audio we generate.
|
||||
chrome_env = os.environ.copy()
|
||||
chrome_args = [
|
||||
"--use-fake-ui-for-media-stream",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
]
|
||||
if not rt["enabled"]:
|
||||
# v1-style fake device (silence) — we don't care about mic content
|
||||
# when we're not speaking.
|
||||
chrome_args.insert(1, "--use-fake-device-for-media-stream")
|
||||
elif rt["bridge_info"] and rt["bridge_info"].get("platform") == "linux":
|
||||
chrome_env["PULSE_SOURCE"] = rt["bridge_info"].get("device_name", "")
|
||||
|
||||
try:
|
||||
with sync_playwright() as pw:
|
||||
# Playwright's launch() doesn't take env; we set PULSE_SOURCE
|
||||
# via the process env before launch so the child Chrome inherits it.
|
||||
for k, v in chrome_env.items():
|
||||
os.environ[k] = v
|
||||
browser = pw.chromium.launch(
|
||||
headless=not headed,
|
||||
args=chrome_args,
|
||||
)
|
||||
context_args = {
|
||||
"viewport": {"width": 1280, "height": 800},
|
||||
"user_agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
|
||||
),
|
||||
"permissions": ["microphone", "camera"],
|
||||
}
|
||||
if auth_state and Path(auth_state).is_file():
|
||||
context_args["storage_state"] = auth_state
|
||||
context = browser.new_context(**context_args)
|
||||
page = context.new_page()
|
||||
|
||||
try:
|
||||
page.goto(url, wait_until="domcontentloaded", timeout=30_000)
|
||||
except Exception as e:
|
||||
state.set(error=f"navigate failed: {e}", exited=True)
|
||||
return 4
|
||||
|
||||
# Guest-mode: Meet shows a name field before "Ask to join". When
|
||||
# we're authed, we instead see "Join now".
|
||||
_try_guest_name(page, guest_name)
|
||||
_click_join(page, state)
|
||||
|
||||
# Install caption observer and attempt to enable captions.
|
||||
try:
|
||||
page.evaluate(_enable_captions_js())
|
||||
state.set(captions_enabled_attempted=True)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
page.evaluate(_CAPTION_OBSERVER_JS)
|
||||
except Exception as e:
|
||||
state.set(error=f"caption observer install failed: {e}")
|
||||
|
||||
# Note: in_call=False until admission is confirmed (we detect
|
||||
# either the Leave button or the caption region, signalling we
|
||||
# made it past the lobby).
|
||||
state.set(captioning=True, join_attempted_at=time.time())
|
||||
|
||||
# v2 realtime: start the speaker thread reading from the
|
||||
# plugin-side say queue. The thread reads JSONL lines written by
|
||||
# meet_say, calls OpenAI Realtime, and streams the audio PCM to
|
||||
# the virtual sink that Chrome's fake-mic is pointed at.
|
||||
if rt["enabled"]:
|
||||
_start_realtime_speaker(
|
||||
rt=rt,
|
||||
out_dir=out_dir,
|
||||
bridge_info=rt["bridge_info"],
|
||||
api_key=realtime_api_key,
|
||||
model=realtime_model,
|
||||
voice=realtime_voice,
|
||||
instructions=realtime_instructions,
|
||||
stop_flag=stop_flag,
|
||||
state=state,
|
||||
)
|
||||
if rt["session"] is not None:
|
||||
state.set(realtime_ready=True)
|
||||
|
||||
# Admission + drain loop. Runs until SIGTERM, duration expiry,
|
||||
# or the page detects "You were removed / you left the
|
||||
# meeting". Responsible for:
|
||||
# * detecting admission (Leave button visible → in_call=True)
|
||||
# * timing out stuck-in-lobby (default 5 minutes)
|
||||
# * draining scraped captions into the transcript
|
||||
# * triggering realtime barge-in when a human speaks while
|
||||
# the bot is generating audio
|
||||
# * periodically flushing realtime counters into status.json
|
||||
deadline = (time.time() + duration_s) if duration_s else None
|
||||
lobby_deadline = time.time() + float(
|
||||
os.environ.get("HERMES_MEET_LOBBY_TIMEOUT", "300")
|
||||
)
|
||||
last_admission_check = 0.0
|
||||
while not stop_flag["stop"]:
|
||||
now = time.time()
|
||||
if deadline and now > deadline:
|
||||
state.set(leave_reason="duration_expired")
|
||||
break
|
||||
|
||||
# Admission detection every ~3s until admitted.
|
||||
if not state.in_call and (now - last_admission_check) > 3.0:
|
||||
last_admission_check = now
|
||||
admitted = _detect_admission(page)
|
||||
if admitted:
|
||||
state.set(
|
||||
in_call=True,
|
||||
lobby_waiting=False,
|
||||
joined_at=now,
|
||||
)
|
||||
elif now > lobby_deadline:
|
||||
state.set(
|
||||
error=(
|
||||
"lobby timeout — host never admitted the bot "
|
||||
f"within {int(lobby_deadline - state.join_attempted_at) if state.join_attempted_at else 0}s"
|
||||
),
|
||||
leave_reason="lobby_timeout",
|
||||
)
|
||||
break
|
||||
elif _detect_denied(page):
|
||||
state.set(
|
||||
error="host denied admission",
|
||||
leave_reason="denied",
|
||||
)
|
||||
break
|
||||
|
||||
try:
|
||||
queued = page.evaluate("window.__hermesMeetDrain && window.__hermesMeetDrain()")
|
||||
if isinstance(queued, list):
|
||||
for entry in queued:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
speaker = str(entry.get("speaker", ""))
|
||||
text = str(entry.get("text", ""))
|
||||
state.record_caption(speaker=speaker, text=text)
|
||||
# Barge-in: if the bot is currently generating
|
||||
# audio AND a real human just spoke, cancel the
|
||||
# in-flight response so we don't talk over them.
|
||||
if rt["enabled"] and rt["session"] is not None:
|
||||
if _looks_like_human_speaker(speaker, guest_name):
|
||||
try:
|
||||
cancelled = rt["session"].cancel_response()
|
||||
if cancelled:
|
||||
state.set(last_barge_in_at=now)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
# Meet reloaded or we got booted — try to detect and
|
||||
# exit gracefully rather than spinning.
|
||||
if page.is_closed():
|
||||
state.set(leave_reason="page_closed")
|
||||
break
|
||||
|
||||
# Fold the realtime session's byte/timestamp counters into
|
||||
# the status file so meet_status can surface them.
|
||||
if rt["session"] is not None:
|
||||
state.set(
|
||||
audio_bytes_out=getattr(rt["session"], "audio_bytes_out", 0),
|
||||
last_audio_out_at=getattr(rt["session"], "last_audio_out_at", None),
|
||||
)
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
# Try to leave cleanly — click "Leave call" button if present.
|
||||
try:
|
||||
page.evaluate(
|
||||
"() => { const b = document.querySelector('button[aria-label*=\"eave call\"]');"
|
||||
" if (b) b.click(); }"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
context.close()
|
||||
browser.close()
|
||||
# v2: teardown realtime speaker + audio bridge.
|
||||
if rt["speaker_stop"]:
|
||||
try:
|
||||
rt["speaker_stop"]()
|
||||
except Exception:
|
||||
pass
|
||||
if rt["speaker_thread"] is not None:
|
||||
try:
|
||||
rt["speaker_thread"].join(timeout=5.0)
|
||||
except Exception:
|
||||
pass
|
||||
if rt["session"]:
|
||||
try:
|
||||
rt["session"].close()
|
||||
except Exception:
|
||||
pass
|
||||
if rt["bridge"]:
|
||||
try:
|
||||
rt["bridge"].teardown()
|
||||
except Exception:
|
||||
pass
|
||||
state.set(in_call=False, captioning=False, exited=True)
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
state.set(error=f"unhandled: {e}", exited=True)
|
||||
return 1
|
||||
|
||||
|
||||
def _try_guest_name(page, guest_name: str) -> None:
|
||||
"""If Meet is showing a guest-name input, type *guest_name* into it."""
|
||||
try:
|
||||
# Meet's guest name input has placeholder "Your name".
|
||||
locator = page.locator('input[aria-label*="name" i]').first
|
||||
if locator.count() and locator.is_visible():
|
||||
locator.fill(guest_name, timeout=2_000)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _detect_admission(page) -> bool:
|
||||
"""True if we're clearly past the lobby and in the call itself.
|
||||
|
||||
Uses a JS-side probe because Meet's DOM structure varies by client
|
||||
version. We check several high-signal indicators and declare admission
|
||||
on the first hit:
|
||||
|
||||
1. Leave-call button is present (``aria-label`` contains "eave call").
|
||||
2. Caption region has appeared (we installed the observer and it attached).
|
||||
3. The participant list container is visible.
|
||||
|
||||
Conservative by default — returns False on any error.
|
||||
"""
|
||||
probe = r"""
|
||||
(() => {
|
||||
const leave = document.querySelector('button[aria-label*="eave call" i]');
|
||||
if (leave) return true;
|
||||
if (window.__hermesMeetInstalled) {
|
||||
const caps = document.querySelector(
|
||||
'[role="region"][aria-label*="aption" i], ' +
|
||||
'div[jsname="YSxPC"], div[jsname="tgaKEf"]'
|
||||
);
|
||||
if (caps) return true;
|
||||
}
|
||||
const parts = document.querySelector('[aria-label*="articipants" i]');
|
||||
if (parts) return true;
|
||||
return false;
|
||||
})();
|
||||
"""
|
||||
try:
|
||||
return bool(page.evaluate(probe))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _detect_denied(page) -> bool:
|
||||
"""True when Meet is showing a 'you were denied' / 'no one admitted' page."""
|
||||
probe = r"""
|
||||
(() => {
|
||||
const text = document.body ? document.body.innerText || '' : '';
|
||||
// English only — matches what shows up when the host denies or
|
||||
// removes a guest.
|
||||
if (/You can't join this video call/i.test(text)) return true;
|
||||
if (/You were removed from the meeting/i.test(text)) return true;
|
||||
if (/No one responded to your request to join/i.test(text)) return true;
|
||||
return false;
|
||||
})();
|
||||
"""
|
||||
try:
|
||||
return bool(page.evaluate(probe))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _looks_like_human_speaker(speaker: str, bot_guest_name: str) -> bool:
|
||||
"""Whether a caption line's speaker is probably a human, not our bot echo.
|
||||
|
||||
Meet attributes captions to the speaker's display name. When Chrome is
|
||||
reading our fake mic, Meet still attributes captions to *our* bot name
|
||||
(because the bot is the one "speaking"). We don't want those to trigger
|
||||
barge-in. Anything else — real participant names — does.
|
||||
|
||||
Conservative: unknown / blank speakers (common when caption scraping
|
||||
falls back to raw text) do NOT trigger barge-in, because we can't tell
|
||||
whether it was a human or us.
|
||||
"""
|
||||
if not speaker or not speaker.strip():
|
||||
return False
|
||||
spk = speaker.strip().lower()
|
||||
if spk in ("unknown", "you", bot_guest_name.strip().lower()):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _click_join(page, state: _BotState) -> None:
|
||||
"""Click 'Join now' or 'Ask to join' if either button is visible.
|
||||
|
||||
Flags ``lobby_waiting`` when we hit the "waiting for host to admit you"
|
||||
state so the agent can surface that in status.
|
||||
"""
|
||||
for label in ("Join now", "Ask to join"):
|
||||
try:
|
||||
btn = page.get_by_role("button", name=label, exact=False).first
|
||||
if btn.count() and btn.is_visible():
|
||||
btn.click(timeout=3_000)
|
||||
if label == "Ask to join":
|
||||
state.set(lobby_waiting=True)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
def _parse_duration(raw: str) -> Optional[float]:
|
||||
"""Parse ``30m`` / ``2h`` / ``90`` (seconds) → float seconds, or None."""
|
||||
if not raw:
|
||||
return None
|
||||
raw = raw.strip().lower()
|
||||
try:
|
||||
if raw.endswith("h"):
|
||||
return float(raw[:-1]) * 3600
|
||||
if raw.endswith("m"):
|
||||
return float(raw[:-1]) * 60
|
||||
if raw.endswith("s"):
|
||||
return float(raw[:-1])
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover — subprocess entry point
|
||||
sys.exit(run_bot())
|
||||
@@ -0,0 +1,54 @@
|
||||
"""Remote 'node host' primitive for the google_meet plugin.
|
||||
|
||||
Lets the Meet bot (Playwright + Chrome) run on a different machine than
|
||||
the hermes-agent gateway. The gateway speaks a small JSON-over-WebSocket
|
||||
RPC protocol to the remote node; the node wraps the existing
|
||||
``plugins.google_meet.process_manager`` API.
|
||||
|
||||
Topology
|
||||
--------
|
||||
gateway (Linux) ── ws://mac.local:18789 ──▶ node server (Mac)
|
||||
└─ process_manager
|
||||
└─ meet_bot (Playwright)
|
||||
|
||||
Why: Google sign-in + Chrome profile live on the user's laptop. Running
|
||||
the bot there reuses that profile without shipping credentials to the
|
||||
server.
|
||||
|
||||
Public surface
|
||||
--------------
|
||||
NodeClient — gateway-side RPC client (short-lived sync WS per call)
|
||||
NodeServer — long-running server that hosts the bot
|
||||
NodeRegistry — local JSON registry of approved nodes (name → url+token)
|
||||
protocol — message envelope helpers (make_request, encode, decode, ...)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.google_meet.node import protocol
|
||||
from plugins.google_meet.node.client import NodeClient
|
||||
from plugins.google_meet.node.protocol import (
|
||||
VALID_REQUEST_TYPES,
|
||||
decode,
|
||||
encode,
|
||||
make_error,
|
||||
make_request,
|
||||
make_response,
|
||||
validate_request,
|
||||
)
|
||||
from plugins.google_meet.node.registry import NodeRegistry
|
||||
from plugins.google_meet.node.server import NodeServer
|
||||
|
||||
__all__ = [
|
||||
"NodeClient",
|
||||
"NodeServer",
|
||||
"NodeRegistry",
|
||||
"protocol",
|
||||
"make_request",
|
||||
"make_response",
|
||||
"make_error",
|
||||
"encode",
|
||||
"decode",
|
||||
"validate_request",
|
||||
"VALID_REQUEST_TYPES",
|
||||
]
|
||||
@@ -0,0 +1,125 @@
|
||||
"""`hermes meet node ...` subcommand tree.
|
||||
|
||||
Wired into the existing ``hermes meet`` parser by the plugin's top-level
|
||||
CLI. This module only defines the subparsers and their dispatch — it
|
||||
does not mutate the existing cli.py.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from plugins.google_meet.node.client import NodeClient
|
||||
from plugins.google_meet.node.registry import NodeRegistry
|
||||
from plugins.google_meet.node.server import NodeServer
|
||||
|
||||
|
||||
def register_cli(subparser: argparse.ArgumentParser) -> None:
|
||||
"""Add ``run / list / approve / remove / status / ping`` subparsers.
|
||||
|
||||
*subparser* is the ``hermes meet node`` argparse object — typically
|
||||
the result of ``meet_parser.add_parser('node', ...)``.
|
||||
"""
|
||||
sp = subparser.add_subparsers(dest="node_cmd", required=True)
|
||||
|
||||
run = sp.add_parser("run", help="Start a node server on this machine.")
|
||||
run.add_argument("--host", default="0.0.0.0")
|
||||
run.add_argument("--port", type=int, default=18789)
|
||||
run.add_argument("--display-name", default="hermes-meet-node")
|
||||
run.set_defaults(func=node_command)
|
||||
|
||||
lst = sp.add_parser("list", help="List approved remote nodes.")
|
||||
lst.set_defaults(func=node_command)
|
||||
|
||||
app = sp.add_parser("approve", help="Register a remote node on the gateway.")
|
||||
app.add_argument("name")
|
||||
app.add_argument("url")
|
||||
app.add_argument("token")
|
||||
app.set_defaults(func=node_command)
|
||||
|
||||
rm = sp.add_parser("remove", help="Forget a registered node.")
|
||||
rm.add_argument("name")
|
||||
rm.set_defaults(func=node_command)
|
||||
|
||||
st = sp.add_parser("status", help="Ping a registered node.")
|
||||
st.add_argument("name")
|
||||
st.set_defaults(func=node_command)
|
||||
|
||||
pg = sp.add_parser("ping", help="Alias for status.")
|
||||
pg.add_argument("name")
|
||||
pg.set_defaults(func=node_command)
|
||||
|
||||
|
||||
def node_command(args: argparse.Namespace) -> int:
|
||||
"""Dispatch for ``hermes meet node ...``.
|
||||
|
||||
Returns a process exit code. Side-effects print to stdout/stderr.
|
||||
"""
|
||||
cmd = getattr(args, "node_cmd", None)
|
||||
|
||||
if cmd == "run":
|
||||
server = NodeServer(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
display_name=args.display_name,
|
||||
)
|
||||
token = server.ensure_token()
|
||||
print(f"[meet-node] display_name={server.display_name}")
|
||||
print(f"[meet-node] listening on ws://{args.host}:{args.port}")
|
||||
print(f"[meet-node] token (copy to gateway): {token}")
|
||||
print(f"[meet-node] approve with:")
|
||||
print(f" hermes meet node approve <name> ws://<host>:{args.port} {token}")
|
||||
try:
|
||||
asyncio.run(server.serve())
|
||||
except KeyboardInterrupt:
|
||||
return 0
|
||||
except RuntimeError as exc:
|
||||
print(f"[meet-node] error: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
return 0
|
||||
|
||||
reg = NodeRegistry()
|
||||
|
||||
if cmd == "list":
|
||||
nodes = reg.list_all()
|
||||
if not nodes:
|
||||
print("no nodes registered")
|
||||
return 0
|
||||
for n in nodes:
|
||||
print(f"{n['name']}\t{n['url']}\ttoken={n['token'][:6]}…")
|
||||
return 0
|
||||
|
||||
if cmd == "approve":
|
||||
reg.add(args.name, args.url, args.token)
|
||||
print(f"approved node {args.name!r} at {args.url}")
|
||||
return 0
|
||||
|
||||
if cmd == "remove":
|
||||
ok = reg.remove(args.name)
|
||||
print(f"removed {args.name!r}" if ok else f"no such node: {args.name!r}")
|
||||
return 0 if ok else 1
|
||||
|
||||
if cmd in ("status", "ping"):
|
||||
entry = reg.get(args.name)
|
||||
if entry is None:
|
||||
print(f"no such node: {args.name!r}", file=sys.stderr)
|
||||
return 1
|
||||
client = NodeClient(entry["url"], entry["token"])
|
||||
try:
|
||||
result = client.ping()
|
||||
except Exception as exc: # noqa: BLE001 — surface any connection error
|
||||
print(json.dumps({"ok": False, "error": str(exc)}))
|
||||
return 1
|
||||
print(json.dumps({"ok": True, "node": args.name, **_coerce_dict(result)}))
|
||||
return 0
|
||||
|
||||
print(f"unknown node command: {cmd!r}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
|
||||
def _coerce_dict(value: Any) -> dict:
|
||||
return value if isinstance(value, dict) else {"result": value}
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Gateway-side RPC client for a remote meet node.
|
||||
|
||||
Each call opens a short-lived synchronous WebSocket to the node, sends
|
||||
exactly one request, reads exactly one response, and closes. This keeps
|
||||
the client trivial to use from non-async tool handlers and avoids
|
||||
maintaining persistent connection state across agent turns.
|
||||
|
||||
The ``websockets`` package is an optional dep — we import it lazily so
|
||||
plugin load doesn't require it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from plugins.google_meet.node import protocol as _proto
|
||||
|
||||
|
||||
class NodeClient:
|
||||
"""Thin synchronous WS client matching the server's request surface."""
|
||||
|
||||
def __init__(self, url: str, token: str, timeout: float = 10.0) -> None:
|
||||
if not isinstance(url, str) or not url:
|
||||
raise ValueError("url must be a non-empty string")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise ValueError("token must be a non-empty string")
|
||||
self.url = url
|
||||
self.token = token
|
||||
self.timeout = float(timeout)
|
||||
|
||||
# ----- core RPC -----------------------------------------------------
|
||||
|
||||
def _rpc(self, type: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Send one request, return the response payload dict.
|
||||
|
||||
Raises RuntimeError when the server sends an ``error`` envelope
|
||||
or the response id doesn't match.
|
||||
"""
|
||||
try:
|
||||
from websockets.sync.client import connect # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"NodeClient requires the 'websockets' package. "
|
||||
"Install it with: pip install websockets"
|
||||
) from exc
|
||||
|
||||
req = _proto.make_request(type, self.token, payload)
|
||||
raw_out = _proto.encode(req)
|
||||
|
||||
with connect(self.url, open_timeout=self.timeout,
|
||||
close_timeout=self.timeout) as ws:
|
||||
ws.send(raw_out)
|
||||
raw_in = ws.recv(timeout=self.timeout)
|
||||
|
||||
if isinstance(raw_in, (bytes, bytearray)):
|
||||
raw_in = raw_in.decode("utf-8")
|
||||
resp = _proto.decode(raw_in)
|
||||
|
||||
if resp.get("type") == "error":
|
||||
raise RuntimeError(f"node error: {resp.get('error', '<unknown>')}")
|
||||
if resp.get("id") != req["id"]:
|
||||
raise RuntimeError(
|
||||
f"response id mismatch: sent {req['id']}, got {resp.get('id')!r}"
|
||||
)
|
||||
payload_out = resp.get("payload")
|
||||
if not isinstance(payload_out, dict):
|
||||
# Ping returns {"type": "pong", "payload": {...}} — still a dict.
|
||||
raise RuntimeError("response missing payload dict")
|
||||
return payload_out
|
||||
|
||||
# ----- convenience methods -----------------------------------------
|
||||
|
||||
def start_bot(
|
||||
self,
|
||||
url: str,
|
||||
guest_name: str = "Hermes Agent",
|
||||
duration: Optional[str] = None,
|
||||
headed: bool = False,
|
||||
mode: str = "transcribe",
|
||||
) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"url": url,
|
||||
"guest_name": guest_name,
|
||||
"headed": bool(headed),
|
||||
"mode": mode,
|
||||
}
|
||||
if duration is not None:
|
||||
payload["duration"] = duration
|
||||
return self._rpc("start_bot", payload)
|
||||
|
||||
def stop(self) -> Dict[str, Any]:
|
||||
return self._rpc("stop", {})
|
||||
|
||||
def status(self) -> Dict[str, Any]:
|
||||
return self._rpc("status", {})
|
||||
|
||||
def transcript(self, last: Optional[int] = None) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {}
|
||||
if last is not None:
|
||||
payload["last"] = int(last)
|
||||
return self._rpc("transcript", payload)
|
||||
|
||||
def say(self, text: str) -> Dict[str, Any]:
|
||||
return self._rpc("say", {"text": str(text)})
|
||||
|
||||
def ping(self) -> Dict[str, Any]:
|
||||
return self._rpc("ping", {})
|
||||
@@ -0,0 +1,124 @@
|
||||
"""Wire protocol for gateway ↔ node RPC.
|
||||
|
||||
Everything is a JSON object with the same envelope shape:
|
||||
|
||||
Request: {"type": <str>, "id": <str>, "token": <str>, "payload": <dict>}
|
||||
Response: {"type": "<req-type>_res", "id": <req-id>, "payload": <dict>}
|
||||
Error: {"type": "error", "id": <req-id>, "error": <str>}
|
||||
|
||||
Requests must carry the shared bearer token (set up via
|
||||
``hermes meet node approve`` on the gateway and read off disk on the
|
||||
server). Mismatched tokens are rejected before dispatch.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
|
||||
VALID_REQUEST_TYPES = frozenset({
|
||||
"start_bot",
|
||||
"stop",
|
||||
"status",
|
||||
"transcript",
|
||||
"say",
|
||||
"ping",
|
||||
})
|
||||
|
||||
|
||||
def make_request(
|
||||
type: str,
|
||||
token: str,
|
||||
payload: Dict[str, Any],
|
||||
req_id: str | None = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Construct a request envelope.
|
||||
|
||||
``req_id`` is auto-generated (uuid4 hex) when not supplied so callers
|
||||
can correlate async responses.
|
||||
"""
|
||||
if not isinstance(type, str) or not type:
|
||||
raise ValueError("type must be a non-empty string")
|
||||
if type not in VALID_REQUEST_TYPES:
|
||||
raise ValueError(f"unknown request type: {type!r}")
|
||||
if not isinstance(token, str):
|
||||
raise ValueError("token must be a string")
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("payload must be a dict")
|
||||
return {
|
||||
"type": type,
|
||||
"id": req_id or uuid.uuid4().hex,
|
||||
"token": token,
|
||||
"payload": payload,
|
||||
}
|
||||
|
||||
|
||||
def make_response(req_id: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Build a success response. The caller supplies the *request* type;
|
||||
we suffix it with ``_res`` so clients can assert they got the right
|
||||
reply.
|
||||
|
||||
For simplicity we don't require the type here — clients usually just
|
||||
key off ``id``. But we still emit a generic ``*_res`` envelope.
|
||||
"""
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("payload must be a dict")
|
||||
return {"type": "response", "id": req_id, "payload": payload}
|
||||
|
||||
|
||||
def make_error(req_id: str, error: str) -> Dict[str, Any]:
|
||||
return {"type": "error", "id": req_id, "error": str(error)}
|
||||
|
||||
|
||||
def encode(msg: Dict[str, Any]) -> str:
|
||||
"""Serialize a message envelope to a JSON string."""
|
||||
return json.dumps(msg, separators=(",", ":"), ensure_ascii=False)
|
||||
|
||||
|
||||
def decode(raw: str) -> Dict[str, Any]:
|
||||
"""Parse a JSON envelope, raising ValueError on anything malformed.
|
||||
|
||||
Minimal type validation: must be an object, must contain ``type`` and
|
||||
``id``. Heavier validation (token match, payload shape) happens in
|
||||
:func:`validate_request` on the server side.
|
||||
"""
|
||||
try:
|
||||
obj = json.loads(raw)
|
||||
except (TypeError, json.JSONDecodeError) as exc:
|
||||
raise ValueError(f"malformed JSON: {exc}") from exc
|
||||
if not isinstance(obj, dict):
|
||||
raise ValueError("envelope must be a JSON object")
|
||||
if "type" not in obj or not isinstance(obj["type"], str):
|
||||
raise ValueError("envelope missing string 'type'")
|
||||
if "id" not in obj or not isinstance(obj["id"], str):
|
||||
raise ValueError("envelope missing string 'id'")
|
||||
return obj
|
||||
|
||||
|
||||
def validate_request(msg: Dict[str, Any], expected_token: str) -> Tuple[bool, str]:
|
||||
"""Check a decoded request against the server's shared token.
|
||||
|
||||
Returns ``(True, "")`` when the envelope is acceptable or
|
||||
``(False, <reason>)`` otherwise. Reason strings are safe to surface
|
||||
back to the client in an error envelope.
|
||||
"""
|
||||
if not isinstance(msg, dict):
|
||||
return False, "envelope must be a dict"
|
||||
t = msg.get("type")
|
||||
if not isinstance(t, str) or not t:
|
||||
return False, "missing or non-string 'type'"
|
||||
if t not in VALID_REQUEST_TYPES:
|
||||
return False, f"unknown request type: {t!r}"
|
||||
if not isinstance(msg.get("id"), str) or not msg.get("id"):
|
||||
return False, "missing or non-string 'id'"
|
||||
token = msg.get("token")
|
||||
if not isinstance(token, str) or not token:
|
||||
return False, "missing token"
|
||||
if token != expected_token:
|
||||
return False, "token mismatch"
|
||||
payload = msg.get("payload")
|
||||
if not isinstance(payload, dict):
|
||||
return False, "payload must be a dict"
|
||||
return True, ""
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Local JSON registry of approved remote meet nodes.
|
||||
|
||||
Lives at ``$HERMES_HOME/workspace/meetings/nodes.json``. The gateway
|
||||
consults it to resolve a ``chrome_node`` name to a ``(url, token)`` pair
|
||||
before opening a WebSocket to the remote bot host.
|
||||
|
||||
Schema
|
||||
------
|
||||
{
|
||||
"nodes": {
|
||||
"<name>": {
|
||||
"url": "ws://host:port",
|
||||
"token": "...",
|
||||
"added_at": <epoch_float>
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def _default_path() -> Path:
|
||||
return Path(get_hermes_home()) / "workspace" / "meetings" / "nodes.json"
|
||||
|
||||
|
||||
class NodeRegistry:
|
||||
"""Simple file-backed registry. Not concurrent-safe across processes
|
||||
— single writer assumed (the gateway CLI)."""
|
||||
|
||||
def __init__(self, path: Optional[Path] = None) -> None:
|
||||
self.path = Path(path) if path is not None else _default_path()
|
||||
|
||||
# ----- storage ------------------------------------------------------
|
||||
|
||||
def _load(self) -> Dict[str, Any]:
|
||||
if not self.path.is_file():
|
||||
return {"nodes": {}}
|
||||
try:
|
||||
data = json.loads(self.path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return {"nodes": {}}
|
||||
if not isinstance(data, dict) or not isinstance(data.get("nodes"), dict):
|
||||
return {"nodes": {}}
|
||||
return data
|
||||
|
||||
def _save(self, data: Dict[str, Any]) -> None:
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = self.path.with_suffix(".json.tmp")
|
||||
tmp.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
tmp.replace(self.path)
|
||||
|
||||
# ----- public API ---------------------------------------------------
|
||||
|
||||
def get(self, name: str) -> Optional[Dict[str, Any]]:
|
||||
data = self._load()
|
||||
entry = data["nodes"].get(name)
|
||||
if entry is None:
|
||||
return None
|
||||
return {"name": name, **entry}
|
||||
|
||||
def add(self, name: str, url: str, token: str) -> None:
|
||||
if not isinstance(name, str) or not name:
|
||||
raise ValueError("node name must be a non-empty string")
|
||||
if not isinstance(url, str) or not url:
|
||||
raise ValueError("url must be a non-empty string")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise ValueError("token must be a non-empty string")
|
||||
data = self._load()
|
||||
data["nodes"][name] = {
|
||||
"url": url,
|
||||
"token": token,
|
||||
"added_at": time.time(),
|
||||
}
|
||||
self._save(data)
|
||||
|
||||
def remove(self, name: str) -> bool:
|
||||
data = self._load()
|
||||
if name in data["nodes"]:
|
||||
del data["nodes"][name]
|
||||
self._save(data)
|
||||
return True
|
||||
return False
|
||||
|
||||
def list_all(self) -> List[Dict[str, Any]]:
|
||||
data = self._load()
|
||||
out: List[Dict[str, Any]] = []
|
||||
for name, entry in sorted(data["nodes"].items()):
|
||||
out.append({"name": name, **entry})
|
||||
return out
|
||||
|
||||
def resolve(self, chrome_node: Optional[str]) -> Optional[Dict[str, Any]]:
|
||||
"""Resolve a node name to its entry.
|
||||
|
||||
If ``chrome_node`` is provided, return that named node (or None).
|
||||
If ``chrome_node`` is None, return the sole registered node when
|
||||
exactly one is registered; otherwise return None (ambiguous or
|
||||
empty).
|
||||
"""
|
||||
if chrome_node:
|
||||
return self.get(chrome_node)
|
||||
nodes = self.list_all()
|
||||
if len(nodes) == 1:
|
||||
return nodes[0]
|
||||
return None
|
||||
@@ -0,0 +1,193 @@
|
||||
"""Remote node server.
|
||||
|
||||
Runs on the machine that will host the Meet bot (typically the user's
|
||||
Mac laptop with a signed-in Chrome). Exposes a WebSocket endpoint that
|
||||
accepts signed RPC requests and dispatches them to the existing
|
||||
``plugins.google_meet.process_manager`` module.
|
||||
|
||||
Launched by ``hermes meet node run``.
|
||||
|
||||
Token handling
|
||||
--------------
|
||||
On first boot we mint 32 hex chars of entropy and persist them at
|
||||
``$HERMES_HOME/workspace/meetings/node_token.json``. Subsequent boots
|
||||
reuse the same token so previously-approved gateways don't need to be
|
||||
re-paired. The operator copies this token out-of-band to the gateway
|
||||
via ``hermes meet node approve <name> <url> <token>``.
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
``websockets`` is an optional dep. We import it lazily inside
|
||||
:meth:`serve` so installing the plugin doesn't require it unless you
|
||||
actually host a node.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import secrets
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from plugins.google_meet.node import protocol as _proto
|
||||
|
||||
|
||||
def _default_token_path() -> Path:
|
||||
return Path(get_hermes_home()) / "workspace" / "meetings" / "node_token.json"
|
||||
|
||||
|
||||
class NodeServer:
|
||||
"""WebSocket server that executes meet bot RPCs locally."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host: str = "0.0.0.0",
|
||||
port: int = 18789,
|
||||
token_path: Optional[Path] = None,
|
||||
display_name: str = "hermes-meet-node",
|
||||
) -> None:
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.display_name = display_name
|
||||
self.token_path = Path(token_path) if token_path is not None else _default_token_path()
|
||||
self._token: Optional[str] = None
|
||||
|
||||
# ----- token management --------------------------------------------
|
||||
|
||||
def ensure_token(self) -> str:
|
||||
"""Return the persisted shared secret, generating one on first use."""
|
||||
if self._token:
|
||||
return self._token
|
||||
if self.token_path.is_file():
|
||||
try:
|
||||
data = json.loads(self.token_path.read_text(encoding="utf-8"))
|
||||
tok = data.get("token")
|
||||
if isinstance(tok, str) and tok:
|
||||
self._token = tok
|
||||
return tok
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
tok = secrets.token_hex(16) # 32 hex chars
|
||||
self.token_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = self.token_path.with_suffix(".json.tmp")
|
||||
tmp.write_text(
|
||||
json.dumps({"token": tok, "generated_at": time.time()}, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
tmp.replace(self.token_path)
|
||||
self._token = tok
|
||||
return tok
|
||||
|
||||
def get_token(self) -> str:
|
||||
"""Alias for :meth:`ensure_token`; does not mutate on subsequent calls."""
|
||||
return self.ensure_token()
|
||||
|
||||
# ----- dispatch -----------------------------------------------------
|
||||
|
||||
async def _handle_request(self, msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validate + dispatch a single decoded request envelope.
|
||||
|
||||
Always returns a response envelope (success or error); never
|
||||
raises. Errors from inside the process_manager are wrapped into
|
||||
the response payload's ``ok``/``error`` keys (which pm already
|
||||
does) rather than being re-encoded as error envelopes — the
|
||||
envelope-level error channel is reserved for auth / protocol
|
||||
failures.
|
||||
"""
|
||||
expected = self.ensure_token()
|
||||
ok, reason = _proto.validate_request(msg, expected)
|
||||
if not ok:
|
||||
return _proto.make_error(str(msg.get("id") or ""), reason)
|
||||
|
||||
req_id = msg["id"]
|
||||
t = msg["type"]
|
||||
payload = msg["payload"]
|
||||
|
||||
# Import lazily so test mocks can monkeypatch freely.
|
||||
from plugins.google_meet import process_manager as pm
|
||||
|
||||
try:
|
||||
if t == "ping":
|
||||
return {"type": "pong", "id": req_id,
|
||||
"payload": {"display_name": self.display_name,
|
||||
"ts": time.time()}}
|
||||
if t == "start_bot":
|
||||
# Whitelist kwargs we pass through to pm.start.
|
||||
kwargs = {
|
||||
k: payload[k]
|
||||
for k in ("url", "guest_name", "duration", "headed",
|
||||
"auth_state", "session_id", "out_dir")
|
||||
if k in payload
|
||||
}
|
||||
if "url" not in kwargs:
|
||||
return _proto.make_error(req_id, "missing 'url' in payload")
|
||||
result = pm.start(**kwargs)
|
||||
return _proto.make_response(req_id, result)
|
||||
if t == "stop":
|
||||
reason_arg = payload.get("reason", "requested")
|
||||
result = pm.stop(reason=reason_arg)
|
||||
return _proto.make_response(req_id, result)
|
||||
if t == "status":
|
||||
return _proto.make_response(req_id, pm.status())
|
||||
if t == "transcript":
|
||||
last = payload.get("last")
|
||||
result = pm.transcript(last=last)
|
||||
return _proto.make_response(req_id, result)
|
||||
if t == "say":
|
||||
# v2 wiring: enqueue into say_queue.jsonl inside the
|
||||
# active meeting's out_dir when present. The bot-side
|
||||
# consumer is v3+ (for v1 this is a stub returning ok).
|
||||
text = payload.get("text", "")
|
||||
active = pm._read_active() # type: ignore[attr-defined]
|
||||
enqueued = False
|
||||
if active and active.get("out_dir"):
|
||||
queue = Path(active["out_dir"]) / "say_queue.jsonl"
|
||||
try:
|
||||
queue.parent.mkdir(parents=True, exist_ok=True)
|
||||
with queue.open("a", encoding="utf-8") as fh:
|
||||
fh.write(json.dumps({"text": text, "ts": time.time()}) + "\n")
|
||||
enqueued = True
|
||||
except OSError:
|
||||
enqueued = False
|
||||
return _proto.make_response(
|
||||
req_id,
|
||||
{"ok": True, "enqueued": enqueued, "text": text},
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — surface any pm crash to client
|
||||
return _proto.make_error(req_id, f"{type(exc).__name__}: {exc}")
|
||||
|
||||
return _proto.make_error(req_id, f"unhandled type: {t!r}")
|
||||
|
||||
# ----- server loop --------------------------------------------------
|
||||
|
||||
async def serve(self) -> None:
|
||||
"""Run the WebSocket server until cancelled.
|
||||
|
||||
Blocks forever. Callers typically wrap this in ``asyncio.run``.
|
||||
"""
|
||||
try:
|
||||
import websockets # type: ignore
|
||||
except ImportError as exc:
|
||||
raise RuntimeError(
|
||||
"NodeServer.serve requires the 'websockets' package. "
|
||||
"Install it with: pip install websockets"
|
||||
) from exc
|
||||
|
||||
self.ensure_token()
|
||||
|
||||
async def _handler(ws):
|
||||
async for raw in ws:
|
||||
try:
|
||||
msg = _proto.decode(raw if isinstance(raw, str) else raw.decode("utf-8"))
|
||||
except ValueError as exc:
|
||||
await ws.send(_proto.encode(_proto.make_error("", f"decode: {exc}")))
|
||||
continue
|
||||
reply = await self._handle_request(msg)
|
||||
await ws.send(_proto.encode(reply))
|
||||
|
||||
async with websockets.serve(_handler, self.host, self.port):
|
||||
# Run until cancelled.
|
||||
import asyncio
|
||||
await asyncio.Future()
|
||||
@@ -0,0 +1,16 @@
|
||||
name: google_meet
|
||||
version: 0.2.0
|
||||
description: "Join a Google Meet call, transcribe live captions, speak in realtime, and follow up afterwards. v1 transcribe-only is the default; v2 realtime duplex audio via OpenAI Realtime + BlackHole/PulseAudio ships with mode='realtime'; v3 remote node host lets the bot run on a different machine than the gateway (gateway on Linux, Chrome+signed-in profile on the user's Mac). Explicit-by-design: only joins meet.google.com URLs passed in \u2014 no calendar scanning, no auto-dial."
|
||||
author: NousResearch
|
||||
kind: standalone
|
||||
platforms:
|
||||
- linux
|
||||
- macos
|
||||
provides_tools:
|
||||
- meet_join
|
||||
- meet_leave
|
||||
- meet_status
|
||||
- meet_transcript
|
||||
- meet_say
|
||||
hooks:
|
||||
- on_session_end
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user