Compare commits
143 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 648da6a8d1 | |||
| edc78e258c | |||
| 31d7f1951a | |||
| b1c18e5a41 | |||
| bd66e55a02 | |||
| 1735ced93b | |||
| bba16943f6 | |||
| 132620ba3d | |||
| 876bb60044 | |||
| a68793b6c4 | |||
| bcc5362432 | |||
| 283c8fd6e2 | |||
| 919274b60e | |||
| 6e83d90eb4 | |||
| c6fdf48b79 | |||
| a046483e86 | |||
| fdcbd2257b | |||
| 48bdd2445e | |||
| 5e52011de3 | |||
| e48a497d16 | |||
| 2dfcc8087a | |||
| 4db58d45d4 | |||
| 57b43fdd4b | |||
| e9c47c7042 | |||
| ee0728c6c4 | |||
| 648b89911f | |||
| 7c17accb29 | |||
| 5006b2204b | |||
| a9fa73a620 | |||
| 7c8c031f60 | |||
| ea01bdcebe | |||
| d635e2df3f | |||
| cf2fabc40f | |||
| af22421e87 | |||
| 97d54f0e4d | |||
| 6e561ffa6d | |||
| ac05daa189 | |||
| 3c1c65e754 | |||
| f92006ce1c | |||
| b35d692f45 | |||
| facea84559 | |||
| f67a61dc93 | |||
| 6ed37e0f42 | |||
| 591deeb928 | |||
| 5ae07e7b5c | |||
| 47b02e961c | |||
| 0702231dd8 | |||
| db09477b77 | |||
| 81987f0350 | |||
| 9830905dab | |||
| 0d548d1db9 | |||
| eb92222811 | |||
| e4a91ccb76 | |||
| 5ac5365923 | |||
| f433197f23 | |||
| df485628ce | |||
| 9fde22d233 | |||
| 9d7b64b5dd | |||
| 5401a0080d | |||
| e5647d7863 | |||
| 023b1bff11 | |||
| 6407b3d5b3 | |||
| 0a59994030 | |||
| 0ed37c0ca4 | |||
| 1c8ce33d51 | |||
| 2182de55bb | |||
| 3cf13747b7 | |||
| 3e61703b08 | |||
| 05d8f11085 | |||
| 13038dc747 | |||
| 629e108ee2 | |||
| c34d3f4807 | |||
| f14264c438 | |||
| 19a3e2ce8e | |||
| d58b305adf | |||
| e93cc934c7 | |||
| 93a2d6b307 | |||
| 4fade39c90 | |||
| f731c2c2bd | |||
| 00c3d848d8 | |||
| fd10463069 | |||
| c599a41b84 | |||
| c7d62b3fe3 | |||
| 36d68bcb82 | |||
| a29bad2a3c | |||
| 7957da7a1d | |||
| fd3864d8bd | |||
| 8ea389a7f8 | |||
| 3e6c108565 | |||
| e3a1a9c24d | |||
| e3697e20a6 | |||
| ed91b79b7e | |||
| 08d5c9c539 | |||
| 1dcf79a864 | |||
| 2de8a7a229 | |||
| ead66f0c92 | |||
| 0bcbc9e316 | |||
| 2d444fc84d | |||
| bb53d79d26 | |||
| 17fc84c256 | |||
| b7c1d77e55 | |||
| 7a192b124e | |||
| 0738b80833 | |||
| 4093ee9c62 | |||
| 6a957a74bc | |||
| 14b27bb68c | |||
| ef9355455b | |||
| dbdefa43c8 | |||
| db9d6375fb | |||
| 8a2506af43 | |||
| e7590f92a2 | |||
| a5129c72ef | |||
| 53fc10fc9a | |||
| 93ddff53e3 | |||
| de596aca1c | |||
| 6f1eed3968 | |||
| e3940f9807 | |||
| bfa60234c8 | |||
| fd9b692d33 | |||
| c61547c067 | |||
| 7f0f67d5f7 | |||
| f5e2a77a80 | |||
| 850fac14e3 | |||
| 5500b51800 | |||
| 63975aa75b | |||
| 62c14d5513 | |||
| 10deb1b87d | |||
| f49afd3122 | |||
| 1143f234e3 | |||
| c4627f4933 | |||
| 7c3e5706d8 | |||
| a9ccb03ccc | |||
| 7dc6eb9fbf | |||
| b290297d66 | |||
| f2fba4f9a1 | |||
| fcc05284fc | |||
| 1840c6a57d | |||
| 591aa159aa | |||
| d3e56b9f39 | |||
| 0fdbfad2b0 | |||
| 4f5669a569 | |||
| 809868e628 | |||
| e5d2815b41 |
@@ -240,6 +240,19 @@ npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
@@ -986,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
@@ -993,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
@@ -1652,9 +1680,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
|
||||
+142
-7
@@ -390,7 +390,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -1349,6 +1349,49 @@ def _is_auth_error(exc: Exception) -> bool:
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for an unsupported request parameter.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase the same class of error a few
|
||||
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
|
||||
``param`` field, ``X is not supported``, ``unknown parameter: X``,
|
||||
``unrecognized request argument: X``. We match on both the parameter
|
||||
name and a generic "unsupported/unknown/unrecognized parameter" marker so
|
||||
call sites can reactively retry without the offending key instead of
|
||||
surfacing a noisy auxiliary failure.
|
||||
|
||||
Generalizes the temperature-specific detector that originally shipped
|
||||
with PR #15621 so the same retry strategy can cover ``max_tokens``,
|
||||
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
|
||||
for the generalization pattern.
|
||||
"""
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
if param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unsupported_parameter",
|
||||
"not supported",
|
||||
"does not support",
|
||||
"unknown parameter",
|
||||
"unrecognized request argument",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
||||
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
|
||||
|
||||
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
|
||||
public symbol because existing tests and call sites import it by name.
|
||||
"""
|
||||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
@@ -1993,6 +2036,39 @@ def resolve_provider_client(
|
||||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type == "aws_sdk":
|
||||
# AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
|
||||
# boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
except ImportError:
|
||||
logger.warning("resolve_provider_client: bedrock requested but "
|
||||
"boto3 or anthropic SDK not installed")
|
||||
return None, None
|
||||
|
||||
if not has_aws_credentials():
|
||||
logger.debug("resolve_provider_client: bedrock requested but "
|
||||
"no AWS credentials found")
|
||||
return None, None
|
||||
|
||||
region = resolve_bedrock_region()
|
||||
default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
try:
|
||||
real_client = build_anthropic_bedrock_client(region)
|
||||
except ImportError as exc:
|
||||
logger.warning("resolve_provider_client: cannot create Bedrock "
|
||||
"client: %s", exc)
|
||||
return None, None
|
||||
client = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, api_key="aws-sdk",
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
@@ -2727,8 +2803,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2816,7 +2892,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
@@ -2919,13 +2995,45 @@ def call_llm(
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s: provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
# If retry still fails, fall through to the max_tokens /
|
||||
# payment / auth chains below using the temperature-stripped
|
||||
# kwargs. Re-raise only if the retry hit something those
|
||||
# chains won't handle.
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3188,8 +3296,35 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
|
||||
+130
-2
@@ -87,6 +87,114 @@ def reset_client_cache():
|
||||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -787,7 +895,17 @@ def call_converse(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse(**kwargs)
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
@@ -819,7 +937,17 @@ def call_converse_stream(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse_stream(**kwargs)
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
||||
@@ -23,26 +23,52 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
converted.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -50,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
converted.append({"type": text_type, "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -216,9 +242,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -412,13 +439,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
validated.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -429,7 +459,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
validated.append({"type": text_type, "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -787,6 +817,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
@@ -798,6 +859,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
||||
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -317,6 +318,13 @@ class ContextCompressor(ContextEngine):
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Recalculate token budgets for the new context length so the
|
||||
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -389,6 +397,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -812,10 +821,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
@@ -853,6 +864,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
|
||||
+43
-2
@@ -31,6 +31,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
@@ -312,7 +313,39 @@ class MemoryManager:
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
@@ -321,7 +354,15 @@ class MemoryManager:
|
||||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
provider.on_memory_write(action, target, content)
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
||||
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
|
||||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
||||
+22
-13
@@ -1199,6 +1199,7 @@ def get_model_context_length(
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
@@ -1237,6 +1238,26 @@ def get_model_context_length(
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
# /models endpoint may report a provider-imposed limit (e.g. Copilot
|
||||
@@ -1282,19 +1303,7 @@ def get_model_context_length(
|
||||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
|
||||
+8
-107
@@ -7,11 +7,15 @@ can invoke skills via /skill-name commands.
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,111 +24,6 @@ _skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -143,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
@@ -0,0 +1,58 @@
|
||||
# Hermes Apps
|
||||
|
||||
Platform apps live here. The first app is a cross-platform GUI shell around the
|
||||
existing Hermes dashboard; it should not fork chat, config, logs, or session UI.
|
||||
|
||||
## Shape
|
||||
|
||||
```text
|
||||
apps/
|
||||
gui/ # cross-platform app shell: dev Chrome shell now, Tauri native next
|
||||
shared/ # runtime bundle notes/scripts used by Windows + macOS packaging
|
||||
```
|
||||
|
||||
## Desktop Dev
|
||||
|
||||
The backend-only GUI mode is:
|
||||
|
||||
```bash
|
||||
hermes dashboard --gui
|
||||
```
|
||||
|
||||
The fast GUI shell is:
|
||||
|
||||
```powershell
|
||||
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
|
||||
npm run dev
|
||||
```
|
||||
|
||||
The native Tauri shell is:
|
||||
|
||||
```powershell
|
||||
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
|
||||
npm run dev:tauri
|
||||
```
|
||||
|
||||
`--gui` implies the embedded TUI; do not pass `--tui` separately for GUI mode.
|
||||
|
||||
## MVP Boundary
|
||||
|
||||
Included:
|
||||
|
||||
- bundled Python runtime
|
||||
- bundled Node/TUI runtime
|
||||
- CLI install to PATH
|
||||
- profile picker and first-run setup
|
||||
- dashboard health/reconnect state
|
||||
- tray controls
|
||||
- desktop notifications
|
||||
- Windows installer
|
||||
|
||||
Deferred:
|
||||
|
||||
- code signing
|
||||
- native self-updater
|
||||
- store distribution
|
||||
|
||||
For MVP updates, the desktop UI should run the existing `hermes update` flow and
|
||||
surface progress/finish notifications.
|
||||
@@ -0,0 +1,102 @@
|
||||
# Hermes GUI
|
||||
|
||||
Cross-platform GUI shell for the Hermes dashboard.
|
||||
|
||||
## Fast Dev Shell
|
||||
|
||||
This gets a GUI window on Windows/WSL today by launching Chrome in app mode:
|
||||
|
||||
```bash
|
||||
cd apps/gui
|
||||
npm run dev
|
||||
```
|
||||
|
||||
It starts `hermes dashboard --gui --no-open --port 9120`, waits for
|
||||
`/api/health`, then opens a standalone app window at `http://127.0.0.1:9120`.
|
||||
|
||||
## Native Shell
|
||||
|
||||
The native Tauri shell is still scaffolded:
|
||||
|
||||
```bash
|
||||
cd apps/gui
|
||||
npm run dev:tauri
|
||||
```
|
||||
|
||||
From Windows PowerShell on a `\\wsl$` path, use PowerShell `npm`, not
|
||||
`npm.cmd`:
|
||||
|
||||
```powershell
|
||||
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
|
||||
cd \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui
|
||||
npm run dev:tauri
|
||||
```
|
||||
|
||||
`npm.cmd` goes through `cmd.exe`, and `cmd.exe` cannot use UNC paths as the
|
||||
current directory.
|
||||
|
||||
If `npm run` still falls through `cmd.exe`, bypass npm entirely:
|
||||
|
||||
```powershell
|
||||
\\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1
|
||||
```
|
||||
|
||||
The launcher builds into `%LOCALAPPDATA%\Hermes\cargo-target\gui` instead of
|
||||
`\\wsl$` because Windows Cargo incremental locks do not work reliably on UNC
|
||||
WSL filesystems.
|
||||
|
||||
In dev, either start Hermes yourself:
|
||||
|
||||
```bash
|
||||
hermes dashboard --gui --no-open --port 9120
|
||||
```
|
||||
|
||||
or let the native shell start it. The tray menu owns:
|
||||
|
||||
- Open Hermes
|
||||
- Open in Browser
|
||||
- Restart Hermes Runtime
|
||||
- Quit Hermes
|
||||
|
||||
The native shell reuses a healthy GUI runtime when one is already running.
|
||||
Otherwise it picks the first free port from `9120..9139`, passes that port into
|
||||
the WSL/backend process, and navigates the Tauri window there. Set
|
||||
`HERMES_GUI_PORT` to force a starting port.
|
||||
|
||||
## Fresh Install Emulation
|
||||
|
||||
Use an isolated Hermes home without touching your real `~/.hermes`:
|
||||
|
||||
```powershell
|
||||
powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh
|
||||
```
|
||||
|
||||
Reset that disposable home and run again:
|
||||
|
||||
```powershell
|
||||
powershell.exe -ExecutionPolicy Bypass -File \\wsl$\Ubuntu\home\bb\hermes-agent\apps\gui\dev-tauri.ps1 -Fresh -ResetFresh
|
||||
```
|
||||
|
||||
Fresh mode stores state in `%LOCALAPPDATA%\Hermes\fresh-install-home` and starts
|
||||
from port `9140` so it does not collide with your normal GUI dev session.
|
||||
|
||||
Set `HERMES_GUI_MIN_SPLASH_MS` only when debugging the startup screen; default
|
||||
startup is instant once the backend is healthy.
|
||||
|
||||
## Boundary
|
||||
|
||||
GUI owns:
|
||||
|
||||
- app shell/window
|
||||
- startup state
|
||||
- sidecar process lifecycle
|
||||
- future tray/notifications/installers
|
||||
|
||||
Hermes owns:
|
||||
|
||||
- dashboard UI
|
||||
- auth/session token
|
||||
- profiles/config/env
|
||||
- TUI/PTT chat bridge
|
||||
- tools/skills/gateway
|
||||
- update flow
|
||||
@@ -0,0 +1,57 @@
|
||||
param(
|
||||
[string]$Command = "dev",
|
||||
[switch]$Fresh,
|
||||
[switch]$ResetFresh
|
||||
)
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
|
||||
|
||||
$AppRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
|
||||
$Script = Join-Path $AppRoot "scripts\tauri.mjs"
|
||||
|
||||
if (-not (Get-Command node -ErrorAction SilentlyContinue)) {
|
||||
throw "Windows Node.js was not found. Install it with: winget install OpenJS.NodeJS.LTS"
|
||||
}
|
||||
|
||||
if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {
|
||||
throw "Windows Rust was not found. Install it with: winget install Rustlang.Rustup"
|
||||
}
|
||||
|
||||
$Tauri = Get-Command tauri -ErrorAction SilentlyContinue
|
||||
$CargoTauri = Get-Command cargo-tauri -ErrorAction SilentlyContinue
|
||||
|
||||
if (-not $Tauri -and -not $CargoTauri) {
|
||||
throw "Tauri CLI not found. Install it with: npm install -g @tauri-apps/cli (run from a normal Windows path, not \\wsl$)"
|
||||
}
|
||||
|
||||
$env:CARGO_INCREMENTAL = "0"
|
||||
$env:CARGO_TARGET_DIR = Join-Path $env:LOCALAPPDATA "Hermes\cargo-target\gui"
|
||||
New-Item -ItemType Directory -Force -Path $env:CARGO_TARGET_DIR | Out-Null
|
||||
|
||||
if ($Fresh) {
|
||||
$FreshHome = Join-Path $env:LOCALAPPDATA "Hermes\fresh-install-home"
|
||||
if ($ResetFresh -and (Test-Path $FreshHome)) {
|
||||
Remove-Item -Recurse -Force $FreshHome
|
||||
}
|
||||
New-Item -ItemType Directory -Force -Path $FreshHome | Out-Null
|
||||
$env:HERMES_HOME = $FreshHome
|
||||
$env:HERMES_GUI_PORT = "9140"
|
||||
$env:HERMES_GUI_FRESH = "1"
|
||||
Write-Host "Fresh GUI mode"
|
||||
Write-Host " HERMES_HOME=$FreshHome"
|
||||
Write-Host " HERMES_GUI_PORT=$env:HERMES_GUI_PORT"
|
||||
}
|
||||
|
||||
Push-Location $AppRoot
|
||||
try {
|
||||
if ($Tauri) {
|
||||
& tauri $Command
|
||||
}
|
||||
else {
|
||||
& cargo tauri $Command
|
||||
}
|
||||
}
|
||||
finally {
|
||||
Pop-Location
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "@hermes/gui",
|
||||
"version": "0.0.0",
|
||||
"private": true,
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "node scripts/dev-shell.mjs",
|
||||
"dev:tauri": "node scripts/tauri.mjs dev",
|
||||
"build": "node scripts/tauri.mjs build",
|
||||
"dashboard": "node scripts/start-dashboard.mjs",
|
||||
"tauri": "node scripts/tauri.mjs"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,156 @@
|
||||
import { spawn, spawnSync } from "node:child_process";
|
||||
import { createServer } from "node:net";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { setTimeout as delay } from "node:timers/promises";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const repoRoot = resolve(here, "../../..");
|
||||
const python = process.env.HERMES_PYTHON || "python";
|
||||
let port = process.env.HERMES_GUI_PORT || "9120";
|
||||
let url = `http://127.0.0.1:${port}`;
|
||||
|
||||
let dashboard = null;
|
||||
|
||||
function stop() {
|
||||
if (dashboard && !dashboard.killed) dashboard.kill();
|
||||
}
|
||||
|
||||
process.on("SIGINT", () => {
|
||||
stop();
|
||||
process.exit(130);
|
||||
});
|
||||
process.on("SIGTERM", () => {
|
||||
stop();
|
||||
process.exit(143);
|
||||
});
|
||||
process.on("exit", stop);
|
||||
|
||||
async function waitForHealth() {
|
||||
for (let i = 0; i < 120; i += 1) {
|
||||
if (await isHealthy()) return true;
|
||||
await delay(500);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function isHealthy() {
|
||||
try {
|
||||
const res = await fetch(`${url}/api/health`, {
|
||||
signal: AbortSignal.timeout(1000),
|
||||
});
|
||||
const data = await res.json();
|
||||
return res.ok && data.status === "ok";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function canBind(candidate) {
|
||||
return new Promise((resolveBind) => {
|
||||
const server = createServer();
|
||||
server.once("error", () => resolveBind(false));
|
||||
server.listen(Number(candidate), "127.0.0.1", () => {
|
||||
server.close(() => resolveBind(true));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function choosePort() {
|
||||
if (process.env.HERMES_GUI_PORT) return;
|
||||
|
||||
let candidate = Number(port);
|
||||
for (let i = 0; i < 20; i += 1) {
|
||||
if (await canBind(candidate)) {
|
||||
port = String(candidate);
|
||||
url = `http://127.0.0.1:${port}`;
|
||||
return;
|
||||
}
|
||||
candidate += 1;
|
||||
}
|
||||
}
|
||||
|
||||
function startDashboard() {
|
||||
dashboard = spawn(
|
||||
python,
|
||||
[
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"dashboard",
|
||||
"--gui",
|
||||
"--no-open",
|
||||
"--host",
|
||||
"127.0.0.1",
|
||||
"--port",
|
||||
port,
|
||||
],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...process.env,
|
||||
HERMES_GUI: "1",
|
||||
},
|
||||
stdio: "inherit",
|
||||
},
|
||||
);
|
||||
|
||||
dashboard.on("exit", (code) => {
|
||||
process.exit(code ?? 0);
|
||||
});
|
||||
}
|
||||
|
||||
function run(command, args) {
|
||||
return (
|
||||
spawnSync(command, args, {
|
||||
shell: process.platform === "win32",
|
||||
stdio: "ignore",
|
||||
}).status === 0
|
||||
);
|
||||
}
|
||||
|
||||
function openGuiWindow() {
|
||||
if (process.platform === "win32") {
|
||||
return (
|
||||
run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
|
||||
run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
|
||||
run("cmd.exe", ["/C", "start", "", url])
|
||||
);
|
||||
}
|
||||
|
||||
if (process.env.WSL_DISTRO_NAME) {
|
||||
return (
|
||||
run("cmd.exe", ["/C", "start", "", "chrome", `--app=${url}`]) ||
|
||||
run("cmd.exe", ["/C", "start", "", "msedge", `--app=${url}`]) ||
|
||||
run("cmd.exe", ["/C", "start", "", url])
|
||||
);
|
||||
}
|
||||
|
||||
if (process.platform === "darwin") {
|
||||
return (
|
||||
run("open", ["-na", "Google Chrome", "--args", `--app=${url}`]) ||
|
||||
run("open", [url])
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
run("google-chrome", [`--app=${url}`]) ||
|
||||
run("chromium", [`--app=${url}`]) ||
|
||||
run("xdg-open", [url])
|
||||
);
|
||||
}
|
||||
|
||||
if (await isHealthy()) {
|
||||
console.log(`Hermes GUI already running -> ${url}`);
|
||||
openGuiWindow();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
await choosePort();
|
||||
startDashboard();
|
||||
|
||||
if (await waitForHealth()) {
|
||||
console.log(`Hermes GUI -> ${url}`);
|
||||
openGuiWindow();
|
||||
} else {
|
||||
console.error(`Hermes GUI did not become healthy at ${url}`);
|
||||
}
|
||||
@@ -0,0 +1,95 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const repoRoot = resolve(here, "../../..");
|
||||
const python = process.env.HERMES_PYTHON || "python";
|
||||
const port = process.env.HERMES_GUI_PORT || "9120";
|
||||
const url = `http://127.0.0.1:${port}`;
|
||||
|
||||
async function isHealthy() {
|
||||
try {
|
||||
const res = await fetch(`${url}/api/health`, {
|
||||
signal: AbortSignal.timeout(1000),
|
||||
});
|
||||
const data = await res.json();
|
||||
return res.ok && data.status === "ok";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function wslRepoRoot() {
|
||||
const normalized = repoRoot.replaceAll("\\", "/");
|
||||
const parts = normalized.split("/");
|
||||
const host = parts[2]?.toLowerCase();
|
||||
if (process.platform !== "win32") return null;
|
||||
if (host !== "wsl$" && host !== "wsl.localhost") return null;
|
||||
const distro = parts[3];
|
||||
const path = `/${parts.slice(4).join("/")}`;
|
||||
return distro && path !== "/" ? { distro, path } : null;
|
||||
}
|
||||
|
||||
function spawnDashboard() {
|
||||
const wsl = wslRepoRoot();
|
||||
if (wsl) {
|
||||
return spawn(
|
||||
"wsl.exe",
|
||||
[
|
||||
"-d",
|
||||
wsl.distro,
|
||||
"--cd",
|
||||
wsl.path,
|
||||
"env",
|
||||
"HERMES_GUI=1",
|
||||
process.env.HERMES_WSL_PYTHON || "python",
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"dashboard",
|
||||
"--gui",
|
||||
"--no-open",
|
||||
"--host",
|
||||
"127.0.0.1",
|
||||
"--port",
|
||||
port,
|
||||
],
|
||||
{ stdio: "inherit" },
|
||||
);
|
||||
}
|
||||
|
||||
return spawn(
|
||||
python,
|
||||
[
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"dashboard",
|
||||
"--gui",
|
||||
"--no-open",
|
||||
"--host",
|
||||
"127.0.0.1",
|
||||
"--port",
|
||||
port,
|
||||
],
|
||||
{
|
||||
cwd: repoRoot,
|
||||
env: {
|
||||
...process.env,
|
||||
HERMES_GUI: "1",
|
||||
},
|
||||
stdio: "inherit",
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
if (await isHealthy()) {
|
||||
console.log(`Hermes GUI already running -> ${url}`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const child = spawnDashboard();
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
if (signal) process.kill(process.pid, signal);
|
||||
process.exit(code ?? 0);
|
||||
});
|
||||
@@ -0,0 +1,90 @@
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
const appRoot = resolve(here, "..");
|
||||
const bin = process.platform === "win32" ? "tauri.cmd" : "tauri";
|
||||
const localTauri = resolve(appRoot, "node_modules", ".bin", bin);
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
function isWsl() {
|
||||
return process.platform === "linux" && !!process.env.WSL_DISTRO_NAME;
|
||||
}
|
||||
|
||||
function quotePs(value) {
|
||||
return `'${value.replaceAll("'", "''")}'`;
|
||||
}
|
||||
|
||||
function dispatchToWindows() {
|
||||
const pathResult = spawnSync("wslpath", ["-w", appRoot], {
|
||||
encoding: "utf8",
|
||||
});
|
||||
const windowsPath = pathResult.stdout.trim();
|
||||
if (!windowsPath) return false;
|
||||
|
||||
const command = [
|
||||
"$ErrorActionPreference = 'Stop'",
|
||||
"Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
|
||||
"if (-not (Get-Command npm -ErrorAction SilentlyContinue)) {",
|
||||
' Write-Error "Windows npm was not found. Install Windows Node.js first: winget install OpenJS.NodeJS.LTS"',
|
||||
"}",
|
||||
"if (-not (Get-Command rustc -ErrorAction SilentlyContinue)) {",
|
||||
' Write-Error "Windows Rust was not found. Install Rust first: winget install Rustlang.Rustup"',
|
||||
"}",
|
||||
`Set-Location -LiteralPath ${quotePs(windowsPath)}`,
|
||||
"& npm run dev:tauri",
|
||||
].join("; ");
|
||||
const result = spawnSync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command],
|
||||
{ stdio: "inherit" },
|
||||
);
|
||||
process.exit(result.status ?? 1);
|
||||
}
|
||||
|
||||
function run(command, commandArgs, { exit = true } = {}) {
|
||||
if (process.platform === "win32") {
|
||||
const psCommand = [
|
||||
"$ErrorActionPreference = 'Stop'",
|
||||
"Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force",
|
||||
`Set-Location -LiteralPath ${quotePs(appRoot)}`,
|
||||
`& ${quotePs(command)} ${commandArgs.map(quotePs).join(" ")}`,
|
||||
].join("; ");
|
||||
const result = spawnSync(
|
||||
"powershell.exe",
|
||||
["-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", psCommand],
|
||||
{ stdio: "inherit" },
|
||||
);
|
||||
if (result.error && result.error.code === "ENOENT") return false;
|
||||
if (exit) process.exit(result.status ?? 1);
|
||||
return result.status === 0;
|
||||
}
|
||||
|
||||
const result = spawnSync(command, commandArgs, {
|
||||
cwd: appRoot,
|
||||
env: process.env,
|
||||
stdio: "inherit",
|
||||
});
|
||||
|
||||
if (result.error && result.error.code === "ENOENT") return false;
|
||||
if (exit) process.exit(result.status ?? 1);
|
||||
return result.status === 0;
|
||||
}
|
||||
|
||||
if (isWsl() && process.env.HERMES_GUI_TAURI_WSL !== "1") {
|
||||
console.log("Launching native Windows Tauri from WSL...");
|
||||
dispatchToWindows();
|
||||
console.error(
|
||||
"Could not hand off to Windows PowerShell. Run this from Windows PowerShell instead:",
|
||||
);
|
||||
console.error(" cd \\\\wsl$\\Ubuntu\\home\\bb\\hermes-agent\\apps\\gui");
|
||||
console.error(" npm run dev:tauri");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (existsSync(localTauri)) run(localTauri, args);
|
||||
if (run("tauri", args, { exit: false })) process.exit(0);
|
||||
if (run("cargo", ["tauri", ...args], { exit: false })) process.exit(0);
|
||||
run("npx", ["--yes", "@tauri-apps/cli@latest", ...args]);
|
||||
@@ -0,0 +1 @@
|
||||
/target/
|
||||
Generated
+5579
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "hermes-gui"
|
||||
version = "0.0.0"
|
||||
description = "Hermes GUI shell"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "hermes_gui_lib"
|
||||
crate-type = ["staticlib", "cdylib", "rlib"]
|
||||
|
||||
[build-dependencies]
|
||||
tauri-build = { version = "2", features = [] }
|
||||
|
||||
[dependencies]
|
||||
tauri = { version = "2", features = ["tray-icon"] }
|
||||
tauri-plugin-notification = "2"
|
||||
tauri-plugin-opener = "2"
|
||||
@@ -0,0 +1,3 @@
|
||||
fn main() {
|
||||
tauri_build::build();
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"$schema": "../gen/schemas/desktop-schema.json",
|
||||
"identifier": "default",
|
||||
"description": "Default Hermes GUI permissions",
|
||||
"windows": ["main"],
|
||||
"permissions": ["core:default", "notification:default", "opener:default"]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
||||
{"default":{"identifier":"default","description":"Default Hermes GUI permissions","local":true,"windows":["main"],"permissions":["core:default","notification:default","opener:default"]}}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
After Width: | Height: | Size: 135 B |
Binary file not shown.
|
After Width: | Height: | Size: 1.1 KiB |
@@ -0,0 +1,4 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
||||
<rect width="100" height="100" rx="18" fill="#071313"/>
|
||||
<text x="50" y="70" text-anchor="middle" font-size="68" fill="#f0e6d2">⚕</text>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 212 B |
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,433 @@
|
||||
use std::{
|
||||
io::{Read, Write},
|
||||
net::{TcpListener, TcpStream},
|
||||
process::{Child, Command, Stdio},
|
||||
sync::Mutex,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use tauri::{
|
||||
image::Image,
|
||||
menu::{Menu, MenuItem, PredefinedMenuItem},
|
||||
tray::{MouseButton, MouseButtonState, TrayIconBuilder, TrayIconEvent},
|
||||
App, AppHandle, Manager, WebviewWindow,
|
||||
};
|
||||
|
||||
const GUI_HOST: &str = "127.0.0.1";
|
||||
const DEFAULT_GUI_PORT: u16 = 9120;
|
||||
const MIN_SPLASH_MS: u64 = 0;
|
||||
const SPLASH_URL: &str = "data:text/html,%3C!doctype%20html%3E%3Cmeta%20charset%3Dutf-8%3E%3Cstyle%3Ebody%7Bmargin%3A0%3Bheight%3A100vh%3Bdisplay%3Agrid%3Bplace-items%3Acenter%3Bbackground%3A%23071313%3Bcolor%3A%23f0e6d2%3Bfont%3A14px%20monospace%3Bletter-spacing%3A.08em%3Btext-transform%3Auppercase%7D%3C%2Fstyle%3E%3Cbody%3EStarting%20Hermes%E2%80%A6%3C%2Fbody%3E";
|
||||
|
||||
struct GuiState {
|
||||
child: Mutex<Option<Child>>,
|
||||
port: Mutex<u16>,
|
||||
}
|
||||
|
||||
fn gui_url(port: u16) -> String {
|
||||
format!("http://{GUI_HOST}:{port}")
|
||||
}
|
||||
|
||||
fn check_health(port: u16) -> bool {
|
||||
let Ok(mut stream) = TcpStream::connect_timeout(
|
||||
&format!("{GUI_HOST}:{port}").parse().unwrap(),
|
||||
Duration::from_secs(1),
|
||||
) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let _ = stream.set_read_timeout(Some(Duration::from_secs(1)));
|
||||
let request =
|
||||
format!("GET /api/health HTTP/1.1\r\nHost: {GUI_HOST}:{port}\r\nConnection: close\r\n\r\n");
|
||||
|
||||
if stream.write_all(request.as_bytes()).is_err() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut response = String::new();
|
||||
let _ = stream.read_to_string(&mut response);
|
||||
response.contains("200 OK")
|
||||
&& response.contains("\"status\":\"ok\"")
|
||||
&& response.contains("\"mode\":\"gui\"")
|
||||
}
|
||||
|
||||
fn can_bind(port: u16) -> bool {
|
||||
TcpListener::bind((GUI_HOST, port)).is_ok()
|
||||
}
|
||||
|
||||
fn base_port() -> u16 {
|
||||
std::env::var("HERMES_GUI_PORT")
|
||||
.ok()
|
||||
.and_then(|raw| raw.parse().ok())
|
||||
.unwrap_or(DEFAULT_GUI_PORT)
|
||||
}
|
||||
|
||||
fn select_port() -> u16 {
|
||||
let start = base_port();
|
||||
for port in start..start.saturating_add(20) {
|
||||
if check_health(port) || can_bind(port) {
|
||||
return port;
|
||||
}
|
||||
}
|
||||
start
|
||||
}
|
||||
|
||||
fn repo_root() -> std::path::PathBuf {
|
||||
std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../../..")
|
||||
.canonicalize()
|
||||
.unwrap_or_else(|_| std::path::PathBuf::from("."))
|
||||
}
|
||||
|
||||
fn runtime_dir() -> Option<std::path::PathBuf> {
|
||||
std::env::var_os("HERMES_GUI_RUNTIME_DIR").map(std::path::PathBuf::from)
|
||||
}
|
||||
|
||||
fn runtime_python(runtime: &std::path::Path) -> std::path::PathBuf {
|
||||
if cfg!(target_os = "windows") {
|
||||
runtime.join("venv").join("Scripts").join("python.exe")
|
||||
} else {
|
||||
runtime.join("venv").join("bin").join("python")
|
||||
}
|
||||
}
|
||||
|
||||
fn wsl_path(root: &std::path::Path) -> Option<(String, String)> {
|
||||
let raw = root.to_string_lossy().replace('\\', "/");
|
||||
let parts: Vec<&str> = raw.split('/').collect();
|
||||
let host = parts.get(2)?.to_ascii_lowercase();
|
||||
if host != "wsl$" && host != "wsl.localhost" {
|
||||
return None;
|
||||
}
|
||||
let distro = parts.get(3)?.to_string();
|
||||
let path = format!("/{}", parts.get(4..)?.join("/"));
|
||||
Some((distro, path))
|
||||
}
|
||||
|
||||
fn start_dashboard(port: u16) -> std::io::Result<Child> {
|
||||
if let Some(runtime) = runtime_dir() {
|
||||
let python = runtime_python(&runtime);
|
||||
let web_dist = runtime.join("web_dist");
|
||||
let tui_dir = runtime.join("ui-tui");
|
||||
let port = port.to_string();
|
||||
return Command::new(python)
|
||||
.args([
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"dashboard",
|
||||
"--gui",
|
||||
"--no-open",
|
||||
"--host",
|
||||
GUI_HOST,
|
||||
"--port",
|
||||
&port,
|
||||
])
|
||||
.env("HERMES_GUI", "1")
|
||||
.env("HERMES_GUI_PORT", &port)
|
||||
.env("HERMES_WEB_DIST", web_dist)
|
||||
.env("HERMES_TUI_DIR", tui_dir)
|
||||
.envs(
|
||||
std::env::vars()
|
||||
.filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
|
||||
)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn();
|
||||
}
|
||||
|
||||
let root = repo_root();
|
||||
let port = port.to_string();
|
||||
|
||||
if let Some((distro, path)) = wsl_path(&root) {
|
||||
let port_env = format!("HERMES_GUI_PORT={port}");
|
||||
let mut env_args = vec!["HERMES_GUI=1".to_string(), port_env];
|
||||
if let Ok(home) = std::env::var("HERMES_HOME") {
|
||||
env_args.push(format!("HERMES_HOME={home}"));
|
||||
}
|
||||
if let Ok(fresh) = std::env::var("HERMES_GUI_FRESH") {
|
||||
env_args.push(format!("HERMES_GUI_FRESH={fresh}"));
|
||||
}
|
||||
let mut args = vec![
|
||||
"-d".to_string(),
|
||||
distro,
|
||||
"--cd".to_string(),
|
||||
path,
|
||||
"env".to_string(),
|
||||
];
|
||||
args.extend(env_args);
|
||||
args.extend([
|
||||
"python".to_string(),
|
||||
"-m".to_string(),
|
||||
"hermes_cli.main".to_string(),
|
||||
"dashboard".to_string(),
|
||||
"--gui".to_string(),
|
||||
"--no-open".to_string(),
|
||||
"--host".to_string(),
|
||||
GUI_HOST.to_string(),
|
||||
"--port".to_string(),
|
||||
port.clone(),
|
||||
]);
|
||||
return Command::new("wsl.exe")
|
||||
.args(args)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn();
|
||||
}
|
||||
|
||||
Command::new("python")
|
||||
.args([
|
||||
"-m",
|
||||
"hermes_cli.main",
|
||||
"dashboard",
|
||||
"--gui",
|
||||
"--no-open",
|
||||
"--host",
|
||||
GUI_HOST,
|
||||
"--port",
|
||||
&port,
|
||||
])
|
||||
.current_dir(root)
|
||||
.env("HERMES_GUI", "1")
|
||||
.env("HERMES_GUI_PORT", &port)
|
||||
.envs(
|
||||
std::env::vars()
|
||||
.filter(|(key, _)| matches!(key.as_str(), "HERMES_HOME" | "HERMES_GUI_FRESH")),
|
||||
)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()
|
||||
}
|
||||
|
||||
fn stop_owned_dashboard(state: &GuiState) {
|
||||
let Some(mut child) = state.child.lock().expect("gui child lock poisoned").take() else {
|
||||
return;
|
||||
};
|
||||
let _ = child.kill();
|
||||
let _ = child.wait();
|
||||
}
|
||||
|
||||
fn current_port(state: &GuiState) -> u16 {
|
||||
*state.port.lock().expect("gui port lock poisoned")
|
||||
}
|
||||
|
||||
fn ensure_dashboard(state: &GuiState) -> Result<(), String> {
|
||||
let current = current_port(state);
|
||||
if check_health(current) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let port = select_port();
|
||||
*state.port.lock().expect("gui port lock poisoned") = port;
|
||||
|
||||
if check_health(port) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let child = start_dashboard(port).map_err(|err| {
|
||||
format!(
|
||||
"Could not auto-start Hermes dashboard ({err}). Start it manually with: hermes dashboard --gui --no-open --port {port}"
|
||||
)
|
||||
})?;
|
||||
*state.child.lock().expect("gui child lock poisoned") = Some(child);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn navigate_when_ready(window: WebviewWindow, port: u16) {
|
||||
std::thread::spawn(move || {
|
||||
let started = Instant::now();
|
||||
while started.elapsed() < Duration::from_secs(60) {
|
||||
if check_health(port) {
|
||||
let min_splash = std::env::var("HERMES_GUI_MIN_SPLASH_MS")
|
||||
.ok()
|
||||
.and_then(|raw| raw.parse::<u64>().ok())
|
||||
.unwrap_or(MIN_SPLASH_MS);
|
||||
let elapsed = started.elapsed();
|
||||
if elapsed < Duration::from_millis(min_splash) {
|
||||
std::thread::sleep(Duration::from_millis(min_splash) - elapsed);
|
||||
}
|
||||
if let Ok(url) = tauri::Url::parse(&gui_url(port)) {
|
||||
let _ = window.navigate(url);
|
||||
let _ = window.show();
|
||||
let _ = window.set_focus();
|
||||
}
|
||||
return;
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn show_main_window(app: &AppHandle) {
|
||||
if let Some(window) = app.get_webview_window("main") {
|
||||
let _ = window.show();
|
||||
let _ = window.set_focus();
|
||||
}
|
||||
}
|
||||
|
||||
fn open_browser(port: u16) {
|
||||
let url = gui_url(port);
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
let _ = Command::new("cmd")
|
||||
.args(["/C", "start", "", &url])
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn();
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
let _ = Command::new("open").arg(&url).spawn();
|
||||
|
||||
#[cfg(all(unix, not(target_os = "macos")))]
|
||||
let _ = Command::new("xdg-open").arg(&url).spawn();
|
||||
}
|
||||
|
||||
fn tray_icon() -> Image<'static> {
|
||||
let width = 32;
|
||||
let height = 32;
|
||||
let mut rgba = Vec::with_capacity(width * height * 4);
|
||||
|
||||
for y in 0..height {
|
||||
for x in 0..width {
|
||||
let mark = (14..=17).contains(&x) && (5..=26).contains(&y)
|
||||
|| (8..=23).contains(&x) && (13..=16).contains(&y)
|
||||
|| (10..=21).contains(&x) && (y == 5 || y == 26);
|
||||
if mark {
|
||||
rgba.extend_from_slice(&[0xF0, 0xE6, 0xD2, 0xFF]);
|
||||
} else {
|
||||
rgba.extend_from_slice(&[0x07, 0x13, 0x13, 0xFF]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Image::new_owned(rgba, width as u32, height as u32)
|
||||
}
|
||||
|
||||
fn restart_runtime(app: &AppHandle) -> Result<(), String> {
|
||||
let state = app.state::<GuiState>();
|
||||
stop_owned_dashboard(&state);
|
||||
ensure_dashboard(&state)?;
|
||||
|
||||
if let Some(window) = app.get_webview_window("main") {
|
||||
if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
|
||||
let _ = window.navigate(url);
|
||||
}
|
||||
let port = current_port(&state);
|
||||
navigate_when_ready(window, port);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn setup_tray(app: &App) -> tauri::Result<()> {
|
||||
let open_item = MenuItem::with_id(app, "open", "Open Hermes", true, None::<&str>)?;
|
||||
let browser_item = MenuItem::with_id(app, "browser", "Open in Browser", true, None::<&str>)?;
|
||||
let restart_item =
|
||||
MenuItem::with_id(app, "restart", "Restart Hermes Runtime", true, None::<&str>)?;
|
||||
let status_item = MenuItem::with_id(app, "status", "Local runtime", false, None::<&str>)?;
|
||||
let separator = PredefinedMenuItem::separator(app)?;
|
||||
let separator2 = PredefinedMenuItem::separator(app)?;
|
||||
let quit_item = MenuItem::with_id(app, "quit", "Quit Hermes", true, None::<&str>)?;
|
||||
|
||||
let menu = Menu::with_items(
|
||||
app,
|
||||
&[
|
||||
&open_item,
|
||||
&browser_item,
|
||||
&restart_item,
|
||||
&separator,
|
||||
&status_item,
|
||||
&separator2,
|
||||
&quit_item,
|
||||
],
|
||||
)?;
|
||||
|
||||
let icon = tray_icon();
|
||||
let _tray = TrayIconBuilder::new()
|
||||
.icon(icon)
|
||||
.menu(&menu)
|
||||
.tooltip("Hermes")
|
||||
.on_menu_event(|app, event| match event.id.as_ref() {
|
||||
"open" => show_main_window(app),
|
||||
"browser" => {
|
||||
let state = app.state::<GuiState>();
|
||||
open_browser(current_port(&state));
|
||||
}
|
||||
"restart" => {
|
||||
if let Err(err) = restart_runtime(app) {
|
||||
eprintln!("Failed to restart Hermes runtime: {err}");
|
||||
}
|
||||
}
|
||||
"quit" => {
|
||||
let state = app.state::<GuiState>();
|
||||
stop_owned_dashboard(&state);
|
||||
app.exit(0);
|
||||
}
|
||||
_ => {}
|
||||
})
|
||||
.on_tray_icon_event(|tray, event| {
|
||||
if let TrayIconEvent::Click {
|
||||
button: MouseButton::Left,
|
||||
button_state: MouseButtonState::Up,
|
||||
..
|
||||
} = event
|
||||
{
|
||||
show_main_window(&tray.app_handle());
|
||||
}
|
||||
})
|
||||
.build(app)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn runtime_running(app: AppHandle) -> bool {
|
||||
let state = app.state::<GuiState>();
|
||||
check_health(current_port(&state))
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
fn restart_runtime_command(app: AppHandle) -> Result<(), String> {
|
||||
restart_runtime(&app)
|
||||
}
|
||||
|
||||
pub fn run() {
|
||||
tauri::Builder::default()
|
||||
.plugin(tauri_plugin_notification::init())
|
||||
.plugin(tauri_plugin_opener::init())
|
||||
.manage(GuiState {
|
||||
child: Mutex::new(None),
|
||||
port: Mutex::new(base_port()),
|
||||
})
|
||||
.invoke_handler(tauri::generate_handler![
|
||||
runtime_running,
|
||||
restart_runtime_command
|
||||
])
|
||||
.setup(|app| {
|
||||
setup_tray(app)?;
|
||||
|
||||
if let Some(window) = app.get_webview_window("main") {
|
||||
if let Ok(url) = tauri::Url::parse(SPLASH_URL) {
|
||||
let _ = window.navigate(url);
|
||||
}
|
||||
|
||||
let state = app.state::<GuiState>();
|
||||
if let Err(err) = ensure_dashboard(&state) {
|
||||
eprintln!("{err}");
|
||||
}
|
||||
|
||||
let port = current_port(&state);
|
||||
navigate_when_ready(window, port);
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.on_window_event(|window, event| {
|
||||
if let tauri::WindowEvent::CloseRequested { api, .. } = event {
|
||||
api.prevent_close();
|
||||
let _ = window.hide();
|
||||
}
|
||||
})
|
||||
.run(tauri::generate_context!())
|
||||
.expect("failed to run Hermes GUI");
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
|
||||
|
||||
fn main() {
|
||||
hermes_gui_lib::run();
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"$schema": "https://schema.tauri.app/config/2",
|
||||
"productName": "Hermes",
|
||||
"version": "0.0.0",
|
||||
"identifier": "ai.nous.hermes.gui",
|
||||
"build": {
|
||||
"beforeDevCommand": "",
|
||||
"beforeBuildCommand": "",
|
||||
"devUrl": "http://127.0.0.1:9120",
|
||||
"frontendDist": "../dist"
|
||||
},
|
||||
"app": {
|
||||
"withGlobalTauri": true,
|
||||
"windows": [
|
||||
{
|
||||
"label": "main",
|
||||
"title": "Hermes",
|
||||
"width": 1400,
|
||||
"height": 900,
|
||||
"minWidth": 900,
|
||||
"minHeight": 600,
|
||||
"resizable": true,
|
||||
"center": true
|
||||
}
|
||||
],
|
||||
"security": {
|
||||
"csp": "default-src 'self' http://127.0.0.1:* http://localhost:*; connect-src 'self' http://127.0.0.1:* http://localhost:* ws://127.0.0.1:* ws://localhost:*; img-src 'self' data: blob: http://127.0.0.1:* http://localhost:*; style-src 'self' 'unsafe-inline' http://127.0.0.1:* http://localhost:*; script-src 'self' 'unsafe-inline' 'unsafe-eval' http://127.0.0.1:* http://localhost:*"
|
||||
}
|
||||
},
|
||||
"bundle": {
|
||||
"active": true,
|
||||
"icon": ["icons/32x32.png", "icons/icon.ico", "icons/icon.svg"],
|
||||
"targets": ["nsis", "dmg", "app"],
|
||||
"resources": {
|
||||
"sidecars": "sidecars/"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
// Browser-side GUI bridge entry.
|
||||
//
|
||||
// The dashboard remains in `web/`; this file is reserved for future shell-only
|
||||
// glue if we need pre-navigation scripts or native event wiring.
|
||||
export {};
|
||||
@@ -0,0 +1,44 @@
|
||||
param(
|
||||
[string]$Out = "$PSScriptRoot\..\gui\src-tauri\sidecars\hermes-runtime",
|
||||
[string]$Python = "python"
|
||||
)
|
||||
|
||||
$Root = Resolve-Path "$PSScriptRoot\..\.."
|
||||
|
||||
Write-Host "Bundling Hermes GUI runtime"
|
||||
Write-Host "repo: $Root"
|
||||
Write-Host "out: $Out"
|
||||
|
||||
if (Test-Path $Out) {
|
||||
Remove-Item -Recurse -Force $Out
|
||||
}
|
||||
New-Item -ItemType Directory -Force -Path $Out | Out-Null
|
||||
|
||||
Write-Host "-> Building dashboard"
|
||||
npm --prefix "$Root\web" ci
|
||||
npm --prefix "$Root\web" run build
|
||||
Copy-Item -Recurse "$Root\web\dist" "$Out\web_dist"
|
||||
|
||||
Write-Host "-> Building TUI"
|
||||
npm --prefix "$Root\ui-tui" ci
|
||||
npm --prefix "$Root\ui-tui" run build
|
||||
New-Item -ItemType Directory -Force -Path "$Out\ui-tui" | Out-Null
|
||||
Copy-Item -Recurse "$Root\ui-tui\dist" "$Out\ui-tui\dist"
|
||||
Copy-Item "$Root\ui-tui\package.json" "$Out\ui-tui\package.json"
|
||||
Copy-Item "$Root\ui-tui\package-lock.json" "$Out\ui-tui\package-lock.json"
|
||||
Copy-Item -Recurse "$Root\ui-tui\node_modules" "$Out\ui-tui\node_modules"
|
||||
|
||||
Write-Host "-> Creating Python runtime"
|
||||
& $Python -m venv "$Out\venv"
|
||||
& "$Out\venv\Scripts\python.exe" -m pip install --upgrade pip
|
||||
& "$Out\venv\Scripts\python.exe" -m pip install -e "$Root[web,pty]"
|
||||
|
||||
@"
|
||||
# Hermes GUI Runtime
|
||||
|
||||
Generated by apps/shared/bundle-runtime.ps1.
|
||||
|
||||
Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
|
||||
"@ | Set-Content "$Out\README.md"
|
||||
|
||||
Write-Host "Runtime bundle ready: $Out"
|
||||
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
OUT="${1:-"$ROOT/apps/gui/src-tauri/sidecars/hermes-runtime"}"
|
||||
PYTHON="${PYTHON:-python}"
|
||||
|
||||
echo "Bundling Hermes GUI runtime"
|
||||
echo "repo: $ROOT"
|
||||
echo "out: $OUT"
|
||||
|
||||
rm -rf "$OUT"
|
||||
mkdir -p "$OUT"
|
||||
|
||||
echo "→ Building dashboard"
|
||||
npm --prefix "$ROOT/web" ci
|
||||
npm --prefix "$ROOT/web" run build
|
||||
cp -a "$ROOT/web/dist" "$OUT/web_dist"
|
||||
|
||||
echo "→ Building TUI"
|
||||
npm --prefix "$ROOT/ui-tui" ci
|
||||
npm --prefix "$ROOT/ui-tui" run build
|
||||
mkdir -p "$OUT/ui-tui"
|
||||
cp -a "$ROOT/ui-tui/dist" "$OUT/ui-tui/dist"
|
||||
cp -a "$ROOT/ui-tui/package.json" "$ROOT/ui-tui/package-lock.json" "$OUT/ui-tui/"
|
||||
cp -a "$ROOT/ui-tui/node_modules" "$OUT/ui-tui/node_modules"
|
||||
|
||||
echo "→ Creating Python runtime"
|
||||
"$PYTHON" -m venv "$OUT/venv"
|
||||
"$OUT/venv/bin/python" -m pip install --upgrade pip
|
||||
"$OUT/venv/bin/python" -m pip install -e "$ROOT[web,pty]"
|
||||
|
||||
cat > "$OUT/README.md" <<EOF
|
||||
# Hermes GUI Runtime
|
||||
|
||||
Generated by apps/shared/bundle-runtime.sh.
|
||||
|
||||
Set HERMES_GUI_RUNTIME_DIR to this directory before launching the Tauri shell.
|
||||
EOF
|
||||
|
||||
echo "✓ Runtime bundle ready: $OUT"
|
||||
@@ -0,0 +1,33 @@
|
||||
# GUI Runtime Contract
|
||||
|
||||
The GUI shell starts Hermes with a small, explicit environment.
|
||||
|
||||
## Environment
|
||||
|
||||
```text
|
||||
HERMES_GUI=1
|
||||
HERMES_WEB_DIST=<bundled web dist>
|
||||
HERMES_TUI_DIR=<bundled ui-tui dir>
|
||||
```
|
||||
|
||||
The native shell uses `127.0.0.1:9120` as its initial GUI port during dev.
|
||||
Bundled builds should keep the port private to the local machine and expose it
|
||||
through `/api/health` and `/api/runtime`.
|
||||
|
||||
The shell should also pass the selected profile through the normal Hermes CLI
|
||||
profile mechanism once the profile picker is wired.
|
||||
|
||||
## Ports
|
||||
|
||||
Use `127.0.0.1` only. Start with the GUI default port, then fall back to a
|
||||
free port if occupied. Show the chosen port in the tray menu.
|
||||
|
||||
## User Data
|
||||
|
||||
The installer owns app files. Hermes owns user state under `HERMES_HOME`.
|
||||
Uninstallers must not delete user state unless the user explicitly asks.
|
||||
|
||||
## Update Model
|
||||
|
||||
MVP does not use Tauri's native updater. GUI runs `hermes update`, tails the
|
||||
action log, notifies completion, then offers to restart the runtime.
|
||||
+2
-6
@@ -951,13 +951,9 @@ class BatchRunner:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
@@ -977,7 +973,7 @@ class BatchRunner:
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
||||
@@ -790,9 +790,16 @@ code_execution:
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
|
||||
@@ -3176,7 +3176,14 @@ class HermesCLI:
|
||||
# the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
|
||||
runtime_model = runtime.get("model")
|
||||
if runtime_model and isinstance(runtime_model, str):
|
||||
self.model = runtime_model
|
||||
# Only use runtime model if: model is unset, or model equals provider name
|
||||
should_use_runtime_model = (
|
||||
not self.model or # No model configured yet
|
||||
self.model == self.provider or # Model is the provider slug
|
||||
self.model == runtime.get("name") # Model matches provider display name
|
||||
)
|
||||
if should_use_runtime_model:
|
||||
self.model = runtime_model
|
||||
|
||||
# If model is still empty (e.g. user ran `hermes auth add openai-codex`
|
||||
# without `hermes model`), fall back to the provider's first catalog
|
||||
@@ -4661,10 +4668,6 @@ class HermesCLI:
|
||||
def new_session(self, silent=False):
|
||||
"""Start a fresh session with a new session ID and cleared agent state."""
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Trigger memory extraction on the old session before session_id rotates.
|
||||
self.agent.commit_memory_session(self.conversation_history)
|
||||
self._notify_session_boundary("on_session_finalize")
|
||||
@@ -5374,29 +5377,26 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
# Fallback to old context length lookup
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
@@ -6165,6 +6165,8 @@ class HermesCLI:
|
||||
self._handle_skin_command(cmd_original)
|
||||
elif canonical == "voice":
|
||||
self._handle_voice_command(cmd_original)
|
||||
elif canonical == "busy":
|
||||
self._handle_busy_command(cmd_original)
|
||||
else:
|
||||
# Check for user-defined quick commands (bypass agent loop, no LLM call)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
@@ -6901,6 +6903,36 @@ class HermesCLI:
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_busy_command(self, cmd: str):
|
||||
"""Handle /busy — control what Enter does while Hermes is working.
|
||||
|
||||
Usage:
|
||||
/busy Show current busy input mode
|
||||
/busy status Show current busy input mode
|
||||
/busy queue Queue input for the next turn instead of interrupting
|
||||
/busy interrupt Interrupt the current run on Enter (default)
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
_cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
|
||||
_cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
if arg not in {"queue", "interrupt"}:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
self.busy_input_mode = arg
|
||||
if save_config_value("display.busy_input_mode", arg):
|
||||
behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
|
||||
_cprint(f" {_DIM}{behavior}{_RST}")
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
|
||||
if not self._fast_command_available():
|
||||
@@ -6979,51 +7011,52 @@ class HermesCLI:
|
||||
focus_topic = parts[1].strip()
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
with self._busy_command("Compressing context..."):
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _handle_debug_command(self):
|
||||
"""Handle /debug — upload debug report + logs and print paste URLs."""
|
||||
@@ -9525,9 +9558,20 @@ class HermesCLI:
|
||||
|
||||
@kb.add('c-d')
|
||||
def handle_ctrl_d(event):
|
||||
"""Handle Ctrl+D - exit."""
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
"""Ctrl+D: delete char under cursor (standard readline behaviour).
|
||||
Only exit when the input is empty — same as bash/zsh. Pending
|
||||
attached images count as input and block the EOF-exit so the
|
||||
user doesn't lose them silently.
|
||||
"""
|
||||
buf = event.app.current_buffer
|
||||
if buf.text:
|
||||
buf.delete()
|
||||
elif self._attached_images:
|
||||
# Empty text but pending attachments — no-op, don't exit.
|
||||
return
|
||||
else:
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
|
||||
_modal_prompt_active = Condition(
|
||||
lambda: bool(self._secret_state or self._sudo_state)
|
||||
@@ -10740,12 +10784,6 @@ class HermesCLI:
|
||||
self.agent.interrupt()
|
||||
except Exception:
|
||||
pass
|
||||
# Flush memories before exit (only for substantial conversations)
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Shut down voice recorder (release persistent audio stream)
|
||||
if hasattr(self, '_voice_recorder') and self._voice_recorder:
|
||||
try:
|
||||
|
||||
+14
-1
@@ -16,7 +16,7 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -417,6 +417,7 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
@@ -438,6 +439,9 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
@@ -481,6 +485,14 @@ def create_job(
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
"id": job_id,
|
||||
@@ -492,6 +504,7 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
|
||||
@@ -671,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
|
||||
+8
-3
@@ -135,7 +135,7 @@ class SessionResetPolicy:
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=notify if notify is not None else True,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -178,7 +178,7 @@ class PlatformConfig:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
@@ -435,7 +435,7 @@ class GatewayConfig:
|
||||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
||||
+101
-22
@@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
@@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
@@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
@@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
@@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
@@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
|
||||
+112
-3
@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
def _split_host_port(value: str) -> tuple[str, int | None]:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return "", None
|
||||
if "://" in raw:
|
||||
parsed = urlsplit(raw)
|
||||
return (parsed.hostname or "").lower().rstrip("."), parsed.port
|
||||
if raw.startswith("[") and "]" in raw:
|
||||
host, _, rest = raw[1:].partition("]")
|
||||
port = None
|
||||
if rest.startswith(":") and rest[1:].isdigit():
|
||||
port = int(rest[1:])
|
||||
return host.lower().rstrip("."), port
|
||||
if raw.count(":") == 1:
|
||||
host, _, maybe_port = raw.rpartition(":")
|
||||
if maybe_port.isdigit():
|
||||
return host.lower().rstrip("."), int(maybe_port)
|
||||
return raw.lower().strip("[]").rstrip("."), None
|
||||
|
||||
|
||||
def _no_proxy_entries() -> list[str]:
|
||||
entries: list[str] = []
|
||||
for key in ("NO_PROXY", "no_proxy"):
|
||||
raw = os.environ.get(key, "")
|
||||
entries.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
|
||||
token = str(entry or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
if token == "*":
|
||||
return True
|
||||
|
||||
token_host, token_port = _split_host_port(token)
|
||||
if token_port is not None and port is not None and token_port != port:
|
||||
return False
|
||||
if token_port is not None and port is None:
|
||||
return False
|
||||
if not token_host:
|
||||
return False
|
||||
|
||||
try:
|
||||
network = ipaddress.ip_network(token_host, strict=False)
|
||||
try:
|
||||
return ipaddress.ip_address(host) in network
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
token_ip = ipaddress.ip_address(token_host)
|
||||
try:
|
||||
return ipaddress.ip_address(host) == token_ip
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if token_host.startswith("*."):
|
||||
suffix = token_host[1:]
|
||||
return host.endswith(suffix)
|
||||
if token_host.startswith("."):
|
||||
return host == token_host[1:] or host.endswith(token_host)
|
||||
return host == token_host or host.endswith(f".{token_host}")
|
||||
|
||||
|
||||
def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
|
||||
"""Return True when NO_PROXY/no_proxy matches at least one target host.
|
||||
|
||||
Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
|
||||
CIDR ranges, optional host:port entries, and ``*``.
|
||||
"""
|
||||
entries = _no_proxy_entries()
|
||||
if not entries or not target_hosts:
|
||||
return False
|
||||
if isinstance(target_hosts, str):
|
||||
candidates = [target_hosts]
|
||||
else:
|
||||
candidates = list(target_hosts)
|
||||
for candidate in candidates:
|
||||
host, port = _split_host_port(str(candidate))
|
||||
if not host:
|
||||
continue
|
||||
if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def resolve_proxy_url(
|
||||
platform_env_var: str | None = None,
|
||||
*,
|
||||
target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
|
||||
) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found.
|
||||
Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
|
||||
of ``target_hosts``.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
detected = normalize_proxy_url(_detect_macos_system_proxy())
|
||||
if detected and should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return detected
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
@@ -2440,6 +2543,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2457,6 +2563,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = strip_markdown(content or "")
|
||||
text = self.format_message(content)
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
||||
@@ -3261,6 +3261,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3320,6 +3321,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
|
||||
@@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
await crypto_store.open()
|
||||
|
||||
# Bind the store to the runtime device_id before any
|
||||
# put_account() runs. PgCryptoStore defaults _device_id
|
||||
# to "" and its crypto_account UPSERT never updates the
|
||||
# device_id column on conflict — so once put_account
|
||||
# writes blank, it stays blank forever. That breaks
|
||||
# every downstream device-scoped olm operation: peer
|
||||
# to-device ciphertext can't find our identity key and
|
||||
# no megolm sessions ever land. Setting _device_id here
|
||||
# (in-memory; the on-disk row may not exist yet) makes
|
||||
# the first put_account write the correct value.
|
||||
# DeviceID is a NewType(str) so plain str works at runtime.
|
||||
if client.device_id:
|
||||
await crypto_store.put_device_id(client.device_id)
|
||||
|
||||
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
|
||||
olm = OlmMachine(client, crypto_store, crypto_state)
|
||||
|
||||
|
||||
@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
|
||||
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
||||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url()
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
||||
+106
-267
@@ -298,50 +298,16 @@ from gateway.restart import (
|
||||
)
|
||||
|
||||
|
||||
def _normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
from gateway.whatsapp_identity import (
|
||||
canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401
|
||||
expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
|
||||
normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
def _expand_whatsapp_auth_aliases(identifier: str) -> set:
|
||||
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||
normalized = _normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = _hermes_home / "whatsapp" / "session"
|
||||
resolved = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = _normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Sentinel placed into _running_agents immediately when a session starts
|
||||
# processing, *before* any await. Prevents a second message for the same
|
||||
# session from bypassing the "already running" guard during the async gap
|
||||
@@ -558,7 +524,7 @@ def _load_gateway_config() -> dict:
|
||||
def _resolve_gateway_model(config: dict | None = None) -> str:
|
||||
"""Read model from config.yaml — single source of truth.
|
||||
|
||||
Without this, temporary AIAgent instances (memory flush, /compress) fall
|
||||
Without this, temporary AIAgent instances (e.g. /compress) fall
|
||||
back to the hardcoded default which fails when the active provider is
|
||||
openai-codex.
|
||||
"""
|
||||
@@ -949,129 +915,6 @@ class GatewayRunner:
|
||||
e,
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
def _flush_memories_for_session(
|
||||
self,
|
||||
old_session_id: str,
|
||||
session_key: Optional[str] = None,
|
||||
):
|
||||
"""Prompt the agent to save memories/skills before context is lost.
|
||||
|
||||
Synchronous worker — meant to be called via run_in_executor from
|
||||
an async context so it doesn't block the event loop.
|
||||
"""
|
||||
# Skip cron sessions — they run headless with no meaningful user
|
||||
# conversation to extract memories from.
|
||||
if old_session_id and old_session_id.startswith("cron_"):
|
||||
logger.debug("Skipping memory flush for cron session: %s", old_session_id)
|
||||
return
|
||||
|
||||
try:
|
||||
history = self.session_store.load_transcript(old_session_id)
|
||||
if not history or len(history) < 4:
|
||||
return
|
||||
|
||||
from run_agent import AIAgent
|
||||
model, runtime_kwargs = self._resolve_session_agent_runtime(
|
||||
session_key=session_key,
|
||||
)
|
||||
if not runtime_kwargs.get("api_key"):
|
||||
return
|
||||
|
||||
tmp_agent = AIAgent(
|
||||
**runtime_kwargs,
|
||||
model=model,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
skip_memory=True, # Flush agent — no memory provider
|
||||
enabled_toolsets=["memory", "skills"],
|
||||
session_id=old_session_id,
|
||||
)
|
||||
try:
|
||||
# Fully silence the flush agent — quiet_mode only suppresses init
|
||||
# messages; tool call output still leaks to the terminal through
|
||||
# _safe_print → _print_fn. Set a no-op to prevent that.
|
||||
tmp_agent._print_fn = lambda *a, **kw: None
|
||||
|
||||
# Build conversation history from transcript
|
||||
msgs = [
|
||||
{"role": m.get("role"), "content": m.get("content")}
|
||||
for m in history
|
||||
if m.get("role") in ("user", "assistant") and m.get("content")
|
||||
]
|
||||
|
||||
# Read live memory state from disk so the flush agent can see
|
||||
# what's already saved and avoid overwriting newer entries.
|
||||
_current_memory = ""
|
||||
try:
|
||||
from tools.memory_tool import get_memory_dir
|
||||
_mem_dir = get_memory_dir()
|
||||
for fname, label in [
|
||||
("MEMORY.md", "MEMORY (your personal notes)"),
|
||||
("USER.md", "USER PROFILE (who the user is)"),
|
||||
]:
|
||||
fpath = _mem_dir / fname
|
||||
if fpath.exists():
|
||||
content = fpath.read_text(encoding="utf-8").strip()
|
||||
if content:
|
||||
_current_memory += f"\n\n## Current {label}:\n{content}"
|
||||
except Exception:
|
||||
pass # Non-fatal — flush still works, just without the guard
|
||||
|
||||
# Give the agent a real turn to think about what to save
|
||||
flush_prompt = (
|
||||
"[System: This session is about to be automatically reset due to "
|
||||
"inactivity or a scheduled daily reset. The conversation context "
|
||||
"will be cleared after this turn.\n\n"
|
||||
"Review the conversation above and:\n"
|
||||
"1. Save any important facts, preferences, or decisions to memory "
|
||||
"(user profile or your notes) that would be useful in future sessions.\n"
|
||||
"2. If you discovered a reusable workflow or solved a non-trivial "
|
||||
"problem, consider saving it as a skill.\n"
|
||||
"3. If nothing is worth saving, that's fine — just skip.\n\n"
|
||||
)
|
||||
|
||||
if _current_memory:
|
||||
flush_prompt += (
|
||||
"IMPORTANT — here is the current live state of memory. Other "
|
||||
"sessions, cron jobs, or the user may have updated it since this "
|
||||
"conversation ended. Do NOT overwrite or remove entries unless "
|
||||
"the conversation above reveals something that genuinely "
|
||||
"supersedes them. Only add new information that is not already "
|
||||
"captured below."
|
||||
f"{_current_memory}\n\n"
|
||||
)
|
||||
|
||||
flush_prompt += (
|
||||
"Do NOT respond to the user. Just use the memory and skill_manage "
|
||||
"tools if needed, then stop.]"
|
||||
)
|
||||
|
||||
tmp_agent.run_conversation(
|
||||
user_message=flush_prompt,
|
||||
conversation_history=msgs,
|
||||
)
|
||||
finally:
|
||||
self._cleanup_agent_resources(tmp_agent)
|
||||
logger.info("Pre-reset memory flush completed for session %s", old_session_id)
|
||||
except Exception as e:
|
||||
logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
|
||||
|
||||
async def _async_flush_memories(
|
||||
self,
|
||||
old_session_id: str,
|
||||
session_key: Optional[str] = None,
|
||||
):
|
||||
"""Run the sync memory flush in a thread pool so it won't block the event loop."""
|
||||
loop = asyncio.get_running_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
self._flush_memories_for_session,
|
||||
old_session_id,
|
||||
session_key,
|
||||
)
|
||||
|
||||
@property
|
||||
def should_exit_cleanly(self) -> bool:
|
||||
return self._exit_cleanly
|
||||
@@ -1137,7 +980,7 @@ class GatewayRunner:
|
||||
if override_runtime.get("api_key"):
|
||||
logger.debug(
|
||||
"Session model override (fast): session=%s config_model=%s -> override_model=%s provider=%s",
|
||||
(resolved_session_key or "")[:30], model, override_model,
|
||||
resolved_session_key or "", model, override_model,
|
||||
override_runtime.get("provider"),
|
||||
)
|
||||
return override_model, override_runtime
|
||||
@@ -1145,12 +988,12 @@ class GatewayRunner:
|
||||
# resolution and apply model/provider from the override on top.
|
||||
logger.debug(
|
||||
"Session model override (no api_key, fallback): session=%s config_model=%s override_model=%s",
|
||||
(resolved_session_key or "")[:30], model, override_model,
|
||||
resolved_session_key or "", model, override_model,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"No session model override: session=%s config_model=%s override_keys=%s",
|
||||
(resolved_session_key or "")[:30], model,
|
||||
resolved_session_key or "", model,
|
||||
list(self._session_model_overrides.keys())[:5] if self._session_model_overrides else "[]",
|
||||
)
|
||||
|
||||
@@ -1721,7 +1564,7 @@ class GatewayRunner:
|
||||
continue
|
||||
try:
|
||||
agent.interrupt(reason)
|
||||
logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
|
||||
logger.debug("Interrupted running agent for session %s during shutdown", session_key)
|
||||
except Exception as e:
|
||||
logger.debug("Failed interrupting agent during shutdown: %s", e)
|
||||
|
||||
@@ -1893,7 +1736,7 @@ class GatewayRunner:
|
||||
logger.warning(
|
||||
"Auto-suspended stuck session %s (active across %d "
|
||||
"consecutive restarts — likely a stuck loop)",
|
||||
session_key[:30], counts[session_key],
|
||||
session_key, counts[session_key],
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -2306,7 +2149,7 @@ class GatewayRunner:
|
||||
except Exception as e:
|
||||
logger.error("Recovered watcher setup error: %s", e)
|
||||
|
||||
# Start background session expiry watcher for proactive memory flushing
|
||||
# Start background session expiry watcher to finalize expired sessions
|
||||
asyncio.create_task(self._session_expiry_watcher())
|
||||
|
||||
# Start background reconnection watcher for platforms that failed at startup
|
||||
@@ -2323,25 +2166,24 @@ class GatewayRunner:
|
||||
return True
|
||||
|
||||
async def _session_expiry_watcher(self, interval: int = 300):
|
||||
"""Background task that proactively flushes memories for expired sessions.
|
||||
|
||||
Runs every `interval` seconds (default 5 min). For each session that
|
||||
has expired according to its reset policy, flushes memories in a thread
|
||||
pool and marks the session so it won't be flushed again.
|
||||
"""Background task that finalizes expired sessions.
|
||||
|
||||
This means memories are already saved by the time the user sends their
|
||||
next message, so there's no blocking delay.
|
||||
Runs every ``interval`` seconds (default 5 min). For each session
|
||||
whose reset policy has expired, invokes ``on_session_finalize``
|
||||
hooks, cleans up the cached AIAgent's tool resources, evicts the
|
||||
cache entry so it can be garbage-collected, and marks the session
|
||||
so it won't be finalized again.
|
||||
"""
|
||||
await asyncio.sleep(60) # initial delay — let the gateway fully start
|
||||
_flush_failures: dict[str, int] = {} # session_id -> consecutive failure count
|
||||
_MAX_FLUSH_RETRIES = 3
|
||||
_finalize_failures: dict[str, int] = {} # session_id -> consecutive failure count
|
||||
_MAX_FINALIZE_RETRIES = 3
|
||||
while self._running:
|
||||
try:
|
||||
self.session_store._ensure_loaded()
|
||||
# Collect expired sessions first, then log a single summary.
|
||||
_expired_entries = []
|
||||
for key, entry in list(self.session_store._entries.items()):
|
||||
if entry.memory_flushed:
|
||||
if entry.expiry_finalized:
|
||||
continue
|
||||
if not self.session_store._is_session_expired(entry):
|
||||
continue
|
||||
@@ -2359,13 +2201,12 @@ class GatewayRunner:
|
||||
f"{p}:{c}" for p, c in sorted(_platforms.items())
|
||||
)
|
||||
logger.info(
|
||||
"Session expiry: %d sessions to flush (%s)",
|
||||
"Session expiry: %d sessions to finalize (%s)",
|
||||
len(_expired_entries), _plat_summary,
|
||||
)
|
||||
|
||||
for key, entry in _expired_entries:
|
||||
try:
|
||||
await self._async_flush_memories(entry.session_id, key)
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook as _invoke_hook
|
||||
_parts = key.split(":")
|
||||
@@ -2397,48 +2238,48 @@ class GatewayRunner:
|
||||
# be garbage-collected. Otherwise the cache grows
|
||||
# unbounded across the gateway's lifetime.
|
||||
self._evict_cached_agent(key)
|
||||
# Mark as flushed and persist to disk so the flag
|
||||
# Mark as finalized and persist to disk so the flag
|
||||
# survives gateway restarts.
|
||||
with self.session_store._lock:
|
||||
entry.memory_flushed = True
|
||||
entry.expiry_finalized = True
|
||||
self.session_store._save()
|
||||
logger.debug(
|
||||
"Memory flush completed for session %s",
|
||||
"Session expiry finalized for %s",
|
||||
entry.session_id,
|
||||
)
|
||||
_flush_failures.pop(entry.session_id, None)
|
||||
_finalize_failures.pop(entry.session_id, None)
|
||||
except Exception as e:
|
||||
failures = _flush_failures.get(entry.session_id, 0) + 1
|
||||
_flush_failures[entry.session_id] = failures
|
||||
if failures >= _MAX_FLUSH_RETRIES:
|
||||
failures = _finalize_failures.get(entry.session_id, 0) + 1
|
||||
_finalize_failures[entry.session_id] = failures
|
||||
if failures >= _MAX_FINALIZE_RETRIES:
|
||||
logger.warning(
|
||||
"Memory flush gave up after %d attempts for %s: %s. "
|
||||
"Marking as flushed to prevent infinite retry loop.",
|
||||
"Session finalize gave up after %d attempts for %s: %s. "
|
||||
"Marking as finalized to prevent infinite retry loop.",
|
||||
failures, entry.session_id, e,
|
||||
)
|
||||
with self.session_store._lock:
|
||||
entry.memory_flushed = True
|
||||
entry.expiry_finalized = True
|
||||
self.session_store._save()
|
||||
_flush_failures.pop(entry.session_id, None)
|
||||
_finalize_failures.pop(entry.session_id, None)
|
||||
else:
|
||||
logger.debug(
|
||||
"Memory flush failed (%d/%d) for %s: %s",
|
||||
failures, _MAX_FLUSH_RETRIES, entry.session_id, e,
|
||||
"Session finalize failed (%d/%d) for %s: %s",
|
||||
failures, _MAX_FINALIZE_RETRIES, entry.session_id, e,
|
||||
)
|
||||
|
||||
if _expired_entries:
|
||||
_flushed = sum(
|
||||
1 for _, e in _expired_entries if e.memory_flushed
|
||||
_done = sum(
|
||||
1 for _, e in _expired_entries if e.expiry_finalized
|
||||
)
|
||||
_failed = len(_expired_entries) - _flushed
|
||||
_failed = len(_expired_entries) - _done
|
||||
if _failed:
|
||||
logger.info(
|
||||
"Session expiry done: %d flushed, %d pending retry",
|
||||
_flushed, _failed,
|
||||
"Session expiry done: %d finalized, %d pending retry",
|
||||
_done, _failed,
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"Session expiry done: %d flushed", _flushed,
|
||||
"Session expiry done: %d finalized", _done,
|
||||
)
|
||||
|
||||
# Sweep agents that have been idle beyond the TTL regardless
|
||||
@@ -2715,7 +2556,7 @@ class GatewayRunner:
|
||||
except Exception as _e:
|
||||
logger.debug(
|
||||
"mark_resume_pending failed for %s: %s",
|
||||
_sk[:20], _e,
|
||||
_sk, _e,
|
||||
)
|
||||
self._interrupt_running_agents(
|
||||
_INTERRUPT_REASON_GATEWAY_RESTART if self._restart_requested else _INTERRUPT_REASON_GATEWAY_SHUTDOWN
|
||||
@@ -3037,6 +2878,7 @@ class GatewayRunner:
|
||||
Platform.QQBOT: "QQ_ALLOWED_USERS",
|
||||
}
|
||||
platform_group_env_map = {
|
||||
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
|
||||
}
|
||||
platform_allow_all_map = {
|
||||
@@ -3093,7 +2935,7 @@ class GatewayRunner:
|
||||
# Check platform-specific and global allowlists
|
||||
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
|
||||
group_allowlist = ""
|
||||
if source.chat_type == "group":
|
||||
if source.chat_type in {"group", "forum"}:
|
||||
group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
|
||||
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
|
||||
|
||||
@@ -3102,7 +2944,7 @@ class GatewayRunner:
|
||||
return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
|
||||
|
||||
# Some platforms authorize group traffic by chat ID rather than sender ID.
|
||||
if group_allowlist and source.chat_type == "group" and source.chat_id:
|
||||
if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
|
||||
allowed_group_ids = {
|
||||
chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
|
||||
}
|
||||
@@ -3380,7 +3222,7 @@ class GatewayRunner:
|
||||
logger.warning(
|
||||
"Evicting stale _running_agents entry for %s "
|
||||
"(age: %.0fs, idle: %.0fs, timeout: %.0fs)%s",
|
||||
_quick_key[:30], _stale_age, _stale_idle,
|
||||
_quick_key, _stale_age, _stale_idle,
|
||||
_raw_stale_timeout, _stale_detail,
|
||||
)
|
||||
self._invalidate_session_run_generation(
|
||||
@@ -3416,7 +3258,7 @@ class GatewayRunner:
|
||||
interrupt_reason=_INTERRUPT_REASON_STOP,
|
||||
invalidation_reason="stop_command",
|
||||
)
|
||||
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
|
||||
logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key)
|
||||
return "⚡ Stopped. You can continue this session."
|
||||
|
||||
# /reset and /new must bypass the running-agent guard so they
|
||||
@@ -3482,7 +3324,7 @@ class GatewayRunner:
|
||||
try:
|
||||
accepted = running_agent.steer(steer_text)
|
||||
except Exception as exc:
|
||||
logger.warning("Steer failed for session %s: %s", _quick_key[:20], exc)
|
||||
logger.warning("Steer failed for session %s: %s", _quick_key, exc)
|
||||
return f"⚠️ Steer failed: {exc}"
|
||||
if accepted:
|
||||
preview = steer_text[:60] + ("..." if len(steer_text) > 60 else "")
|
||||
@@ -3565,7 +3407,7 @@ class GatewayRunner:
|
||||
)
|
||||
|
||||
if event.message_type == MessageType.PHOTO:
|
||||
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key[:20])
|
||||
logger.debug("PRIORITY photo follow-up for session %s — queueing without interrupt", _quick_key)
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter:
|
||||
merge_pending_message_event(adapter._pending_messages, _quick_key, event)
|
||||
@@ -3585,7 +3427,7 @@ class GatewayRunner:
|
||||
logger.debug(
|
||||
"Telegram follow-up arrived %.2fs after run start for %s — queueing without interrupt",
|
||||
time.time() - _started_at,
|
||||
_quick_key[:20],
|
||||
_quick_key,
|
||||
)
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if adapter:
|
||||
@@ -3603,7 +3445,7 @@ class GatewayRunner:
|
||||
if event.get_command() == "stop":
|
||||
# Force-clean the sentinel so the session is unlocked.
|
||||
self._release_running_agent_state(_quick_key)
|
||||
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key[:20])
|
||||
logger.info("HARD STOP (pending) for session %s — sentinel cleared", _quick_key)
|
||||
return "⚡ Force-stopped. The agent was still starting — session unlocked."
|
||||
# Queue the message so it will be picked up after the
|
||||
# agent starts.
|
||||
@@ -3624,7 +3466,11 @@ class GatewayRunner:
|
||||
if self._queue_during_drain_enabled()
|
||||
else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
|
||||
)
|
||||
logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
|
||||
if self._busy_input_mode == "queue":
|
||||
logger.debug("PRIORITY queue follow-up for session %s", _quick_key)
|
||||
self._queue_or_replace_pending_event(_quick_key, event)
|
||||
return None
|
||||
logger.debug("PRIORITY interrupt for session %s", _quick_key)
|
||||
running_agent.interrupt(event.text)
|
||||
if _quick_key in self._pending_messages:
|
||||
self._pending_messages[_quick_key] += "\n" + event.text
|
||||
@@ -4622,7 +4468,7 @@ class GatewayRunner:
|
||||
if not self._is_session_run_current(_quick_key, run_generation):
|
||||
logger.info(
|
||||
"Discarding stale agent result for %s — generation %d is no longer current",
|
||||
_quick_key[:20] if _quick_key else "?",
|
||||
_quick_key or "?",
|
||||
run_generation,
|
||||
)
|
||||
_stale_adapter = self.adapters.get(source.platform)
|
||||
@@ -4673,7 +4519,7 @@ class GatewayRunner:
|
||||
except Exception as _e:
|
||||
logger.debug(
|
||||
"clear_resume_pending failed for %s: %s",
|
||||
session_key[:20], _e,
|
||||
session_key, _e,
|
||||
)
|
||||
|
||||
# Surface error details when the agent failed silently (final_response=None)
|
||||
@@ -5050,19 +4896,11 @@ class GatewayRunner:
|
||||
# Get existing session key
|
||||
session_key = self._session_key_for_source(source)
|
||||
self._invalidate_session_run_generation(session_key, reason="session_reset")
|
||||
|
||||
# Flush memories in the background (fire-and-forget) so the user
|
||||
# gets the "Session reset!" response immediately.
|
||||
try:
|
||||
old_entry = self.session_store._entries.get(session_key)
|
||||
if old_entry:
|
||||
_flush_task = asyncio.create_task(
|
||||
self._async_flush_memories(old_entry.session_id, session_key)
|
||||
)
|
||||
self._background_tasks.add(_flush_task)
|
||||
_flush_task.add_done_callback(self._background_tasks.discard)
|
||||
except Exception as e:
|
||||
logger.debug("Gateway memory flush on reset failed: %s", e)
|
||||
|
||||
# Snapshot the old entry so on_session_finalize can report the
|
||||
# expiring session id before reset_session() rotates it.
|
||||
old_entry = self.session_store._entries.get(session_key)
|
||||
|
||||
# Close tool resources on the old agent (terminal sandboxes, browser
|
||||
# daemons, background processes) before evicting from cache.
|
||||
# Guard with getattr because test fixtures may skip __init__.
|
||||
@@ -5320,7 +5158,7 @@ class GatewayRunner:
|
||||
interrupt_reason=_INTERRUPT_REASON_STOP,
|
||||
invalidation_reason="stop_command_pending",
|
||||
)
|
||||
logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
|
||||
logger.info("STOP (pending) for session %s — sentinel cleared", session_key)
|
||||
return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
|
||||
if agent:
|
||||
# Force-clean the session lock so a truly hung agent doesn't
|
||||
@@ -5688,9 +5526,17 @@ class GatewayRunner:
|
||||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {plabel}")
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
@@ -5824,28 +5670,25 @@ class GatewayRunner:
|
||||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry.
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
lines.append(f"Cost: {mi.format_cost()}")
|
||||
lines.append(f"Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or current_base_url,
|
||||
api_key=result.api_key or current_api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
@@ -7257,29 +7100,25 @@ class GatewayRunner:
|
||||
logger.debug("Failed to list titled sessions: %s", e)
|
||||
return f"Could not list sessions: {e}"
|
||||
|
||||
# Resolve the name to a session ID
|
||||
# Resolve the name to a session ID.
|
||||
target_id = self._session_db.resolve_session_by_title(name)
|
||||
if not target_id:
|
||||
return (
|
||||
f"No session found matching '**{name}**'.\n"
|
||||
"Use `/resume` with no arguments to see available sessions."
|
||||
)
|
||||
# Compression creates child continuations that hold the live transcript.
|
||||
# Follow that chain so gateway /resume matches CLI behavior (#15000).
|
||||
try:
|
||||
target_id = self._session_db.resolve_resume_session_id(target_id)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)
|
||||
|
||||
# Check if already on that session
|
||||
current_entry = self.session_store.get_or_create_session(source)
|
||||
if current_entry.session_id == target_id:
|
||||
return f"📌 Already on session **{name}**."
|
||||
|
||||
# Flush memories for current session before switching
|
||||
try:
|
||||
_flush_task = asyncio.create_task(
|
||||
self._async_flush_memories(current_entry.session_id, session_key)
|
||||
)
|
||||
self._background_tasks.add(_flush_task)
|
||||
_flush_task.add_done_callback(self._background_tasks.discard)
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush on resume failed: %s", e)
|
||||
|
||||
# Clear any running agent for this session key
|
||||
self._release_running_agent_state(session_key)
|
||||
|
||||
@@ -8816,7 +8655,7 @@ class GatewayRunner:
|
||||
if reason:
|
||||
logger.info(
|
||||
"Invalidated run generation for %s → %d (%s)",
|
||||
session_key[:20],
|
||||
session_key,
|
||||
generation,
|
||||
reason,
|
||||
)
|
||||
@@ -9223,7 +9062,7 @@ class GatewayRunner:
|
||||
if not _run_still_current():
|
||||
logger.info(
|
||||
"Discarding stale proxy stream for %s — generation %d is no longer current",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
run_generation or 0,
|
||||
)
|
||||
return {
|
||||
@@ -9287,7 +9126,7 @@ class GatewayRunner:
|
||||
if not _run_still_current():
|
||||
logger.info(
|
||||
"Discarding stale proxy result for %s — generation %d is no longer current",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
run_generation or 0,
|
||||
)
|
||||
return {
|
||||
@@ -9729,7 +9568,7 @@ class GatewayRunner:
|
||||
)
|
||||
logger.debug(
|
||||
"run_agent resolved: model=%s provider=%s session=%s",
|
||||
model, runtime_kwargs.get("provider"), (session_key or "")[:30],
|
||||
model, runtime_kwargs.get("provider"), session_key or "",
|
||||
)
|
||||
except Exception as exc:
|
||||
return {
|
||||
@@ -10340,7 +10179,7 @@ class GatewayRunner:
|
||||
):
|
||||
logger.info(
|
||||
"Skipping stale agent promotion for %s — generation %s is no longer current",
|
||||
(session_key or "")[:20],
|
||||
session_key or "",
|
||||
run_generation,
|
||||
)
|
||||
return
|
||||
@@ -10487,7 +10326,7 @@ class GatewayRunner:
|
||||
logger.info(
|
||||
"Backup interrupt detected for session %s "
|
||||
"(monitor task state: %s)",
|
||||
session_key[:20],
|
||||
session_key,
|
||||
"done" if interrupt_monitor.done() else "running",
|
||||
)
|
||||
_backup_agent.interrupt(_bp_text)
|
||||
@@ -10547,7 +10386,7 @@ class GatewayRunner:
|
||||
logger.info(
|
||||
"Backup interrupt detected for session %s "
|
||||
"(monitor task state: %s)",
|
||||
session_key[:20],
|
||||
session_key,
|
||||
"done" if interrupt_monitor.done() else "running",
|
||||
)
|
||||
_backup_agent.interrupt(_bp_text)
|
||||
@@ -10649,7 +10488,7 @@ class GatewayRunner:
|
||||
if _is_control_interrupt_message(interrupt_message):
|
||||
logger.info(
|
||||
"Ignoring control interrupt message for session %s: %s",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
interrupt_message,
|
||||
)
|
||||
else:
|
||||
@@ -10693,7 +10532,7 @@ class GatewayRunner:
|
||||
if self._draining and (pending_event or pending):
|
||||
logger.info(
|
||||
"Discarding pending follow-up for session %s during gateway %s",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
self._status_action_label(),
|
||||
)
|
||||
pending_event = None
|
||||
@@ -10750,7 +10589,7 @@ class GatewayRunner:
|
||||
try:
|
||||
logger.info(
|
||||
"Queued follow-up for session %s: final stream delivery not confirmed; sending first response before continuing.",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
)
|
||||
await adapter.send(
|
||||
source.chat_id,
|
||||
@@ -10762,7 +10601,7 @@ class GatewayRunner:
|
||||
elif first_response:
|
||||
logger.info(
|
||||
"Queued follow-up for session %s: skipping resend because final streamed delivery was confirmed.",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
)
|
||||
# Release deferred bg-review notifications now that the
|
||||
# first response has been delivered. Pop from the
|
||||
@@ -10897,7 +10736,7 @@ class GatewayRunner:
|
||||
if not _is_empty_sentinel and (_streamed or _previewed):
|
||||
logger.info(
|
||||
"Suppressing normal final send for session %s: final delivery already confirmed (streamed=%s previewed=%s).",
|
||||
session_key[:20] if session_key else "?",
|
||||
session_key or "?",
|
||||
_streamed,
|
||||
_previewed,
|
||||
)
|
||||
|
||||
+97
-16
@@ -60,6 +60,10 @@ from .config import (
|
||||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -83,6 +87,9 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -120,8 +127,14 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -135,6 +148,9 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -186,6 +202,31 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
and the LLM needs the real ID to tag users."""
|
||||
|
||||
|
||||
def _discord_tools_loaded() -> bool:
|
||||
"""True iff the agent will actually have Discord tools this session.
|
||||
|
||||
Two conditions must hold:
|
||||
1. The `discord` or `discord_admin` toolset is enabled for the
|
||||
Discord platform via `hermes tools` (opt-in, default OFF).
|
||||
2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
|
||||
at registry time, so the toolset being enabled in config is not
|
||||
enough if the token isn't configured.
|
||||
|
||||
Returns False (safe default — keeps the stale-API disclaimer) on any
|
||||
error so a bad config can't silently promise tools the agent lacks.
|
||||
"""
|
||||
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
cfg = load_config()
|
||||
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
|
||||
return "discord" in enabled or "discord_admin" in enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
@@ -273,13 +314,44 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
# Inject the Discord IDs block only when the agent actually has
|
||||
# Discord tools loaded this session — i.e. the user opted into
|
||||
# `discord` / `discord_admin` via `hermes tools` AND the bot
|
||||
# token is configured. Otherwise keep the stale-API disclaimer
|
||||
# honest so we never promise tools the agent lacks.
|
||||
if _discord_tools_loaded():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
"**Platform notes:** You are responding via iMessage. "
|
||||
"Keep responses short and conversational — think texts, not essays. "
|
||||
"Structure longer replies as separate short thoughts, each separated "
|
||||
"by a blank line (double newline). Each block between blank lines "
|
||||
"will be delivered as its own iMessage bubble, so write accordingly: "
|
||||
"one idea per bubble, 1–3 sentences each. "
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
@@ -367,11 +439,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -407,7 +479,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -459,7 +531,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -518,15 +590,24 @@ def build_session_key(
|
||||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
+12
-1
@@ -743,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
|
||||
|
||||
try:
|
||||
raw = json.loads(auth_file.read_text())
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
corrupt_path = auth_file.with_suffix(".json.corrupt")
|
||||
try:
|
||||
import shutil
|
||||
shutil.copy2(auth_file, corrupt_path)
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning(
|
||||
"auth: failed to parse %s (%s) — starting with empty store. "
|
||||
"Corrupt file preserved at %s",
|
||||
auth_file, exc, corrupt_path,
|
||||
)
|
||||
return {"version": AUTH_STORE_VERSION, "providers": {}}
|
||||
|
||||
if isinstance(raw, dict) and (
|
||||
|
||||
@@ -103,7 +103,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
@@ -126,6 +127,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
|
||||
+10
-9
@@ -612,14 +612,6 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -783,6 +775,15 @@ DEFAULT_CONFIG = {
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
# When a subagent hits a dangerous-command approval prompt, the parent's
|
||||
# prompt_toolkit TUI owns stdin — a thread-local input() call from the
|
||||
# subagent worker would deadlock the parent UI. To avoid the deadlock,
|
||||
# subagent threads ALWAYS resolve approvals non-interactively:
|
||||
# false (default) → auto-deny with a logger.warning audit line (safe)
|
||||
# true → auto-approve "once" with a logger.warning audit line
|
||||
# Flip to true only if you trust delegated work to run dangerous cmds
|
||||
# without human review (cron pipelines, batch automation, etc.).
|
||||
"subagent_auto_approve": False,
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -839,7 +840,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
|
||||
+340
-80
@@ -51,6 +51,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _add_accept_hooks_flag(parser) -> None:
|
||||
"""Attach the ``--accept-hooks`` flag. Shared across every agent
|
||||
subparser so the flag works regardless of CLI position."""
|
||||
@@ -174,6 +175,7 @@ load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")
|
||||
try:
|
||||
if "HERMES_REDACT_SECRETS" not in os.environ:
|
||||
import yaml as _yaml_early
|
||||
|
||||
_cfg_path = get_hermes_home() / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as _f:
|
||||
@@ -839,6 +841,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
|
||||
|
||||
|
||||
def _tui_build_needed(tui_dir: Path) -> bool:
|
||||
if _hermes_ink_bundle_stale(tui_dir):
|
||||
return True
|
||||
entry = tui_dir / "dist" / "entry.js"
|
||||
if not entry.exists():
|
||||
return True
|
||||
@@ -1026,7 +1030,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [node, str(root / "dist" / "entry.js")], root
|
||||
|
||||
|
||||
def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
||||
def _launch_tui(
|
||||
resume_session_id: Optional[str] = None,
|
||||
tui_dev: bool = False,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
):
|
||||
"""Replace current process with the TUI."""
|
||||
tui_dir = PROJECT_ROOT / "ui-tui"
|
||||
|
||||
@@ -1036,6 +1045,12 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
||||
)
|
||||
env.setdefault("HERMES_PYTHON", sys.executable)
|
||||
env.setdefault("HERMES_CWD", os.getcwd())
|
||||
if model:
|
||||
env["HERMES_MODEL"] = model
|
||||
env["HERMES_INFERENCE_MODEL"] = model
|
||||
if provider:
|
||||
env["HERMES_TUI_PROVIDER"] = provider
|
||||
env["HERMES_INFERENCE_PROVIDER"] = provider
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
@@ -1174,6 +1189,8 @@ def cmd_chat(args):
|
||||
_launch_tui(
|
||||
getattr(args, "resume", None),
|
||||
tui_dev=getattr(args, "tui_dev", False),
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
)
|
||||
|
||||
# Import and run the CLI
|
||||
@@ -1325,7 +1342,9 @@ def cmd_whatsapp(args):
|
||||
return
|
||||
|
||||
if not (bridge_dir / "node_modules").exists():
|
||||
print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
|
||||
print(
|
||||
"\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)..."
|
||||
)
|
||||
npm = shutil.which("npm")
|
||||
if not npm:
|
||||
print(" ✗ npm not found on PATH — install Node.js first")
|
||||
@@ -1701,15 +1720,14 @@ def _clear_stale_openai_base_url():
|
||||
|
||||
# (task_key, display_name, short_description)
|
||||
_AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("vision", "Vision", "image/screenshot analysis"),
|
||||
("compression", "Compression", "context summarization"),
|
||||
("web_extract", "Web extract", "web page summarization"),
|
||||
("session_search", "Session search", "past-conversation recall"),
|
||||
("approval", "Approval", "smart command approval"),
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("flush_memories", "Flush memories", "memory consolidation"),
|
||||
("vision", "Vision", "image/screenshot analysis"),
|
||||
("compression", "Compression", "context summarization"),
|
||||
("web_extract", "Web extract", "web page summarization"),
|
||||
("session_search", "Session search", "past-conversation recall"),
|
||||
("approval", "Approval", "smart command approval"),
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
]
|
||||
|
||||
|
||||
@@ -1808,7 +1826,7 @@ def _aux_config_menu() -> None:
|
||||
print(" Auxiliary models — side-task routing")
|
||||
print()
|
||||
print(" Side tasks (vision, compression, web extraction, etc.) default")
|
||||
print(" to your main chat model. \"auto\" means \"use my main model\" —")
|
||||
print(' to your main chat model. "auto" means "use my main model" —')
|
||||
print(" Hermes only falls back to a lightweight backend (OpenRouter,")
|
||||
print(" Nous Portal) if the main model is unavailable. Override a")
|
||||
print(" task below if you want it pinned to a specific provider/model.")
|
||||
@@ -1819,15 +1837,20 @@ def _aux_config_menu() -> None:
|
||||
desc_col = max(len(desc) for _, _, desc in _AUX_TASKS) + 4
|
||||
entries: list[tuple[str, str]] = []
|
||||
for task_key, name, desc in _AUX_TASKS:
|
||||
task_cfg = aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
|
||||
task_cfg = (
|
||||
aux.get(task_key, {}) if isinstance(aux.get(task_key), dict) else {}
|
||||
)
|
||||
current = _format_aux_current(task_cfg)
|
||||
label = f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
|
||||
label = (
|
||||
f"{name.ljust(name_col)}{('(' + desc + ')').ljust(desc_col)}{current}"
|
||||
)
|
||||
entries.append((task_key, label))
|
||||
entries.append(("__reset__", "Reset all to auto"))
|
||||
entries.append(("__back__", "Back"))
|
||||
entries.append(("__back__", "Back"))
|
||||
|
||||
idx = _prompt_provider_choice(
|
||||
[label for _, label in entries], default=0,
|
||||
[label for _, label in entries],
|
||||
default=0,
|
||||
)
|
||||
if idx is None:
|
||||
return
|
||||
@@ -1875,7 +1898,9 @@ def _aux_select_for_task(task: str) -> None:
|
||||
|
||||
entries: list[tuple[str, str, list[str]]] = [] # (slug, label, models)
|
||||
# "auto" always first
|
||||
auto_marker = " ← current" if current_provider == "auto" and not current_base_url else ""
|
||||
auto_marker = (
|
||||
" ← current" if current_provider == "auto" and not current_base_url else ""
|
||||
)
|
||||
entries.append(("__auto__", f"auto (recommended){auto_marker}", []))
|
||||
|
||||
for p in providers:
|
||||
@@ -1884,7 +1909,9 @@ def _aux_select_for_task(task: str) -> None:
|
||||
total = p.get("total_models", 0)
|
||||
models = p.get("models") or []
|
||||
model_hint = f" — {total} models" if total else ""
|
||||
marker = " ← current" if slug == current_provider and not current_base_url else ""
|
||||
marker = (
|
||||
" ← current" if slug == current_provider and not current_base_url else ""
|
||||
)
|
||||
entries.append((slug, f"{name}{model_hint}{marker}", list(models)))
|
||||
|
||||
# Custom endpoint (raw base_url)
|
||||
@@ -1952,14 +1979,17 @@ def _aux_flow_provider_model(
|
||||
selected = val or ""
|
||||
else:
|
||||
selected = _prompt_model_selection(
|
||||
model_list, current_model=current_model, pricing=pricing,
|
||||
model_list,
|
||||
current_model=current_model,
|
||||
pricing=pricing,
|
||||
)
|
||||
if selected is None:
|
||||
print("No change.")
|
||||
return
|
||||
|
||||
_save_aux_choice(task, provider=provider_slug, model=selected or "",
|
||||
base_url="", api_key="")
|
||||
_save_aux_choice(
|
||||
task, provider=provider_slug, model=selected or "", base_url="", api_key=""
|
||||
)
|
||||
if selected:
|
||||
print(f"{display_name}: {provider_slug} · {selected}")
|
||||
else:
|
||||
@@ -1979,7 +2009,9 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
|
||||
print(" Provide an OpenAI-compatible base URL (e.g. http://localhost:11434/v1)")
|
||||
print()
|
||||
try:
|
||||
url_prompt = f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
|
||||
url_prompt = (
|
||||
f"Base URL [{current_base_url}]: " if current_base_url else "Base URL: "
|
||||
)
|
||||
url = input(url_prompt).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
@@ -1989,20 +2021,30 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
|
||||
print("No URL provided. No change.")
|
||||
return
|
||||
try:
|
||||
model_prompt = f"Model slug (optional) [{current_model}]: " if current_model else "Model slug (optional): "
|
||||
model_prompt = (
|
||||
f"Model slug (optional) [{current_model}]: "
|
||||
if current_model
|
||||
else "Model slug (optional): "
|
||||
)
|
||||
model = input(model_prompt).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return
|
||||
model = model or current_model
|
||||
try:
|
||||
api_key = getpass.getpass("API key (optional, blank = use OPENAI_API_KEY): ").strip()
|
||||
api_key = getpass.getpass(
|
||||
"API key (optional, blank = use OPENAI_API_KEY): "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return
|
||||
|
||||
_save_aux_choice(
|
||||
task, provider="custom", model=model, base_url=url, api_key=api_key,
|
||||
task,
|
||||
provider="custom",
|
||||
model=model,
|
||||
base_url=url,
|
||||
api_key=api_key,
|
||||
)
|
||||
short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
|
||||
print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))
|
||||
@@ -2118,7 +2160,9 @@ def _model_flow_ai_gateway(config, current_model=""):
|
||||
api_key = get_env_value("AI_GATEWAY_API_KEY")
|
||||
if not api_key:
|
||||
print("No Vercel AI Gateway API key configured.")
|
||||
print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
|
||||
print(
|
||||
"Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway"
|
||||
)
|
||||
print("Add a payment method to get $5 in free credits.")
|
||||
print()
|
||||
try:
|
||||
@@ -2918,7 +2962,9 @@ def _model_flow_named_custom(config, provider_info):
|
||||
|
||||
print("Fetching available models...")
|
||||
models = fetch_api_models(
|
||||
api_key, base_url, timeout=8.0,
|
||||
api_key,
|
||||
base_url,
|
||||
timeout=8.0,
|
||||
api_mode=api_mode or None,
|
||||
)
|
||||
|
||||
@@ -3589,7 +3635,12 @@ def _model_flow_stepfun(config, current_model=""):
|
||||
_save_model_choice,
|
||||
deactivate_provider,
|
||||
)
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
from hermes_cli.config import (
|
||||
get_env_value,
|
||||
save_env_value,
|
||||
load_config,
|
||||
save_config,
|
||||
)
|
||||
from hermes_cli.models import fetch_api_models
|
||||
|
||||
provider_id = "stepfun"
|
||||
@@ -3608,6 +3659,7 @@ def _model_flow_stepfun(config, current_model=""):
|
||||
if key_env:
|
||||
try:
|
||||
import getpass
|
||||
|
||||
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
@@ -3633,7 +3685,10 @@ def _model_flow_stepfun(config, current_model=""):
|
||||
current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
|
||||
|
||||
region_choices = [
|
||||
("international", f"International ({_stepfun_base_url_for_region('international')})"),
|
||||
(
|
||||
"international",
|
||||
f"International ({_stepfun_base_url_for_region('international')})",
|
||||
),
|
||||
("china", f"China ({_stepfun_base_url_for_region('china')})"),
|
||||
]
|
||||
ordered_regions = []
|
||||
@@ -4476,6 +4531,7 @@ def cmd_webhook(args):
|
||||
def cmd_hooks(args):
|
||||
"""Shell-hook inspection and management."""
|
||||
from hermes_cli.hooks import hooks_command
|
||||
|
||||
hooks_command(args)
|
||||
|
||||
|
||||
@@ -6046,6 +6102,86 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
)
|
||||
import signal as _signal
|
||||
|
||||
def _wait_for_service_active(
|
||||
scope_cmd_: list,
|
||||
svc_name_: str,
|
||||
timeout: float = 10.0,
|
||||
) -> bool:
|
||||
"""Poll ``systemctl is-active`` until the unit reports active.
|
||||
|
||||
systemd's Stopped -> Started transition after a graceful exit
|
||||
(or a hard restart) is not instantaneous; a one-shot check
|
||||
races that window and falsely reports the unit as down.
|
||||
Poll every 0.5s up to ``timeout`` seconds before giving up.
|
||||
"""
|
||||
deadline = _time.monotonic() + max(timeout, 0.5)
|
||||
while True:
|
||||
try:
|
||||
_verify = subprocess.run(
|
||||
scope_cmd_ + ["is-active", svc_name_],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if _verify.stdout.strip() == "active":
|
||||
return True
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
if _time.monotonic() >= deadline:
|
||||
return False
|
||||
_time.sleep(0.5)
|
||||
|
||||
def _service_restart_sec(
|
||||
scope_cmd_: list,
|
||||
svc_name_: str,
|
||||
default: float = 0.0,
|
||||
) -> float:
|
||||
"""Read the unit's ``RestartUSec`` (RestartSec) in seconds.
|
||||
|
||||
After a graceful exit-75, systemd waits ``RestartSec`` before
|
||||
respawning the unit. Callers that poll for ``is-active``
|
||||
must use a timeout >= ``RestartSec`` + transition slack, or
|
||||
they'll give up *during* the cooldown window and wrongly
|
||||
conclude the unit didn't relaunch.
|
||||
"""
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd_
|
||||
+ [
|
||||
"show",
|
||||
svc_name_,
|
||||
"--property=RestartUSec",
|
||||
"--value",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return default
|
||||
raw = (_show.stdout or "").strip()
|
||||
# systemd emits values like "30s", "100ms", "1min 30s", or
|
||||
# "infinity". Parse conservatively; on any miss return default.
|
||||
if not raw or raw == "infinity":
|
||||
return default
|
||||
total = 0.0
|
||||
matched = False
|
||||
for part in raw.split():
|
||||
for _suf, _mult in (
|
||||
("ms", 0.001),
|
||||
("us", 0.000001),
|
||||
("min", 60.0),
|
||||
("s", 1.0),
|
||||
):
|
||||
if part.endswith(_suf):
|
||||
try:
|
||||
total += float(part[: -len(_suf)]) * _mult
|
||||
matched = True
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
return total if matched else default
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
@@ -6061,12 +6197,17 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
_cfg_drain = None
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
_cfg_agent = (load_config().get("agent") or {})
|
||||
|
||||
_cfg_agent = load_config().get("agent") or {}
|
||||
_cfg_drain = _cfg_agent.get("restart_drain_timeout")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
_drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
|
||||
_drain_budget = (
|
||||
float(_cfg_drain)
|
||||
if _cfg_drain is not None
|
||||
else float(_DEFAULT_DRAIN)
|
||||
)
|
||||
except (TypeError, ValueError):
|
||||
_drain_budget = float(_DEFAULT_DRAIN)
|
||||
# Add a 15s margin so the drain loop + final exit finish before
|
||||
@@ -6131,14 +6272,23 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
_main_pid = 0
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd + [
|
||||
"show", svc_name,
|
||||
"--property=MainPID", "--value",
|
||||
scope_cmd
|
||||
+ [
|
||||
"show",
|
||||
svc_name,
|
||||
"--property=MainPID",
|
||||
"--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
_main_pid = int((_show.stdout or "").strip() or 0)
|
||||
except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
|
||||
except (
|
||||
ValueError,
|
||||
subprocess.TimeoutExpired,
|
||||
FileNotFoundError,
|
||||
):
|
||||
_main_pid = 0
|
||||
|
||||
_graceful_ok = False
|
||||
@@ -6147,19 +6297,33 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
f" → {svc_name}: draining (up to {int(_drain_budget)}s)..."
|
||||
)
|
||||
_graceful_ok = _graceful_restart_via_sigusr1(
|
||||
_main_pid, drain_timeout=_drain_budget,
|
||||
_main_pid,
|
||||
drain_timeout=_drain_budget,
|
||||
)
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. Verify the new
|
||||
# process came up.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
# via Restart=on-failure. The unit's
|
||||
# RestartSec (default 30s on ours) gates the
|
||||
# respawn — poll past that + slack so we
|
||||
# don't give up mid-cooldown and falsely
|
||||
# print "drained but didn't relaunch". For
|
||||
# units without RestartSec set we fall back
|
||||
# to the original 10s budget.
|
||||
_restart_sec = _service_restart_sec(
|
||||
scope_cmd,
|
||||
svc_name,
|
||||
default=0.0,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
_post_drain_timeout = max(
|
||||
10.0,
|
||||
_restart_sec + 10.0,
|
||||
)
|
||||
if _wait_for_service_active(
|
||||
scope_cmd,
|
||||
svc_name,
|
||||
timeout=_post_drain_timeout,
|
||||
):
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
# Process exited but wasn't respawned (older
|
||||
@@ -6185,14 +6349,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
if _wait_for_service_active(
|
||||
scope_cmd,
|
||||
svc_name,
|
||||
timeout=10.0,
|
||||
):
|
||||
restarted_services.append(svc_name)
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
@@ -6207,14 +6368,11 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
_time.sleep(3)
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify2.stdout.strip() == "active":
|
||||
if _wait_for_service_active(
|
||||
scope_cmd,
|
||||
svc_name,
|
||||
timeout=10.0,
|
||||
):
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
@@ -6715,9 +6873,15 @@ def cmd_dashboard(args):
|
||||
try:
|
||||
import fastapi # noqa: F401
|
||||
import uvicorn # noqa: F401
|
||||
except ImportError:
|
||||
print("Web UI dependencies not installed.")
|
||||
print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
|
||||
except ImportError as e:
|
||||
print("Web UI dependencies not installed (need fastapi + uvicorn).")
|
||||
print(
|
||||
f"Re-install the package into this interpreter so metadata updates apply:\n"
|
||||
f" cd {PROJECT_ROOT}\n"
|
||||
f" {sys.executable} -m pip install -e .\n"
|
||||
"If `pip` is missing in this venv, use: uv pip install -e ."
|
||||
)
|
||||
print(f"Import error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if "HERMES_WEB_DIST" not in os.environ:
|
||||
@@ -6726,11 +6890,17 @@ def cmd_dashboard(args):
|
||||
|
||||
from hermes_cli.web_server import start_server
|
||||
|
||||
gui_mode = getattr(args, "gui", False)
|
||||
embedded_chat = (
|
||||
gui_mode or args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
|
||||
)
|
||||
start_server(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
open_browser=not args.no_open,
|
||||
allow_public=getattr(args, "insecure", False),
|
||||
embedded_chat=embedded_chat,
|
||||
gui_mode=gui_mode,
|
||||
)
|
||||
|
||||
|
||||
@@ -6813,6 +6983,40 @@ For more help on a command:
|
||||
parser.add_argument(
|
||||
"--version", "-V", action="store_true", help="Show version and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--oneshot",
|
||||
metavar="PROMPT",
|
||||
default=None,
|
||||
help=(
|
||||
"One-shot mode: send a single prompt and print ONLY the final "
|
||||
"response text to stdout. No banner, no spinner, no tool "
|
||||
"previews, no session_id line. Tools, memory, rules, and "
|
||||
"AGENTS.md in the CWD are loaded as normal; approvals are "
|
||||
"auto-bypassed. Intended for scripts / pipes."
|
||||
),
|
||||
)
|
||||
# --model / --provider are accepted at the top level so they can pair
|
||||
# with -z without needing the `chat` subcommand. If neither -z nor a
|
||||
# subcommand consumes them, they fall through harmlessly as None.
|
||||
# Mirrors `hermes chat --model ... --provider ...` semantics.
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
default=None,
|
||||
help=(
|
||||
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
@@ -7390,17 +7594,39 @@ For more help on a command:
|
||||
"reset", help="Clear exhaustion status for all credentials for a provider"
|
||||
)
|
||||
auth_reset.add_argument("provider", help="Provider id")
|
||||
auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
|
||||
auth_status = auth_subparsers.add_parser(
|
||||
"status", help="Show auth status for a provider"
|
||||
)
|
||||
auth_status.add_argument("provider", help="Provider id")
|
||||
auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
|
||||
auth_logout = auth_subparsers.add_parser(
|
||||
"logout", help="Log out a provider and clear stored auth state"
|
||||
)
|
||||
auth_logout.add_argument("provider", help="Provider id")
|
||||
auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
|
||||
auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
|
||||
auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
|
||||
auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
|
||||
auth_spotify = auth_subparsers.add_parser(
|
||||
"spotify", help="Authenticate Hermes with Spotify via PKCE"
|
||||
)
|
||||
auth_spotify.add_argument(
|
||||
"spotify_action",
|
||||
nargs="?",
|
||||
choices=["login", "status", "logout"],
|
||||
default="login",
|
||||
)
|
||||
auth_spotify.add_argument(
|
||||
"--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)"
|
||||
)
|
||||
auth_spotify.add_argument(
|
||||
"--redirect-uri",
|
||||
help="Allow-listed localhost redirect URI for your Spotify app",
|
||||
)
|
||||
auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
|
||||
auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
|
||||
auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
|
||||
auth_spotify.add_argument(
|
||||
"--no-browser",
|
||||
action="store_true",
|
||||
help="Do not attempt to open the browser automatically",
|
||||
)
|
||||
auth_spotify.add_argument(
|
||||
"--timeout", type=float, help="Callback/token exchange timeout in seconds"
|
||||
)
|
||||
auth_parser.set_defaults(func=cmd_auth)
|
||||
|
||||
# =========================================================================
|
||||
@@ -7610,7 +7836,8 @@ For more help on a command:
|
||||
hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
|
||||
|
||||
hooks_subparsers.add_parser(
|
||||
"list", aliases=["ls"],
|
||||
"list",
|
||||
aliases=["ls"],
|
||||
help="List configured hooks with matcher, timeout, and consent status",
|
||||
)
|
||||
|
||||
@@ -7623,14 +7850,18 @@ For more help on a command:
|
||||
help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
|
||||
)
|
||||
_hk_test.add_argument(
|
||||
"--for-tool", dest="for_tool", default=None,
|
||||
"--for-tool",
|
||||
dest="for_tool",
|
||||
default=None,
|
||||
help=(
|
||||
"Only fire hooks whose matcher matches this tool name "
|
||||
"(used for pre_tool_call / post_tool_call)"
|
||||
),
|
||||
)
|
||||
_hk_test.add_argument(
|
||||
"--payload-file", dest="payload_file", default=None,
|
||||
"--payload-file",
|
||||
dest="payload_file",
|
||||
default=None,
|
||||
help=(
|
||||
"Path to a JSON file whose contents are merged into the "
|
||||
"synthetic payload before execution"
|
||||
@@ -7638,7 +7869,8 @@ For more help on a command:
|
||||
)
|
||||
|
||||
_hk_revoke = hooks_subparsers.add_parser(
|
||||
"revoke", aliases=["remove", "rm"],
|
||||
"revoke",
|
||||
aliases=["remove", "rm"],
|
||||
help="Remove a command's allowlist entries (takes effect on next restart)",
|
||||
)
|
||||
_hk_revoke.add_argument(
|
||||
@@ -8916,6 +9148,19 @@ Examples:
|
||||
action="store_true",
|
||||
help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
|
||||
)
|
||||
dashboard_parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
|
||||
"Alternatively set HERMES_DASHBOARD_TUI=1."
|
||||
),
|
||||
)
|
||||
dashboard_parser.add_argument(
|
||||
"--gui",
|
||||
action="store_true",
|
||||
help="Run dashboard in GUI-shell mode; implies --tui",
|
||||
)
|
||||
dashboard_parser.set_defaults(func=cmd_dashboard)
|
||||
|
||||
# =========================================================================
|
||||
@@ -9058,26 +9303,28 @@ Examples:
|
||||
# the nested subcommand (dest varies by parser).
|
||||
_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
|
||||
_AGENT_SUBCOMMANDS = {
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"cron": ("cron_command", {"run", "tick"}),
|
||||
"gateway": ("gateway_command", {"run"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
"mcp": ("mcp_action", {"serve"}),
|
||||
}
|
||||
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
|
||||
if (
|
||||
args.command in _AGENT_COMMANDS
|
||||
or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
|
||||
if args.command in _AGENT_COMMANDS or (
|
||||
_sub_attr and getattr(args, _sub_attr, None) in _sub_set
|
||||
):
|
||||
_accept_hooks = bool(getattr(args, "accept_hooks", False))
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins
|
||||
|
||||
discover_plugins()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"plugin discovery failed at CLI startup", exc_info=True,
|
||||
"plugin discovery failed at CLI startup",
|
||||
exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
register_from_config(load_config(), accept_hooks=_accept_hooks)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
@@ -9085,6 +9332,19 @@ Examples:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
if getattr(args, "oneshot", None):
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(
|
||||
run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
)
|
||||
)
|
||||
|
||||
# Handle top-level --resume / --continue as shortcut to chat
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
@@ -527,6 +527,42 @@ def _resolve_alias_fallback(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_display_context_length(
|
||||
model: str,
|
||||
provider: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
|
||||
but provider-enforced limits can be lower (e.g. Codex OAuth caps the
|
||||
same slug at 272k). The authoritative source is
|
||||
``agent.model_metadata.get_model_context_length`` which already knows
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
model,
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
except Exception:
|
||||
pass
|
||||
if model_info is not None and model_info.context_window:
|
||||
return int(model_info.context_window)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core model-switching pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
+110
-59
@@ -42,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openai/gpt-5.5", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
@@ -65,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
("arcee-ai/trinity-large-thinking", ""),
|
||||
("openai/gpt-5.4-pro", ""),
|
||||
("openai/gpt-5.5-pro", ""),
|
||||
("openai/gpt-5.4-nano", ""),
|
||||
]
|
||||
|
||||
@@ -120,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5",
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.4-mini",
|
||||
"openai/gpt-5.3-codex",
|
||||
"google/gemini-3-pro-preview",
|
||||
@@ -139,7 +139,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"x-ai/grok-4.20-beta",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"arcee-ai/trinity-large-thinking",
|
||||
"openai/gpt-5.4-pro",
|
||||
"openai/gpt-5.5-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
|
||||
@@ -1379,27 +1379,93 @@ def curated_models_for_provider(
|
||||
return [(m, "") for m in models]
|
||||
|
||||
|
||||
def detect_provider_for_model(
|
||||
def _provider_keys(provider: str) -> set[str]:
|
||||
key = (provider or "").strip().lower()
|
||||
normalized = normalize_provider(provider)
|
||||
return {k for k in (key, normalized) if k}
|
||||
|
||||
|
||||
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
|
||||
return any(
|
||||
name_lower == model.lower()
|
||||
for provider in providers
|
||||
for model in _PROVIDER_MODELS.get(provider, [])
|
||||
)
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset(
|
||||
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_static_model_alias(
|
||||
name_lower: str,
|
||||
current_keys: set[str],
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
|
||||
try:
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES.get(name_lower)
|
||||
if identity is None:
|
||||
return None
|
||||
|
||||
vendor = identity.vendor
|
||||
family = identity.family
|
||||
|
||||
def _match(provider: str) -> Optional[str]:
|
||||
models = _PROVIDER_MODELS.get(provider, [])
|
||||
if not models:
|
||||
return None
|
||||
prefix = (
|
||||
f"{vendor}/{family}"
|
||||
if provider in _AGGREGATOR_PROVIDERS
|
||||
else family
|
||||
).lower()
|
||||
for model in models:
|
||||
if model.lower().startswith(prefix):
|
||||
return model
|
||||
return None
|
||||
|
||||
for provider in current_keys:
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _PROVIDER_MODELS:
|
||||
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _AGGREGATOR_PROVIDERS:
|
||||
if provider in current_keys and (matched := _match(provider)):
|
||||
return provider, matched
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_static_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
"""Auto-detect a provider from static catalogs only.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``(provider_id, model_name)``. The model name may be remapped
|
||||
when a static alias or bare provider name resolves to a catalog default.
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider with credentials (highest)
|
||||
2. Direct provider without credentials → remap to OpenRouter slug
|
||||
3. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
name_lower = name.lower()
|
||||
current_keys = _provider_keys(current_provider)
|
||||
|
||||
alias_match = _resolve_static_model_alias(name_lower, current_keys)
|
||||
if alias_match:
|
||||
return alias_match
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
@@ -1412,64 +1478,49 @@ def detect_provider_for_model(
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider != normalize_provider(current_provider)
|
||||
and resolved_provider not in current_keys
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if any(name_lower == m.lower() for m in current_models):
|
||||
if _model_in_provider_catalog(name_lower, current_keys):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
direct_match: Optional[str] = None
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid == current_provider or pid in _AGGREGATORS:
|
||||
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
direct_match = pid
|
||||
break
|
||||
return (pid, name)
|
||||
|
||||
if direct_match:
|
||||
# Check if we have credentials for this provider — env vars,
|
||||
# credential pool, or auth store entries.
|
||||
has_creds = False
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
||||
if pconfig:
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if os.getenv(env_var, "").strip():
|
||||
has_creds = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# Also check credential pool and auth store — covers OAuth,
|
||||
# Claude Code tokens, and other non-env-var credentials (#10300).
|
||||
if not has_creds:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(direct_match)
|
||||
if pool.has_credentials():
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# Always return the direct provider match. If credentials are
|
||||
# missing, the client init will give a clear error rather than
|
||||
# silently routing through the wrong provider (#10300).
|
||||
return (direct_match, name)
|
||||
|
||||
def detect_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider static catalog match
|
||||
2. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
static_match = detect_static_provider_for_model(name, current_provider)
|
||||
if static_match:
|
||||
return static_match
|
||||
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
|
||||
return None
|
||||
|
||||
# --- Step 2: check OpenRouter catalog ---
|
||||
# First try exact match (handles provider/model format)
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -0,0 +1,229 @@
|
||||
"""PTY bridge for `hermes dashboard` chat tab.
|
||||
|
||||
Wraps a child process behind a pseudo-terminal so its ANSI output can be
|
||||
streamed to a browser-side terminal emulator (xterm.js) and typed
|
||||
keystrokes can be fed back in. The only caller today is the
|
||||
``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
|
||||
|
||||
Design constraints:
|
||||
|
||||
* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which
|
||||
exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python
|
||||
has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
|
||||
install/platform message so the dashboard can render a banner instead of
|
||||
crashing.
|
||||
* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`,
|
||||
which is a pure-Python wrapper around the OS calls. The browser talks
|
||||
to the same ``hermes --tui`` binary it would launch from the CLI, so
|
||||
every TUI feature (slash popover, model picker, tool rows, markdown,
|
||||
skin engine, clarify/sudo/approval prompts) ships automatically.
|
||||
* **Byte-safe I/O.** Reads and writes go through the PTY master fd
|
||||
directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
|
||||
streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
|
||||
mid-read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
import select
|
||||
import signal
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
import time
|
||||
from typing import Optional, Sequence
|
||||
|
||||
try:
|
||||
import ptyprocess # type: ignore
|
||||
_PTY_AVAILABLE = not sys.platform.startswith("win")
|
||||
except ImportError: # pragma: no cover - dev env without ptyprocess
|
||||
ptyprocess = None # type: ignore
|
||||
_PTY_AVAILABLE = False
|
||||
|
||||
|
||||
__all__ = ["PtyBridge", "PtyUnavailableError"]
|
||||
|
||||
|
||||
class PtyUnavailableError(RuntimeError):
|
||||
"""Raised when a PTY cannot be created on this platform.
|
||||
|
||||
Today this means native Windows (no ConPTY bindings) or a dev
|
||||
environment missing the ``ptyprocess`` dependency. The dashboard
|
||||
surfaces the message to the user as a chat-tab banner.
|
||||
"""
|
||||
|
||||
|
||||
class PtyBridge:
|
||||
"""Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
|
||||
|
||||
Not thread-safe. A single bridge is owned by the WebSocket handler
|
||||
that spawned it; the reader runs in an executor thread while writes
|
||||
happen on the event-loop thread. Both sides are OK because the
|
||||
kernel PTY is the actual synchronization point — we never call
|
||||
:mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
|
||||
``os.write`` on the master fd, which is safe.
|
||||
"""
|
||||
|
||||
def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined]
|
||||
self._proc = proc
|
||||
self._fd: int = proc.fd
|
||||
self._closed = False
|
||||
|
||||
# -- lifecycle --------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""True if a PTY can be spawned on this platform."""
|
||||
return bool(_PTY_AVAILABLE)
|
||||
|
||||
@classmethod
|
||||
def spawn(
|
||||
cls,
|
||||
argv: Sequence[str],
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
env: Optional[dict] = None,
|
||||
cols: int = 80,
|
||||
rows: int = 24,
|
||||
) -> "PtyBridge":
|
||||
"""Spawn ``argv`` behind a new PTY and return a bridge.
|
||||
|
||||
Raises :class:`PtyUnavailableError` if the platform can't host a
|
||||
PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for
|
||||
ordinary exec failures (missing binary, bad cwd, etc.).
|
||||
"""
|
||||
if not _PTY_AVAILABLE:
|
||||
if sys.platform.startswith("win"):
|
||||
raise PtyUnavailableError(
|
||||
"Pseudo-terminals are unavailable on this platform. "
|
||||
"Hermes Agent supports Windows only via WSL."
|
||||
)
|
||||
if ptyprocess is None:
|
||||
raise PtyUnavailableError(
|
||||
"The `ptyprocess` package is missing. "
|
||||
"Install with: pip install ptyprocess "
|
||||
"(or pip install -e '.[pty]')."
|
||||
)
|
||||
raise PtyUnavailableError("Pseudo-terminals are unavailable.")
|
||||
# Let caller-supplied env fully override inheritance; if they pass
|
||||
# None we inherit the server's env (same semantics as subprocess).
|
||||
spawn_env = os.environ.copy() if env is None else env
|
||||
proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr]
|
||||
list(argv),
|
||||
cwd=cwd,
|
||||
env=spawn_env,
|
||||
dimensions=(rows, cols),
|
||||
)
|
||||
return cls(proc)
|
||||
|
||||
@property
|
||||
def pid(self) -> int:
|
||||
return int(self._proc.pid)
|
||||
|
||||
def is_alive(self) -> bool:
|
||||
if self._closed:
|
||||
return False
|
||||
try:
|
||||
return bool(self._proc.isalive())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# -- I/O --------------------------------------------------------------
|
||||
|
||||
def read(self, timeout: float = 0.2) -> Optional[bytes]:
|
||||
"""Read up to 64 KiB of raw bytes from the PTY master.
|
||||
|
||||
Returns:
|
||||
* bytes — zero or more bytes of child output
|
||||
* empty bytes (``b""``) — no data available within ``timeout``
|
||||
* None — child has exited and the master fd is at EOF
|
||||
|
||||
Never blocks longer than ``timeout`` seconds. Safe to call after
|
||||
:meth:`close`; returns ``None`` in that case.
|
||||
"""
|
||||
if self._closed:
|
||||
return None
|
||||
try:
|
||||
readable, _, _ = select.select([self._fd], [], [], timeout)
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
if not readable:
|
||||
return b""
|
||||
try:
|
||||
data = os.read(self._fd, 65536)
|
||||
except OSError as exc:
|
||||
# EIO on Linux = slave side closed. EBADF = already closed.
|
||||
if exc.errno in (errno.EIO, errno.EBADF):
|
||||
return None
|
||||
raise
|
||||
if not data:
|
||||
return None
|
||||
return data
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
"""Write raw bytes to the PTY master (i.e. the child's stdin)."""
|
||||
if self._closed or not data:
|
||||
return
|
||||
# os.write can return a short write under load; loop until drained.
|
||||
view = memoryview(data)
|
||||
while view:
|
||||
try:
|
||||
n = os.write(self._fd, view)
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
|
||||
return
|
||||
raise
|
||||
if n <= 0:
|
||||
return
|
||||
view = view[n:]
|
||||
|
||||
def resize(self, cols: int, rows: int) -> None:
|
||||
"""Forward a terminal resize to the child via ``TIOCSWINSZ``."""
|
||||
if self._closed:
|
||||
return
|
||||
# struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
|
||||
winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
|
||||
try:
|
||||
fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# -- teardown ---------------------------------------------------------
|
||||
|
||||
def close(self) -> None:
|
||||
"""Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
|
||||
|
||||
Idempotent. Reaping the child is important so we don't leak
|
||||
zombies across the lifetime of the dashboard process.
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# SIGHUP is the conventional "your terminal went away" signal.
|
||||
# We escalate if the child ignores it.
|
||||
for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
|
||||
if not self._proc.isalive():
|
||||
break
|
||||
try:
|
||||
self._proc.kill(sig)
|
||||
except Exception:
|
||||
pass
|
||||
deadline = time.monotonic() + 0.5
|
||||
while self._proc.isalive() and time.monotonic() < deadline:
|
||||
time.sleep(0.02)
|
||||
|
||||
try:
|
||||
self._proc.close(force=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Context-manager sugar — handy in tests and ad-hoc scripts.
|
||||
def __enter__(self) -> "PtyBridge":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_exc) -> None:
|
||||
self.close()
|
||||
+155
-19
@@ -68,25 +68,58 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
("discord", "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
|
||||
|
||||
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
|
||||
# these platforms, and only resolve/save for these platforms. A toolset
|
||||
# absent from this map is available on every platform (current behaviour).
|
||||
#
|
||||
# Use this for tools whose APIs only make sense on one platform (Discord
|
||||
# server admin, Slack workspace admin, etc.). Keeps every other platform's
|
||||
# checklist from filling up with irrelevant toggles.
|
||||
_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
|
||||
"discord": {"discord"},
|
||||
"discord_admin": {"discord"},
|
||||
}
|
||||
|
||||
|
||||
def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
|
||||
"""Return True if ``ts_key`` is configurable on ``platform``.
|
||||
|
||||
Toolsets without a restriction entry are allowed everywhere (the default).
|
||||
"""
|
||||
allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
|
||||
return allowed is None or platform in allowed
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
"""Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.
|
||||
|
||||
Plugin toolsets are appended at the end so they appear after the
|
||||
built-in toolsets in the TUI checklist.
|
||||
built-in toolsets in the TUI checklist. A plugin whose toolset key
|
||||
already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
|
||||
plugins (e.g. ``plugins/spotify``) share their toolset key with the
|
||||
built-in entry, and we want the built-in label/description to win.
|
||||
Without the dedupe, ``hermes tools`` → "reconfigure existing" would
|
||||
list the same toolset twice.
|
||||
"""
|
||||
result = list(CONFIGURABLE_TOOLSETS)
|
||||
seen = {ts_key for ts_key, _, _ in result}
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
|
||||
discover_plugins() # idempotent — ensures plugins are loaded
|
||||
result.extend(get_plugin_toolsets())
|
||||
for entry in get_plugin_toolsets():
|
||||
if entry[0] in seen:
|
||||
continue
|
||||
seen.add(entry[0])
|
||||
result.append(entry)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -368,13 +401,9 @@ TOOL_CATEGORIES = {
|
||||
"providers": [
|
||||
{
|
||||
"name": "Spotify Web API",
|
||||
"tag": "PKCE OAuth — run `hermes auth spotify` after this",
|
||||
"env_vars": [
|
||||
{"key": "HERMES_SPOTIFY_CLIENT_ID", "prompt": "Spotify app client_id",
|
||||
"url": "https://developer.spotify.com/dashboard"},
|
||||
{"key": "HERMES_SPOTIFY_REDIRECT_URI", "prompt": "Redirect URI (must be allow-listed in your Spotify app)",
|
||||
"default": "http://127.0.0.1:43827/spotify/callback"},
|
||||
],
|
||||
"tag": "PKCE OAuth — opens the setup wizard",
|
||||
"env_vars": [],
|
||||
"post_setup": "spotify",
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -478,6 +507,35 @@ def _run_post_setup(post_setup_key: str):
|
||||
_print_warning(" kittentts install timed out (>5min)")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
|
||||
elif post_setup_key == "spotify":
|
||||
# Run the full `hermes auth spotify` flow — if the user has no
|
||||
# client_id yet, this drops them into the interactive wizard
|
||||
# (opens the Spotify dashboard, prompts for client_id, persists
|
||||
# to ~/.hermes/.env), then continues straight into PKCE. If they
|
||||
# already have an app, it skips the wizard and just does OAuth.
|
||||
from types import SimpleNamespace
|
||||
try:
|
||||
from hermes_cli.auth import login_spotify_command
|
||||
except Exception as exc:
|
||||
_print_warning(f" Could not load Spotify auth: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
return
|
||||
_print_info(" Starting Spotify login...")
|
||||
try:
|
||||
login_spotify_command(SimpleNamespace(
|
||||
client_id=None, redirect_uri=None, scope=None,
|
||||
no_browser=False, timeout=None,
|
||||
))
|
||||
_print_success(" Spotify authenticated")
|
||||
except SystemExit as exc:
|
||||
# User aborted the wizard, or OAuth failed — don't fail the
|
||||
# toolset enable; they can retry with `hermes auth spotify`.
|
||||
_print_warning(f" Spotify login did not complete: {exc}")
|
||||
_print_info(" Run later: hermes auth spotify")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Spotify login failed: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
|
||||
elif post_setup_key == "rl_training":
|
||||
try:
|
||||
__import__("tinker_atropos")
|
||||
@@ -566,7 +624,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -580,6 +638,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -589,7 +649,10 @@ def _get_platform_tools(
|
||||
has_explicit_config = any(ts in configurable_keys for ts in toolset_names)
|
||||
|
||||
if has_explicit_config:
|
||||
enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
|
||||
enabled_toolsets = {
|
||||
ts for ts in toolset_names
|
||||
if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
else:
|
||||
# No explicit config — fall back to resolving composite toolset names
|
||||
# (e.g. "hermes-cli") to individual tool names and reverse-mapping.
|
||||
@@ -599,14 +662,52 @@ def _get_platform_tools(
|
||||
|
||||
enabled_toolsets = set()
|
||||
for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
|
||||
if not _toolset_allowed_for_platform(ts_key, platform):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
# Legacy safety: if the platform's own name matches a default-off
|
||||
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
|
||||
# keep that toolset enabled on first install. Skip this dodge for
|
||||
# platform-restricted toolsets — those are always opt-in even on
|
||||
# their own platform (e.g. `discord` + `discord` should stay OFF).
|
||||
if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
@@ -614,7 +715,6 @@ def _get_platform_tools(
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -632,7 +732,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -678,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
"""
|
||||
config.setdefault("platform_toolsets", {})
|
||||
|
||||
# Drop platform-scoped toolsets that don't apply here. Prevents the
|
||||
# "Configure all platforms" checklist (or a hand-edited config.yaml)
|
||||
# from turning on, say, the `discord` toolset for Telegram.
|
||||
enabled_toolset_keys = {
|
||||
ts for ts in enabled_toolset_keys
|
||||
if _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
|
||||
# Get the set of all configurable toolset keys (built-in + plugin)
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_keys = _get_plugin_toolset_keys()
|
||||
@@ -692,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
|
||||
# Preserve any entries that are NOT configurable toolsets and NOT platform
|
||||
# defaults (i.e. only MCP server names should be preserved)
|
||||
@@ -699,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
# Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
|
||||
# saving from the picker as consent to clear the "no_mcp" sentinel. The
|
||||
# picker has no checkbox for no_mcp, so without this users who once set it
|
||||
# by hand could never re-enable MCP servers through the UI.
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
@@ -806,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
return _tool_token_cache
|
||||
|
||||
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
|
||||
"""Multi-select checklist of toolsets. Returns set of selected toolset keys."""
|
||||
from hermes_cli.curses_ui import curses_checklist
|
||||
from toolsets import resolve_toolset
|
||||
@@ -814,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
|
||||
# Pre-compute per-tool token counts (cached after first call).
|
||||
tool_tokens = _estimate_tool_tokens()
|
||||
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
# Drop platform-scoped toolsets that don't apply to this platform.
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
|
||||
labels = []
|
||||
for ts_key, ts_label, ts_desc in effective:
|
||||
@@ -1728,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
|
||||
checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
# Show checklist
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
|
||||
|
||||
added = new_enabled - current_enabled
|
||||
removed = current_enabled - new_enabled
|
||||
@@ -2084,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]
|
||||
|
||||
def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
|
||||
"""Print a summary of enabled/disabled toolsets and MCP tool filters."""
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
|
||||
print(f"Built-in toolsets ({platform}):")
|
||||
@@ -2150,6 +2272,20 @@ def tools_disable_enable_command(args):
|
||||
_print_error(f"Unknown toolset '{name}'")
|
||||
toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
|
||||
|
||||
# Reject platform-scoped toolsets on platforms that don't allow them.
|
||||
restricted_targets = [
|
||||
t for t in toolset_targets
|
||||
if not _toolset_allowed_for_platform(t, platform)
|
||||
]
|
||||
if restricted_targets:
|
||||
for name in restricted_targets:
|
||||
allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
|
||||
_print_error(
|
||||
f"Toolset '{name}' is not available on platform '{platform}' "
|
||||
f"(only: {', '.join(allowed)})"
|
||||
)
|
||||
toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
|
||||
|
||||
if toolset_targets:
|
||||
_apply_toolset_change(config, platform, toolset_targets, action)
|
||||
|
||||
|
||||
+892
-150
File diff suppressed because it is too large
Load Diff
+29
-25
@@ -288,30 +288,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -464,9 +468,9 @@ def _coerce_number(value: str, integer_only: bool = False):
|
||||
f = float(value)
|
||||
except (ValueError, OverflowError):
|
||||
return value
|
||||
# Guard against inf/nan before int() conversion
|
||||
# Guard against inf/nan — not JSON-serializable, keep original string
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return f
|
||||
return value
|
||||
# If it looks like an integer (no fractional part), return int
|
||||
if f == int(f):
|
||||
return int(f)
|
||||
|
||||
+1
-1
@@ -156,7 +156,7 @@
|
||||
for entry in "''${ENTRIES[@]}"; do
|
||||
IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
|
||||
echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
|
||||
STATUS=$?
|
||||
if [ "$STATUS" -eq 0 ]; then
|
||||
echo " ok"
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
|
||||
hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
@@ -91,4 +91,29 @@
|
||||
|
||||
// Register this plugin — the dashboard picks it up automatically.
|
||||
window.__HERMES_PLUGINS__.register("example", ExamplePage);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Page-scoped slot demo: inject a small banner at the top of /sessions.
|
||||
//
|
||||
// Built-in pages expose named slots (<page>:top, <page>:bottom) that
|
||||
// plugins can populate without overriding the whole route. The
|
||||
// manifest lists the slots we use in its `slots` array so the shell
|
||||
// knows to render <PluginSlot name="sessions:top" /> there.
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
function SessionsTopBanner() {
|
||||
return React.createElement(Card, {
|
||||
className: "border-dashed",
|
||||
},
|
||||
React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
|
||||
React.createElement(Badge, { variant: "outline" }, "Example"),
|
||||
React.createElement("span", {
|
||||
className: "text-xs text-muted-foreground",
|
||||
}, "This banner was injected into the Sessions page by the example plugin via the ",
|
||||
React.createElement("code", { className: "font-courier" }, "sessions:top"),
|
||||
" slot."),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
|
||||
})();
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
"path": "/example",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"slots": ["sessions:top"],
|
||||
"entry": "dist/index.js",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
# even if shutdown_memory_provider is never called (e.g. gateway crash,
|
||||
# SIGKILL, or exception in _async_flush_memories preventing shutdown).
|
||||
# SIGKILL, or exception in the session expiry watcher preventing shutdown).
|
||||
# ---------------------------------------------------------------------------
|
||||
_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
|
||||
|
||||
|
||||
@@ -78,6 +78,16 @@ termux = [
|
||||
]
|
||||
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
|
||||
google = [
|
||||
# Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
|
||||
# Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with
|
||||
# the [all] extra and users don't hit runtime `pip install` paths that fail
|
||||
# in environments without pip (e.g. Nix-managed Python).
|
||||
"google-api-python-client>=2.100,<3",
|
||||
"google-auth-oauthlib>=1.0,<2",
|
||||
"google-auth-httplib2>=0.2,<1",
|
||||
]
|
||||
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
|
||||
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
|
||||
rl = [
|
||||
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
|
||||
@@ -109,6 +119,7 @@ all = [
|
||||
"hermes-agent[voice]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[google]",
|
||||
"hermes-agent[mistral]",
|
||||
"hermes-agent[bedrock]",
|
||||
"hermes-agent[web]",
|
||||
|
||||
+445
-239
@@ -502,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||
return found
|
||||
|
||||
|
||||
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
|
||||
"""Escape unescaped control chars inside JSON string values.
|
||||
|
||||
Walks the raw JSON character-by-character, tracking whether we are
|
||||
inside a double-quoted string. Inside strings, replaces literal
|
||||
control characters (0x00-0x1F) that aren't already part of an escape
|
||||
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
|
||||
else.
|
||||
|
||||
Ported from #12093 — complements the other repair passes in
|
||||
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
|
||||
not enough (e.g. llama.cpp backends that emit literal apostrophes or
|
||||
tabs alongside other malformations).
|
||||
"""
|
||||
out: list[str] = []
|
||||
in_string = False
|
||||
i = 0
|
||||
n = len(raw)
|
||||
while i < n:
|
||||
ch = raw[i]
|
||||
if in_string:
|
||||
if ch == "\\" and i + 1 < n:
|
||||
# Already-escaped char — pass through as-is
|
||||
out.append(ch)
|
||||
out.append(raw[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = False
|
||||
out.append(ch)
|
||||
elif ord(ch) < 0x20:
|
||||
out.append(f"\\u{ord(ch):04x}")
|
||||
else:
|
||||
out.append(ch)
|
||||
else:
|
||||
if ch == '"':
|
||||
in_string = True
|
||||
out.append(ch)
|
||||
i += 1
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
"""Attempt to repair malformed tool_call argument JSON.
|
||||
|
||||
@@ -523,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Repair pass 0: llama.cpp backends sometimes emit literal control
|
||||
# characters (tabs, newlines) inside JSON string values. json.loads
|
||||
# with strict=False accepts these and lets us re-serialise the
|
||||
# result into wire-valid JSON without any string surgery. This is
|
||||
# the most common local-model repair case (#12068).
|
||||
try:
|
||||
parsed = json.loads(raw_stripped, strict=False)
|
||||
reserialised = json.dumps(parsed, separators=(",", ":"))
|
||||
if reserialised != raw_stripped:
|
||||
logger.warning(
|
||||
"Repaired unescaped control chars in tool_call arguments for %s",
|
||||
tool_name,
|
||||
)
|
||||
return reserialised
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Attempt common JSON repairs
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
@@ -557,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Repair pass 4: escape unescaped control chars inside JSON strings,
|
||||
# then retry. Catches cases where strict=False alone fails because
|
||||
# other malformations are present too.
|
||||
try:
|
||||
escaped = _escape_invalid_chars_in_json_strings(fixed)
|
||||
if escaped != fixed:
|
||||
json.loads(escaped)
|
||||
logger.warning(
|
||||
"Repaired control-char-laced tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], escaped[:80],
|
||||
)
|
||||
return escaped
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Last resort: replace with empty object so the API request doesn't
|
||||
# crash the entire session.
|
||||
logger.warning(
|
||||
@@ -740,6 +814,11 @@ class AIAgent:
|
||||
for AI models that support function calling.
|
||||
"""
|
||||
|
||||
_TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER = (
|
||||
"[hermes-agent: tool call arguments were corrupted in this session and "
|
||||
"have been dropped to keep the conversation alive. See issue #15236.]"
|
||||
)
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return self._base_url
|
||||
@@ -1437,6 +1516,8 @@ class AIAgent:
|
||||
|
||||
# Track conversation messages for session logging
|
||||
self._session_messages: List[Dict[str, Any]] = []
|
||||
self._memory_write_origin = "assistant_tool"
|
||||
self._memory_write_context = "foreground"
|
||||
|
||||
# Cached system prompt -- built once per session, only rebuilt on compression
|
||||
self._cached_system_prompt: Optional[str] = None
|
||||
@@ -1497,7 +1578,6 @@ class AIAgent:
|
||||
self._memory_enabled = False
|
||||
self._user_profile_enabled = False
|
||||
self._memory_nudge_interval = 10
|
||||
self._memory_flush_min_turns = 6
|
||||
self._turns_since_memory = 0
|
||||
self._iters_since_skill = 0
|
||||
if not skip_memory:
|
||||
@@ -1506,7 +1586,6 @@ class AIAgent:
|
||||
self._memory_enabled = mem_config.get("memory_enabled", False)
|
||||
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
||||
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
|
||||
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
|
||||
if self._memory_enabled or self._user_profile_enabled:
|
||||
from tools.memory_tool import MemoryStore
|
||||
self._memory_store = MemoryStore(
|
||||
@@ -2231,6 +2310,34 @@ class AIAgent:
|
||||
except Exception:
|
||||
logger.debug("status_callback error in _emit_status", exc_info=True)
|
||||
|
||||
def _emit_warning(self, message: str) -> None:
|
||||
"""Emit a user-visible warning through the same status plumbing.
|
||||
|
||||
Unlike debug logs, these warnings are meant for degraded side paths
|
||||
such as auxiliary compression or memory flushes where the main turn can
|
||||
continue but the user needs to know something important failed.
|
||||
"""
|
||||
try:
|
||||
self._vprint(f"{self.log_prefix}{message}", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
if self.status_callback:
|
||||
try:
|
||||
self.status_callback("warn", message)
|
||||
except Exception:
|
||||
logger.debug("status_callback error in _emit_warning", exc_info=True)
|
||||
|
||||
def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
|
||||
"""Surface a compact warning for failed auxiliary work."""
|
||||
try:
|
||||
detail = self._summarize_api_error(exc)
|
||||
except Exception:
|
||||
detail = str(exc)
|
||||
detail = (detail or exc.__class__.__name__).strip()
|
||||
if len(detail) > 220:
|
||||
detail = detail[:217].rstrip() + "..."
|
||||
self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
|
||||
|
||||
def _current_main_runtime(self) -> Dict[str, str]:
|
||||
"""Return the live main runtime for session-scoped auxiliary routing."""
|
||||
return {
|
||||
@@ -2290,6 +2397,7 @@ class AIAgent:
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(self, "_aux_compression_context_length_config", None),
|
||||
provider=getattr(self, "provider", ""),
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
@@ -2316,6 +2424,11 @@ class AIAgent:
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
self.context_compressor.threshold_tokens = new_threshold
|
||||
@@ -3047,7 +3160,10 @@ class AIAgent:
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
parent_session_id=self.session_id,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = self._memory_store
|
||||
review_agent._memory_enabled = self._memory_enabled
|
||||
review_agent._user_profile_enabled = self._user_profile_enabled
|
||||
@@ -3081,7 +3197,8 @@ class AIAgent:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Background memory/skill review failed: %s", e)
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
self._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Close all resources (httpx client, subprocesses, etc.) so
|
||||
# GC doesn't try to clean them up on a dead asyncio event
|
||||
@@ -3095,6 +3212,32 @@ class AIAgent:
|
||||
t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
|
||||
t.start()
|
||||
|
||||
def _build_memory_write_metadata(
|
||||
self,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(self, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(self, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": self.session_id or "",
|
||||
"parent_session_id": self._parent_session_id or "",
|
||||
"platform": self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in (None, "")}
|
||||
|
||||
def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
|
||||
"""Rewrite the current-turn user message before persistence/return.
|
||||
|
||||
@@ -4023,6 +4166,49 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _sync_external_memory_for_turn(
|
||||
self,
|
||||
*,
|
||||
original_user_message: Any,
|
||||
final_response: Any,
|
||||
interrupted: bool,
|
||||
) -> None:
|
||||
"""Mirror a completed turn into external memory providers.
|
||||
|
||||
Called at the end of ``run_conversation`` with the cleaned user
|
||||
message (``original_user_message``) and the finalised assistant
|
||||
response. The external memory backend gets both ``sync_all`` (to
|
||||
persist the exchange) and ``queue_prefetch_all`` (to start
|
||||
warming context for the next turn) in one shot.
|
||||
|
||||
Uses ``original_user_message`` rather than ``user_message``
|
||||
because the latter may carry injected skill content that bloats
|
||||
or breaks provider queries.
|
||||
|
||||
Interrupted turns are skipped entirely (#15218). A partial
|
||||
assistant output, an aborted tool chain, or a mid-stream reset
|
||||
is not durable conversational truth — mirroring it into an
|
||||
external memory backend pollutes future recall with state the
|
||||
user never saw completed. The prefetch is gated on the same
|
||||
flag: the user's next message is almost certainly a retry of
|
||||
the same intent, and a prefetch keyed on the interrupted turn
|
||||
would fire against stale context.
|
||||
|
||||
Normal completed turns still sync as before. The whole body is
|
||||
wrapped in ``try/except Exception`` because external memory
|
||||
providers are strictly best-effort — a misconfigured or offline
|
||||
backend must not block the user from seeing their response.
|
||||
"""
|
||||
if interrupted:
|
||||
return
|
||||
if not (self._memory_manager and final_response and original_user_message):
|
||||
return
|
||||
try:
|
||||
self._memory_manager.sync_all(original_user_message, final_response)
|
||||
self._memory_manager.queue_prefetch_all(original_user_message)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def release_clients(self) -> None:
|
||||
"""Release LLM client resources WITHOUT tearing down session tool state.
|
||||
|
||||
@@ -4955,6 +5141,8 @@ class AIAgent:
|
||||
# response.incomplete instead of response.completed).
|
||||
self._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
@@ -5432,6 +5620,26 @@ class AIAgent:
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
return self._anthropic_client.messages.create(**api_kwargs)
|
||||
|
||||
def _rebuild_anthropic_client(self) -> None:
|
||||
"""Rebuild the Anthropic client after an interrupt or stale call.
|
||||
|
||||
Handles both direct Anthropic and Bedrock-hosted Anthropic models
|
||||
correctly — rebuilding with the Bedrock SDK when provider is bedrock,
|
||||
rather than always falling back to build_anthropic_client() which
|
||||
requires a direct Anthropic API key.
|
||||
"""
|
||||
if getattr(self, "provider", None) == "bedrock":
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
|
||||
self._anthropic_client = build_anthropic_bedrock_client(region)
|
||||
else:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
|
||||
def _interruptible_api_call(self, api_kwargs: dict):
|
||||
"""
|
||||
Run the API call in a background thread so the main conversation loop
|
||||
@@ -5467,12 +5675,21 @@ class AIAgent:
|
||||
# bedrock responses like chat_completions responses.
|
||||
from agent.bedrock_adapter import (
|
||||
_get_bedrock_runtime_client,
|
||||
invalidate_runtime_client,
|
||||
is_stale_connection_error,
|
||||
normalize_converse_response,
|
||||
)
|
||||
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||
api_kwargs.pop("__bedrock_converse__", None)
|
||||
client = _get_bedrock_runtime_client(region)
|
||||
raw_response = client.converse(**api_kwargs)
|
||||
try:
|
||||
raw_response = client.converse(**api_kwargs)
|
||||
except Exception as _bedrock_exc:
|
||||
# Evict the cached client on stale-connection failures
|
||||
# so the outer retry loop builds a fresh client/pool.
|
||||
if is_stale_connection_error(_bedrock_exc):
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
else:
|
||||
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
|
||||
@@ -5530,14 +5747,8 @@ class AIAgent:
|
||||
)
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
@@ -5562,14 +5773,8 @@ class AIAgent:
|
||||
# seed future retries.
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
@@ -5725,12 +5930,21 @@ class AIAgent:
|
||||
try:
|
||||
from agent.bedrock_adapter import (
|
||||
_get_bedrock_runtime_client,
|
||||
invalidate_runtime_client,
|
||||
is_stale_connection_error,
|
||||
stream_converse_with_callbacks,
|
||||
)
|
||||
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||
api_kwargs.pop("__bedrock_converse__", None)
|
||||
client = _get_bedrock_runtime_client(region)
|
||||
raw_response = client.converse_stream(**api_kwargs)
|
||||
try:
|
||||
raw_response = client.converse_stream(**api_kwargs)
|
||||
except Exception as _bedrock_exc:
|
||||
# Evict the cached client on stale-connection failures
|
||||
# so the outer retry loop builds a fresh client/pool.
|
||||
if is_stale_connection_error(_bedrock_exc):
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
|
||||
def _on_text(text):
|
||||
_fire_first()
|
||||
@@ -5982,11 +6196,25 @@ class AIAgent:
|
||||
for idx in sorted(tool_calls_acc):
|
||||
tc = tool_calls_acc[idx]
|
||||
arguments = tc["function"]["arguments"]
|
||||
tool_name = tc["function"]["name"] or "?"
|
||||
if arguments and arguments.strip():
|
||||
try:
|
||||
json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
has_truncated_tool_args = True
|
||||
# Attempt repair before flagging as truncated.
|
||||
# Models like GLM-5.1 via Ollama produce trailing
|
||||
# commas, unclosed brackets, Python None, etc.
|
||||
# Without repair, these hit the truncation handler
|
||||
# and kill the session. _repair_tool_call_arguments
|
||||
# returns "{}" for unrepairable args, which is far
|
||||
# better than a crashed session.
|
||||
repaired = _repair_tool_call_arguments(arguments, tool_name)
|
||||
if repaired != "{}":
|
||||
# Successfully repaired — use the fixed args
|
||||
arguments = repaired
|
||||
else:
|
||||
# Unrepairable — flag for truncation handling
|
||||
has_truncated_tool_args = True
|
||||
mock_tool_calls.append(SimpleNamespace(
|
||||
id=tc["id"],
|
||||
type=tc["type"],
|
||||
@@ -6084,6 +6312,14 @@ class AIAgent:
|
||||
|
||||
try:
|
||||
for _stream_attempt in range(_max_stream_retries + 1):
|
||||
# Check for interrupt before each retry attempt. Without
|
||||
# this, /stop closes the HTTP connection (outer poll loop),
|
||||
# but the retry loop opens a FRESH connection — negating the
|
||||
# interrupt entirely. On slow providers (ollama-cloud) each
|
||||
# retry can block for the full stream-read timeout (120s+),
|
||||
# causing multi-minute delays between /stop and response.
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before stream retry")
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
@@ -6410,14 +6646,8 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
@@ -7409,6 +7639,12 @@ class AIAgent:
|
||||
raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
|
||||
if raw_reasoning_content is not None:
|
||||
msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
|
||||
elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning():
|
||||
# DeepSeek thinking mode requires reasoning_content on every
|
||||
# assistant tool-call message. Without it, replaying the
|
||||
# persisted message causes HTTP 400. Include empty string
|
||||
# as a defensive compatibility fallback (refs #15250).
|
||||
msg["reasoning_content"] = ""
|
||||
|
||||
if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
|
||||
# Pass reasoning_details back unmodified so providers (OpenRouter,
|
||||
@@ -7484,6 +7720,35 @@ class AIAgent:
|
||||
|
||||
return msg
|
||||
|
||||
def _needs_kimi_tool_reasoning(self) -> bool:
|
||||
"""Return True when the current provider is Kimi / Moonshot thinking mode.
|
||||
|
||||
Kimi ``/coding`` and Moonshot thinking mode both require
|
||||
``reasoning_content`` on every assistant tool-call message; omitting
|
||||
it causes the next replay to fail with HTTP 400.
|
||||
"""
|
||||
return (
|
||||
self.provider in {"kimi-coding", "kimi-coding-cn"}
|
||||
or base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
)
|
||||
|
||||
def _needs_deepseek_tool_reasoning(self) -> bool:
|
||||
"""Return True when the current provider is DeepSeek thinking mode.
|
||||
|
||||
DeepSeek V4 thinking mode requires ``reasoning_content`` on every
|
||||
assistant tool-call turn; omitting it causes HTTP 400 when the
|
||||
message is replayed in a subsequent API request (#15250).
|
||||
"""
|
||||
provider = (self.provider or "").lower()
|
||||
model = (self.model or "").lower()
|
||||
return (
|
||||
provider == "deepseek"
|
||||
or "deepseek" in model
|
||||
or base_url_host_matches(self.base_url, "api.deepseek.com")
|
||||
)
|
||||
|
||||
def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
|
||||
"""Copy provider-facing reasoning fields onto an API replay message."""
|
||||
if source_msg.get("role") != "assistant":
|
||||
@@ -7499,13 +7764,14 @@ class AIAgent:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
kimi_requires_reasoning = (
|
||||
self.provider in {"kimi-coding", "kimi-coding-cn"}
|
||||
or base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
)
|
||||
if kimi_requires_reasoning and source_msg.get("tool_calls"):
|
||||
# Providers that require an echoed reasoning_content on every
|
||||
# assistant tool-call turn. Detection logic lives in the per-provider
|
||||
# helpers so both the creation path (_build_assistant_message) and
|
||||
# this replay path stay in sync.
|
||||
if source_msg.get("tool_calls") and (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
):
|
||||
api_msg["reasoning_content"] = ""
|
||||
|
||||
@staticmethod
|
||||
@@ -7536,6 +7802,115 @@ class AIAgent:
|
||||
]
|
||||
return api_msg
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_call_arguments(
|
||||
messages: list,
|
||||
*,
|
||||
logger=None,
|
||||
session_id: str = None,
|
||||
) -> int:
|
||||
"""Repair corrupted assistant tool-call argument JSON in-place."""
|
||||
log = logger or logging.getLogger(__name__)
|
||||
if not isinstance(messages, list):
|
||||
return 0
|
||||
|
||||
repaired = 0
|
||||
marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
|
||||
|
||||
def _prepend_marker(tool_msg: dict) -> None:
|
||||
existing = tool_msg.get("content")
|
||||
if isinstance(existing, str):
|
||||
if not existing:
|
||||
tool_msg["content"] = marker
|
||||
elif not existing.startswith(marker):
|
||||
tool_msg["content"] = f"{marker}\n{existing}"
|
||||
return
|
||||
if existing is None:
|
||||
tool_msg["content"] = marker
|
||||
return
|
||||
try:
|
||||
existing_text = json.dumps(existing)
|
||||
except TypeError:
|
||||
existing_text = str(existing)
|
||||
tool_msg["content"] = f"{marker}\n{existing_text}"
|
||||
|
||||
message_index = 0
|
||||
while message_index < len(messages):
|
||||
msg = messages[message_index]
|
||||
if not isinstance(msg, dict) or msg.get("role") != "assistant":
|
||||
message_index += 1
|
||||
continue
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if not isinstance(tool_calls, list) or not tool_calls:
|
||||
message_index += 1
|
||||
continue
|
||||
|
||||
insert_at = message_index + 1
|
||||
for tool_call in tool_calls:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
function = tool_call.get("function")
|
||||
if not isinstance(function, dict):
|
||||
continue
|
||||
|
||||
arguments = function.get("arguments")
|
||||
if arguments is None or arguments == "":
|
||||
function["arguments"] = "{}"
|
||||
continue
|
||||
if isinstance(arguments, str) and not arguments.strip():
|
||||
function["arguments"] = "{}"
|
||||
continue
|
||||
if not isinstance(arguments, str):
|
||||
continue
|
||||
|
||||
try:
|
||||
json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
tool_call_id = tool_call.get("id")
|
||||
function_name = function.get("name", "?")
|
||||
preview = arguments[:80]
|
||||
log.warning(
|
||||
"Corrupted tool_call arguments repaired before request "
|
||||
"(session=%s, message_index=%s, tool_call_id=%s, function=%s, preview=%r)",
|
||||
session_id or "-",
|
||||
message_index,
|
||||
tool_call_id or "-",
|
||||
function_name,
|
||||
preview,
|
||||
)
|
||||
function["arguments"] = "{}"
|
||||
|
||||
existing_tool_msg = None
|
||||
scan_index = message_index + 1
|
||||
while scan_index < len(messages):
|
||||
candidate = messages[scan_index]
|
||||
if not isinstance(candidate, dict) or candidate.get("role") != "tool":
|
||||
break
|
||||
if candidate.get("tool_call_id") == tool_call_id:
|
||||
existing_tool_msg = candidate
|
||||
break
|
||||
scan_index += 1
|
||||
|
||||
if existing_tool_msg is None:
|
||||
messages.insert(
|
||||
insert_at,
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": marker,
|
||||
},
|
||||
)
|
||||
insert_at += 1
|
||||
else:
|
||||
_prepend_marker(existing_tool_msg)
|
||||
|
||||
repaired += 1
|
||||
|
||||
message_index += 1
|
||||
|
||||
return repaired
|
||||
|
||||
def _should_sanitize_tool_calls(self) -> bool:
|
||||
"""Determine if tool_calls need sanitization for strict APIs.
|
||||
|
||||
@@ -7549,201 +7924,6 @@ class AIAgent:
|
||||
"""
|
||||
return self.api_mode != "codex_responses"
|
||||
|
||||
def flush_memories(self, messages: list = None, min_turns: int = None):
|
||||
"""Give the model one turn to persist memories before context is lost.
|
||||
|
||||
Called before compression, session reset, or CLI exit. Injects a flush
|
||||
message, makes one API call, executes any memory tool calls, then
|
||||
strips all flush artifacts from the message list.
|
||||
|
||||
Args:
|
||||
messages: The current conversation messages. If None, uses
|
||||
self._session_messages (last run_conversation state).
|
||||
min_turns: Minimum user turns required to trigger the flush.
|
||||
None = use config value (flush_min_turns).
|
||||
0 = always flush (used for compression).
|
||||
"""
|
||||
if self._memory_flush_min_turns == 0 and min_turns is None:
|
||||
return
|
||||
if "memory" not in self.valid_tool_names or not self._memory_store:
|
||||
return
|
||||
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
|
||||
if self._user_turn_count < effective_min:
|
||||
return
|
||||
|
||||
if messages is None:
|
||||
messages = getattr(self, '_session_messages', None)
|
||||
if not messages or len(messages) < 3:
|
||||
return
|
||||
|
||||
flush_content = (
|
||||
"[System: The session is being compressed. "
|
||||
"Save anything worth remembering — prioritize user preferences, "
|
||||
"corrections, and recurring patterns over task-specific details.]"
|
||||
)
|
||||
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
|
||||
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
|
||||
messages.append(flush_msg)
|
||||
|
||||
try:
|
||||
# Build API messages for the flush call
|
||||
_needs_sanitize = self._should_sanitize_tool_calls()
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
self._copy_reasoning_content_for_api(msg, api_msg)
|
||||
api_msg.pop("reasoning", None)
|
||||
api_msg.pop("finish_reason", None)
|
||||
api_msg.pop("_flush_sentinel", None)
|
||||
api_msg.pop("_thinking_prefill", None)
|
||||
if _needs_sanitize:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
api_messages.append(api_msg)
|
||||
|
||||
if self._cached_system_prompt:
|
||||
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
|
||||
|
||||
# Make one API call with only the memory tool available
|
||||
memory_tool_def = None
|
||||
for t in (self.tools or []):
|
||||
if t.get("function", {}).get("name") == "memory":
|
||||
memory_tool_def = t
|
||||
break
|
||||
|
||||
if not memory_tool_def:
|
||||
messages.pop() # remove flush msg
|
||||
return
|
||||
|
||||
# Use auxiliary client for the flush call when available --
|
||||
# it's cheaper and avoids Codex Responses API incompatibility.
|
||||
from agent.auxiliary_client import (
|
||||
call_llm as _call_llm,
|
||||
_fixed_temperature_for_model,
|
||||
OMIT_TEMPERATURE,
|
||||
)
|
||||
_aux_available = True
|
||||
# Kimi models manage temperature server-side — omit it entirely.
|
||||
# Other models with a fixed contract get that value; everyone else
|
||||
# gets the historical 0.3 default.
|
||||
_fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
|
||||
_omit_temperature = _fixed_temp is OMIT_TEMPERATURE
|
||||
if _omit_temperature:
|
||||
_flush_temperature = None
|
||||
elif _fixed_temp is not None:
|
||||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
messages=api_messages,
|
||||
tools=[memory_tool_def],
|
||||
temperature=_flush_temperature,
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except RuntimeError:
|
||||
_aux_available = False
|
||||
response = None
|
||||
|
||||
if not _aux_available and self.api_mode == "codex_responses":
|
||||
# No auxiliary client -- use the Codex Responses path directly
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
|
||||
if _flush_temperature is not None:
|
||||
codex_kwargs["temperature"] = _flush_temperature
|
||||
else:
|
||||
codex_kwargs.pop("temperature", None)
|
||||
if "max_output_tokens" in codex_kwargs:
|
||||
codex_kwargs["max_output_tokens"] = 5120
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
elif not _aux_available and self.api_mode == "anthropic_messages":
|
||||
# Native Anthropic — use the transport for kwargs
|
||||
_tflush = self._get_transport()
|
||||
ant_kwargs = _tflush.build_kwargs(
|
||||
model=self.model, messages=api_messages,
|
||||
tools=[memory_tool_def], max_tokens=5120,
|
||||
reasoning_config=None,
|
||||
preserve_dots=self._anthropic_preserve_dots(),
|
||||
)
|
||||
response = self._anthropic_messages_create(ant_kwargs)
|
||||
elif not _aux_available:
|
||||
api_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
**self._max_tokens_param(5120),
|
||||
}
|
||||
if _flush_temperature is not None:
|
||||
api_kwargs["temperature"] = _flush_temperature
|
||||
from agent.auxiliary_client import _get_task_timeout
|
||||
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
|
||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
if self.api_mode == "codex_responses" and not _aux_available:
|
||||
_ct_flush = self._get_transport()
|
||||
_cnr_flush = _ct_flush.normalize_response(response)
|
||||
if _cnr_flush and _cnr_flush.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _cnr_flush.tool_calls
|
||||
]
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _flush_result.tool_calls
|
||||
]
|
||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
# chat_completions / bedrock — normalize through transport
|
||||
_flush_result = self._get_transport().normalize_response(response)
|
||||
if _flush_result.tool_calls:
|
||||
tool_calls = _flush_result.tool_calls
|
||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||
# Auxiliary client returned OpenAI-shaped response while main
|
||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||
_aux_msg = response.choices[0].message
|
||||
if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
|
||||
tool_calls = _aux_msg.tool_calls
|
||||
|
||||
for tc in tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
flush_target = args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
_memory_tool(
|
||||
action=args.get("action"),
|
||||
target=flush_target,
|
||||
content=args.get("content"),
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush tool call failed: %s", e)
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush API call failed: %s", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
|
||||
messages.pop()
|
||||
if not messages:
|
||||
break
|
||||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
@@ -7762,8 +7942,6 @@ class AIAgent:
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
|
||||
focus_topic,
|
||||
)
|
||||
# Pre-compression memory flush: let the model save memories before they're lost
|
||||
self.flush_memories(messages, min_turns=0)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if self._memory_manager:
|
||||
@@ -7779,6 +7957,15 @@ class AIAgent:
|
||||
# focus_topic — fall back to calling without it.
|
||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
summary_error = getattr(self.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(self, "_last_compression_summary_warning", None) != summary_error:
|
||||
self._last_compression_summary_warning = summary_error
|
||||
self._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
|
||||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
@@ -7948,6 +8135,10 @@ class AIAgent:
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=tool_call_id,
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -8459,6 +8650,10 @@ class AIAgent:
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -8703,6 +8898,7 @@ class AIAgent:
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
self._copy_reasoning_content_for_api(msg, api_msg)
|
||||
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
|
||||
api_msg.pop(internal_field, None)
|
||||
if _needs_sanitize:
|
||||
@@ -9333,6 +9529,19 @@ class AIAgent:
|
||||
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.
|
||||
# However, providers like Moonshot AI require a separate 'reasoning_content' field
|
||||
# on assistant messages with tool_calls. We handle both cases here.
|
||||
request_logger = getattr(self, "logger", None) or logging.getLogger(__name__)
|
||||
repaired_tool_calls = self._sanitize_tool_call_arguments(
|
||||
messages,
|
||||
logger=request_logger,
|
||||
session_id=self.session_id,
|
||||
)
|
||||
if repaired_tool_calls > 0:
|
||||
request_logger.info(
|
||||
"Sanitized %s corrupted tool_call arguments before request (session=%s)",
|
||||
repaired_tool_calls,
|
||||
self.session_id or "-",
|
||||
)
|
||||
|
||||
api_messages = []
|
||||
for idx, msg in enumerate(messages):
|
||||
api_msg = msg.copy()
|
||||
@@ -12162,14 +12371,11 @@ class AIAgent:
|
||||
self._iters_since_skill = 0
|
||||
|
||||
# External memory provider: sync the completed turn + queue next prefetch.
|
||||
# Use original_user_message (clean input) — user_message may contain
|
||||
# injected skill content that bloats / breaks provider queries.
|
||||
if self._memory_manager and final_response and original_user_message:
|
||||
try:
|
||||
self._memory_manager.sync_all(original_user_message, final_response)
|
||||
self._memory_manager.queue_prefetch_all(original_user_message)
|
||||
except Exception:
|
||||
pass
|
||||
self._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=final_response,
|
||||
interrupted=interrupted,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
|
||||
+99
-7
@@ -29,10 +29,25 @@ BOLD='\033[1m'
|
||||
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
|
||||
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
|
||||
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
|
||||
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
|
||||
# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
|
||||
# FHS-style layout for root installs. Track whether the user gave us an
|
||||
# explicit directory — if so we never override it.
|
||||
if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
|
||||
INSTALL_DIR="$HERMES_INSTALL_DIR"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
else
|
||||
INSTALL_DIR=""
|
||||
INSTALL_DIR_EXPLICIT=false
|
||||
fi
|
||||
PYTHON_VERSION="3.11"
|
||||
NODE_VERSION="22"
|
||||
|
||||
# FHS-style root install layout (set by resolve_install_layout when applicable):
|
||||
# code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
|
||||
# data still at /root/.hermes (HERMES_HOME). Matches Claude Code / Codex CLI
|
||||
# and keeps Docker bind-mounted /root/ volumes lean.
|
||||
ROOT_FHS_LAYOUT=false
|
||||
|
||||
# Options
|
||||
USE_VENV=true
|
||||
RUN_SETUP=true
|
||||
@@ -64,6 +79,7 @@ while [[ $# -gt 0 ]]; do
|
||||
;;
|
||||
--dir)
|
||||
INSTALL_DIR="$2"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
shift 2
|
||||
;;
|
||||
--hermes-home)
|
||||
@@ -79,9 +95,20 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --no-venv Don't create virtual environment"
|
||||
echo " --skip-setup Skip interactive setup wizard"
|
||||
echo " --branch NAME Git branch to install (default: main)"
|
||||
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
|
||||
echo " --dir PATH Installation directory"
|
||||
echo " default (non-root): ~/.hermes/hermes-agent"
|
||||
echo " default (root, Linux): /usr/local/lib/hermes-agent"
|
||||
echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)"
|
||||
echo " -h, --help Show this help"
|
||||
echo ""
|
||||
echo "Notes:"
|
||||
echo " When running as root on Linux, Hermes installs the code under"
|
||||
echo " /usr/local/lib/hermes-agent and links the command into"
|
||||
echo " /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
|
||||
echo " Data, config, sessions, and logs still live in \$HERMES_HOME"
|
||||
echo " (default /root/.hermes). This keeps Docker bind-mounted volumes"
|
||||
echo " small and ensures the command is on PATH for all shells."
|
||||
echo " Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -163,9 +190,60 @@ is_termux() {
|
||||
[ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
|
||||
}
|
||||
|
||||
# Decide where the repo checkout + venv live, and where the `hermes` command
|
||||
# symlink goes. Called after detect_os so $OS/$DISTRO are known.
|
||||
#
|
||||
# Defaults:
|
||||
# - Non-root, any OS: INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $HOME/.local/bin
|
||||
# - Termux (any uid): INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $PREFIX/bin (already on PATH)
|
||||
# - Root on Linux (new): INSTALL_DIR = /usr/local/lib/hermes-agent
|
||||
# command link in /usr/local/bin
|
||||
# (unless a legacy install already exists at
|
||||
# $HERMES_HOME/hermes-agent — then preserve it)
|
||||
#
|
||||
# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
|
||||
resolve_install_layout() {
|
||||
if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
|
||||
log_info "Install directory: $INSTALL_DIR (explicit)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
|
||||
if is_termux; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Root on Linux: prefer FHS layout unless a legacy install already exists.
|
||||
# macOS root installs keep the legacy layout because /usr/local/ on macOS
|
||||
# is Homebrew territory and we don't want to fight that.
|
||||
if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
|
||||
if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
|
||||
log_info " (new root installs use /usr/local/lib/hermes-agent)"
|
||||
return 0
|
||||
fi
|
||||
INSTALL_DIR="/usr/local/lib/hermes-agent"
|
||||
ROOT_FHS_LAYOUT=true
|
||||
log_info "Root install on Linux — using FHS layout"
|
||||
log_info " Code: $INSTALL_DIR"
|
||||
log_info " Command: /usr/local/bin/hermes"
|
||||
log_info " Data: $HERMES_HOME (unchanged)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Default: non-root, non-Termux → legacy user-scoped layout.
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
}
|
||||
|
||||
get_command_link_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo "$PREFIX/bin"
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo "/usr/local/bin"
|
||||
else
|
||||
echo "$HOME/.local/bin"
|
||||
fi
|
||||
@@ -174,6 +252,8 @@ get_command_link_dir() {
|
||||
get_command_link_display_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo '$PREFIX/bin'
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo '/usr/local/bin'
|
||||
else
|
||||
echo '~/.local/bin'
|
||||
fi
|
||||
@@ -975,6 +1055,14 @@ setup_path() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
|
||||
if [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
export PATH="$command_link_dir:$PATH"
|
||||
log_info "/usr/local/bin is already on PATH for all shells"
|
||||
log_success "hermes command ready"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if ~/.local/bin is on PATH; if not, add it to shell config.
|
||||
# Detect the user's actual login shell (not the shell running this script,
|
||||
# which is always bash when piped from curl).
|
||||
@@ -1339,12 +1427,12 @@ print_success() {
|
||||
echo ""
|
||||
|
||||
# Show file locations
|
||||
echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
|
||||
echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
|
||||
echo ""
|
||||
echo -e " ${YELLOW}Config:${NC} ~/.hermes/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} ~/.hermes/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} ~/.hermes/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} ~/.hermes/hermes-agent/"
|
||||
echo -e " ${YELLOW}Config:${NC} $HERMES_HOME/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} $HERMES_HOME/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} $HERMES_HOME/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} $INSTALL_DIR"
|
||||
echo ""
|
||||
|
||||
echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
|
||||
@@ -1364,6 +1452,9 @@ print_success() {
|
||||
if [ "$DISTRO" = "termux" ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
|
||||
echo ""
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
|
||||
echo ""
|
||||
else
|
||||
echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
|
||||
echo ""
|
||||
@@ -1415,6 +1506,7 @@ main() {
|
||||
print_banner
|
||||
|
||||
detect_os
|
||||
resolve_install_layout
|
||||
install_uv
|
||||
check_python
|
||||
check_git
|
||||
|
||||
@@ -48,6 +48,9 @@ AUTHOR_MAP = {
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -59,14 +62,19 @@ AUTHOR_MAP = {
|
||||
"keifergu@tencent.com": "keifergu",
|
||||
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
|
||||
"abner.the.foreman@agentmail.to": "Abnertheforeman",
|
||||
"thomasgeorgevii09@gmail.com": "tochukwuada",
|
||||
"harryykyle1@gmail.com": "hharry11",
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
"cyprian@ironin.pl": "iRonin",
|
||||
"valdi.jorge@gmail.com": "jvcl",
|
||||
"q19dcp@gmail.com": "aj-nt",
|
||||
"ebukau84@gmail.com": "UgwujaGeorge",
|
||||
"francip@gmail.com": "francip",
|
||||
"omni@comelse.com": "omnissiah-comelse",
|
||||
"oussama.redcode@gmail.com": "mavrickdeveloper",
|
||||
@@ -84,6 +92,7 @@ AUTHOR_MAP = {
|
||||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||
"xydarcher@uestc.edu.cn": "Readon",
|
||||
"sir_even@icloud.com": "sirEven",
|
||||
"36056348+sirEven@users.noreply.github.com": "sirEven",
|
||||
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
||||
@@ -106,6 +115,7 @@ AUTHOR_MAP = {
|
||||
"30841158+n-WN@users.noreply.github.com": "n-WN",
|
||||
"tsuijinglei@gmail.com": "hiddenpuppy",
|
||||
"jerome@clawwork.ai": "HiddenPuppy",
|
||||
"jerome.benoit@sap.com": "jerome-benoit",
|
||||
"wysie@users.noreply.github.com": "Wysie",
|
||||
"leoyuan0099@gmail.com": "keyuyuan",
|
||||
"bxzt2006@163.com": "Only-Code-A",
|
||||
@@ -200,6 +210,9 @@ AUTHOR_MAP = {
|
||||
"1434494126@qq.com": "5park1e",
|
||||
"158153005+5park1e@users.noreply.github.com": "5park1e",
|
||||
"innocarpe@gmail.com": "innocarpe",
|
||||
"noreply@ked.com": "qike-ms",
|
||||
"andrekurait@gmail.com": "AndreKurait",
|
||||
"bsgdigital@users.noreply.github.com": "bsgdigital",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
@@ -490,6 +503,9 @@ AUTHOR_MAP = {
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
"chenzeshi@live.com": "chen1749144759",
|
||||
"mor.aleksandr@yahoo.com": "MorAlekss",
|
||||
"ash@users.noreply.github.com": "ash",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(
|
||||
|
||||
### Model architecture
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
SAM Architecture:
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
@@ -144,6 +145,7 @@ SAM Architecture:
|
||||
Image Embeddings Prompt Embeddings Masks + IoU
|
||||
(computed once) (per prompt) predictions
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
### Model variants
|
||||
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Resolve HERMES_HOME for standalone skill scripts.
|
||||
|
||||
Skill scripts may run outside the Hermes process (e.g. system Python,
|
||||
nix env, CI) where ``hermes_constants`` is not importable. This module
|
||||
provides the same ``get_hermes_home()`` and ``display_hermes_home()``
|
||||
contracts as ``hermes_constants`` without requiring it on ``sys.path``.
|
||||
|
||||
When ``hermes_constants`` IS available it is used directly so that any
|
||||
future enhancements (profile resolution, Docker detection, etc.) are
|
||||
picked up automatically. The fallback path replicates the core logic
|
||||
from ``hermes_constants.py`` using only the stdlib.
|
||||
|
||||
All scripts under ``google-workspace/scripts/`` should import from here
|
||||
instead of duplicating the ``HERMES_HOME = Path(os.getenv(...))`` pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from hermes_constants import display_hermes_home as display_hermes_home
|
||||
from hermes_constants import get_hermes_home as get_hermes_home
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
|
||||
def get_hermes_home() -> Path:
|
||||
"""Return the Hermes home directory (default: ~/.hermes).
|
||||
|
||||
Mirrors ``hermes_constants.get_hermes_home()``."""
|
||||
val = os.environ.get("HERMES_HOME", "").strip()
|
||||
return Path(val) if val else Path.home() / ".hermes"
|
||||
|
||||
def display_hermes_home() -> str:
|
||||
"""Return a user-friendly ``~/``-shortened display string.
|
||||
|
||||
Mirrors ``hermes_constants.display_hermes_home()``."""
|
||||
home = get_hermes_home()
|
||||
try:
|
||||
return "~/" + str(home.relative_to(Path.home()))
|
||||
except ValueError:
|
||||
return str(home)
|
||||
@@ -31,7 +31,14 @@ from datetime import datetime, timedelta, timezone
|
||||
from email.mime.text import MIMEText
|
||||
from pathlib import Path
|
||||
|
||||
HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
from _hermes_home import get_hermes_home
|
||||
|
||||
HERMES_HOME = get_hermes_home()
|
||||
TOKEN_PATH = HERMES_HOME / "google_token.json"
|
||||
CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"
|
||||
|
||||
|
||||
@@ -10,9 +10,12 @@ import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
def get_hermes_home() -> Path:
|
||||
return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
from _hermes_home import get_hermes_home
|
||||
|
||||
|
||||
def get_token_path() -> Path:
|
||||
|
||||
@@ -21,6 +21,8 @@ Agent workflow:
|
||||
6. Run --check to verify. Done.
|
||||
"""
|
||||
|
||||
from __future__ import annotations # allow PEP 604 `X | None` on Python 3.9+
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
@@ -28,13 +30,12 @@ import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from hermes_constants import display_hermes_home, get_hermes_home
|
||||
except ModuleNotFoundError:
|
||||
HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
|
||||
if HERMES_AGENT_ROOT.exists():
|
||||
sys.path.insert(0, str(HERMES_AGENT_ROOT))
|
||||
from hermes_constants import display_hermes_home, get_hermes_home
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
from _hermes_home import display_hermes_home, get_hermes_home
|
||||
|
||||
HERMES_HOME = get_hermes_home()
|
||||
TOKEN_PATH = HERMES_HOME / "google_token.json"
|
||||
@@ -111,7 +112,11 @@ def install_deps():
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"ERROR: Failed to install dependencies: {e}")
|
||||
print(f"Try manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
|
||||
print(
|
||||
"On environments without pip (e.g. Nix), install the optional extra instead:"
|
||||
)
|
||||
print(" pip install 'hermes-agent[google]'")
|
||||
print(f"Or manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target
|
||||
|
||||
This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ RESEARCH PAPER PIPELINE │
|
||||
@@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
},
|
||||
},
|
||||
"auxiliary": {
|
||||
"flush_memories": {
|
||||
"compression": {
|
||||
"provider": "myrelay",
|
||||
"model": "claude-sonnet-4.6",
|
||||
},
|
||||
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
AnthropicAuxiliaryClient,
|
||||
AsyncAnthropicAuxiliaryClient,
|
||||
)
|
||||
async_client, async_model = get_async_text_auxiliary_client("flush_memories")
|
||||
async_client, async_model = get_async_text_auxiliary_client("compression")
|
||||
assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
|
||||
assert async_model == "claude-sonnet-4.6"
|
||||
|
||||
sync_client, sync_model = get_text_auxiliary_client("flush_memories")
|
||||
sync_client, sync_model = get_text_auxiliary_client("compression")
|
||||
assert isinstance(sync_client, AnthropicAuxiliaryClient)
|
||||
assert sync_model == "claude-sonnet-4.6"
|
||||
|
||||
|
||||
@@ -1230,3 +1230,210 @@ class TestEmptyTextBlockFix:
|
||||
from agent.bedrock_adapter import _convert_content_to_converse
|
||||
blocks = _convert_content_to_converse("Hello")
|
||||
assert blocks[0]["text"] == "Hello"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection and per-region client invalidation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestInvalidateRuntimeClient:
|
||||
"""Per-region eviction used to discard dead/stale bedrock-runtime clients."""
|
||||
|
||||
def test_evicts_only_the_target_region(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
invalidate_runtime_client,
|
||||
reset_client_cache,
|
||||
)
|
||||
reset_client_cache()
|
||||
_bedrock_runtime_client_cache["us-east-1"] = "dead-client"
|
||||
_bedrock_runtime_client_cache["us-west-2"] = "live-client"
|
||||
|
||||
evicted = invalidate_runtime_client("us-east-1")
|
||||
|
||||
assert evicted is True
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache
|
||||
assert _bedrock_runtime_client_cache["us-west-2"] == "live-client"
|
||||
|
||||
def test_returns_false_when_region_not_cached(self):
|
||||
from agent.bedrock_adapter import invalidate_runtime_client, reset_client_cache
|
||||
reset_client_cache()
|
||||
assert invalidate_runtime_client("eu-west-1") is False
|
||||
|
||||
|
||||
class TestIsStaleConnectionError:
|
||||
"""Classifier that decides whether an exception warrants client eviction."""
|
||||
|
||||
def test_detects_botocore_connection_closed_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
exc = ConnectionClosedError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_botocore_endpoint_connection_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import EndpointConnectionError
|
||||
exc = EndpointConnectionError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_botocore_read_timeout(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import ReadTimeoutError
|
||||
exc = ReadTimeoutError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_urllib3_protocol_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from urllib3.exceptions import ProtocolError
|
||||
exc = ProtocolError("Connection broken")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_library_internal_assertion_error(self):
|
||||
"""A bare AssertionError raised from inside urllib3/botocore signals
|
||||
a corrupted connection-pool invariant and should trigger eviction."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
|
||||
# Fabricate an AssertionError whose traceback's last frame belongs
|
||||
# to a module named "urllib3.connectionpool". We do this by exec'ing
|
||||
# a tiny `assert False` under a fake globals dict — the resulting
|
||||
# frame's ``f_globals["__name__"]`` is what the classifier inspects.
|
||||
fake_globals = {"__name__": "urllib3.connectionpool"}
|
||||
try:
|
||||
exec("def _boom():\n assert False\n_boom()", fake_globals)
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is True
|
||||
else:
|
||||
pytest.fail("AssertionError not raised")
|
||||
|
||||
def test_detects_botocore_internal_assertion_error(self):
|
||||
"""Same as above but for a frame inside the botocore namespace."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
fake_globals = {"__name__": "botocore.httpsession"}
|
||||
try:
|
||||
exec("def _boom():\n assert False\n_boom()", fake_globals)
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is True
|
||||
else:
|
||||
pytest.fail("AssertionError not raised")
|
||||
|
||||
def test_ignores_application_assertion_error(self):
|
||||
"""AssertionError from application code (not urllib3/botocore) should
|
||||
NOT be classified as stale — those are real test/code bugs."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
try:
|
||||
assert False, "test-only" # noqa: B011
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is False
|
||||
|
||||
def test_ignores_unrelated_exceptions(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
assert is_stale_connection_error(ValueError("bad input")) is False
|
||||
assert is_stale_connection_error(KeyError("missing")) is False
|
||||
|
||||
|
||||
class TestCallConverseInvalidatesOnStaleError:
|
||||
"""call_converse / call_converse_stream evict the cached client when the
|
||||
boto3 call raises a stale-connection error — so the next invocation
|
||||
reconnects instead of reusing the dead socket."""
|
||||
|
||||
def test_converse_evicts_client_on_stale_error(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
|
||||
reset_client_cache()
|
||||
dead_client = MagicMock()
|
||||
dead_client.converse.side_effect = ConnectionClosedError(
|
||||
endpoint_url="https://bedrock.example",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = dead_client
|
||||
|
||||
with pytest.raises(ConnectionClosedError):
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache, (
|
||||
"stale client should have been evicted so the retry reconnects"
|
||||
)
|
||||
|
||||
def test_converse_stream_evicts_client_on_stale_error(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse_stream,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
|
||||
reset_client_cache()
|
||||
dead_client = MagicMock()
|
||||
dead_client.converse_stream.side_effect = ConnectionClosedError(
|
||||
endpoint_url="https://bedrock.example",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = dead_client
|
||||
|
||||
with pytest.raises(ConnectionClosedError):
|
||||
call_converse_stream(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache
|
||||
|
||||
def test_converse_does_not_evict_on_non_stale_error(self):
|
||||
"""Non-stale errors (e.g. ValidationException) leave the client cache alone."""
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
reset_client_cache()
|
||||
live_client = MagicMock()
|
||||
live_client.converse.side_effect = ClientError(
|
||||
error_response={"Error": {"Code": "ValidationException", "Message": "bad"}},
|
||||
operation_name="Converse",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = live_client
|
||||
|
||||
with pytest.raises(ClientError):
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert _bedrock_runtime_client_cache.get("us-east-1") is live_client, (
|
||||
"validation errors do not indicate a dead connection — keep the client"
|
||||
)
|
||||
|
||||
def test_converse_leaves_successful_client_in_cache(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
|
||||
reset_client_cache()
|
||||
live_client = MagicMock()
|
||||
live_client.converse.return_value = {
|
||||
"output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}},
|
||||
"stopReason": "end_turn",
|
||||
"usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2},
|
||||
}
|
||||
_bedrock_runtime_client_cache["us-east-1"] = live_client
|
||||
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert _bedrock_runtime_client_cache.get("us-east-1") is live_client
|
||||
|
||||
@@ -376,17 +376,15 @@ class TestBedrockModelNameNormalization:
|
||||
"apac.anthropic.claude-haiku-4-5", preserve_dots=True
|
||||
) == "apac.anthropic.claude-haiku-4-5"
|
||||
|
||||
def test_preserve_false_mangles_as_documented(self):
|
||||
"""Canary: with ``preserve_dots=False`` the function still
|
||||
produces the broken all-hyphen form — this is the shape that
|
||||
Bedrock rejected and that the fix avoids. Keeping this test
|
||||
locks in the existing behaviour of ``normalize_model_name`` so a
|
||||
future refactor doesn't accidentally decouple the knob from its
|
||||
effect."""
|
||||
def test_bedrock_prefix_preserved_without_preserve_dots(self):
|
||||
"""Bedrock inference profile IDs are auto-detected by prefix and
|
||||
always returned unmangled -- ``preserve_dots`` is irrelevant for
|
||||
these IDs because the dots are namespace separators, not version
|
||||
separators. Regression for #12295."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name(
|
||||
"global.anthropic.claude-opus-4-7", preserve_dots=False
|
||||
) == "global-anthropic-claude-opus-4-7"
|
||||
) == "global.anthropic.claude-opus-4-7"
|
||||
|
||||
def test_bare_foundation_model_id_preserved(self):
|
||||
"""Non-inference-profile Bedrock IDs
|
||||
@@ -422,12 +420,11 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
|
||||
f"{kwargs['model']!r}"
|
||||
)
|
||||
|
||||
def test_bedrock_model_mangled_without_preserve_dots(self):
|
||||
"""Inverse canary: without the flag, ``build_anthropic_kwargs``
|
||||
still produces the broken form — so the fix in
|
||||
``_anthropic_preserve_dots`` is the load-bearing piece that
|
||||
wires ``preserve_dots=True`` through to this builder for the
|
||||
Bedrock case."""
|
||||
def test_bedrock_model_preserved_without_preserve_dots(self):
|
||||
"""Bedrock inference profile IDs survive ``build_anthropic_kwargs``
|
||||
even without ``preserve_dots=True`` -- the prefix auto-detection
|
||||
in ``normalize_model_name`` is the load-bearing piece.
|
||||
Regression for #12295."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="global.anthropic.claude-opus-4-7",
|
||||
@@ -437,4 +434,157 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
|
||||
reasoning_config=None,
|
||||
preserve_dots=False,
|
||||
)
|
||||
assert kwargs["model"] == "global-anthropic-claude-opus-4-7"
|
||||
assert kwargs["model"] == "global.anthropic.claude-opus-4-7"
|
||||
|
||||
|
||||
class TestBedrockModelIdDetection:
|
||||
"""Tests for ``_is_bedrock_model_id`` and the auto-detection that
|
||||
makes ``normalize_model_name`` preserve dots for Bedrock IDs
|
||||
regardless of ``preserve_dots``. Regression for #12295."""
|
||||
|
||||
def test_bare_bedrock_id_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("anthropic.claude-opus-4-7") is True
|
||||
|
||||
def test_regional_us_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("us.anthropic.claude-sonnet-4-5-v1:0") is True
|
||||
|
||||
def test_regional_global_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("global.anthropic.claude-opus-4-7") is True
|
||||
|
||||
def test_regional_eu_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("eu.anthropic.claude-sonnet-4-6") is True
|
||||
|
||||
def test_openrouter_format_not_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("claude-opus-4.6") is False
|
||||
|
||||
def test_bare_claude_not_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("claude-opus-4-7") is False
|
||||
|
||||
def test_bare_bedrock_id_preserved_without_flag(self):
|
||||
"""The primary bug from #12295: ``anthropic.claude-opus-4-7``
|
||||
sent to bedrock-mantle via auxiliary clients that don't pass
|
||||
``preserve_dots=True``."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name(
|
||||
"anthropic.claude-opus-4-7", preserve_dots=False
|
||||
) == "anthropic.claude-opus-4-7"
|
||||
|
||||
def test_openrouter_dots_still_converted(self):
|
||||
"""Non-Bedrock dotted model names must still be converted."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
|
||||
|
||||
def test_bare_bedrock_id_survives_build_kwargs(self):
|
||||
"""End-to-end: bare Bedrock ID through ``build_anthropic_kwargs``
|
||||
without ``preserve_dots=True`` -- the auxiliary client path."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="anthropic.claude-opus-4-7",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=1024,
|
||||
reasoning_config=None,
|
||||
preserve_dots=False,
|
||||
)
|
||||
assert kwargs["model"] == "anthropic.claude-opus-4-7"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# auxiliary_client Bedrock resolution — fix for #13919
|
||||
# ---------------------------------------------------------------------------
|
||||
# Before the fix, resolve_provider_client("bedrock", ...) fell through to the
|
||||
# "unhandled auth_type" warning and returned (None, None), breaking all
|
||||
# auxiliary tasks (compression, memory, summarization) for Bedrock users.
|
||||
|
||||
|
||||
class TestAuxiliaryClientBedrockResolution:
|
||||
"""Verify resolve_provider_client handles Bedrock's aws_sdk auth type."""
|
||||
|
||||
def test_bedrock_returns_client_with_credentials(self, monkeypatch):
|
||||
"""With valid AWS credentials, Bedrock should return a usable client."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
monkeypatch.setenv("AWS_REGION", "us-west-2")
|
||||
|
||||
mock_anthropic_bedrock = MagicMock()
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=mock_anthropic_bedrock):
|
||||
from agent.auxiliary_client import resolve_provider_client, AnthropicAuxiliaryClient
|
||||
client, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is not None, (
|
||||
"resolve_provider_client('bedrock') returned None — "
|
||||
"aws_sdk auth type is not handled"
|
||||
)
|
||||
assert isinstance(client, AnthropicAuxiliaryClient)
|
||||
assert model is not None
|
||||
assert client.api_key == "aws-sdk"
|
||||
assert "us-west-2" in client.base_url
|
||||
|
||||
def test_bedrock_returns_none_without_credentials(self, monkeypatch):
|
||||
"""Without AWS credentials, Bedrock should return (None, None) gracefully."""
|
||||
with patch("agent.bedrock_adapter.has_aws_credentials", return_value=False):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
client, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_bedrock_uses_configured_region(self, monkeypatch):
|
||||
"""Bedrock client base_url should reflect AWS_REGION."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
monkeypatch.setenv("AWS_REGION", "eu-central-1")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
client, _ = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is not None
|
||||
assert "eu-central-1" in client.base_url
|
||||
|
||||
def test_bedrock_respects_explicit_model(self, monkeypatch):
|
||||
"""When caller passes an explicit model, it should be used."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_, model = resolve_provider_client(
|
||||
"bedrock", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
||||
)
|
||||
|
||||
assert "claude-sonnet" in model
|
||||
|
||||
def test_bedrock_async_mode(self, monkeypatch):
|
||||
"""Async mode should return an AsyncAnthropicAuxiliaryClient."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client, AsyncAnthropicAuxiliaryClient
|
||||
client, model = resolve_provider_client("bedrock", None, async_mode=True)
|
||||
|
||||
assert client is not None
|
||||
assert isinstance(client, AsyncAnthropicAuxiliaryClient)
|
||||
|
||||
def test_bedrock_default_model_is_haiku(self, monkeypatch):
|
||||
"""Default auxiliary model for Bedrock should be Haiku (fast, cheap)."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert "haiku" in model.lower()
|
||||
|
||||
@@ -847,6 +847,32 @@ class TestTokenBudgetTailProtection:
|
||||
assert isinstance(pruned, int)
|
||||
|
||||
|
||||
class TestUpdateModelBudgets:
|
||||
"""Regression: update_model() must recalculate token budgets."""
|
||||
|
||||
def test_tail_budget_recalculated(self):
|
||||
"""tail_token_budget must change after switching to a different context length."""
|
||||
from unittest.mock import patch
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
|
||||
comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
|
||||
old_tail = comp.tail_token_budget
|
||||
old_max_summary = comp.max_summary_tokens
|
||||
|
||||
comp.update_model("model-b", context_length=32_000)
|
||||
assert comp.tail_token_budget != old_tail, "tail_token_budget should change"
|
||||
assert comp.tail_token_budget < old_tail, "smaller context → smaller budget"
|
||||
assert comp.max_summary_tokens != old_max_summary, "max_summary_tokens should change"
|
||||
|
||||
def test_budgets_proportional(self):
|
||||
"""Budgets should be proportional to context_length after update."""
|
||||
from unittest.mock import patch
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||
comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
|
||||
comp.update_model("model-b", context_length=10_000)
|
||||
assert comp.tail_token_budget == int(comp.threshold_tokens * comp.summary_target_ratio)
|
||||
assert comp.max_summary_tokens == min(int(10_000 * 0.05), 4000)
|
||||
|
||||
|
||||
class TestTruncateToolCallArgsJson:
|
||||
"""Regression tests for #11762.
|
||||
|
||||
|
||||
@@ -77,6 +77,13 @@ class FakeMemoryProvider(MemoryProvider):
|
||||
self.memory_writes.append((action, target, content))
|
||||
|
||||
|
||||
class MetadataMemoryProvider(FakeMemoryProvider):
|
||||
"""Provider that opts into write metadata."""
|
||||
|
||||
def on_memory_write(self, action, target, content, metadata=None):
|
||||
self.memory_writes.append((action, target, content, metadata or {}))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider ABC tests
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -862,6 +869,51 @@ class TestOnMemoryWriteBridge:
|
||||
mgr.on_memory_write("add", "memory", "new fact")
|
||||
assert p.memory_writes == [("add", "memory", "new fact")]
|
||||
|
||||
def test_on_memory_write_metadata_passed_to_opt_in_provider(self):
|
||||
"""Providers that accept metadata receive structured write provenance."""
|
||||
mgr = MemoryManager()
|
||||
p = MetadataMemoryProvider("ext")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.on_memory_write(
|
||||
"add",
|
||||
"memory",
|
||||
"new fact",
|
||||
metadata={
|
||||
"write_origin": "assistant_tool",
|
||||
"execution_context": "foreground",
|
||||
"session_id": "sess-1",
|
||||
},
|
||||
)
|
||||
|
||||
assert p.memory_writes == [
|
||||
(
|
||||
"add",
|
||||
"memory",
|
||||
"new fact",
|
||||
{
|
||||
"write_origin": "assistant_tool",
|
||||
"execution_context": "foreground",
|
||||
"session_id": "sess-1",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def test_on_memory_write_metadata_keeps_legacy_provider_compatible(self):
|
||||
"""Old 3-arg providers keep working when the manager receives metadata."""
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider("ext")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.on_memory_write(
|
||||
"add",
|
||||
"user",
|
||||
"legacy provider fact",
|
||||
metadata={"write_origin": "assistant_tool"},
|
||||
)
|
||||
|
||||
assert p.memory_writes == [("add", "user", "legacy provider fact")]
|
||||
|
||||
def test_on_memory_write_replace(self):
|
||||
"""on_memory_write fires for 'replace' actions."""
|
||||
mgr = MemoryManager()
|
||||
|
||||
@@ -588,6 +588,57 @@ class TestGetModelContextLength:
|
||||
assert result == 200000
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Bedrock context resolution — must run BEFORE custom-endpoint probe
|
||||
# =========================================================================
|
||||
|
||||
class TestBedrockContextResolution:
|
||||
"""Regression tests for Bedrock context-length resolution order.
|
||||
|
||||
Bug: because ``bedrock-runtime.<region>.amazonaws.com`` is not listed in
|
||||
``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and
|
||||
the custom-endpoint probe at step 2 ran first — fetching ``/models`` from
|
||||
Bedrock (which it doesn't serve), returning the 128K default-fallback
|
||||
before execution ever reached the Bedrock branch.
|
||||
|
||||
Fix: promote the Bedrock branch ahead of the custom-endpoint probe.
|
||||
"""
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch):
|
||||
"""provider='bedrock' resolves via static table, bypasses /models probe."""
|
||||
ctx = get_model_context_length(
|
||||
"anthropic.claude-opus-4-v1:0",
|
||||
provider="bedrock",
|
||||
base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
|
||||
)
|
||||
# Must return the static Bedrock table value (200K for Claude),
|
||||
# NOT DEFAULT_FALLBACK_CONTEXT (128K).
|
||||
assert ctx == 200000
|
||||
mock_fetch.assert_not_called()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_bedrock_url_without_provider_hint(self, mock_fetch):
|
||||
"""bedrock-runtime host infers Bedrock even when provider is omitted."""
|
||||
ctx = get_model_context_length(
|
||||
"anthropic.claude-sonnet-4-v1:0",
|
||||
base_url="https://bedrock-runtime.us-west-2.amazonaws.com",
|
||||
)
|
||||
assert ctx == 200000
|
||||
mock_fetch.assert_not_called()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_non_bedrock_url_still_probes(self, mock_fetch):
|
||||
"""Non-Bedrock hosts still reach the custom-endpoint probe."""
|
||||
mock_fetch.return_value = {"some-model": {"context_length": 50000}}
|
||||
ctx = get_model_context_length(
|
||||
"some-model",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
assert ctx == 50000
|
||||
assert mock_fetch.called
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _strip_provider_prefix — Ollama model:tag vs provider:model
|
||||
# =========================================================================
|
||||
|
||||
@@ -0,0 +1,201 @@
|
||||
"""Regression tests for the generic unsupported-parameter detector in
|
||||
``agent.auxiliary_client``.
|
||||
|
||||
The original temperature-specific detector (PR #15621) was generalized so the
|
||||
same reactive-retry strategy covers any provider that rejects an arbitrary
|
||||
request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
|
||||
just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
|
||||
pattern.
|
||||
|
||||
These tests lock in:
|
||||
* ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
|
||||
* the back-compat wrapper ``_is_unsupported_temperature_error`` still works
|
||||
* the max_tokens retry branch no longer pops a key that was never set
|
||||
(``max_tokens is None`` gate)
|
||||
* the max_tokens retry branch matches via the generic helper on top of the
|
||||
legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_is_unsupported_parameter_error,
|
||||
_is_unsupported_temperature_error,
|
||||
)
|
||||
|
||||
|
||||
class TestIsUnsupportedParameterError:
|
||||
"""The generic detector must match real provider phrasings for any param."""
|
||||
|
||||
@pytest.mark.parametrize("param,message", [
|
||||
# temperature phrasings (regression coverage via the generic API)
|
||||
("temperature", "HTTP 400: Unsupported parameter: temperature"),
|
||||
("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
|
||||
("temperature", "this model does not support temperature"),
|
||||
# max_tokens phrasings
|
||||
("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
|
||||
("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
|
||||
("max_tokens", "Invalid parameter: max_tokens is not supported"),
|
||||
# arbitrary future params
|
||||
("seed", "HTTP 400: unrecognized parameter: seed"),
|
||||
("top_p", "Error: top_p is not supported for this model"),
|
||||
])
|
||||
def test_matches_real_provider_messages(self, param, message):
|
||||
assert _is_unsupported_parameter_error(RuntimeError(message), param) is True
|
||||
|
||||
@pytest.mark.parametrize("param,message", [
|
||||
# Param not mentioned at all
|
||||
("temperature", "HTTP 400: max_tokens is too large"),
|
||||
# Param mentioned but not flagged as unsupported
|
||||
("temperature", "temperature must be between 0 and 2"),
|
||||
# Totally unrelated 400
|
||||
("max_tokens", "Rate limit exceeded"),
|
||||
# Connection-level errors
|
||||
("temperature", "Connection reset by peer"),
|
||||
])
|
||||
def test_does_not_match_unrelated_errors(self, param, message):
|
||||
assert _is_unsupported_parameter_error(RuntimeError(message), param) is False
|
||||
|
||||
def test_empty_param_returns_false(self):
|
||||
assert _is_unsupported_parameter_error(
|
||||
RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
|
||||
) is False
|
||||
|
||||
def test_temperature_wrapper_delegates_to_generic(self):
|
||||
"""Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
|
||||
msg = "HTTP 400: Unsupported parameter: temperature"
|
||||
assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
|
||||
# And the unrelated-case still holds
|
||||
assert _is_unsupported_temperature_error(
|
||||
RuntimeError("max_tokens is too large")) is False
|
||||
|
||||
|
||||
def _dummy_response():
|
||||
"""Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
class TestMaxTokensRetryHardening:
|
||||
"""The max_tokens retry branch now (a) gates on ``max_tokens is not None``
|
||||
and (b) also matches the generic phrasings via the helper.
|
||||
"""
|
||||
|
||||
def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||||
"""No max_tokens kwarg → must not pop/retry even if the error mentions it.
|
||||
|
||||
Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
|
||||
``kwargs["max_completion_tokens"] = max_tokens`` would set a None
|
||||
value and hit the provider again. The gate skips the whole branch.
|
||||
"""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||||
client.chat.completions.create.side_effect = err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
# max_tokens omitted on purpose
|
||||
)
|
||||
|
||||
# Only the initial attempt — no retry because the gate blocked it
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_sync_max_tokens_retry_matches_generic_phrasing(self):
|
||||
"""A 400 saying "Unknown parameter: max_tokens" (not the legacy
|
||||
substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
|
||||
now triggers the retry via the generic helper.
|
||||
"""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("Unknown parameter: max_tokens")
|
||||
response = _dummy_response()
|
||||
client.chat.completions.create.side_effect = [err, response]
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=512,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
second_call = client.chat.completions.create.call_args_list[1]
|
||||
assert "max_tokens" not in second_call.kwargs
|
||||
assert second_call.kwargs["max_completion_tokens"] == 512
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||||
client.chat.completions.create = AsyncMock(side_effect=err)
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_max_tokens_retry_matches_generic_phrasing(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("Unknown parameter: max_tokens")
|
||||
response = _dummy_response()
|
||||
client.chat.completions.create = AsyncMock(side_effect=[err, response])
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=512,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert client.chat.completions.create.await_count == 2
|
||||
second_call = client.chat.completions.create.call_args_list[1]
|
||||
assert "max_tokens" not in second_call.kwargs
|
||||
assert second_call.kwargs["max_completion_tokens"] == 512
|
||||
@@ -0,0 +1,237 @@
|
||||
"""Regression tests for the universal "unsupported temperature" retry in
|
||||
``agent.auxiliary_client``.
|
||||
|
||||
Auxiliary callers (context compression, session search,
|
||||
web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
|
||||
reasons. Several provider/model combinations reject ``temperature`` with a
|
||||
400:
|
||||
|
||||
* OpenAI Responses (gpt-5/o-series reasoning models)
|
||||
* Copilot Responses (reasoning models)
|
||||
* OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
|
||||
* Anthropic Opus 4.7+ via OpenAI-compat endpoints
|
||||
* Kimi/Moonshot (server-managed)
|
||||
|
||||
``_fixed_temperature_for_model`` catches Kimi up front, and
|
||||
``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
|
||||
but the same backend can accept ``temperature`` for some models and reject
|
||||
it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
|
||||
endpoint). An allow/deny-list is not maintainable across providers.
|
||||
|
||||
The universal fix is reactive: when a call returns an
|
||||
``Unsupported parameter: temperature`` 400, retry once without temperature.
|
||||
These tests lock in that behaviour for both sync and async paths.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_is_unsupported_temperature_error,
|
||||
)
|
||||
|
||||
|
||||
class TestIsUnsupportedTemperatureError:
|
||||
"""The detector must match the phrasings providers actually return."""
|
||||
|
||||
@pytest.mark.parametrize("message", [
|
||||
# OpenAI / Codex Responses
|
||||
"HTTP 400: Unsupported parameter: temperature",
|
||||
"Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
|
||||
# Copilot / OpenAI error-code form
|
||||
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
||||
# OpenRouter-style
|
||||
"Provider returned error: temperature is not supported for this model",
|
||||
"this model does not support temperature",
|
||||
# Anthropic-style via OAI-compat
|
||||
"temperature: unknown parameter",
|
||||
# Some gateways
|
||||
"unrecognized request argument supplied: temperature",
|
||||
])
|
||||
def test_matches_real_provider_messages(self, message):
|
||||
assert _is_unsupported_temperature_error(RuntimeError(message)) is True
|
||||
|
||||
@pytest.mark.parametrize("message", [
|
||||
# Unrelated 400s must NOT trigger a silent-retry
|
||||
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
|
||||
"max_tokens is too large for this model",
|
||||
"Rate limit exceeded",
|
||||
"Connection reset by peer",
|
||||
# Temperature value error is a different class of problem
|
||||
"temperature must be between 0 and 2",
|
||||
])
|
||||
def test_does_not_match_unrelated_errors(self, message):
|
||||
assert _is_unsupported_temperature_error(RuntimeError(message)) is False
|
||||
|
||||
|
||||
def _dummy_response():
|
||||
# The real code calls _validate_llm_response which inspects
|
||||
# response.choices[0].message. The tests here patch that out, so
|
||||
# any sentinel object is fine.
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
class TestCallLlmUnsupportedTemperatureRetry:
|
||||
"""``call_llm`` retries once without temperature and returns on success."""
|
||||
|
||||
def _setup(self, first_exc):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
|
||||
return client
|
||||
|
||||
@pytest.mark.parametrize("error_message", [
|
||||
"HTTP 400: Unsupported parameter: temperature",
|
||||
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
||||
"Provider error: this model does not support temperature",
|
||||
])
|
||||
def test_retries_once_without_temperature(self, error_message):
|
||||
client = self._setup(RuntimeError(error_message))
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "remember this"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
||||
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
||||
assert first_kwargs["temperature"] == 0.3
|
||||
assert "temperature" not in retry_kwargs
|
||||
# other kwargs preserved
|
||||
assert retry_kwargs["max_tokens"] == 500
|
||||
|
||||
def test_non_temperature_400_does_not_retry_as_temperature(self):
|
||||
"""Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
non_temp_err = RuntimeError(
|
||||
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
|
||||
)
|
||||
client.chat.completions.create.side_effect = non_temp_err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError, match="Invalid value"):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
# Should NOT have retried (non-temperature 400 doesn't match)
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_no_retry_when_temperature_not_in_kwargs(self):
|
||||
"""If caller didn't send temperature, don't invent a temperature-retry."""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
# Provider complains about temperature even though we didn't send it.
|
||||
# (Pathological but possible with misleading error text.) The guard
|
||||
# ``"temperature" in kwargs`` must prevent an unnecessary retry.
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
|
||||
client.chat.completions.create.side_effect = err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=None, # explicit: no temperature sent
|
||||
max_tokens=500,
|
||||
)
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
|
||||
class TestAsyncCallLlmUnsupportedTemperatureRetry:
|
||||
"""``async_call_llm`` mirror of the sync retry semantics."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_retries_once_without_temperature(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create = AsyncMock(side_effect=[
|
||||
RuntimeError("HTTP 400: Unsupported parameter: temperature"),
|
||||
_dummy_response(),
|
||||
])
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "query"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert client.chat.completions.create.await_count == 2
|
||||
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
||||
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
||||
assert first_kwargs["temperature"] == 0.3
|
||||
assert "temperature" not in retry_kwargs
|
||||
assert retry_kwargs["max_tokens"] == 500
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_non_temperature_400_does_not_retry(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create = AsyncMock(
|
||||
side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
|
||||
)
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError, match="Invalid value"):
|
||||
await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
assert client.chat.completions.create.await_count == 1
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Tests for the /busy CLI command and busy-input-mode config handling."""
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def _import_cli():
|
||||
import hermes_cli.config as config_mod
|
||||
|
||||
if not hasattr(config_mod, "save_env_value_secure"):
|
||||
config_mod.save_env_value_secure = lambda key, value: {
|
||||
"success": True,
|
||||
"stored_as": key,
|
||||
"validated": False,
|
||||
}
|
||||
|
||||
import cli as cli_mod
|
||||
|
||||
return cli_mod
|
||||
|
||||
|
||||
class TestHandleBusyCommand(unittest.TestCase):
|
||||
def _make_cli(self, busy_input_mode="interrupt"):
|
||||
return SimpleNamespace(
|
||||
busy_input_mode=busy_input_mode,
|
||||
agent=None,
|
||||
)
|
||||
|
||||
def test_no_args_shows_status(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("queue")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("queue", printed)
|
||||
self.assertIn("interrupt", printed)
|
||||
|
||||
def test_queue_argument_sets_queue_mode_and_saves(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("interrupt")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint"),
|
||||
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy queue")
|
||||
|
||||
self.assertEqual(stub.busy_input_mode, "queue")
|
||||
mock_save.assert_called_once_with("display.busy_input_mode", "queue")
|
||||
|
||||
def test_interrupt_argument_sets_interrupt_mode_and_saves(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("queue")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint"),
|
||||
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy interrupt")
|
||||
|
||||
self.assertEqual(stub.busy_input_mode, "interrupt")
|
||||
mock_save.assert_called_once_with("display.busy_input_mode", "interrupt")
|
||||
|
||||
def test_invalid_argument_prints_usage(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli()
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy nonsense")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("Usage: /busy", printed)
|
||||
|
||||
|
||||
class TestBusyCommandRegistry(unittest.TestCase):
|
||||
def test_busy_in_registry(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
names = [c.name for c in COMMAND_REGISTRY]
|
||||
assert "busy" in names
|
||||
|
||||
def test_busy_subcommands_documented(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
busy = next(c for c in COMMAND_REGISTRY if c.name == "busy")
|
||||
assert busy.args_hint == "[queue|interrupt|status]"
|
||||
assert busy.category == "Configuration"
|
||||
@@ -33,7 +33,6 @@ class _FakeAgent:
|
||||
self._todo_store.write(
|
||||
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
|
||||
)
|
||||
self.flush_memories = MagicMock()
|
||||
self.commit_memory_session = MagicMock()
|
||||
self._invalidate_system_prompt = MagicMock()
|
||||
|
||||
@@ -157,7 +156,6 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
|
||||
assert cli.agent._todo_store.read() == []
|
||||
assert cli.session_start > old_session_start
|
||||
assert cli.agent.session_start == cli.session_start
|
||||
cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
|
||||
cli.agent._invalidate_system_prompt.assert_called_once()
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,390 @@
|
||||
"""Tests for cron job context_from feature (issue #5439 Option C)."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cron_env(tmp_path, monkeypatch):
|
||||
"""Isolated cron environment with temp HERMES_HOME."""
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "cron").mkdir()
|
||||
(hermes_home / "cron" / "output").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import cron.jobs as jobs_mod
|
||||
monkeypatch.setattr(jobs_mod, "HERMES_DIR", hermes_home)
|
||||
monkeypatch.setattr(jobs_mod, "CRON_DIR", hermes_home / "cron")
|
||||
monkeypatch.setattr(jobs_mod, "JOBS_FILE", hermes_home / "cron" / "jobs.json")
|
||||
monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", hermes_home / "cron" / "output")
|
||||
|
||||
return hermes_home
|
||||
|
||||
|
||||
class TestJobContextFromField:
|
||||
"""Test that context_from is stored and retrieved correctly."""
|
||||
|
||||
def test_create_job_with_context_from_string(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize findings",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
|
||||
assert job_b["context_from"] == [job_a["id"]]
|
||||
loaded = get_job(job_b["id"])
|
||||
assert loaded["context_from"] == [job_a["id"]]
|
||||
|
||||
def test_create_job_with_context_from_list(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Find weather", schedule="every 1h")
|
||||
job_c = create_job(
|
||||
prompt="Summarize everything",
|
||||
schedule="every 2h",
|
||||
context_from=[job_a["id"], job_b["id"]],
|
||||
)
|
||||
|
||||
assert job_c["context_from"] == [job_a["id"], job_b["id"]]
|
||||
|
||||
def test_create_job_without_context_from(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h")
|
||||
assert job.get("context_from") is None
|
||||
|
||||
def test_context_from_empty_string_normalized_to_none(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h", context_from="")
|
||||
assert job.get("context_from") is None
|
||||
|
||||
def test_context_from_empty_list_normalized_to_none(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h", context_from=[])
|
||||
assert job.get("context_from") is None
|
||||
|
||||
|
||||
class TestBuildJobPromptContextFrom:
|
||||
"""Test that _build_job_prompt() injects context from referenced jobs."""
|
||||
|
||||
def test_injects_latest_output(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
|
||||
# Записываем output для job_a
|
||||
output_dir = OUTPUT_DIR / job_a["id"]
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
(output_dir / "2026-04-22_10-00-00.md").write_text(
|
||||
"Today's top story: AI is everywhere.", encoding="utf-8"
|
||||
)
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Summarize the news",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "Today's top story: AI is everywhere." in prompt
|
||||
assert f"Output from job '{job_a['id']}'" in prompt
|
||||
|
||||
def test_uses_most_recent_output(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
import time
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
output_dir = OUTPUT_DIR / job_a["id"]
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
old_file = output_dir / "2026-04-22_08-00-00.md"
|
||||
old_file.write_text("Old output", encoding="utf-8")
|
||||
time.sleep(0.01)
|
||||
new_file = output_dir / "2026-04-22_10-00-00.md"
|
||||
new_file.write_text("New output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "New output" in prompt
|
||||
assert "Old output" not in prompt
|
||||
|
||||
def test_graceful_when_no_output_yet(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# job_a never ran — output dir does not exist
|
||||
# expect silent skip: no placeholder injected, base prompt intact
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "no output" not in prompt.lower()
|
||||
assert "not found" not in prompt.lower()
|
||||
assert "Summarize" in prompt
|
||||
|
||||
def test_injects_multiple_context_jobs(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Find weather", schedule="every 1h")
|
||||
|
||||
for job, content in [(job_a, "News: AI boom"), (job_b, "Weather: Sunny")]:
|
||||
out_dir = OUTPUT_DIR / job["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text(content, encoding="utf-8")
|
||||
|
||||
job_c = create_job(
|
||||
prompt="Daily briefing",
|
||||
schedule="every 2h",
|
||||
context_from=[job_a["id"], job_b["id"]],
|
||||
)
|
||||
prompt = _build_job_prompt(job_c)
|
||||
assert "News: AI boom" in prompt
|
||||
assert "Weather: Sunny" in prompt
|
||||
|
||||
def test_context_injected_before_prompt(self, cron_env):
|
||||
"""Context should appear before the job's own prompt."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Context data", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process the data above",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
context_pos = prompt.find("Context data")
|
||||
prompt_pos = prompt.find("Process the data above")
|
||||
assert context_pos < prompt_pos
|
||||
|
||||
def test_output_truncated_at_8k_chars(self, cron_env):
|
||||
"""Output longer than 8000 chars should be truncated."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
big_output = "x" * 10000
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text(big_output, encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "truncated" in prompt
|
||||
assert "x" * 10000 not in prompt
|
||||
|
||||
def test_graceful_when_file_deleted_between_listing_and_reading(self, cron_env):
|
||||
"""Job should not crash if output file is deleted mid-read."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
from unittest.mock import patch
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# Simulate file deleted between glob() and read_text()
|
||||
original_read = Path.read_text
|
||||
def mock_read_text(self, *args, **kwargs):
|
||||
if self.suffix == ".md":
|
||||
raise FileNotFoundError("file deleted mid-read")
|
||||
return original_read(self, *args, **kwargs)
|
||||
|
||||
with patch.object(Path, "read_text", mock_read_text):
|
||||
prompt = _build_job_prompt(job_b)
|
||||
|
||||
# Job should not crash, prompt should still contain the base prompt
|
||||
assert "Process" in prompt
|
||||
|
||||
def test_graceful_when_permission_error(self, cron_env):
|
||||
"""Job should not crash if output directory is not readable."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
from unittest.mock import patch
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# Simulate permission error on read
|
||||
original_read = Path.read_text
|
||||
def mock_read_text(self, *args, **kwargs):
|
||||
if self.suffix == ".md":
|
||||
raise PermissionError("permission denied")
|
||||
return original_read(self, *args, **kwargs)
|
||||
|
||||
with patch.object(Path, "read_text", mock_read_text):
|
||||
prompt = _build_job_prompt(job_b)
|
||||
|
||||
# Job should not crash, prompt should still contain the base prompt
|
||||
assert "Process" in prompt
|
||||
|
||||
def test_invalid_job_id_skipped(self, cron_env):
|
||||
"""context_from with path traversal job_id should be skipped."""
|
||||
from cron.jobs import create_job
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job = create_job(prompt="Process", schedule="every 2h")
|
||||
# Manually inject invalid context_from (simulating tampered jobs.json)
|
||||
job["context_from"] = ["../../../etc/passwd"]
|
||||
prompt = _build_job_prompt(job)
|
||||
# Should not crash and should not inject anything malicious
|
||||
assert "Process" in prompt
|
||||
assert "etc/passwd" not in prompt
|
||||
|
||||
|
||||
|
||||
class TestUpdateContextFrom:
|
||||
"""Verify the cronjob tool's `update` action wires context_from through.
|
||||
|
||||
Without this, the create-path stores the field but users can never modify
|
||||
or clear it via the tool (schema promises "pass an empty array to clear").
|
||||
"""
|
||||
|
||||
def test_update_adds_context_from_to_existing_job(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Summarize", schedule="every 2h")
|
||||
assert job_b.get("context_from") is None
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=job_a["id"],
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
reloaded = get_job(job_b["id"])
|
||||
assert reloaded["context_from"] == [job_a["id"]]
|
||||
|
||||
def test_update_changes_context_from_reference(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_a2 = create_job(prompt="Find weather", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
assert job_b["context_from"] == [job_a["id"]]
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=[job_a2["id"]],
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] == [job_a2["id"]]
|
||||
|
||||
def test_update_clears_context_from_with_empty_list(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
assert get_job(job_b["id"])["context_from"] == [job_a["id"]]
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=[],
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] is None
|
||||
|
||||
def test_update_clears_context_from_with_empty_string(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from="",
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] is None
|
||||
|
||||
def test_update_rejects_unknown_job_reference(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_b = create_job(prompt="Summarize", schedule="every 2h")
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=["deadbeef0000"],
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "not found" in result["error"]
|
||||
|
||||
def test_update_preserves_context_from_when_not_passed(self, cron_env):
|
||||
"""Updating other fields must not clobber context_from."""
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
|
||||
# Update an unrelated field
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
prompt="Summarize v2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
reloaded = get_job(job_b["id"])
|
||||
assert reloaded["prompt"] == "Summarize v2"
|
||||
assert reloaded["context_from"] == [job_a["id"]]
|
||||
@@ -1374,6 +1374,139 @@ class TestResponsesStreaming:
|
||||
assert data["status"] == "completed"
|
||||
assert data["output"][-1]["content"][0]["text"] == "Stored response"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter):
|
||||
"""Server-side asyncio.CancelledError (shutdown, request timeout) must
|
||||
still leave an ``incomplete`` snapshot in ResponseStore so
|
||||
GET /v1/responses/{id} and previous_response_id chaining keep
|
||||
working. Regression for PR #15171 follow-up.
|
||||
|
||||
Calls _write_sse_responses directly so the test can await the
|
||||
handler to completion (TestClient disconnection races the server
|
||||
handler, which makes end-to-end assertion on the final stored
|
||||
snapshot flaky).
|
||||
"""
|
||||
# Build a minimal fake request + stream queue the writer understands.
|
||||
fake_request = MagicMock()
|
||||
fake_request.headers = {}
|
||||
|
||||
written_payloads: list = []
|
||||
|
||||
class _FakeStreamResponse:
|
||||
async def prepare(self, req):
|
||||
pass
|
||||
|
||||
async def write(self, payload):
|
||||
written_payloads.append(payload)
|
||||
|
||||
# Patch web.StreamResponse for the duration of the writer call.
|
||||
import gateway.platforms.api_server as api_mod
|
||||
import queue as _q
|
||||
|
||||
stream_q: _q.Queue = _q.Queue()
|
||||
|
||||
async def _agent_coro():
|
||||
# Feed one partial delta into the stream queue...
|
||||
stream_q.put("partial output")
|
||||
# ...then give the drain loop a moment to pick it up before
|
||||
# raising CancelledError to simulate a server-side cancel.
|
||||
await asyncio.sleep(0.01)
|
||||
raise asyncio.CancelledError()
|
||||
|
||||
agent_task = asyncio.ensure_future(_agent_coro())
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
|
||||
with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()):
|
||||
with pytest.raises(asyncio.CancelledError):
|
||||
await adapter._write_sse_responses(
|
||||
request=fake_request,
|
||||
response_id=response_id,
|
||||
model="hermes-agent",
|
||||
created_at=int(time.time()),
|
||||
stream_q=stream_q,
|
||||
agent_task=agent_task,
|
||||
agent_ref=[None],
|
||||
conversation_history=[],
|
||||
user_message="will be cancelled",
|
||||
instructions=None,
|
||||
conversation=None,
|
||||
store=True,
|
||||
session_id=None,
|
||||
)
|
||||
|
||||
# The in_progress snapshot was persisted on response.created,
|
||||
# and the CancelledError handler must have updated it to
|
||||
# ``incomplete`` with the partial text it saw.
|
||||
stored = adapter._response_store.get(response_id)
|
||||
assert stored is not None, "snapshot must be retrievable after cancellation"
|
||||
assert stored["response"]["status"] == "incomplete"
|
||||
# Partial text captured before cancel should be preserved.
|
||||
output_text = "".join(
|
||||
part.get("text", "")
|
||||
for item in stored["response"].get("output", [])
|
||||
if item.get("type") == "message"
|
||||
for part in item.get("content", [])
|
||||
)
|
||||
assert "partial output" in output_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter):
|
||||
"""Client disconnect (ConnectionResetError) during streaming must
|
||||
persist an ``incomplete`` snapshot in ResponseStore. Regression
|
||||
for PR #15171."""
|
||||
fake_request = MagicMock()
|
||||
fake_request.headers = {}
|
||||
|
||||
write_call_count = {"n": 0}
|
||||
|
||||
class _DisconnectingStreamResponse:
|
||||
async def prepare(self, req):
|
||||
pass
|
||||
|
||||
async def write(self, payload):
|
||||
# First two writes succeed (prepare + response.created).
|
||||
# On the third write (a text delta), the "client"
|
||||
# disconnects — simulate with ConnectionResetError.
|
||||
write_call_count["n"] += 1
|
||||
if write_call_count["n"] >= 3:
|
||||
raise ConnectionResetError("simulated client disconnect")
|
||||
|
||||
import gateway.platforms.api_server as api_mod
|
||||
import queue as _q
|
||||
|
||||
stream_q: _q.Queue = _q.Queue()
|
||||
stream_q.put("some streamed text")
|
||||
stream_q.put(None) # EOS sentinel
|
||||
|
||||
async def _agent_coro():
|
||||
await asyncio.sleep(0.01)
|
||||
return ({"final_response": "", "messages": [], "api_calls": 0},
|
||||
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
|
||||
agent_task = asyncio.ensure_future(_agent_coro())
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
|
||||
with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()):
|
||||
await adapter._write_sse_responses(
|
||||
request=fake_request,
|
||||
response_id=response_id,
|
||||
model="hermes-agent",
|
||||
created_at=int(time.time()),
|
||||
stream_q=stream_q,
|
||||
agent_task=agent_task,
|
||||
agent_ref=[None],
|
||||
conversation_history=[],
|
||||
user_message="will disconnect",
|
||||
instructions=None,
|
||||
conversation=None,
|
||||
store=True,
|
||||
session_id=None,
|
||||
)
|
||||
|
||||
stored = adapter._response_store.get(response_id)
|
||||
assert stored is not None, "snapshot must survive client disconnect"
|
||||
assert stored["response"]["status"] == "incomplete"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth on endpoints
|
||||
|
||||
@@ -1,249 +0,0 @@
|
||||
"""Tests for proactive memory flush on session expiry.
|
||||
|
||||
Verifies that:
|
||||
1. _is_session_expired() works from a SessionEntry alone (no source needed)
|
||||
2. The sync callback is no longer called in get_or_create_session
|
||||
3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
|
||||
4. The background watcher can detect expired sessions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from gateway.config import Platform, GatewayConfig, SessionResetPolicy
|
||||
from gateway.session import SessionSource, SessionStore, SessionEntry
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def idle_store(tmp_path):
|
||||
"""SessionStore with a 60-minute idle reset policy."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def no_reset_store(tmp_path):
|
||||
"""SessionStore with no reset policy (mode=none)."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="none"),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
class TestIsSessionExpired:
|
||||
"""_is_session_expired should detect expiry from entry alone."""
|
||||
|
||||
def test_idle_session_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_1",
|
||||
created_at=datetime.now() - timedelta(hours=3),
|
||||
updated_at=datetime.now() - timedelta(minutes=120),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is True
|
||||
|
||||
def test_active_session_not_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_2",
|
||||
created_at=datetime.now() - timedelta(hours=1),
|
||||
updated_at=datetime.now() - timedelta(minutes=10),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_none_mode_never_expires(self, no_reset_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_3",
|
||||
created_at=datetime.now() - timedelta(days=30),
|
||||
updated_at=datetime.now() - timedelta(days=30),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert no_reset_store._is_session_expired(entry) is False
|
||||
|
||||
def test_active_processes_prevent_expiry(self, idle_store):
|
||||
"""Sessions with active background processes should never expire."""
|
||||
idle_store._has_active_processes_fn = lambda key: True
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_4",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_daily_mode_expired(self, tmp_path):
|
||||
"""Daily mode should expire sessions from before today's reset hour."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
store._db = None
|
||||
store._loaded = True
|
||||
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_5",
|
||||
created_at=datetime.now() - timedelta(days=2),
|
||||
updated_at=datetime.now() - timedelta(days=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert store._is_session_expired(entry) is True
|
||||
|
||||
|
||||
class TestGetOrCreateSessionNoCallback:
|
||||
"""get_or_create_session should NOT call a sync flush callback."""
|
||||
|
||||
def test_auto_reset_creates_new_session_after_flush(self, idle_store):
|
||||
"""When a flushed session auto-resets, a new session_id is created."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
# Create initial session
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
old_sid = entry1.session_id
|
||||
|
||||
# Simulate the watcher having flushed it
|
||||
entry1.memory_flushed = True
|
||||
|
||||
# Simulate the session going idle
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Next call should auto-reset
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.session_id != old_sid
|
||||
assert entry2.was_auto_reset is True
|
||||
# New session starts with memory_flushed=False
|
||||
assert entry2.memory_flushed is False
|
||||
|
||||
def test_no_sync_callback_invoked(self, idle_store):
|
||||
"""No synchronous callback should block during auto-reset."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Verify no _on_auto_reset attribute
|
||||
assert not hasattr(idle_store, '_on_auto_reset')
|
||||
|
||||
# This should NOT block (no sync LLM call)
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.was_auto_reset is True
|
||||
|
||||
|
||||
class TestMemoryFlushedFlag:
|
||||
"""The memory_flushed flag on SessionEntry prevents double-flushing."""
|
||||
|
||||
def test_defaults_to_false(self):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:123",
|
||||
session_id="sid_new",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert entry.memory_flushed is False
|
||||
|
||||
def test_persists_through_save_load(self, idle_store):
|
||||
"""memory_flushed=True must survive a save/load cycle (simulates restart)."""
|
||||
key = "agent:main:discord:thread:789"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.DISCORD,
|
||||
chat_type="thread",
|
||||
memory_flushed=True,
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
# Simulate restart: clear in-memory state, reload from disk
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is True
|
||||
|
||||
def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
|
||||
"""An entry without memory_flushed stays False after reload."""
|
||||
key = "agent:main:telegram:dm:456"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_not_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=2),
|
||||
updated_at=datetime.now() - timedelta(hours=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is False
|
||||
|
||||
def test_roundtrip_to_dict_from_dict(self):
|
||||
"""to_dict/from_dict must preserve memory_flushed."""
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:999",
|
||||
session_id="sid_rt",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
memory_flushed=True,
|
||||
)
|
||||
d = entry.to_dict()
|
||||
assert d["memory_flushed"] is True
|
||||
|
||||
restored = SessionEntry.from_dict(d)
|
||||
assert restored.memory_flushed is True
|
||||
|
||||
def test_legacy_entry_without_field_defaults_false(self):
|
||||
"""Old sessions.json entries missing memory_flushed should default to False."""
|
||||
data = {
|
||||
"session_key": "agent:main:telegram:dm:legacy",
|
||||
"session_id": "sid_legacy",
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platform": "telegram",
|
||||
"chat_type": "dm",
|
||||
# no memory_flushed key
|
||||
}
|
||||
entry = SessionEntry.from_dict(data)
|
||||
assert entry.memory_flushed is False
|
||||
@@ -66,6 +66,37 @@ class TestBlueBubblesHelpers:
|
||||
|
||||
assert check_bluebubbles_requirements() is True
|
||||
|
||||
def test_supports_message_editing_is_false(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.SUPPORTS_MESSAGE_EDITING is False
|
||||
|
||||
def test_truncate_message_omits_pagination_suffixes(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
chunks = adapter.truncate_message("abcdefghij", max_length=6)
|
||||
assert len(chunks) > 1
|
||||
assert "".join(chunks) == "abcdefghij"
|
||||
assert all("(" not in chunk for chunk in chunks)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_splits_paragraphs_into_multiple_bubbles(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
sent = []
|
||||
|
||||
async def fake_resolve_chat_guid(chat_id):
|
||||
return "iMessage;-;user@example.com"
|
||||
|
||||
async def fake_api_post(path, payload):
|
||||
sent.append(payload["message"])
|
||||
return {"data": {"guid": f"msg-{len(sent)}"}}
|
||||
|
||||
monkeypatch.setattr(adapter, "_resolve_chat_guid", fake_resolve_chat_guid)
|
||||
monkeypatch.setattr(adapter, "_api_post", fake_api_post)
|
||||
|
||||
result = await adapter.send("user@example.com", "first thought\n\nsecond thought")
|
||||
|
||||
assert result.success is True
|
||||
assert sent == ["first thought", "second thought"]
|
||||
|
||||
def test_format_message_strips_markdown(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.format_message("**Hello** `world`") == "Hello world"
|
||||
|
||||
@@ -70,6 +70,9 @@ def _make_runner():
|
||||
runner.session_store = None
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.pairing_store = MagicMock()
|
||||
runner.pairing_store.is_approved.return_value = True
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
return runner, _AGENT_PENDING_SENTINEL
|
||||
|
||||
|
||||
@@ -91,6 +94,30 @@ def _make_adapter(platform_val="telegram"):
|
||||
class TestBusySessionAck:
|
||||
"""User sends a message while agent is running — should get acknowledgment."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_message_queue_mode_queues_without_interrupt(self):
|
||||
"""Runner queue mode must not interrupt an active agent for text follow-ups."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="follow up in queue mode")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
running_agent = MagicMock()
|
||||
runner._busy_input_mode = "queue"
|
||||
runner._running_agents[sk] = running_agent
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
result = await GatewayRunner._handle_message(runner, event)
|
||||
|
||||
assert result is None
|
||||
assert sk in adapter._pending_messages
|
||||
assert adapter._pending_messages[sk] is event
|
||||
assert sk not in runner._pending_messages
|
||||
running_agent.interrupt.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sends_ack_when_agent_running(self):
|
||||
"""First message during busy session should get a status ack."""
|
||||
|
||||
@@ -52,6 +52,10 @@ class TestPlatformConfigRoundtrip:
|
||||
assert restored.enabled is False
|
||||
assert restored.token is None
|
||||
|
||||
def test_from_dict_coerces_quoted_false_enabled(self):
|
||||
restored = PlatformConfig.from_dict({"enabled": "false"})
|
||||
assert restored.enabled is False
|
||||
|
||||
|
||||
class TestGetConnectedPlatforms:
|
||||
def test_returns_enabled_with_token(self):
|
||||
@@ -140,6 +144,10 @@ class TestSessionResetPolicy:
|
||||
assert restored.at_hour == 4
|
||||
assert restored.idle_minutes == 1440
|
||||
|
||||
def test_from_dict_coerces_quoted_false_notify(self):
|
||||
restored = SessionResetPolicy.from_dict({"notify": "false"})
|
||||
assert restored.notify is False
|
||||
|
||||
|
||||
class TestGatewayConfigRoundtrip:
|
||||
def test_full_roundtrip(self):
|
||||
@@ -182,6 +190,10 @@ class TestGatewayConfigRoundtrip:
|
||||
assert restored.unauthorized_dm_behavior == "ignore"
|
||||
assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
|
||||
|
||||
def test_from_dict_coerces_quoted_false_always_log_local(self):
|
||||
restored = GatewayConfig.from_dict({"always_log_local": "false"})
|
||||
assert restored.always_log_local is False
|
||||
|
||||
|
||||
class TestLoadGatewayConfig:
|
||||
def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
@@ -238,6 +250,55 @@ class TestLoadGatewayConfig:
|
||||
|
||||
assert config.thread_sessions_per_user is False
|
||||
|
||||
def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"platforms:\n"
|
||||
" api_server:\n"
|
||||
" enabled: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.platforms[Platform.API_SERVER].enabled is False
|
||||
assert Platform.API_SERVER not in config.get_connected_platforms()
|
||||
|
||||
def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"session_reset:\n"
|
||||
" notify: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.default_reset_policy.notify is False
|
||||
|
||||
def test_bridges_quoted_false_always_log_local_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"always_log_local: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.always_log_local is False
|
||||
|
||||
def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
"""Tests for memory flush stale-overwrite prevention (#2670).
|
||||
|
||||
Verifies that:
|
||||
1. Cron sessions are skipped (no flush for headless cron runs)
|
||||
2. Current memory state is injected into the flush prompt so the
|
||||
flush agent can see what's already saved and avoid overwrites
|
||||
3. The flush still works normally when memory files don't exist
|
||||
"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _mock_dotenv(monkeypatch):
|
||||
"""gateway.run imports dotenv at module level; stub it so tests run without the package."""
|
||||
fake = types.ModuleType("dotenv")
|
||||
fake.load_dotenv = lambda *a, **kw: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner._honcho_managers = {}
|
||||
runner._honcho_configs = {}
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner.adapters = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.session_store = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
_TRANSCRIPT_4_MSGS = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
{"role": "user", "content": "remember my name is Alice"},
|
||||
{"role": "assistant", "content": "Got it, Alice!"},
|
||||
]
|
||||
|
||||
|
||||
class TestCronSessionBypass:
|
||||
"""Cron sessions should never trigger a memory flush."""
|
||||
|
||||
def test_cron_session_skipped(self):
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_job123_20260323_120000")
|
||||
# session_store.load_transcript should never be called
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_cron_session_with_prefix_skipped(self):
|
||||
"""Cron sessions with different prefixes are still skipped."""
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_daily_20260323")
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_non_cron_session_proceeds(self):
|
||||
"""Non-cron sessions should still attempt the flush."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner._flush_memories_for_session("session_abc123")
|
||||
runner.session_store.load_transcript.assert_called_once_with("session_abc123")
|
||||
|
||||
|
||||
def _make_flush_context(monkeypatch, memory_dir=None):
|
||||
"""Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
|
||||
tmp_agent = MagicMock()
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
return runner, tmp_agent, memory_dir
|
||||
|
||||
|
||||
class TestMemoryInjection:
|
||||
"""The flush prompt should include current memory state from disk."""
|
||||
|
||||
def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
|
||||
"""When memory files exist, their content appears in the flush prompt."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
|
||||
(memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_123")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
|
||||
assert "Agent knows Python" in flush_prompt
|
||||
assert "User prefers dark mode" in flush_prompt
|
||||
assert "Name: Alice" in flush_prompt
|
||||
assert "Timezone: PST" in flush_prompt
|
||||
assert "Do NOT overwrite or remove entries" in flush_prompt
|
||||
assert "current live state of memory" in flush_prompt
|
||||
|
||||
def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
|
||||
"""When no memory files exist, flush still runs without the guard."""
|
||||
empty_dir = tmp_path / "no_memories"
|
||||
empty_dir.mkdir()
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_456")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "Do NOT overwrite or remove entries" not in flush_prompt
|
||||
assert "Review the conversation above" in flush_prompt
|
||||
|
||||
def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
|
||||
"""Empty memory files should not trigger the guard section."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("")
|
||||
(memory_dir / "USER.md").write_text(" \n ") # whitespace only
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_789")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "current live state of memory" not in flush_prompt
|
||||
|
||||
|
||||
class TestFlushAgentSilenced:
|
||||
"""The flush agent must not produce any terminal output."""
|
||||
|
||||
def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
|
||||
"""_print_fn on the flush agent must be a no-op so tool output never leaks."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
|
||||
captured_agent = {}
|
||||
|
||||
def _fake_ai_agent(*args, **kwargs):
|
||||
agent = MagicMock()
|
||||
captured_agent["instance"] = agent
|
||||
return agent
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _fake_ai_agent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_silent")
|
||||
|
||||
agent = captured_agent["instance"]
|
||||
assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
|
||||
# Confirm it is callable and produces no output (no exception)
|
||||
agent._print_fn("should be silenced")
|
||||
|
||||
def test_kawaii_spinner_respects_print_fn(self):
|
||||
"""KawaiiSpinner must route all output through print_fn when supplied."""
|
||||
from agent.display import KawaiiSpinner
|
||||
|
||||
written = []
|
||||
spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
|
||||
spinner._write("hello")
|
||||
assert written == [("hello",)], "spinner should route through print_fn"
|
||||
|
||||
# A no-op print_fn must produce no output to stdout
|
||||
import io, sys
|
||||
buf = io.StringIO()
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = buf
|
||||
try:
|
||||
silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
|
||||
silent_spinner._write("should not appear")
|
||||
silent_spinner.stop("done")
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
|
||||
|
||||
def test_flush_agent_closes_resources_after_run(self, monkeypatch):
|
||||
"""Memory flush should close temporary agent resources after the turn."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
tmp_agent.shutdown_memory_provider = MagicMock()
|
||||
tmp_agent.close = MagicMock()
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_cleanup")
|
||||
|
||||
tmp_agent.shutdown_memory_provider.assert_called_once()
|
||||
tmp_agent.close.assert_called_once()
|
||||
|
||||
|
||||
class TestFlushPromptStructure:
|
||||
"""Verify the flush prompt retains its core instructions."""
|
||||
|
||||
def test_core_instructions_present(self, monkeypatch):
|
||||
"""The flush prompt should still contain the original guidance."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_struct")
|
||||
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "automatically reset" in flush_prompt
|
||||
assert "Save any important facts" in flush_prompt
|
||||
assert "consider saving it as a skill" in flush_prompt
|
||||
assert "Do NOT respond to the user" in flush_prompt
|
||||
@@ -197,10 +197,14 @@ def _make_fake_mautrix():
|
||||
self.account_id = account_id
|
||||
self.pickle_key = pickle_key
|
||||
self.db = db
|
||||
self._device_id = ""
|
||||
|
||||
async def open(self):
|
||||
pass
|
||||
|
||||
async def put_device_id(self, device_id):
|
||||
self._device_id = device_id
|
||||
|
||||
mautrix_crypto_store_asyncpg.PgCryptoStore = PgCryptoStore
|
||||
|
||||
# --- mautrix.util ---
|
||||
|
||||
@@ -137,11 +137,38 @@ class TestGetProxyUrl:
|
||||
class TestResolveProxyUrl:
|
||||
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
def test_no_proxy_bypasses_matching_host(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "api.telegram.org")
|
||||
|
||||
assert resolve_proxy_url(target_hosts="api.telegram.org") is None
|
||||
|
||||
def test_no_proxy_bypasses_cidr_target(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "149.154.160.0/20")
|
||||
|
||||
assert resolve_proxy_url(target_hosts=["149.154.167.220"]) is None
|
||||
|
||||
def test_no_proxy_ignored_without_target(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "*")
|
||||
|
||||
assert resolve_proxy_url() == "http://proxy.example:8080"
|
||||
|
||||
|
||||
class TestRunAgentProxyDispatch:
|
||||
"""Test that _run_agent() delegates to proxy when configured."""
|
||||
|
||||
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
|
||||
across gateway messenger platforms.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -53,9 +53,6 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
|
||||
mock_store.switch_session.return_value = mock_session_entry
|
||||
runner.session_store = mock_store
|
||||
|
||||
# Stub out memory flushing
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
|
||||
return runner
|
||||
|
||||
|
||||
@@ -179,6 +176,40 @@ class TestHandleResumeCommand:
|
||||
assert call_args[0][1] == "sess_v2"
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_follows_compression_continuation(self, tmp_path):
|
||||
"""Gateway /resume should reopen the live descendant after compression."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("compressed_root", "telegram")
|
||||
db.set_session_title("compressed_root", "Compressed Work")
|
||||
db.end_session("compressed_root", "compression")
|
||||
db.create_session("compressed_child", "telegram", parent_session_id="compressed_root")
|
||||
db.append_message("compressed_child", "user", "hello from continuation")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Compressed Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
runner.session_store.load_transcript.side_effect = (
|
||||
lambda session_id: [{"role": "user", "content": "hello from continuation"}]
|
||||
if session_id == "compressed_child"
|
||||
else []
|
||||
)
|
||||
|
||||
result = await runner._handle_resume_command(event)
|
||||
|
||||
assert "Resumed session" in result
|
||||
assert "(1 message)" in result
|
||||
call_args = runner.session_store.switch_session.call_args
|
||||
assert call_args[0][1] == "compressed_child"
|
||||
runner.session_store.load_transcript.assert_called_with("compressed_child")
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_clears_running_agent(self, tmp_path):
|
||||
"""Switching sessions clears any cached running agent."""
|
||||
@@ -199,28 +230,3 @@ class TestHandleResumeCommand:
|
||||
|
||||
assert real_key not in runner._running_agents
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_flushes_memories(self, tmp_path):
|
||||
"""Resume should flush memories from the current session before switching."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("old_session", "telegram")
|
||||
db.set_session_title("old_session", "Old Work")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Old Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
|
||||
await runner._handle_resume_command(event)
|
||||
|
||||
runner._async_flush_memories.assert_called_once_with(
|
||||
"current_session_001",
|
||||
"agent:main:telegram:dm:67890",
|
||||
)
|
||||
db.close()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user