Compare commits
78 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0d2aa2b6b4 | |||
| 4bbf08a818 | |||
| b8692b1ba1 | |||
| 3e61703b08 | |||
| 05d8f11085 | |||
| 13038dc747 | |||
| 629e108ee2 | |||
| c34d3f4807 | |||
| f14264c438 | |||
| 19a3e2ce8e | |||
| d58b305adf | |||
| e93cc934c7 | |||
| 93a2d6b307 | |||
| 4fade39c90 | |||
| f731c2c2bd | |||
| 00c3d848d8 | |||
| fd10463069 | |||
| c599a41b84 | |||
| c7d62b3fe3 | |||
| 36d68bcb82 | |||
| a29bad2a3c | |||
| 7957da7a1d | |||
| fd3864d8bd | |||
| 8ea389a7f8 | |||
| 3e6c108565 | |||
| e3a1a9c24d | |||
| e3697e20a6 | |||
| ed91b79b7e | |||
| 08d5c9c539 | |||
| 1dcf79a864 | |||
| 2de8a7a229 | |||
| ead66f0c92 | |||
| 0bcbc9e316 | |||
| 2d444fc84d | |||
| bb53d79d26 | |||
| 17fc84c256 | |||
| b7c1d77e55 | |||
| 7a192b124e | |||
| 4093ee9c62 | |||
| 6a957a74bc | |||
| 14b27bb68c | |||
| ef9355455b | |||
| dbdefa43c8 | |||
| db9d6375fb | |||
| 8a2506af43 | |||
| e7590f92a2 | |||
| a5129c72ef | |||
| 53fc10fc9a | |||
| 93ddff53e3 | |||
| de596aca1c | |||
| 6f1eed3968 | |||
| e3940f9807 | |||
| bfa60234c8 | |||
| fd9b692d33 | |||
| c61547c067 | |||
| 7f0f67d5f7 | |||
| f5e2a77a80 | |||
| 850fac14e3 | |||
| 5500b51800 | |||
| 63975aa75b | |||
| 62c14d5513 | |||
| 10deb1b87d | |||
| f49afd3122 | |||
| 1143f234e3 | |||
| c4627f4933 | |||
| 7c3e5706d8 | |||
| a9ccb03ccc | |||
| 7dc6eb9fbf | |||
| b290297d66 | |||
| f2fba4f9a1 | |||
| fcc05284fc | |||
| 1840c6a57d | |||
| 591aa159aa | |||
| d3e56b9f39 | |||
| 0fdbfad2b0 | |||
| 4f5669a569 | |||
| 809868e628 | |||
| e5d2815b41 |
@@ -240,6 +240,19 @@ npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
@@ -986,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
@@ -993,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
|
||||
@@ -1993,6 +1993,39 @@ def resolve_provider_client(
|
||||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type == "aws_sdk":
|
||||
# AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
|
||||
# boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
except ImportError:
|
||||
logger.warning("resolve_provider_client: bedrock requested but "
|
||||
"boto3 or anthropic SDK not installed")
|
||||
return None, None
|
||||
|
||||
if not has_aws_credentials():
|
||||
logger.debug("resolve_provider_client: bedrock requested but "
|
||||
"no AWS credentials found")
|
||||
return None, None
|
||||
|
||||
region = resolve_bedrock_region()
|
||||
default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
try:
|
||||
real_client = build_anthropic_bedrock_client(region)
|
||||
except ImportError as exc:
|
||||
logger.warning("resolve_provider_client: cannot create Bedrock "
|
||||
"client: %s", exc)
|
||||
return None, None
|
||||
client = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, api_key="aws-sdk",
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
|
||||
+130
-2
@@ -87,6 +87,114 @@ def reset_client_cache():
|
||||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -787,7 +895,17 @@ def call_converse(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse(**kwargs)
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
@@ -819,7 +937,17 @@ def call_converse_stream(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse_stream(**kwargs)
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
||||
@@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
@@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
||||
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -389,6 +390,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -812,10 +814,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
@@ -853,6 +857,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
|
||||
+43
-2
@@ -31,6 +31,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
@@ -312,7 +313,39 @@ class MemoryManager:
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
@@ -321,7 +354,15 @@ class MemoryManager:
|
||||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
provider.on_memory_write(action, target, content)
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
||||
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
|
||||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
||||
+22
-13
@@ -1199,6 +1199,7 @@ def get_model_context_length(
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
@@ -1237,6 +1238,26 @@ def get_model_context_length(
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
# /models endpoint may report a provider-imposed limit (e.g. Copilot
|
||||
@@ -1282,19 +1303,7 @@ def get_model_context_length(
|
||||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
|
||||
+8
-107
@@ -7,11 +7,15 @@ can invoke skills via /skill-name commands.
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,111 +24,6 @@ _skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -143,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
+2
-6
@@ -951,13 +951,9 @@ class BatchRunner:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
@@ -977,7 +973,7 @@ class BatchRunner:
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
||||
@@ -5374,29 +5374,26 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
# Fallback to old context length lookup
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
@@ -6165,6 +6162,8 @@ class HermesCLI:
|
||||
self._handle_skin_command(cmd_original)
|
||||
elif canonical == "voice":
|
||||
self._handle_voice_command(cmd_original)
|
||||
elif canonical == "busy":
|
||||
self._handle_busy_command(cmd_original)
|
||||
else:
|
||||
# Check for user-defined quick commands (bypass agent loop, no LLM call)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
@@ -6901,6 +6900,36 @@ class HermesCLI:
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_busy_command(self, cmd: str):
|
||||
"""Handle /busy — control what Enter does while Hermes is working.
|
||||
|
||||
Usage:
|
||||
/busy Show current busy input mode
|
||||
/busy status Show current busy input mode
|
||||
/busy queue Queue input for the next turn instead of interrupting
|
||||
/busy interrupt Interrupt the current run on Enter (default)
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
_cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
|
||||
_cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
if arg not in {"queue", "interrupt"}:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
self.busy_input_mode = arg
|
||||
if save_config_value("display.busy_input_mode", arg):
|
||||
behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
|
||||
_cprint(f" {_DIM}{behavior}{_RST}")
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
|
||||
if not self._fast_command_available():
|
||||
@@ -6979,51 +7008,52 @@ class HermesCLI:
|
||||
focus_topic = parts[1].strip()
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
with self._busy_command("Compressing context..."):
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _handle_debug_command(self):
|
||||
"""Handle /debug — upload debug report + logs and print paste URLs."""
|
||||
@@ -9525,9 +9555,20 @@ class HermesCLI:
|
||||
|
||||
@kb.add('c-d')
|
||||
def handle_ctrl_d(event):
|
||||
"""Handle Ctrl+D - exit."""
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
"""Ctrl+D: delete char under cursor (standard readline behaviour).
|
||||
Only exit when the input is empty — same as bash/zsh. Pending
|
||||
attached images count as input and block the EOF-exit so the
|
||||
user doesn't lose them silently.
|
||||
"""
|
||||
buf = event.app.current_buffer
|
||||
if buf.text:
|
||||
buf.delete()
|
||||
elif self._attached_images:
|
||||
# Empty text but pending attachments — no-op, don't exit.
|
||||
return
|
||||
else:
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
|
||||
_modal_prompt_active = Condition(
|
||||
lambda: bool(self._secret_state or self._sudo_state)
|
||||
|
||||
+8
-3
@@ -135,7 +135,7 @@ class SessionResetPolicy:
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=notify if notify is not None else True,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -178,7 +178,7 @@ class PlatformConfig:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
@@ -435,7 +435,7 @@ class GatewayConfig:
|
||||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
||||
+101
-22
@@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
@@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
@@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
@@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
@@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
@@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
|
||||
+106
-3
@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
def _split_host_port(value: str) -> tuple[str, int | None]:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return "", None
|
||||
if "://" in raw:
|
||||
parsed = urlsplit(raw)
|
||||
return (parsed.hostname or "").lower().rstrip("."), parsed.port
|
||||
if raw.startswith("[") and "]" in raw:
|
||||
host, _, rest = raw[1:].partition("]")
|
||||
port = None
|
||||
if rest.startswith(":") and rest[1:].isdigit():
|
||||
port = int(rest[1:])
|
||||
return host.lower().rstrip("."), port
|
||||
if raw.count(":") == 1:
|
||||
host, _, maybe_port = raw.rpartition(":")
|
||||
if maybe_port.isdigit():
|
||||
return host.lower().rstrip("."), int(maybe_port)
|
||||
return raw.lower().strip("[]").rstrip("."), None
|
||||
|
||||
|
||||
def _no_proxy_entries() -> list[str]:
|
||||
entries: list[str] = []
|
||||
for key in ("NO_PROXY", "no_proxy"):
|
||||
raw = os.environ.get(key, "")
|
||||
entries.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
|
||||
token = str(entry or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
if token == "*":
|
||||
return True
|
||||
|
||||
token_host, token_port = _split_host_port(token)
|
||||
if token_port is not None and port is not None and token_port != port:
|
||||
return False
|
||||
if token_port is not None and port is None:
|
||||
return False
|
||||
if not token_host:
|
||||
return False
|
||||
|
||||
try:
|
||||
network = ipaddress.ip_network(token_host, strict=False)
|
||||
try:
|
||||
return ipaddress.ip_address(host) in network
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
token_ip = ipaddress.ip_address(token_host)
|
||||
try:
|
||||
return ipaddress.ip_address(host) == token_ip
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if token_host.startswith("*."):
|
||||
suffix = token_host[1:]
|
||||
return host.endswith(suffix)
|
||||
if token_host.startswith("."):
|
||||
return host == token_host[1:] or host.endswith(token_host)
|
||||
return host == token_host or host.endswith(f".{token_host}")
|
||||
|
||||
|
||||
def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
|
||||
"""Return True when NO_PROXY/no_proxy matches at least one target host.
|
||||
|
||||
Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
|
||||
CIDR ranges, optional host:port entries, and ``*``.
|
||||
"""
|
||||
entries = _no_proxy_entries()
|
||||
if not entries or not target_hosts:
|
||||
return False
|
||||
if isinstance(target_hosts, str):
|
||||
candidates = [target_hosts]
|
||||
else:
|
||||
candidates = list(target_hosts)
|
||||
for candidate in candidates:
|
||||
host, port = _split_host_port(str(candidate))
|
||||
if not host:
|
||||
continue
|
||||
if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def resolve_proxy_url(
|
||||
platform_env_var: str | None = None,
|
||||
*,
|
||||
target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
|
||||
) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found.
|
||||
Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
|
||||
of ``target_hosts``.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
detected = normalize_proxy_url(_detect_macos_system_proxy())
|
||||
if detected and should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return detected
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
|
||||
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = strip_markdown(content or "")
|
||||
text = self.format_message(content)
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
||||
@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
|
||||
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
||||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url()
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
||||
+42
-60
@@ -298,50 +298,16 @@ from gateway.restart import (
|
||||
)
|
||||
|
||||
|
||||
def _normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
from gateway.whatsapp_identity import (
|
||||
canonical_whatsapp_identifier as _canonical_whatsapp_identifier, # noqa: F401
|
||||
expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
|
||||
normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
def _expand_whatsapp_auth_aliases(identifier: str) -> set:
|
||||
"""Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
|
||||
normalized = _normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = _hermes_home / "whatsapp" / "session"
|
||||
resolved = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = _normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Sentinel placed into _running_agents immediately when a session starts
|
||||
# processing, *before* any await. Prevents a second message for the same
|
||||
# session from bypassing the "already running" guard during the async gap
|
||||
@@ -3037,6 +3003,7 @@ class GatewayRunner:
|
||||
Platform.QQBOT: "QQ_ALLOWED_USERS",
|
||||
}
|
||||
platform_group_env_map = {
|
||||
Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
|
||||
}
|
||||
platform_allow_all_map = {
|
||||
@@ -3093,7 +3060,7 @@ class GatewayRunner:
|
||||
# Check platform-specific and global allowlists
|
||||
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
|
||||
group_allowlist = ""
|
||||
if source.chat_type == "group":
|
||||
if source.chat_type in {"group", "forum"}:
|
||||
group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
|
||||
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()
|
||||
|
||||
@@ -3102,7 +3069,7 @@ class GatewayRunner:
|
||||
return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
|
||||
|
||||
# Some platforms authorize group traffic by chat ID rather than sender ID.
|
||||
if group_allowlist and source.chat_type == "group" and source.chat_id:
|
||||
if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
|
||||
allowed_group_ids = {
|
||||
chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
|
||||
}
|
||||
@@ -3624,6 +3591,10 @@ class GatewayRunner:
|
||||
if self._queue_during_drain_enabled()
|
||||
else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
|
||||
)
|
||||
if self._busy_input_mode == "queue":
|
||||
logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
|
||||
self._queue_or_replace_pending_event(_quick_key, event)
|
||||
return None
|
||||
logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
|
||||
running_agent.interrupt(event.text)
|
||||
if _quick_key in self._pending_messages:
|
||||
@@ -5688,9 +5659,17 @@ class GatewayRunner:
|
||||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {plabel}")
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
@@ -5824,28 +5803,25 @@ class GatewayRunner:
|
||||
lines = [f"Model switched to `{result.new_model}`"]
|
||||
lines.append(f"Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry.
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or current_base_url or "",
|
||||
api_key=result.api_key or current_api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
lines.append(f"Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
lines.append(f"Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
lines.append(f"Cost: {mi.format_cost()}")
|
||||
lines.append(f"Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or current_base_url,
|
||||
api_key=result.api_key or current_api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
lines.append(f"Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
@@ -7257,13 +7233,19 @@ class GatewayRunner:
|
||||
logger.debug("Failed to list titled sessions: %s", e)
|
||||
return f"Could not list sessions: {e}"
|
||||
|
||||
# Resolve the name to a session ID
|
||||
# Resolve the name to a session ID.
|
||||
target_id = self._session_db.resolve_session_by_title(name)
|
||||
if not target_id:
|
||||
return (
|
||||
f"No session found matching '**{name}**'.\n"
|
||||
"Use `/resume` with no arguments to see available sessions."
|
||||
)
|
||||
# Compression creates child continuations that hold the live transcript.
|
||||
# Follow that chain so gateway /resume matches CLI behavior (#15000).
|
||||
try:
|
||||
target_id = self._session_db.resolve_resume_session_id(target_id)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)
|
||||
|
||||
# Check if already on that session
|
||||
current_entry = self.session_store.get_or_create_session(source)
|
||||
|
||||
+28
-3
@@ -60,6 +60,10 @@ from .config import (
|
||||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -281,6 +285,18 @@ def build_session_context_prompt(
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are responding via iMessage. "
|
||||
"Keep responses short and conversational — think texts, not essays. "
|
||||
"Structure longer replies as separate short thoughts, each separated "
|
||||
"by a blank line (double newline). Each block between blank lines "
|
||||
"will be delivered as its own iMessage bubble, so write accordingly: "
|
||||
"one idea per bubble, 1–3 sentences each. "
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
platforms_list = ["local (files on this machine)"]
|
||||
@@ -518,15 +534,24 @@ def build_session_key(
|
||||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
+12
-1
@@ -743,7 +743,18 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
|
||||
|
||||
try:
|
||||
raw = json.loads(auth_file.read_text())
|
||||
except Exception:
|
||||
except Exception as exc:
|
||||
corrupt_path = auth_file.with_suffix(".json.corrupt")
|
||||
try:
|
||||
import shutil
|
||||
shutil.copy2(auth_file, corrupt_path)
|
||||
except Exception:
|
||||
pass
|
||||
logger.warning(
|
||||
"auth: failed to parse %s (%s) — starting with empty store. "
|
||||
"Corrupt file preserved at %s",
|
||||
auth_file, exc, corrupt_path,
|
||||
)
|
||||
return {"version": AUTH_STORE_VERSION, "providers": {}}
|
||||
|
||||
if isinstance(raw, dict) and (
|
||||
|
||||
@@ -126,6 +126,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
|
||||
@@ -839,7 +839,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
|
||||
+19
-3
@@ -6715,9 +6715,15 @@ def cmd_dashboard(args):
|
||||
try:
|
||||
import fastapi # noqa: F401
|
||||
import uvicorn # noqa: F401
|
||||
except ImportError:
|
||||
print("Web UI dependencies not installed.")
|
||||
print(f"Install them with: {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
|
||||
except ImportError as e:
|
||||
print("Web UI dependencies not installed (need fastapi + uvicorn).")
|
||||
print(
|
||||
f"Re-install the package into this interpreter so metadata updates apply:\n"
|
||||
f" cd {PROJECT_ROOT}\n"
|
||||
f" {sys.executable} -m pip install -e .\n"
|
||||
"If `pip` is missing in this venv, use: uv pip install -e ."
|
||||
)
|
||||
print(f"Import error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if "HERMES_WEB_DIST" not in os.environ:
|
||||
@@ -6726,11 +6732,13 @@ def cmd_dashboard(args):
|
||||
|
||||
from hermes_cli.web_server import start_server
|
||||
|
||||
embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
|
||||
start_server(
|
||||
host=args.host,
|
||||
port=args.port,
|
||||
open_browser=not args.no_open,
|
||||
allow_public=getattr(args, "insecure", False),
|
||||
embedded_chat=embedded_chat,
|
||||
)
|
||||
|
||||
|
||||
@@ -8916,6 +8924,14 @@ Examples:
|
||||
action="store_true",
|
||||
help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
|
||||
)
|
||||
dashboard_parser.add_argument(
|
||||
"--tui",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
|
||||
"Alternatively set HERMES_DASHBOARD_TUI=1."
|
||||
),
|
||||
)
|
||||
dashboard_parser.set_defaults(func=cmd_dashboard)
|
||||
|
||||
# =========================================================================
|
||||
|
||||
@@ -527,6 +527,42 @@ def _resolve_alias_fallback(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_display_context_length(
|
||||
model: str,
|
||||
provider: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
|
||||
but provider-enforced limits can be lower (e.g. Codex OAuth caps the
|
||||
same slug at 272k). The authoritative source is
|
||||
``agent.model_metadata.get_model_context_length`` which already knows
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
model,
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
except Exception:
|
||||
pass
|
||||
if model_info is not None and model_info.context_window:
|
||||
return int(model_info.context_window)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core model-switching pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -42,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openai/gpt-5.5", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
@@ -65,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
("arcee-ai/trinity-large-thinking", ""),
|
||||
("openai/gpt-5.4-pro", ""),
|
||||
("openai/gpt-5.5-pro", ""),
|
||||
("openai/gpt-5.4-nano", ""),
|
||||
]
|
||||
|
||||
@@ -120,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5",
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.4-mini",
|
||||
"openai/gpt-5.3-codex",
|
||||
"google/gemini-3-pro-preview",
|
||||
@@ -139,7 +139,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"x-ai/grok-4.20-beta",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"arcee-ai/trinity-large-thinking",
|
||||
"openai/gpt-5.4-pro",
|
||||
"openai/gpt-5.5-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
|
||||
|
||||
@@ -0,0 +1,229 @@
|
||||
"""PTY bridge for `hermes dashboard` chat tab.
|
||||
|
||||
Wraps a child process behind a pseudo-terminal so its ANSI output can be
|
||||
streamed to a browser-side terminal emulator (xterm.js) and typed
|
||||
keystrokes can be fed back in. The only caller today is the
|
||||
``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
|
||||
|
||||
Design constraints:
|
||||
|
||||
* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which
|
||||
exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python
|
||||
has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
|
||||
install/platform message so the dashboard can render a banner instead of
|
||||
crashing.
|
||||
* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`,
|
||||
which is a pure-Python wrapper around the OS calls. The browser talks
|
||||
to the same ``hermes --tui`` binary it would launch from the CLI, so
|
||||
every TUI feature (slash popover, model picker, tool rows, markdown,
|
||||
skin engine, clarify/sudo/approval prompts) ships automatically.
|
||||
* **Byte-safe I/O.** Reads and writes go through the PTY master fd
|
||||
directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
|
||||
streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
|
||||
mid-read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
import select
|
||||
import signal
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
import time
|
||||
from typing import Optional, Sequence
|
||||
|
||||
try:
|
||||
import ptyprocess # type: ignore
|
||||
_PTY_AVAILABLE = not sys.platform.startswith("win")
|
||||
except ImportError: # pragma: no cover - dev env without ptyprocess
|
||||
ptyprocess = None # type: ignore
|
||||
_PTY_AVAILABLE = False
|
||||
|
||||
|
||||
__all__ = ["PtyBridge", "PtyUnavailableError"]
|
||||
|
||||
|
||||
class PtyUnavailableError(RuntimeError):
|
||||
"""Raised when a PTY cannot be created on this platform.
|
||||
|
||||
Today this means native Windows (no ConPTY bindings) or a dev
|
||||
environment missing the ``ptyprocess`` dependency. The dashboard
|
||||
surfaces the message to the user as a chat-tab banner.
|
||||
"""
|
||||
|
||||
|
||||
class PtyBridge:
|
||||
"""Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
|
||||
|
||||
Not thread-safe. A single bridge is owned by the WebSocket handler
|
||||
that spawned it; the reader runs in an executor thread while writes
|
||||
happen on the event-loop thread. Both sides are OK because the
|
||||
kernel PTY is the actual synchronization point — we never call
|
||||
:mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
|
||||
``os.write`` on the master fd, which is safe.
|
||||
"""
|
||||
|
||||
def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined]
|
||||
self._proc = proc
|
||||
self._fd: int = proc.fd
|
||||
self._closed = False
|
||||
|
||||
# -- lifecycle --------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""True if a PTY can be spawned on this platform."""
|
||||
return bool(_PTY_AVAILABLE)
|
||||
|
||||
@classmethod
|
||||
def spawn(
|
||||
cls,
|
||||
argv: Sequence[str],
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
env: Optional[dict] = None,
|
||||
cols: int = 80,
|
||||
rows: int = 24,
|
||||
) -> "PtyBridge":
|
||||
"""Spawn ``argv`` behind a new PTY and return a bridge.
|
||||
|
||||
Raises :class:`PtyUnavailableError` if the platform can't host a
|
||||
PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for
|
||||
ordinary exec failures (missing binary, bad cwd, etc.).
|
||||
"""
|
||||
if not _PTY_AVAILABLE:
|
||||
if sys.platform.startswith("win"):
|
||||
raise PtyUnavailableError(
|
||||
"Pseudo-terminals are unavailable on this platform. "
|
||||
"Hermes Agent supports Windows only via WSL."
|
||||
)
|
||||
if ptyprocess is None:
|
||||
raise PtyUnavailableError(
|
||||
"The `ptyprocess` package is missing. "
|
||||
"Install with: pip install ptyprocess "
|
||||
"(or pip install -e '.[pty]')."
|
||||
)
|
||||
raise PtyUnavailableError("Pseudo-terminals are unavailable.")
|
||||
# Let caller-supplied env fully override inheritance; if they pass
|
||||
# None we inherit the server's env (same semantics as subprocess).
|
||||
spawn_env = os.environ.copy() if env is None else env
|
||||
proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr]
|
||||
list(argv),
|
||||
cwd=cwd,
|
||||
env=spawn_env,
|
||||
dimensions=(rows, cols),
|
||||
)
|
||||
return cls(proc)
|
||||
|
||||
@property
|
||||
def pid(self) -> int:
|
||||
return int(self._proc.pid)
|
||||
|
||||
def is_alive(self) -> bool:
|
||||
if self._closed:
|
||||
return False
|
||||
try:
|
||||
return bool(self._proc.isalive())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# -- I/O --------------------------------------------------------------
|
||||
|
||||
def read(self, timeout: float = 0.2) -> Optional[bytes]:
|
||||
"""Read up to 64 KiB of raw bytes from the PTY master.
|
||||
|
||||
Returns:
|
||||
* bytes — zero or more bytes of child output
|
||||
* empty bytes (``b""``) — no data available within ``timeout``
|
||||
* None — child has exited and the master fd is at EOF
|
||||
|
||||
Never blocks longer than ``timeout`` seconds. Safe to call after
|
||||
:meth:`close`; returns ``None`` in that case.
|
||||
"""
|
||||
if self._closed:
|
||||
return None
|
||||
try:
|
||||
readable, _, _ = select.select([self._fd], [], [], timeout)
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
if not readable:
|
||||
return b""
|
||||
try:
|
||||
data = os.read(self._fd, 65536)
|
||||
except OSError as exc:
|
||||
# EIO on Linux = slave side closed. EBADF = already closed.
|
||||
if exc.errno in (errno.EIO, errno.EBADF):
|
||||
return None
|
||||
raise
|
||||
if not data:
|
||||
return None
|
||||
return data
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
"""Write raw bytes to the PTY master (i.e. the child's stdin)."""
|
||||
if self._closed or not data:
|
||||
return
|
||||
# os.write can return a short write under load; loop until drained.
|
||||
view = memoryview(data)
|
||||
while view:
|
||||
try:
|
||||
n = os.write(self._fd, view)
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
|
||||
return
|
||||
raise
|
||||
if n <= 0:
|
||||
return
|
||||
view = view[n:]
|
||||
|
||||
def resize(self, cols: int, rows: int) -> None:
|
||||
"""Forward a terminal resize to the child via ``TIOCSWINSZ``."""
|
||||
if self._closed:
|
||||
return
|
||||
# struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
|
||||
winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
|
||||
try:
|
||||
fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# -- teardown ---------------------------------------------------------
|
||||
|
||||
def close(self) -> None:
|
||||
"""Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
|
||||
|
||||
Idempotent. Reaping the child is important so we don't leak
|
||||
zombies across the lifetime of the dashboard process.
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# SIGHUP is the conventional "your terminal went away" signal.
|
||||
# We escalate if the child ignores it.
|
||||
for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
|
||||
if not self._proc.isalive():
|
||||
break
|
||||
try:
|
||||
self._proc.kill(sig)
|
||||
except Exception:
|
||||
pass
|
||||
deadline = time.monotonic() + 0.5
|
||||
while self._proc.isalive() and time.monotonic() < deadline:
|
||||
time.sleep(0.02)
|
||||
|
||||
try:
|
||||
self._proc.close(force=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Context-manager sugar — handy in tests and ad-hoc scripts.
|
||||
def __enter__(self) -> "PtyBridge":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_exc) -> None:
|
||||
self.close()
|
||||
+68
-11
@@ -68,12 +68,13 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord_admin"}
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
@@ -368,13 +369,9 @@ TOOL_CATEGORIES = {
|
||||
"providers": [
|
||||
{
|
||||
"name": "Spotify Web API",
|
||||
"tag": "PKCE OAuth — run `hermes auth spotify` after this",
|
||||
"env_vars": [
|
||||
{"key": "HERMES_SPOTIFY_CLIENT_ID", "prompt": "Spotify app client_id",
|
||||
"url": "https://developer.spotify.com/dashboard"},
|
||||
{"key": "HERMES_SPOTIFY_REDIRECT_URI", "prompt": "Redirect URI (must be allow-listed in your Spotify app)",
|
||||
"default": "http://127.0.0.1:43827/spotify/callback"},
|
||||
],
|
||||
"tag": "PKCE OAuth — opens the setup wizard",
|
||||
"env_vars": [],
|
||||
"post_setup": "spotify",
|
||||
},
|
||||
],
|
||||
},
|
||||
@@ -478,6 +475,35 @@ def _run_post_setup(post_setup_key: str):
|
||||
_print_warning(" kittentts install timed out (>5min)")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
|
||||
elif post_setup_key == "spotify":
|
||||
# Run the full `hermes auth spotify` flow — if the user has no
|
||||
# client_id yet, this drops them into the interactive wizard
|
||||
# (opens the Spotify dashboard, prompts for client_id, persists
|
||||
# to ~/.hermes/.env), then continues straight into PKCE. If they
|
||||
# already have an app, it skips the wizard and just does OAuth.
|
||||
from types import SimpleNamespace
|
||||
try:
|
||||
from hermes_cli.auth import login_spotify_command
|
||||
except Exception as exc:
|
||||
_print_warning(f" Could not load Spotify auth: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
return
|
||||
_print_info(" Starting Spotify login...")
|
||||
try:
|
||||
login_spotify_command(SimpleNamespace(
|
||||
client_id=None, redirect_uri=None, scope=None,
|
||||
no_browser=False, timeout=None,
|
||||
))
|
||||
_print_success(" Spotify authenticated")
|
||||
except SystemExit as exc:
|
||||
# User aborted the wizard, or OAuth failed — don't fail the
|
||||
# toolset enable; they can retry with `hermes auth spotify`.
|
||||
_print_warning(f" Spotify login did not complete: {exc}")
|
||||
_print_info(" Run later: hermes auth spotify")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Spotify login failed: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
|
||||
elif post_setup_key == "rl_training":
|
||||
try:
|
||||
__import__("tinker_atropos")
|
||||
@@ -566,7 +592,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -580,6 +606,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -602,11 +630,42 @@ def _get_platform_tools(
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
@@ -614,7 +673,6 @@ def _get_platform_tools(
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -632,7 +690,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
|
||||
+351
-10
@@ -49,7 +49,7 @@ from hermes_cli.config import (
|
||||
from gateway.status import get_running_pid, read_runtime_status
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
@@ -73,6 +73,10 @@ app = FastAPI(title="Hermes Agent", version=__version__)
|
||||
_SESSION_TOKEN = secrets.token_urlsafe(32)
|
||||
_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
|
||||
|
||||
# In-browser Chat tab (/chat, /api/pty, …). Off unless ``hermes dashboard --tui``
|
||||
# or HERMES_DASHBOARD_TUI=1. Set from :func:`start_server`.
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = False
|
||||
|
||||
# Simple rate limiter for the reveal endpoint
|
||||
_reveal_timestamps: List[float] = []
|
||||
_REVEAL_MAX_PER_WINDOW = 5
|
||||
@@ -283,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
|
||||
"display.busy_input_mode": {
|
||||
"type": "select",
|
||||
"description": "Input behavior while agent is running",
|
||||
"options": ["queue", "interrupt", "block"],
|
||||
"options": ["interrupt", "queue"],
|
||||
},
|
||||
"memory.provider": {
|
||||
"type": "select",
|
||||
@@ -1529,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
result = json.loads(resp.read().decode())
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
access_token = result.get("access_token", "")
|
||||
refresh_token = result.get("refresh_token", "")
|
||||
expires_in = int(result.get("expires_in") or 3600)
|
||||
if not access_token:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
|
||||
try:
|
||||
_save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
sess["status"] = "approved"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "approved"
|
||||
_log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
|
||||
return {"ok": True, "status": "approved"}
|
||||
|
||||
@@ -2263,6 +2271,329 @@ async def get_usage_analytics(days: int = 30):
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
|
||||
#
|
||||
# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
|
||||
# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
|
||||
# WebSocket. The browser renders the ANSI through xterm.js (see
|
||||
# web/src/pages/ChatPage.tsx).
|
||||
#
|
||||
# Auth: ``?token=<session_token>`` query param (browsers can't set
|
||||
# Authorization on the WS upgrade). Same ephemeral ``_SESSION_TOKEN`` as
|
||||
# REST. Localhost-only — we defensively reject non-loopback clients even
|
||||
# though uvicorn binds to 127.0.0.1.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
|
||||
|
||||
_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
|
||||
_PTY_READ_CHUNK_TIMEOUT = 0.2
|
||||
_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
|
||||
# Starlette's TestClient reports the peer as "testclient"; treat it as
|
||||
# loopback so tests don't need to rewrite request scope.
|
||||
_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
|
||||
|
||||
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
|
||||
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
|
||||
# the chat tab generates on mount; entries auto-evict when the last subscriber
|
||||
# drops AND the publisher has disconnected.
|
||||
_event_channels: dict[str, set] = {}
|
||||
_event_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _resolve_chat_argv(
|
||||
resume: Optional[str] = None,
|
||||
sidecar_url: Optional[str] = None,
|
||||
) -> tuple[list[str], Optional[str], Optional[dict]]:
|
||||
"""Resolve the argv + cwd + env for the chat PTY.
|
||||
|
||||
Default: whatever ``hermes --tui`` would run. Tests monkeypatch this
|
||||
function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
|
||||
so nothing has to build Node or the TUI bundle.
|
||||
|
||||
Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
|
||||
matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
|
||||
Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
|
||||
not parse its argv.
|
||||
|
||||
`sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
|
||||
the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
|
||||
dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
|
||||
"""
|
||||
from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
|
||||
|
||||
argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
|
||||
env: Optional[dict] = None
|
||||
|
||||
if resume or sidecar_url:
|
||||
env = os.environ.copy()
|
||||
|
||||
if resume:
|
||||
env["HERMES_TUI_RESUME"] = resume
|
||||
|
||||
if sidecar_url:
|
||||
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
|
||||
|
||||
return list(argv), str(cwd) if cwd else None, env
|
||||
|
||||
|
||||
def _build_sidecar_url(channel: str) -> Optional[str]:
|
||||
"""ws:// URL the PTY child should publish events to, or None when unbound."""
|
||||
host = getattr(app.state, "bound_host", None)
|
||||
port = getattr(app.state, "bound_port", None)
|
||||
|
||||
if not host or not port:
|
||||
return None
|
||||
|
||||
netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
|
||||
qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
|
||||
|
||||
return f"ws://{netloc}/api/pub?{qs}"
|
||||
|
||||
|
||||
async def _broadcast_event(channel: str, payload: str) -> None:
|
||||
"""Fan out one publisher frame to every subscriber on `channel`."""
|
||||
async with _event_lock:
|
||||
subs = list(_event_channels.get(channel, ()))
|
||||
|
||||
for sub in subs:
|
||||
try:
|
||||
await sub.send_text(payload)
|
||||
except Exception:
|
||||
# Subscriber went away mid-send; the /api/events finally clause
|
||||
# will remove it from the registry on its next iteration.
|
||||
pass
|
||||
|
||||
|
||||
def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
|
||||
"""Return the channel id from the query string or None if invalid."""
|
||||
channel = ws.query_params.get("channel", "")
|
||||
|
||||
return channel if _VALID_CHANNEL_RE.match(channel) else None
|
||||
|
||||
|
||||
@app.websocket("/api/pty")
|
||||
async def pty_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
# --- auth + loopback check (before accept so we can close cleanly) ---
|
||||
token = ws.query_params.get("token", "")
|
||||
expected = _SESSION_TOKEN
|
||||
if not hmac.compare_digest(token.encode(), expected.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
# --- spawn PTY ------------------------------------------------------
|
||||
resume = ws.query_params.get("resume") or None
|
||||
channel = _channel_or_close_code(ws)
|
||||
sidecar_url = _build_sidecar_url(channel) if channel else None
|
||||
|
||||
try:
|
||||
argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
|
||||
except SystemExit as exc:
|
||||
# _make_tui_argv calls sys.exit(1) when node/npm is missing.
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
|
||||
except PtyUnavailableError as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
except (FileNotFoundError, OSError) as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# --- reader task: PTY master → WebSocket ----------------------------
|
||||
async def pump_pty_to_ws() -> None:
|
||||
while True:
|
||||
chunk = await loop.run_in_executor(
|
||||
None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
|
||||
)
|
||||
if chunk is None: # EOF
|
||||
return
|
||||
if not chunk: # no data this tick; yield control and retry
|
||||
await asyncio.sleep(0)
|
||||
continue
|
||||
try:
|
||||
await ws.send_bytes(chunk)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
reader_task = asyncio.create_task(pump_pty_to_ws())
|
||||
|
||||
# --- writer loop: WebSocket → PTY master ----------------------------
|
||||
try:
|
||||
while True:
|
||||
msg = await ws.receive()
|
||||
msg_type = msg.get("type")
|
||||
if msg_type == "websocket.disconnect":
|
||||
break
|
||||
raw = msg.get("bytes")
|
||||
if raw is None:
|
||||
text = msg.get("text")
|
||||
raw = text.encode("utf-8") if isinstance(text, str) else b""
|
||||
if not raw:
|
||||
continue
|
||||
|
||||
# Resize escape is consumed locally, never written to the PTY.
|
||||
match = _RESIZE_RE.match(raw)
|
||||
if match and match.end() == len(raw):
|
||||
cols = int(match.group(1))
|
||||
rows = int(match.group(2))
|
||||
bridge.resize(cols=cols, rows=rows)
|
||||
continue
|
||||
|
||||
bridge.write(raw)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
reader_task.cancel()
|
||||
try:
|
||||
await reader_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
bridge.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
|
||||
#
|
||||
# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
|
||||
# dashboard can render structured metadata (model badge, tool-call sidebar,
|
||||
# slash launcher, session info) alongside the xterm.js terminal that PTY
|
||||
# already paints. Both transports bind to the same session id when one is
|
||||
# active, so a tool.start emitted by the agent fans out to both sinks.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/ws")
|
||||
async def gateway_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
from tui_gateway.ws import handle_ws
|
||||
|
||||
await handle_ws(ws)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pub + /api/events — chat-tab event broadcast.
|
||||
#
|
||||
# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
|
||||
# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
|
||||
# dispatcher emit through it. The dashboard fans those frames out to any
|
||||
# subscriber that opened /api/events on the same channel id. This is what
|
||||
# gives the React sidebar its tool-call feed without breaking the PTY
|
||||
# child's stdio handshake with Ink.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/pub")
|
||||
async def pub_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
await _broadcast_event(channel, await ws.receive_text())
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
||||
|
||||
@app.websocket("/api/events")
|
||||
async def events_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
async with _event_lock:
|
||||
_event_channels.setdefault(channel, set()).add(ws)
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Subscribers don't speak — the receive() just blocks until
|
||||
# disconnect so the connection stays open as long as the
|
||||
# browser holds it.
|
||||
await ws.receive_text()
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
async with _event_lock:
|
||||
subs = _event_channels.get(channel)
|
||||
|
||||
if subs is not None:
|
||||
subs.discard(ws)
|
||||
|
||||
if not subs:
|
||||
_event_channels.pop(channel, None)
|
||||
|
||||
|
||||
def mount_spa(application: FastAPI):
|
||||
"""Mount the built SPA. Falls back to index.html for client-side routing.
|
||||
|
||||
@@ -2284,8 +2615,10 @@ def mount_spa(application: FastAPI):
|
||||
def _serve_index():
|
||||
"""Return index.html with the session token injected."""
|
||||
html = _index_path.read_text()
|
||||
chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
|
||||
token_script = (
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
|
||||
f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
|
||||
)
|
||||
html = html.replace("</head>", f"{token_script}</head>", 1)
|
||||
return HTMLResponse(
|
||||
@@ -2798,10 +3131,15 @@ def start_server(
|
||||
port: int = 9119,
|
||||
open_browser: bool = True,
|
||||
allow_public: bool = False,
|
||||
*,
|
||||
embedded_chat: bool = False,
|
||||
):
|
||||
"""Start the web UI server."""
|
||||
import uvicorn
|
||||
|
||||
global _DASHBOARD_EMBEDDED_CHAT_ENABLED
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
|
||||
|
||||
_LOCALHOST = ("127.0.0.1", "localhost", "::1")
|
||||
if host not in _LOCALHOST and not allow_public:
|
||||
raise SystemExit(
|
||||
@@ -2817,7 +3155,10 @@ def start_server(
|
||||
|
||||
# Record the bound host so host_header_middleware can validate incoming
|
||||
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
|
||||
# bound_port is also stashed so /api/pty can build the back-WS URL the
|
||||
# PTY child uses to publish events to the dashboard sidebar.
|
||||
app.state.bound_host = host
|
||||
app.state.bound_port = port
|
||||
|
||||
if open_browser:
|
||||
import webbrowser
|
||||
|
||||
+29
-25
@@ -288,30 +288,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -464,9 +468,9 @@ def _coerce_number(value: str, integer_only: bool = False):
|
||||
f = float(value)
|
||||
except (ValueError, OverflowError):
|
||||
return value
|
||||
# Guard against inf/nan before int() conversion
|
||||
# Guard against inf/nan — not JSON-serializable, keep original string
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return f
|
||||
return value
|
||||
# If it looks like an integer (no fractional part), return int
|
||||
if f == int(f):
|
||||
return int(f)
|
||||
|
||||
+1
-1
@@ -156,7 +156,7 @@
|
||||
for entry in "''${ENTRIES[@]}"; do
|
||||
IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
|
||||
echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
|
||||
STATUS=$?
|
||||
if [ "$STATUS" -eq 0 ]; then
|
||||
echo " ok"
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
|
||||
hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
@@ -78,6 +78,16 @@ termux = [
|
||||
]
|
||||
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
|
||||
google = [
|
||||
# Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
|
||||
# Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with
|
||||
# the [all] extra and users don't hit runtime `pip install` paths that fail
|
||||
# in environments without pip (e.g. Nix-managed Python).
|
||||
"google-api-python-client>=2.100,<3",
|
||||
"google-auth-oauthlib>=1.0,<2",
|
||||
"google-auth-httplib2>=0.2,<1",
|
||||
]
|
||||
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
|
||||
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
|
||||
rl = [
|
||||
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
|
||||
@@ -109,6 +119,7 @@ all = [
|
||||
"hermes-agent[voice]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[google]",
|
||||
"hermes-agent[mistral]",
|
||||
"hermes-agent[bedrock]",
|
||||
"hermes-agent[web]",
|
||||
|
||||
+489
-50
@@ -502,6 +502,48 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
|
||||
return found
|
||||
|
||||
|
||||
def _escape_invalid_chars_in_json_strings(raw: str) -> str:
|
||||
"""Escape unescaped control chars inside JSON string values.
|
||||
|
||||
Walks the raw JSON character-by-character, tracking whether we are
|
||||
inside a double-quoted string. Inside strings, replaces literal
|
||||
control characters (0x00-0x1F) that aren't already part of an escape
|
||||
sequence with their ``\\uXXXX`` equivalents. Pass-through for everything
|
||||
else.
|
||||
|
||||
Ported from #12093 — complements the other repair passes in
|
||||
``_repair_tool_call_arguments`` when ``json.loads(strict=False)`` is
|
||||
not enough (e.g. llama.cpp backends that emit literal apostrophes or
|
||||
tabs alongside other malformations).
|
||||
"""
|
||||
out: list[str] = []
|
||||
in_string = False
|
||||
i = 0
|
||||
n = len(raw)
|
||||
while i < n:
|
||||
ch = raw[i]
|
||||
if in_string:
|
||||
if ch == "\\" and i + 1 < n:
|
||||
# Already-escaped char — pass through as-is
|
||||
out.append(ch)
|
||||
out.append(raw[i + 1])
|
||||
i += 2
|
||||
continue
|
||||
if ch == '"':
|
||||
in_string = False
|
||||
out.append(ch)
|
||||
elif ord(ch) < 0x20:
|
||||
out.append(f"\\u{ord(ch):04x}")
|
||||
else:
|
||||
out.append(ch)
|
||||
else:
|
||||
if ch == '"':
|
||||
in_string = True
|
||||
out.append(ch)
|
||||
i += 1
|
||||
return "".join(out)
|
||||
|
||||
|
||||
def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
"""Attempt to repair malformed tool_call argument JSON.
|
||||
|
||||
@@ -523,6 +565,23 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
logger.warning("Sanitized Python-None tool_call arguments for %s", tool_name)
|
||||
return "{}"
|
||||
|
||||
# Repair pass 0: llama.cpp backends sometimes emit literal control
|
||||
# characters (tabs, newlines) inside JSON string values. json.loads
|
||||
# with strict=False accepts these and lets us re-serialise the
|
||||
# result into wire-valid JSON without any string surgery. This is
|
||||
# the most common local-model repair case (#12068).
|
||||
try:
|
||||
parsed = json.loads(raw_stripped, strict=False)
|
||||
reserialised = json.dumps(parsed, separators=(",", ":"))
|
||||
if reserialised != raw_stripped:
|
||||
logger.warning(
|
||||
"Repaired unescaped control chars in tool_call arguments for %s",
|
||||
tool_name,
|
||||
)
|
||||
return reserialised
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Attempt common JSON repairs
|
||||
fixed = raw_stripped
|
||||
# 1. Strip trailing commas before } or ]
|
||||
@@ -557,6 +616,21 @@ def _repair_tool_call_arguments(raw_args: str, tool_name: str = "?") -> str:
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Repair pass 4: escape unescaped control chars inside JSON strings,
|
||||
# then retry. Catches cases where strict=False alone fails because
|
||||
# other malformations are present too.
|
||||
try:
|
||||
escaped = _escape_invalid_chars_in_json_strings(fixed)
|
||||
if escaped != fixed:
|
||||
json.loads(escaped)
|
||||
logger.warning(
|
||||
"Repaired control-char-laced tool_call arguments for %s: %s → %s",
|
||||
tool_name, raw_stripped[:80], escaped[:80],
|
||||
)
|
||||
return escaped
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
# Last resort: replace with empty object so the API request doesn't
|
||||
# crash the entire session.
|
||||
logger.warning(
|
||||
@@ -740,6 +814,11 @@ class AIAgent:
|
||||
for AI models that support function calling.
|
||||
"""
|
||||
|
||||
_TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER = (
|
||||
"[hermes-agent: tool call arguments were corrupted in this session and "
|
||||
"have been dropped to keep the conversation alive. See issue #15236.]"
|
||||
)
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return self._base_url
|
||||
@@ -1437,6 +1516,8 @@ class AIAgent:
|
||||
|
||||
# Track conversation messages for session logging
|
||||
self._session_messages: List[Dict[str, Any]] = []
|
||||
self._memory_write_origin = "assistant_tool"
|
||||
self._memory_write_context = "foreground"
|
||||
|
||||
# Cached system prompt -- built once per session, only rebuilt on compression
|
||||
self._cached_system_prompt: Optional[str] = None
|
||||
@@ -2231,6 +2312,34 @@ class AIAgent:
|
||||
except Exception:
|
||||
logger.debug("status_callback error in _emit_status", exc_info=True)
|
||||
|
||||
def _emit_warning(self, message: str) -> None:
|
||||
"""Emit a user-visible warning through the same status plumbing.
|
||||
|
||||
Unlike debug logs, these warnings are meant for degraded side paths
|
||||
such as auxiliary compression or memory flushes where the main turn can
|
||||
continue but the user needs to know something important failed.
|
||||
"""
|
||||
try:
|
||||
self._vprint(f"{self.log_prefix}{message}", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
if self.status_callback:
|
||||
try:
|
||||
self.status_callback("warn", message)
|
||||
except Exception:
|
||||
logger.debug("status_callback error in _emit_warning", exc_info=True)
|
||||
|
||||
def _emit_auxiliary_failure(self, task: str, exc: BaseException) -> None:
|
||||
"""Surface a compact warning for failed auxiliary work."""
|
||||
try:
|
||||
detail = self._summarize_api_error(exc)
|
||||
except Exception:
|
||||
detail = str(exc)
|
||||
detail = (detail or exc.__class__.__name__).strip()
|
||||
if len(detail) > 220:
|
||||
detail = detail[:217].rstrip() + "..."
|
||||
self._emit_warning(f"⚠ Auxiliary {task} failed: {detail}")
|
||||
|
||||
def _current_main_runtime(self) -> Dict[str, str]:
|
||||
"""Return the live main runtime for session-scoped auxiliary routing."""
|
||||
return {
|
||||
@@ -3047,7 +3156,10 @@ class AIAgent:
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
parent_session_id=self.session_id,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
review_agent._memory_write_context = "background_review"
|
||||
review_agent._memory_store = self._memory_store
|
||||
review_agent._memory_enabled = self._memory_enabled
|
||||
review_agent._user_profile_enabled = self._user_profile_enabled
|
||||
@@ -3081,7 +3193,8 @@ class AIAgent:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.debug("Background memory/skill review failed: %s", e)
|
||||
logger.warning("Background memory/skill review failed: %s", e)
|
||||
self._emit_auxiliary_failure("background review", e)
|
||||
finally:
|
||||
# Close all resources (httpx client, subprocesses, etc.) so
|
||||
# GC doesn't try to clean them up on a dead asyncio event
|
||||
@@ -3095,6 +3208,32 @@ class AIAgent:
|
||||
t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
|
||||
t.start()
|
||||
|
||||
def _build_memory_write_metadata(
|
||||
self,
|
||||
*,
|
||||
write_origin: Optional[str] = None,
|
||||
execution_context: Optional[str] = None,
|
||||
task_id: Optional[str] = None,
|
||||
tool_call_id: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build provenance metadata for external memory-provider mirrors."""
|
||||
metadata: Dict[str, Any] = {
|
||||
"write_origin": write_origin or getattr(self, "_memory_write_origin", "assistant_tool"),
|
||||
"execution_context": (
|
||||
execution_context
|
||||
or getattr(self, "_memory_write_context", "foreground")
|
||||
),
|
||||
"session_id": self.session_id or "",
|
||||
"parent_session_id": self._parent_session_id or "",
|
||||
"platform": self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
|
||||
"tool_name": "memory",
|
||||
}
|
||||
if task_id:
|
||||
metadata["task_id"] = task_id
|
||||
if tool_call_id:
|
||||
metadata["tool_call_id"] = tool_call_id
|
||||
return {k: v for k, v in metadata.items() if v not in (None, "")}
|
||||
|
||||
def _apply_persist_user_message_override(self, messages: List[Dict]) -> None:
|
||||
"""Rewrite the current-turn user message before persistence/return.
|
||||
|
||||
@@ -4023,6 +4162,49 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _sync_external_memory_for_turn(
|
||||
self,
|
||||
*,
|
||||
original_user_message: Any,
|
||||
final_response: Any,
|
||||
interrupted: bool,
|
||||
) -> None:
|
||||
"""Mirror a completed turn into external memory providers.
|
||||
|
||||
Called at the end of ``run_conversation`` with the cleaned user
|
||||
message (``original_user_message``) and the finalised assistant
|
||||
response. The external memory backend gets both ``sync_all`` (to
|
||||
persist the exchange) and ``queue_prefetch_all`` (to start
|
||||
warming context for the next turn) in one shot.
|
||||
|
||||
Uses ``original_user_message`` rather than ``user_message``
|
||||
because the latter may carry injected skill content that bloats
|
||||
or breaks provider queries.
|
||||
|
||||
Interrupted turns are skipped entirely (#15218). A partial
|
||||
assistant output, an aborted tool chain, or a mid-stream reset
|
||||
is not durable conversational truth — mirroring it into an
|
||||
external memory backend pollutes future recall with state the
|
||||
user never saw completed. The prefetch is gated on the same
|
||||
flag: the user's next message is almost certainly a retry of
|
||||
the same intent, and a prefetch keyed on the interrupted turn
|
||||
would fire against stale context.
|
||||
|
||||
Normal completed turns still sync as before. The whole body is
|
||||
wrapped in ``try/except Exception`` because external memory
|
||||
providers are strictly best-effort — a misconfigured or offline
|
||||
backend must not block the user from seeing their response.
|
||||
"""
|
||||
if interrupted:
|
||||
return
|
||||
if not (self._memory_manager and final_response and original_user_message):
|
||||
return
|
||||
try:
|
||||
self._memory_manager.sync_all(original_user_message, final_response)
|
||||
self._memory_manager.queue_prefetch_all(original_user_message)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def release_clients(self) -> None:
|
||||
"""Release LLM client resources WITHOUT tearing down session tool state.
|
||||
|
||||
@@ -5432,6 +5614,26 @@ class AIAgent:
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
return self._anthropic_client.messages.create(**api_kwargs)
|
||||
|
||||
def _rebuild_anthropic_client(self) -> None:
|
||||
"""Rebuild the Anthropic client after an interrupt or stale call.
|
||||
|
||||
Handles both direct Anthropic and Bedrock-hosted Anthropic models
|
||||
correctly — rebuilding with the Bedrock SDK when provider is bedrock,
|
||||
rather than always falling back to build_anthropic_client() which
|
||||
requires a direct Anthropic API key.
|
||||
"""
|
||||
if getattr(self, "provider", None) == "bedrock":
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
region = getattr(self, "_bedrock_region", "us-east-1") or "us-east-1"
|
||||
self._anthropic_client = build_anthropic_bedrock_client(region)
|
||||
else:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
|
||||
def _interruptible_api_call(self, api_kwargs: dict):
|
||||
"""
|
||||
Run the API call in a background thread so the main conversation loop
|
||||
@@ -5467,12 +5669,21 @@ class AIAgent:
|
||||
# bedrock responses like chat_completions responses.
|
||||
from agent.bedrock_adapter import (
|
||||
_get_bedrock_runtime_client,
|
||||
invalidate_runtime_client,
|
||||
is_stale_connection_error,
|
||||
normalize_converse_response,
|
||||
)
|
||||
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||
api_kwargs.pop("__bedrock_converse__", None)
|
||||
client = _get_bedrock_runtime_client(region)
|
||||
raw_response = client.converse(**api_kwargs)
|
||||
try:
|
||||
raw_response = client.converse(**api_kwargs)
|
||||
except Exception as _bedrock_exc:
|
||||
# Evict the cached client on stale-connection failures
|
||||
# so the outer retry loop builds a fresh client/pool.
|
||||
if is_stale_connection_error(_bedrock_exc):
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
result["response"] = normalize_converse_response(raw_response)
|
||||
else:
|
||||
request_client_holder["client"] = self._create_request_openai_client(reason="chat_completion_request")
|
||||
@@ -5530,14 +5741,8 @@ class AIAgent:
|
||||
)
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
rc = request_client_holder.get("client")
|
||||
if rc is not None:
|
||||
@@ -5562,14 +5767,8 @@ class AIAgent:
|
||||
# seed future retries.
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
@@ -5725,12 +5924,21 @@ class AIAgent:
|
||||
try:
|
||||
from agent.bedrock_adapter import (
|
||||
_get_bedrock_runtime_client,
|
||||
invalidate_runtime_client,
|
||||
is_stale_connection_error,
|
||||
stream_converse_with_callbacks,
|
||||
)
|
||||
region = api_kwargs.pop("__bedrock_region__", "us-east-1")
|
||||
api_kwargs.pop("__bedrock_converse__", None)
|
||||
client = _get_bedrock_runtime_client(region)
|
||||
raw_response = client.converse_stream(**api_kwargs)
|
||||
try:
|
||||
raw_response = client.converse_stream(**api_kwargs)
|
||||
except Exception as _bedrock_exc:
|
||||
# Evict the cached client on stale-connection failures
|
||||
# so the outer retry loop builds a fresh client/pool.
|
||||
if is_stale_connection_error(_bedrock_exc):
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
|
||||
def _on_text(text):
|
||||
_fire_first()
|
||||
@@ -5982,11 +6190,25 @@ class AIAgent:
|
||||
for idx in sorted(tool_calls_acc):
|
||||
tc = tool_calls_acc[idx]
|
||||
arguments = tc["function"]["arguments"]
|
||||
tool_name = tc["function"]["name"] or "?"
|
||||
if arguments and arguments.strip():
|
||||
try:
|
||||
json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
has_truncated_tool_args = True
|
||||
# Attempt repair before flagging as truncated.
|
||||
# Models like GLM-5.1 via Ollama produce trailing
|
||||
# commas, unclosed brackets, Python None, etc.
|
||||
# Without repair, these hit the truncation handler
|
||||
# and kill the session. _repair_tool_call_arguments
|
||||
# returns "{}" for unrepairable args, which is far
|
||||
# better than a crashed session.
|
||||
repaired = _repair_tool_call_arguments(arguments, tool_name)
|
||||
if repaired != "{}":
|
||||
# Successfully repaired — use the fixed args
|
||||
arguments = repaired
|
||||
else:
|
||||
# Unrepairable — flag for truncation handling
|
||||
has_truncated_tool_args = True
|
||||
mock_tool_calls.append(SimpleNamespace(
|
||||
id=tc["id"],
|
||||
type=tc["type"],
|
||||
@@ -6410,14 +6632,8 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
|
||||
self._anthropic_client.close()
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
self._anthropic_api_key,
|
||||
getattr(self, "_anthropic_base_url", None),
|
||||
timeout=get_provider_request_timeout(self.provider, self.model),
|
||||
)
|
||||
self._rebuild_anthropic_client()
|
||||
else:
|
||||
request_client = request_client_holder.get("client")
|
||||
if request_client is not None:
|
||||
@@ -7409,6 +7625,12 @@ class AIAgent:
|
||||
raw_reasoning_content = getattr(assistant_message, "reasoning_content", None)
|
||||
if raw_reasoning_content is not None:
|
||||
msg["reasoning_content"] = _sanitize_surrogates(raw_reasoning_content)
|
||||
elif msg.get("tool_calls") and self._needs_deepseek_tool_reasoning():
|
||||
# DeepSeek thinking mode requires reasoning_content on every
|
||||
# assistant tool-call message. Without it, replaying the
|
||||
# persisted message causes HTTP 400. Include empty string
|
||||
# as a defensive compatibility fallback (refs #15250).
|
||||
msg["reasoning_content"] = ""
|
||||
|
||||
if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
|
||||
# Pass reasoning_details back unmodified so providers (OpenRouter,
|
||||
@@ -7484,6 +7706,35 @@ class AIAgent:
|
||||
|
||||
return msg
|
||||
|
||||
def _needs_kimi_tool_reasoning(self) -> bool:
|
||||
"""Return True when the current provider is Kimi / Moonshot thinking mode.
|
||||
|
||||
Kimi ``/coding`` and Moonshot thinking mode both require
|
||||
``reasoning_content`` on every assistant tool-call message; omitting
|
||||
it causes the next replay to fail with HTTP 400.
|
||||
"""
|
||||
return (
|
||||
self.provider in {"kimi-coding", "kimi-coding-cn"}
|
||||
or base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
)
|
||||
|
||||
def _needs_deepseek_tool_reasoning(self) -> bool:
|
||||
"""Return True when the current provider is DeepSeek thinking mode.
|
||||
|
||||
DeepSeek V4 thinking mode requires ``reasoning_content`` on every
|
||||
assistant tool-call turn; omitting it causes HTTP 400 when the
|
||||
message is replayed in a subsequent API request (#15250).
|
||||
"""
|
||||
provider = (self.provider or "").lower()
|
||||
model = (self.model or "").lower()
|
||||
return (
|
||||
provider == "deepseek"
|
||||
or "deepseek" in model
|
||||
or base_url_host_matches(self.base_url, "api.deepseek.com")
|
||||
)
|
||||
|
||||
def _copy_reasoning_content_for_api(self, source_msg: dict, api_msg: dict) -> None:
|
||||
"""Copy provider-facing reasoning fields onto an API replay message."""
|
||||
if source_msg.get("role") != "assistant":
|
||||
@@ -7499,13 +7750,14 @@ class AIAgent:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
kimi_requires_reasoning = (
|
||||
self.provider in {"kimi-coding", "kimi-coding-cn"}
|
||||
or base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
)
|
||||
if kimi_requires_reasoning and source_msg.get("tool_calls"):
|
||||
# Providers that require an echoed reasoning_content on every
|
||||
# assistant tool-call turn. Detection logic lives in the per-provider
|
||||
# helpers so both the creation path (_build_assistant_message) and
|
||||
# this replay path stay in sync.
|
||||
if source_msg.get("tool_calls") and (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
):
|
||||
api_msg["reasoning_content"] = ""
|
||||
|
||||
@staticmethod
|
||||
@@ -7536,6 +7788,115 @@ class AIAgent:
|
||||
]
|
||||
return api_msg
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_call_arguments(
|
||||
messages: list,
|
||||
*,
|
||||
logger=None,
|
||||
session_id: str = None,
|
||||
) -> int:
|
||||
"""Repair corrupted assistant tool-call argument JSON in-place."""
|
||||
log = logger or logging.getLogger(__name__)
|
||||
if not isinstance(messages, list):
|
||||
return 0
|
||||
|
||||
repaired = 0
|
||||
marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
|
||||
|
||||
def _prepend_marker(tool_msg: dict) -> None:
|
||||
existing = tool_msg.get("content")
|
||||
if isinstance(existing, str):
|
||||
if not existing:
|
||||
tool_msg["content"] = marker
|
||||
elif not existing.startswith(marker):
|
||||
tool_msg["content"] = f"{marker}\n{existing}"
|
||||
return
|
||||
if existing is None:
|
||||
tool_msg["content"] = marker
|
||||
return
|
||||
try:
|
||||
existing_text = json.dumps(existing)
|
||||
except TypeError:
|
||||
existing_text = str(existing)
|
||||
tool_msg["content"] = f"{marker}\n{existing_text}"
|
||||
|
||||
message_index = 0
|
||||
while message_index < len(messages):
|
||||
msg = messages[message_index]
|
||||
if not isinstance(msg, dict) or msg.get("role") != "assistant":
|
||||
message_index += 1
|
||||
continue
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if not isinstance(tool_calls, list) or not tool_calls:
|
||||
message_index += 1
|
||||
continue
|
||||
|
||||
insert_at = message_index + 1
|
||||
for tool_call in tool_calls:
|
||||
if not isinstance(tool_call, dict):
|
||||
continue
|
||||
function = tool_call.get("function")
|
||||
if not isinstance(function, dict):
|
||||
continue
|
||||
|
||||
arguments = function.get("arguments")
|
||||
if arguments is None or arguments == "":
|
||||
function["arguments"] = "{}"
|
||||
continue
|
||||
if isinstance(arguments, str) and not arguments.strip():
|
||||
function["arguments"] = "{}"
|
||||
continue
|
||||
if not isinstance(arguments, str):
|
||||
continue
|
||||
|
||||
try:
|
||||
json.loads(arguments)
|
||||
except json.JSONDecodeError:
|
||||
tool_call_id = tool_call.get("id")
|
||||
function_name = function.get("name", "?")
|
||||
preview = arguments[:80]
|
||||
log.warning(
|
||||
"Corrupted tool_call arguments repaired before request "
|
||||
"(session=%s, message_index=%s, tool_call_id=%s, function=%s, preview=%r)",
|
||||
session_id or "-",
|
||||
message_index,
|
||||
tool_call_id or "-",
|
||||
function_name,
|
||||
preview,
|
||||
)
|
||||
function["arguments"] = "{}"
|
||||
|
||||
existing_tool_msg = None
|
||||
scan_index = message_index + 1
|
||||
while scan_index < len(messages):
|
||||
candidate = messages[scan_index]
|
||||
if not isinstance(candidate, dict) or candidate.get("role") != "tool":
|
||||
break
|
||||
if candidate.get("tool_call_id") == tool_call_id:
|
||||
existing_tool_msg = candidate
|
||||
break
|
||||
scan_index += 1
|
||||
|
||||
if existing_tool_msg is None:
|
||||
messages.insert(
|
||||
insert_at,
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call_id,
|
||||
"content": marker,
|
||||
},
|
||||
)
|
||||
insert_at += 1
|
||||
else:
|
||||
_prepend_marker(existing_tool_msg)
|
||||
|
||||
repaired += 1
|
||||
|
||||
message_index += 1
|
||||
|
||||
return repaired
|
||||
|
||||
def _should_sanitize_tool_calls(self) -> bool:
|
||||
"""Determine if tool_calls need sanitization for strict APIs.
|
||||
|
||||
@@ -7633,6 +7994,7 @@ class AIAgent:
|
||||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
aux_error = None
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
@@ -7642,14 +8004,19 @@ class AIAgent:
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except RuntimeError:
|
||||
except Exception as e:
|
||||
aux_error = e
|
||||
_aux_available = False
|
||||
response = None
|
||||
|
||||
if not _aux_available and self.api_mode == "codex_responses":
|
||||
# No auxiliary client -- use the Codex Responses path directly
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
|
||||
_ct_flush = self._get_transport()
|
||||
if _ct_flush is not None:
|
||||
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
|
||||
elif not codex_kwargs.get("tools"):
|
||||
codex_kwargs["tools"] = [memory_tool_def]
|
||||
if _flush_temperature is not None:
|
||||
codex_kwargs["temperature"] = _flush_temperature
|
||||
else:
|
||||
@@ -7681,11 +8048,37 @@ class AIAgent:
|
||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
if aux_error is not None:
|
||||
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
|
||||
self._emit_auxiliary_failure("memory flush", aux_error)
|
||||
|
||||
def _openai_tool_calls(resp):
|
||||
if resp is not None and hasattr(resp, "choices") and resp.choices:
|
||||
msg = getattr(resp.choices[0], "message", None)
|
||||
calls = getattr(msg, "tool_calls", None)
|
||||
if calls:
|
||||
return calls
|
||||
return []
|
||||
|
||||
def _codex_output_tool_calls(resp):
|
||||
calls = []
|
||||
for item in getattr(resp, "output", []) or []:
|
||||
if getattr(item, "type", None) == "function_call":
|
||||
calls.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", None),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
return calls
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
if self.api_mode == "codex_responses" and not _aux_available:
|
||||
_ct_flush = self._get_transport()
|
||||
_cnr_flush = _ct_flush.normalize_response(response)
|
||||
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
|
||||
if _cnr_flush and _cnr_flush.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
@@ -7693,6 +8086,8 @@ class AIAgent:
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _cnr_flush.tool_calls
|
||||
]
|
||||
else:
|
||||
tool_calls = _codex_output_tool_calls(response)
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
@@ -7705,15 +8100,16 @@ class AIAgent:
|
||||
]
|
||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
# chat_completions / bedrock — normalize through transport
|
||||
_flush_result = self._get_transport().normalize_response(response)
|
||||
if _flush_result.tool_calls:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = _flush_result.tool_calls
|
||||
else:
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||
# Auxiliary client returned OpenAI-shaped response while main
|
||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||
_aux_msg = response.choices[0].message
|
||||
if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
|
||||
tool_calls = _aux_msg.tool_calls
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
|
||||
for tc in tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
@@ -7728,12 +8124,27 @@ class AIAgent:
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
if self._memory_manager and args.get("action") in ("add", "replace"):
|
||||
try:
|
||||
self._memory_manager.on_memory_write(
|
||||
args.get("action", ""),
|
||||
flush_target,
|
||||
args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
write_origin="memory_flush",
|
||||
execution_context="flush_memories",
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush tool call failed: %s", e)
|
||||
logger.warning("Memory flush tool call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush tool", e)
|
||||
except Exception as e:
|
||||
logger.debug("Memory flush API call failed: %s", e)
|
||||
logger.warning("Memory flush API call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
@@ -7779,6 +8190,15 @@ class AIAgent:
|
||||
# focus_topic — fall back to calling without it.
|
||||
compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
|
||||
|
||||
summary_error = getattr(self.context_compressor, "_last_summary_error", None)
|
||||
if summary_error:
|
||||
if getattr(self, "_last_compression_summary_warning", None) != summary_error:
|
||||
self._last_compression_summary_warning = summary_error
|
||||
self._emit_warning(
|
||||
f"⚠ Compression summary failed: {summary_error}. "
|
||||
"Inserted a fallback context marker."
|
||||
)
|
||||
|
||||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
compressed.append({"role": "user", "content": todo_snapshot})
|
||||
@@ -7948,6 +8368,10 @@ class AIAgent:
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=tool_call_id,
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -8459,6 +8883,10 @@ class AIAgent:
|
||||
function_args.get("action", ""),
|
||||
target,
|
||||
function_args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
task_id=effective_task_id,
|
||||
tool_call_id=getattr(tool_call, "id", None),
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -8703,6 +9131,7 @@ class AIAgent:
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
self._copy_reasoning_content_for_api(msg, api_msg)
|
||||
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
|
||||
api_msg.pop(internal_field, None)
|
||||
if _needs_sanitize:
|
||||
@@ -9333,6 +9762,19 @@ class AIAgent:
|
||||
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.
|
||||
# However, providers like Moonshot AI require a separate 'reasoning_content' field
|
||||
# on assistant messages with tool_calls. We handle both cases here.
|
||||
request_logger = getattr(self, "logger", None) or logging.getLogger(__name__)
|
||||
repaired_tool_calls = self._sanitize_tool_call_arguments(
|
||||
messages,
|
||||
logger=request_logger,
|
||||
session_id=self.session_id,
|
||||
)
|
||||
if repaired_tool_calls > 0:
|
||||
request_logger.info(
|
||||
"Sanitized %s corrupted tool_call arguments before request (session=%s)",
|
||||
repaired_tool_calls,
|
||||
self.session_id or "-",
|
||||
)
|
||||
|
||||
api_messages = []
|
||||
for idx, msg in enumerate(messages):
|
||||
api_msg = msg.copy()
|
||||
@@ -12162,14 +12604,11 @@ class AIAgent:
|
||||
self._iters_since_skill = 0
|
||||
|
||||
# External memory provider: sync the completed turn + queue next prefetch.
|
||||
# Use original_user_message (clean input) — user_message may contain
|
||||
# injected skill content that bloats / breaks provider queries.
|
||||
if self._memory_manager and final_response and original_user_message:
|
||||
try:
|
||||
self._memory_manager.sync_all(original_user_message, final_response)
|
||||
self._memory_manager.queue_prefetch_all(original_user_message)
|
||||
except Exception:
|
||||
pass
|
||||
self._sync_external_memory_for_turn(
|
||||
original_user_message=original_user_message,
|
||||
final_response=final_response,
|
||||
interrupted=interrupted,
|
||||
)
|
||||
|
||||
# Background memory/skill review — runs AFTER the response is delivered
|
||||
# so it never competes with the user's task for model attention.
|
||||
|
||||
@@ -48,6 +48,9 @@ AUTHOR_MAP = {
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -59,14 +62,19 @@ AUTHOR_MAP = {
|
||||
"keifergu@tencent.com": "keifergu",
|
||||
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
|
||||
"abner.the.foreman@agentmail.to": "Abnertheforeman",
|
||||
"thomasgeorgevii09@gmail.com": "tochukwuada",
|
||||
"harryykyle1@gmail.com": "hharry11",
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
"cyprian@ironin.pl": "iRonin",
|
||||
"valdi.jorge@gmail.com": "jvcl",
|
||||
"q19dcp@gmail.com": "aj-nt",
|
||||
"ebukau84@gmail.com": "UgwujaGeorge",
|
||||
"francip@gmail.com": "francip",
|
||||
"omni@comelse.com": "omnissiah-comelse",
|
||||
"oussama.redcode@gmail.com": "mavrickdeveloper",
|
||||
@@ -106,6 +114,7 @@ AUTHOR_MAP = {
|
||||
"30841158+n-WN@users.noreply.github.com": "n-WN",
|
||||
"tsuijinglei@gmail.com": "hiddenpuppy",
|
||||
"jerome@clawwork.ai": "HiddenPuppy",
|
||||
"jerome.benoit@sap.com": "jerome-benoit",
|
||||
"wysie@users.noreply.github.com": "Wysie",
|
||||
"leoyuan0099@gmail.com": "keyuyuan",
|
||||
"bxzt2006@163.com": "Only-Code-A",
|
||||
@@ -200,6 +209,9 @@ AUTHOR_MAP = {
|
||||
"1434494126@qq.com": "5park1e",
|
||||
"158153005+5park1e@users.noreply.github.com": "5park1e",
|
||||
"innocarpe@gmail.com": "innocarpe",
|
||||
"noreply@ked.com": "qike-ms",
|
||||
"andrekurait@gmail.com": "AndreKurait",
|
||||
"bsgdigital@users.noreply.github.com": "bsgdigital",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
@@ -490,6 +502,7 @@ AUTHOR_MAP = {
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
"chenzeshi@live.com": "chen1749144759",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -134,6 +134,7 @@ masks = processor.image_processor.post_process_masks(
|
||||
|
||||
### Model architecture
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
SAM Architecture:
|
||||
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
|
||||
@@ -144,6 +145,7 @@ SAM Architecture:
|
||||
Image Embeddings Prompt Embeddings Masks + IoU
|
||||
(computed once) (per prompt) predictions
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
### Model variants
|
||||
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
"""Resolve HERMES_HOME for standalone skill scripts.
|
||||
|
||||
Skill scripts may run outside the Hermes process (e.g. system Python,
|
||||
nix env, CI) where ``hermes_constants`` is not importable. This module
|
||||
provides the same ``get_hermes_home()`` and ``display_hermes_home()``
|
||||
contracts as ``hermes_constants`` without requiring it on ``sys.path``.
|
||||
|
||||
When ``hermes_constants`` IS available it is used directly so that any
|
||||
future enhancements (profile resolution, Docker detection, etc.) are
|
||||
picked up automatically. The fallback path replicates the core logic
|
||||
from ``hermes_constants.py`` using only the stdlib.
|
||||
|
||||
All scripts under ``google-workspace/scripts/`` should import from here
|
||||
instead of duplicating the ``HERMES_HOME = Path(os.getenv(...))`` pattern.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from hermes_constants import display_hermes_home as display_hermes_home
|
||||
from hermes_constants import get_hermes_home as get_hermes_home
|
||||
except (ModuleNotFoundError, ImportError):
|
||||
|
||||
def get_hermes_home() -> Path:
|
||||
"""Return the Hermes home directory (default: ~/.hermes).
|
||||
|
||||
Mirrors ``hermes_constants.get_hermes_home()``."""
|
||||
val = os.environ.get("HERMES_HOME", "").strip()
|
||||
return Path(val) if val else Path.home() / ".hermes"
|
||||
|
||||
def display_hermes_home() -> str:
|
||||
"""Return a user-friendly ``~/``-shortened display string.
|
||||
|
||||
Mirrors ``hermes_constants.display_hermes_home()``."""
|
||||
home = get_hermes_home()
|
||||
try:
|
||||
return "~/" + str(home.relative_to(Path.home()))
|
||||
except ValueError:
|
||||
return str(home)
|
||||
@@ -31,7 +31,14 @@ from datetime import datetime, timedelta, timezone
|
||||
from email.mime.text import MIMEText
|
||||
from pathlib import Path
|
||||
|
||||
HERMES_HOME = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
from _hermes_home import get_hermes_home
|
||||
|
||||
HERMES_HOME = get_hermes_home()
|
||||
TOKEN_PATH = HERMES_HOME / "google_token.json"
|
||||
CLIENT_SECRET_PATH = HERMES_HOME / "google_client_secret.json"
|
||||
|
||||
|
||||
@@ -10,9 +10,12 @@ import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
def get_hermes_home() -> Path:
|
||||
return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
from _hermes_home import get_hermes_home
|
||||
|
||||
|
||||
def get_token_path() -> Path:
|
||||
|
||||
@@ -21,6 +21,8 @@ Agent workflow:
|
||||
6. Run --check to verify. Done.
|
||||
"""
|
||||
|
||||
from __future__ import annotations # allow PEP 604 `X | None` on Python 3.9+
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
@@ -28,13 +30,12 @@ import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from hermes_constants import display_hermes_home, get_hermes_home
|
||||
except ModuleNotFoundError:
|
||||
HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
|
||||
if HERMES_AGENT_ROOT.exists():
|
||||
sys.path.insert(0, str(HERMES_AGENT_ROOT))
|
||||
from hermes_constants import display_hermes_home, get_hermes_home
|
||||
# Ensure sibling modules (_hermes_home) are importable when run standalone.
|
||||
_SCRIPTS_DIR = str(Path(__file__).resolve().parent)
|
||||
if _SCRIPTS_DIR not in sys.path:
|
||||
sys.path.insert(0, _SCRIPTS_DIR)
|
||||
|
||||
from _hermes_home import display_hermes_home, get_hermes_home
|
||||
|
||||
HERMES_HOME = get_hermes_home()
|
||||
TOKEN_PATH = HERMES_HOME / "google_token.json"
|
||||
@@ -111,7 +112,11 @@ def install_deps():
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"ERROR: Failed to install dependencies: {e}")
|
||||
print(f"Try manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
|
||||
print(
|
||||
"On environments without pip (e.g. Nix), install the optional extra instead:"
|
||||
)
|
||||
print(" pip install 'hermes-agent[google]'")
|
||||
print(f"Or manually: {sys.executable} -m pip install {' '.join(REQUIRED_PACKAGES)}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ End-to-end pipeline for producing publication-ready ML/AI research papers target
|
||||
|
||||
This is **not a linear pipeline** — it is an iterative loop. Results trigger new experiments. Reviews trigger new analysis. The agent must handle these feedback loops.
|
||||
|
||||
<!-- ascii-guard-ignore -->
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ RESEARCH PAPER PIPELINE │
|
||||
@@ -41,6 +42,7 @@ This is **not a linear pipeline** — it is an iterative loop. Results trigger n
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
<!-- ascii-guard-ignore-end -->
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1230,3 +1230,210 @@ class TestEmptyTextBlockFix:
|
||||
from agent.bedrock_adapter import _convert_content_to_converse
|
||||
blocks = _convert_content_to_converse("Hello")
|
||||
assert blocks[0]["text"] == "Hello"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection and per-region client invalidation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestInvalidateRuntimeClient:
|
||||
"""Per-region eviction used to discard dead/stale bedrock-runtime clients."""
|
||||
|
||||
def test_evicts_only_the_target_region(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
invalidate_runtime_client,
|
||||
reset_client_cache,
|
||||
)
|
||||
reset_client_cache()
|
||||
_bedrock_runtime_client_cache["us-east-1"] = "dead-client"
|
||||
_bedrock_runtime_client_cache["us-west-2"] = "live-client"
|
||||
|
||||
evicted = invalidate_runtime_client("us-east-1")
|
||||
|
||||
assert evicted is True
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache
|
||||
assert _bedrock_runtime_client_cache["us-west-2"] == "live-client"
|
||||
|
||||
def test_returns_false_when_region_not_cached(self):
|
||||
from agent.bedrock_adapter import invalidate_runtime_client, reset_client_cache
|
||||
reset_client_cache()
|
||||
assert invalidate_runtime_client("eu-west-1") is False
|
||||
|
||||
|
||||
class TestIsStaleConnectionError:
|
||||
"""Classifier that decides whether an exception warrants client eviction."""
|
||||
|
||||
def test_detects_botocore_connection_closed_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
exc = ConnectionClosedError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_botocore_endpoint_connection_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import EndpointConnectionError
|
||||
exc = EndpointConnectionError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_botocore_read_timeout(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from botocore.exceptions import ReadTimeoutError
|
||||
exc = ReadTimeoutError(endpoint_url="https://bedrock.example")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_urllib3_protocol_error(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
from urllib3.exceptions import ProtocolError
|
||||
exc = ProtocolError("Connection broken")
|
||||
assert is_stale_connection_error(exc) is True
|
||||
|
||||
def test_detects_library_internal_assertion_error(self):
|
||||
"""A bare AssertionError raised from inside urllib3/botocore signals
|
||||
a corrupted connection-pool invariant and should trigger eviction."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
|
||||
# Fabricate an AssertionError whose traceback's last frame belongs
|
||||
# to a module named "urllib3.connectionpool". We do this by exec'ing
|
||||
# a tiny `assert False` under a fake globals dict — the resulting
|
||||
# frame's ``f_globals["__name__"]`` is what the classifier inspects.
|
||||
fake_globals = {"__name__": "urllib3.connectionpool"}
|
||||
try:
|
||||
exec("def _boom():\n assert False\n_boom()", fake_globals)
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is True
|
||||
else:
|
||||
pytest.fail("AssertionError not raised")
|
||||
|
||||
def test_detects_botocore_internal_assertion_error(self):
|
||||
"""Same as above but for a frame inside the botocore namespace."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
fake_globals = {"__name__": "botocore.httpsession"}
|
||||
try:
|
||||
exec("def _boom():\n assert False\n_boom()", fake_globals)
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is True
|
||||
else:
|
||||
pytest.fail("AssertionError not raised")
|
||||
|
||||
def test_ignores_application_assertion_error(self):
|
||||
"""AssertionError from application code (not urllib3/botocore) should
|
||||
NOT be classified as stale — those are real test/code bugs."""
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
try:
|
||||
assert False, "test-only" # noqa: B011
|
||||
except AssertionError as exc:
|
||||
assert is_stale_connection_error(exc) is False
|
||||
|
||||
def test_ignores_unrelated_exceptions(self):
|
||||
from agent.bedrock_adapter import is_stale_connection_error
|
||||
assert is_stale_connection_error(ValueError("bad input")) is False
|
||||
assert is_stale_connection_error(KeyError("missing")) is False
|
||||
|
||||
|
||||
class TestCallConverseInvalidatesOnStaleError:
|
||||
"""call_converse / call_converse_stream evict the cached client when the
|
||||
boto3 call raises a stale-connection error — so the next invocation
|
||||
reconnects instead of reusing the dead socket."""
|
||||
|
||||
def test_converse_evicts_client_on_stale_error(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
|
||||
reset_client_cache()
|
||||
dead_client = MagicMock()
|
||||
dead_client.converse.side_effect = ConnectionClosedError(
|
||||
endpoint_url="https://bedrock.example",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = dead_client
|
||||
|
||||
with pytest.raises(ConnectionClosedError):
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache, (
|
||||
"stale client should have been evicted so the retry reconnects"
|
||||
)
|
||||
|
||||
def test_converse_stream_evicts_client_on_stale_error(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse_stream,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ConnectionClosedError
|
||||
|
||||
reset_client_cache()
|
||||
dead_client = MagicMock()
|
||||
dead_client.converse_stream.side_effect = ConnectionClosedError(
|
||||
endpoint_url="https://bedrock.example",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = dead_client
|
||||
|
||||
with pytest.raises(ConnectionClosedError):
|
||||
call_converse_stream(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert "us-east-1" not in _bedrock_runtime_client_cache
|
||||
|
||||
def test_converse_does_not_evict_on_non_stale_error(self):
|
||||
"""Non-stale errors (e.g. ValidationException) leave the client cache alone."""
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
reset_client_cache()
|
||||
live_client = MagicMock()
|
||||
live_client.converse.side_effect = ClientError(
|
||||
error_response={"Error": {"Code": "ValidationException", "Message": "bad"}},
|
||||
operation_name="Converse",
|
||||
)
|
||||
_bedrock_runtime_client_cache["us-east-1"] = live_client
|
||||
|
||||
with pytest.raises(ClientError):
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert _bedrock_runtime_client_cache.get("us-east-1") is live_client, (
|
||||
"validation errors do not indicate a dead connection — keep the client"
|
||||
)
|
||||
|
||||
def test_converse_leaves_successful_client_in_cache(self):
|
||||
from agent.bedrock_adapter import (
|
||||
_bedrock_runtime_client_cache,
|
||||
call_converse,
|
||||
reset_client_cache,
|
||||
)
|
||||
|
||||
reset_client_cache()
|
||||
live_client = MagicMock()
|
||||
live_client.converse.return_value = {
|
||||
"output": {"message": {"role": "assistant", "content": [{"text": "hi"}]}},
|
||||
"stopReason": "end_turn",
|
||||
"usage": {"inputTokens": 1, "outputTokens": 1, "totalTokens": 2},
|
||||
}
|
||||
_bedrock_runtime_client_cache["us-east-1"] = live_client
|
||||
|
||||
call_converse(
|
||||
region="us-east-1",
|
||||
model="anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
assert _bedrock_runtime_client_cache.get("us-east-1") is live_client
|
||||
|
||||
@@ -376,17 +376,15 @@ class TestBedrockModelNameNormalization:
|
||||
"apac.anthropic.claude-haiku-4-5", preserve_dots=True
|
||||
) == "apac.anthropic.claude-haiku-4-5"
|
||||
|
||||
def test_preserve_false_mangles_as_documented(self):
|
||||
"""Canary: with ``preserve_dots=False`` the function still
|
||||
produces the broken all-hyphen form — this is the shape that
|
||||
Bedrock rejected and that the fix avoids. Keeping this test
|
||||
locks in the existing behaviour of ``normalize_model_name`` so a
|
||||
future refactor doesn't accidentally decouple the knob from its
|
||||
effect."""
|
||||
def test_bedrock_prefix_preserved_without_preserve_dots(self):
|
||||
"""Bedrock inference profile IDs are auto-detected by prefix and
|
||||
always returned unmangled -- ``preserve_dots`` is irrelevant for
|
||||
these IDs because the dots are namespace separators, not version
|
||||
separators. Regression for #12295."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name(
|
||||
"global.anthropic.claude-opus-4-7", preserve_dots=False
|
||||
) == "global-anthropic-claude-opus-4-7"
|
||||
) == "global.anthropic.claude-opus-4-7"
|
||||
|
||||
def test_bare_foundation_model_id_preserved(self):
|
||||
"""Non-inference-profile Bedrock IDs
|
||||
@@ -422,12 +420,11 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
|
||||
f"{kwargs['model']!r}"
|
||||
)
|
||||
|
||||
def test_bedrock_model_mangled_without_preserve_dots(self):
|
||||
"""Inverse canary: without the flag, ``build_anthropic_kwargs``
|
||||
still produces the broken form — so the fix in
|
||||
``_anthropic_preserve_dots`` is the load-bearing piece that
|
||||
wires ``preserve_dots=True`` through to this builder for the
|
||||
Bedrock case."""
|
||||
def test_bedrock_model_preserved_without_preserve_dots(self):
|
||||
"""Bedrock inference profile IDs survive ``build_anthropic_kwargs``
|
||||
even without ``preserve_dots=True`` -- the prefix auto-detection
|
||||
in ``normalize_model_name`` is the load-bearing piece.
|
||||
Regression for #12295."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="global.anthropic.claude-opus-4-7",
|
||||
@@ -437,4 +434,157 @@ class TestBedrockBuildAnthropicKwargsEndToEnd:
|
||||
reasoning_config=None,
|
||||
preserve_dots=False,
|
||||
)
|
||||
assert kwargs["model"] == "global-anthropic-claude-opus-4-7"
|
||||
assert kwargs["model"] == "global.anthropic.claude-opus-4-7"
|
||||
|
||||
|
||||
class TestBedrockModelIdDetection:
|
||||
"""Tests for ``_is_bedrock_model_id`` and the auto-detection that
|
||||
makes ``normalize_model_name`` preserve dots for Bedrock IDs
|
||||
regardless of ``preserve_dots``. Regression for #12295."""
|
||||
|
||||
def test_bare_bedrock_id_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("anthropic.claude-opus-4-7") is True
|
||||
|
||||
def test_regional_us_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("us.anthropic.claude-sonnet-4-5-v1:0") is True
|
||||
|
||||
def test_regional_global_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("global.anthropic.claude-opus-4-7") is True
|
||||
|
||||
def test_regional_eu_prefix_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("eu.anthropic.claude-sonnet-4-6") is True
|
||||
|
||||
def test_openrouter_format_not_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("claude-opus-4.6") is False
|
||||
|
||||
def test_bare_claude_not_detected(self):
|
||||
from agent.anthropic_adapter import _is_bedrock_model_id
|
||||
assert _is_bedrock_model_id("claude-opus-4-7") is False
|
||||
|
||||
def test_bare_bedrock_id_preserved_without_flag(self):
|
||||
"""The primary bug from #12295: ``anthropic.claude-opus-4-7``
|
||||
sent to bedrock-mantle via auxiliary clients that don't pass
|
||||
``preserve_dots=True``."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name(
|
||||
"anthropic.claude-opus-4-7", preserve_dots=False
|
||||
) == "anthropic.claude-opus-4-7"
|
||||
|
||||
def test_openrouter_dots_still_converted(self):
|
||||
"""Non-Bedrock dotted model names must still be converted."""
|
||||
from agent.anthropic_adapter import normalize_model_name
|
||||
assert normalize_model_name("claude-opus-4.6") == "claude-opus-4-6"
|
||||
|
||||
def test_bare_bedrock_id_survives_build_kwargs(self):
|
||||
"""End-to-end: bare Bedrock ID through ``build_anthropic_kwargs``
|
||||
without ``preserve_dots=True`` -- the auxiliary client path."""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
kwargs = build_anthropic_kwargs(
|
||||
model="anthropic.claude-opus-4-7",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=1024,
|
||||
reasoning_config=None,
|
||||
preserve_dots=False,
|
||||
)
|
||||
assert kwargs["model"] == "anthropic.claude-opus-4-7"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# auxiliary_client Bedrock resolution — fix for #13919
|
||||
# ---------------------------------------------------------------------------
|
||||
# Before the fix, resolve_provider_client("bedrock", ...) fell through to the
|
||||
# "unhandled auth_type" warning and returned (None, None), breaking all
|
||||
# auxiliary tasks (compression, memory, summarization) for Bedrock users.
|
||||
|
||||
|
||||
class TestAuxiliaryClientBedrockResolution:
|
||||
"""Verify resolve_provider_client handles Bedrock's aws_sdk auth type."""
|
||||
|
||||
def test_bedrock_returns_client_with_credentials(self, monkeypatch):
|
||||
"""With valid AWS credentials, Bedrock should return a usable client."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
monkeypatch.setenv("AWS_REGION", "us-west-2")
|
||||
|
||||
mock_anthropic_bedrock = MagicMock()
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=mock_anthropic_bedrock):
|
||||
from agent.auxiliary_client import resolve_provider_client, AnthropicAuxiliaryClient
|
||||
client, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is not None, (
|
||||
"resolve_provider_client('bedrock') returned None — "
|
||||
"aws_sdk auth type is not handled"
|
||||
)
|
||||
assert isinstance(client, AnthropicAuxiliaryClient)
|
||||
assert model is not None
|
||||
assert client.api_key == "aws-sdk"
|
||||
assert "us-west-2" in client.base_url
|
||||
|
||||
def test_bedrock_returns_none_without_credentials(self, monkeypatch):
|
||||
"""Without AWS credentials, Bedrock should return (None, None) gracefully."""
|
||||
with patch("agent.bedrock_adapter.has_aws_credentials", return_value=False):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
client, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is None
|
||||
assert model is None
|
||||
|
||||
def test_bedrock_uses_configured_region(self, monkeypatch):
|
||||
"""Bedrock client base_url should reflect AWS_REGION."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
monkeypatch.setenv("AWS_REGION", "eu-central-1")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
client, _ = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert client is not None
|
||||
assert "eu-central-1" in client.base_url
|
||||
|
||||
def test_bedrock_respects_explicit_model(self, monkeypatch):
|
||||
"""When caller passes an explicit model, it should be used."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_, model = resolve_provider_client(
|
||||
"bedrock", "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
|
||||
)
|
||||
|
||||
assert "claude-sonnet" in model
|
||||
|
||||
def test_bedrock_async_mode(self, monkeypatch):
|
||||
"""Async mode should return an AsyncAnthropicAuxiliaryClient."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client, AsyncAnthropicAuxiliaryClient
|
||||
client, model = resolve_provider_client("bedrock", None, async_mode=True)
|
||||
|
||||
assert client is not None
|
||||
assert isinstance(client, AsyncAnthropicAuxiliaryClient)
|
||||
|
||||
def test_bedrock_default_model_is_haiku(self, monkeypatch):
|
||||
"""Default auxiliary model for Bedrock should be Haiku (fast, cheap)."""
|
||||
monkeypatch.setenv("AWS_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE")
|
||||
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY")
|
||||
|
||||
with patch("agent.anthropic_adapter.build_anthropic_bedrock_client",
|
||||
return_value=MagicMock()):
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_, model = resolve_provider_client("bedrock", None)
|
||||
|
||||
assert "haiku" in model.lower()
|
||||
|
||||
@@ -77,6 +77,13 @@ class FakeMemoryProvider(MemoryProvider):
|
||||
self.memory_writes.append((action, target, content))
|
||||
|
||||
|
||||
class MetadataMemoryProvider(FakeMemoryProvider):
|
||||
"""Provider that opts into write metadata."""
|
||||
|
||||
def on_memory_write(self, action, target, content, metadata=None):
|
||||
self.memory_writes.append((action, target, content, metadata or {}))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider ABC tests
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -862,6 +869,51 @@ class TestOnMemoryWriteBridge:
|
||||
mgr.on_memory_write("add", "memory", "new fact")
|
||||
assert p.memory_writes == [("add", "memory", "new fact")]
|
||||
|
||||
def test_on_memory_write_metadata_passed_to_opt_in_provider(self):
|
||||
"""Providers that accept metadata receive structured write provenance."""
|
||||
mgr = MemoryManager()
|
||||
p = MetadataMemoryProvider("ext")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.on_memory_write(
|
||||
"add",
|
||||
"memory",
|
||||
"new fact",
|
||||
metadata={
|
||||
"write_origin": "assistant_tool",
|
||||
"execution_context": "foreground",
|
||||
"session_id": "sess-1",
|
||||
},
|
||||
)
|
||||
|
||||
assert p.memory_writes == [
|
||||
(
|
||||
"add",
|
||||
"memory",
|
||||
"new fact",
|
||||
{
|
||||
"write_origin": "assistant_tool",
|
||||
"execution_context": "foreground",
|
||||
"session_id": "sess-1",
|
||||
},
|
||||
)
|
||||
]
|
||||
|
||||
def test_on_memory_write_metadata_keeps_legacy_provider_compatible(self):
|
||||
"""Old 3-arg providers keep working when the manager receives metadata."""
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider("ext")
|
||||
mgr.add_provider(p)
|
||||
|
||||
mgr.on_memory_write(
|
||||
"add",
|
||||
"user",
|
||||
"legacy provider fact",
|
||||
metadata={"write_origin": "assistant_tool"},
|
||||
)
|
||||
|
||||
assert p.memory_writes == [("add", "user", "legacy provider fact")]
|
||||
|
||||
def test_on_memory_write_replace(self):
|
||||
"""on_memory_write fires for 'replace' actions."""
|
||||
mgr = MemoryManager()
|
||||
|
||||
@@ -588,6 +588,57 @@ class TestGetModelContextLength:
|
||||
assert result == 200000
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Bedrock context resolution — must run BEFORE custom-endpoint probe
|
||||
# =========================================================================
|
||||
|
||||
class TestBedrockContextResolution:
|
||||
"""Regression tests for Bedrock context-length resolution order.
|
||||
|
||||
Bug: because ``bedrock-runtime.<region>.amazonaws.com`` is not listed in
|
||||
``_URL_TO_PROVIDER``, ``_is_known_provider_base_url`` returned False and
|
||||
the custom-endpoint probe at step 2 ran first — fetching ``/models`` from
|
||||
Bedrock (which it doesn't serve), returning the 128K default-fallback
|
||||
before execution ever reached the Bedrock branch.
|
||||
|
||||
Fix: promote the Bedrock branch ahead of the custom-endpoint probe.
|
||||
"""
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_bedrock_provider_returns_static_table_before_probe(self, mock_fetch):
|
||||
"""provider='bedrock' resolves via static table, bypasses /models probe."""
|
||||
ctx = get_model_context_length(
|
||||
"anthropic.claude-opus-4-v1:0",
|
||||
provider="bedrock",
|
||||
base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
|
||||
)
|
||||
# Must return the static Bedrock table value (200K for Claude),
|
||||
# NOT DEFAULT_FALLBACK_CONTEXT (128K).
|
||||
assert ctx == 200000
|
||||
mock_fetch.assert_not_called()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_bedrock_url_without_provider_hint(self, mock_fetch):
|
||||
"""bedrock-runtime host infers Bedrock even when provider is omitted."""
|
||||
ctx = get_model_context_length(
|
||||
"anthropic.claude-sonnet-4-v1:0",
|
||||
base_url="https://bedrock-runtime.us-west-2.amazonaws.com",
|
||||
)
|
||||
assert ctx == 200000
|
||||
mock_fetch.assert_not_called()
|
||||
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_non_bedrock_url_still_probes(self, mock_fetch):
|
||||
"""Non-Bedrock hosts still reach the custom-endpoint probe."""
|
||||
mock_fetch.return_value = {"some-model": {"context_length": 50000}}
|
||||
ctx = get_model_context_length(
|
||||
"some-model",
|
||||
base_url="https://api.example.com/v1",
|
||||
)
|
||||
assert ctx == 50000
|
||||
assert mock_fetch.called
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _strip_provider_prefix — Ollama model:tag vs provider:model
|
||||
# =========================================================================
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
"""Tests for the /busy CLI command and busy-input-mode config handling."""
|
||||
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def _import_cli():
|
||||
import hermes_cli.config as config_mod
|
||||
|
||||
if not hasattr(config_mod, "save_env_value_secure"):
|
||||
config_mod.save_env_value_secure = lambda key, value: {
|
||||
"success": True,
|
||||
"stored_as": key,
|
||||
"validated": False,
|
||||
}
|
||||
|
||||
import cli as cli_mod
|
||||
|
||||
return cli_mod
|
||||
|
||||
|
||||
class TestHandleBusyCommand(unittest.TestCase):
|
||||
def _make_cli(self, busy_input_mode="interrupt"):
|
||||
return SimpleNamespace(
|
||||
busy_input_mode=busy_input_mode,
|
||||
agent=None,
|
||||
)
|
||||
|
||||
def test_no_args_shows_status(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("queue")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("queue", printed)
|
||||
self.assertIn("interrupt", printed)
|
||||
|
||||
def test_queue_argument_sets_queue_mode_and_saves(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("interrupt")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint"),
|
||||
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy queue")
|
||||
|
||||
self.assertEqual(stub.busy_input_mode, "queue")
|
||||
mock_save.assert_called_once_with("display.busy_input_mode", "queue")
|
||||
|
||||
def test_interrupt_argument_sets_interrupt_mode_and_saves(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli("queue")
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint"),
|
||||
patch.object(cli_mod, "save_config_value", return_value=True) as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy interrupt")
|
||||
|
||||
self.assertEqual(stub.busy_input_mode, "interrupt")
|
||||
mock_save.assert_called_once_with("display.busy_input_mode", "interrupt")
|
||||
|
||||
def test_invalid_argument_prints_usage(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = self._make_cli()
|
||||
with (
|
||||
patch.object(cli_mod, "_cprint") as mock_cprint,
|
||||
patch.object(cli_mod, "save_config_value") as mock_save,
|
||||
):
|
||||
cli_mod.HermesCLI._handle_busy_command(stub, "/busy nonsense")
|
||||
|
||||
mock_save.assert_not_called()
|
||||
printed = " ".join(str(c) for c in mock_cprint.call_args_list)
|
||||
self.assertIn("Usage: /busy", printed)
|
||||
|
||||
|
||||
class TestBusyCommandRegistry(unittest.TestCase):
|
||||
def test_busy_in_registry(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
names = [c.name for c in COMMAND_REGISTRY]
|
||||
assert "busy" in names
|
||||
|
||||
def test_busy_subcommands_documented(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
busy = next(c for c in COMMAND_REGISTRY if c.name == "busy")
|
||||
assert busy.args_hint == "[queue|interrupt|status]"
|
||||
assert busy.category == "Configuration"
|
||||
@@ -1374,6 +1374,139 @@ class TestResponsesStreaming:
|
||||
assert data["status"] == "completed"
|
||||
assert data["output"][-1]["content"][0]["text"] == "Stored response"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_cancelled_persists_incomplete_snapshot(self, adapter):
|
||||
"""Server-side asyncio.CancelledError (shutdown, request timeout) must
|
||||
still leave an ``incomplete`` snapshot in ResponseStore so
|
||||
GET /v1/responses/{id} and previous_response_id chaining keep
|
||||
working. Regression for PR #15171 follow-up.
|
||||
|
||||
Calls _write_sse_responses directly so the test can await the
|
||||
handler to completion (TestClient disconnection races the server
|
||||
handler, which makes end-to-end assertion on the final stored
|
||||
snapshot flaky).
|
||||
"""
|
||||
# Build a minimal fake request + stream queue the writer understands.
|
||||
fake_request = MagicMock()
|
||||
fake_request.headers = {}
|
||||
|
||||
written_payloads: list = []
|
||||
|
||||
class _FakeStreamResponse:
|
||||
async def prepare(self, req):
|
||||
pass
|
||||
|
||||
async def write(self, payload):
|
||||
written_payloads.append(payload)
|
||||
|
||||
# Patch web.StreamResponse for the duration of the writer call.
|
||||
import gateway.platforms.api_server as api_mod
|
||||
import queue as _q
|
||||
|
||||
stream_q: _q.Queue = _q.Queue()
|
||||
|
||||
async def _agent_coro():
|
||||
# Feed one partial delta into the stream queue...
|
||||
stream_q.put("partial output")
|
||||
# ...then give the drain loop a moment to pick it up before
|
||||
# raising CancelledError to simulate a server-side cancel.
|
||||
await asyncio.sleep(0.01)
|
||||
raise asyncio.CancelledError()
|
||||
|
||||
agent_task = asyncio.ensure_future(_agent_coro())
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
|
||||
with patch.object(api_mod.web, "StreamResponse", return_value=_FakeStreamResponse()):
|
||||
with pytest.raises(asyncio.CancelledError):
|
||||
await adapter._write_sse_responses(
|
||||
request=fake_request,
|
||||
response_id=response_id,
|
||||
model="hermes-agent",
|
||||
created_at=int(time.time()),
|
||||
stream_q=stream_q,
|
||||
agent_task=agent_task,
|
||||
agent_ref=[None],
|
||||
conversation_history=[],
|
||||
user_message="will be cancelled",
|
||||
instructions=None,
|
||||
conversation=None,
|
||||
store=True,
|
||||
session_id=None,
|
||||
)
|
||||
|
||||
# The in_progress snapshot was persisted on response.created,
|
||||
# and the CancelledError handler must have updated it to
|
||||
# ``incomplete`` with the partial text it saw.
|
||||
stored = adapter._response_store.get(response_id)
|
||||
assert stored is not None, "snapshot must be retrievable after cancellation"
|
||||
assert stored["response"]["status"] == "incomplete"
|
||||
# Partial text captured before cancel should be preserved.
|
||||
output_text = "".join(
|
||||
part.get("text", "")
|
||||
for item in stored["response"].get("output", [])
|
||||
if item.get("type") == "message"
|
||||
for part in item.get("content", [])
|
||||
)
|
||||
assert "partial output" in output_text
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_client_disconnect_persists_incomplete_snapshot(self, adapter):
|
||||
"""Client disconnect (ConnectionResetError) during streaming must
|
||||
persist an ``incomplete`` snapshot in ResponseStore. Regression
|
||||
for PR #15171."""
|
||||
fake_request = MagicMock()
|
||||
fake_request.headers = {}
|
||||
|
||||
write_call_count = {"n": 0}
|
||||
|
||||
class _DisconnectingStreamResponse:
|
||||
async def prepare(self, req):
|
||||
pass
|
||||
|
||||
async def write(self, payload):
|
||||
# First two writes succeed (prepare + response.created).
|
||||
# On the third write (a text delta), the "client"
|
||||
# disconnects — simulate with ConnectionResetError.
|
||||
write_call_count["n"] += 1
|
||||
if write_call_count["n"] >= 3:
|
||||
raise ConnectionResetError("simulated client disconnect")
|
||||
|
||||
import gateway.platforms.api_server as api_mod
|
||||
import queue as _q
|
||||
|
||||
stream_q: _q.Queue = _q.Queue()
|
||||
stream_q.put("some streamed text")
|
||||
stream_q.put(None) # EOS sentinel
|
||||
|
||||
async def _agent_coro():
|
||||
await asyncio.sleep(0.01)
|
||||
return ({"final_response": "", "messages": [], "api_calls": 0},
|
||||
{"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
|
||||
agent_task = asyncio.ensure_future(_agent_coro())
|
||||
response_id = f"resp_{uuid.uuid4().hex[:28]}"
|
||||
|
||||
with patch.object(api_mod.web, "StreamResponse", return_value=_DisconnectingStreamResponse()):
|
||||
await adapter._write_sse_responses(
|
||||
request=fake_request,
|
||||
response_id=response_id,
|
||||
model="hermes-agent",
|
||||
created_at=int(time.time()),
|
||||
stream_q=stream_q,
|
||||
agent_task=agent_task,
|
||||
agent_ref=[None],
|
||||
conversation_history=[],
|
||||
user_message="will disconnect",
|
||||
instructions=None,
|
||||
conversation=None,
|
||||
store=True,
|
||||
session_id=None,
|
||||
)
|
||||
|
||||
stored = adapter._response_store.get(response_id)
|
||||
assert stored is not None, "snapshot must survive client disconnect"
|
||||
assert stored["response"]["status"] == "incomplete"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth on endpoints
|
||||
|
||||
@@ -66,6 +66,37 @@ class TestBlueBubblesHelpers:
|
||||
|
||||
assert check_bluebubbles_requirements() is True
|
||||
|
||||
def test_supports_message_editing_is_false(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.SUPPORTS_MESSAGE_EDITING is False
|
||||
|
||||
def test_truncate_message_omits_pagination_suffixes(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
chunks = adapter.truncate_message("abcdefghij", max_length=6)
|
||||
assert len(chunks) > 1
|
||||
assert "".join(chunks) == "abcdefghij"
|
||||
assert all("(" not in chunk for chunk in chunks)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_splits_paragraphs_into_multiple_bubbles(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
sent = []
|
||||
|
||||
async def fake_resolve_chat_guid(chat_id):
|
||||
return "iMessage;-;user@example.com"
|
||||
|
||||
async def fake_api_post(path, payload):
|
||||
sent.append(payload["message"])
|
||||
return {"data": {"guid": f"msg-{len(sent)}"}}
|
||||
|
||||
monkeypatch.setattr(adapter, "_resolve_chat_guid", fake_resolve_chat_guid)
|
||||
monkeypatch.setattr(adapter, "_api_post", fake_api_post)
|
||||
|
||||
result = await adapter.send("user@example.com", "first thought\n\nsecond thought")
|
||||
|
||||
assert result.success is True
|
||||
assert sent == ["first thought", "second thought"]
|
||||
|
||||
def test_format_message_strips_markdown(self, monkeypatch):
|
||||
adapter = _make_adapter(monkeypatch)
|
||||
assert adapter.format_message("**Hello** `world`") == "Hello world"
|
||||
|
||||
@@ -70,6 +70,9 @@ def _make_runner():
|
||||
runner.session_store = None
|
||||
runner.hooks = MagicMock()
|
||||
runner.hooks.emit = AsyncMock()
|
||||
runner.pairing_store = MagicMock()
|
||||
runner.pairing_store.is_approved.return_value = True
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
return runner, _AGENT_PENDING_SENTINEL
|
||||
|
||||
|
||||
@@ -91,6 +94,30 @@ def _make_adapter(platform_val="telegram"):
|
||||
class TestBusySessionAck:
|
||||
"""User sends a message while agent is running — should get acknowledgment."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_message_queue_mode_queues_without_interrupt(self):
|
||||
"""Runner queue mode must not interrupt an active agent for text follow-ups."""
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="follow up in queue mode")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
running_agent = MagicMock()
|
||||
runner._busy_input_mode = "queue"
|
||||
runner._running_agents[sk] = running_agent
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
result = await GatewayRunner._handle_message(runner, event)
|
||||
|
||||
assert result is None
|
||||
assert sk in adapter._pending_messages
|
||||
assert adapter._pending_messages[sk] is event
|
||||
assert sk not in runner._pending_messages
|
||||
running_agent.interrupt.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_sends_ack_when_agent_running(self):
|
||||
"""First message during busy session should get a status ack."""
|
||||
|
||||
@@ -52,6 +52,10 @@ class TestPlatformConfigRoundtrip:
|
||||
assert restored.enabled is False
|
||||
assert restored.token is None
|
||||
|
||||
def test_from_dict_coerces_quoted_false_enabled(self):
|
||||
restored = PlatformConfig.from_dict({"enabled": "false"})
|
||||
assert restored.enabled is False
|
||||
|
||||
|
||||
class TestGetConnectedPlatforms:
|
||||
def test_returns_enabled_with_token(self):
|
||||
@@ -140,6 +144,10 @@ class TestSessionResetPolicy:
|
||||
assert restored.at_hour == 4
|
||||
assert restored.idle_minutes == 1440
|
||||
|
||||
def test_from_dict_coerces_quoted_false_notify(self):
|
||||
restored = SessionResetPolicy.from_dict({"notify": "false"})
|
||||
assert restored.notify is False
|
||||
|
||||
|
||||
class TestGatewayConfigRoundtrip:
|
||||
def test_full_roundtrip(self):
|
||||
@@ -182,6 +190,10 @@ class TestGatewayConfigRoundtrip:
|
||||
assert restored.unauthorized_dm_behavior == "ignore"
|
||||
assert restored.platforms[Platform.WHATSAPP].extra["unauthorized_dm_behavior"] == "pair"
|
||||
|
||||
def test_from_dict_coerces_quoted_false_always_log_local(self):
|
||||
restored = GatewayConfig.from_dict({"always_log_local": "false"})
|
||||
assert restored.always_log_local is False
|
||||
|
||||
|
||||
class TestLoadGatewayConfig:
|
||||
def test_bridges_quick_commands_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
@@ -238,6 +250,55 @@ class TestLoadGatewayConfig:
|
||||
|
||||
assert config.thread_sessions_per_user is False
|
||||
|
||||
def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"platforms:\n"
|
||||
" api_server:\n"
|
||||
" enabled: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.platforms[Platform.API_SERVER].enabled is False
|
||||
assert Platform.API_SERVER not in config.get_connected_platforms()
|
||||
|
||||
def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"session_reset:\n"
|
||||
" notify: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.default_reset_policy.notify is False
|
||||
|
||||
def test_bridges_quoted_false_always_log_local_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"always_log_local: \"false\"\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
config = load_gateway_config()
|
||||
|
||||
assert config.always_log_local is False
|
||||
|
||||
def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
@@ -137,11 +137,38 @@ class TestGetProxyUrl:
|
||||
class TestResolveProxyUrl:
|
||||
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
def test_no_proxy_bypasses_matching_host(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "api.telegram.org")
|
||||
|
||||
assert resolve_proxy_url(target_hosts="api.telegram.org") is None
|
||||
|
||||
def test_no_proxy_bypasses_cidr_target(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "149.154.160.0/20")
|
||||
|
||||
assert resolve_proxy_url(target_hosts=["149.154.167.220"]) is None
|
||||
|
||||
def test_no_proxy_ignored_without_target(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "*")
|
||||
|
||||
assert resolve_proxy_url() == "http://proxy.example:8080"
|
||||
|
||||
|
||||
class TestRunAgentProxyDispatch:
|
||||
"""Test that _run_agent() delegates to proxy when configured."""
|
||||
|
||||
@@ -179,6 +179,40 @@ class TestHandleResumeCommand:
|
||||
assert call_args[0][1] == "sess_v2"
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_follows_compression_continuation(self, tmp_path):
|
||||
"""Gateway /resume should reopen the live descendant after compression."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("compressed_root", "telegram")
|
||||
db.set_session_title("compressed_root", "Compressed Work")
|
||||
db.end_session("compressed_root", "compression")
|
||||
db.create_session("compressed_child", "telegram", parent_session_id="compressed_root")
|
||||
db.append_message("compressed_child", "user", "hello from continuation")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Compressed Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
runner.session_store.load_transcript.side_effect = (
|
||||
lambda session_id: [{"role": "user", "content": "hello from continuation"}]
|
||||
if session_id == "compressed_child"
|
||||
else []
|
||||
)
|
||||
|
||||
result = await runner._handle_resume_command(event)
|
||||
|
||||
assert "Resumed session" in result
|
||||
assert "(1 message)" in result
|
||||
call_args = runner.session_store.switch_session.call_args
|
||||
assert call_args[0][1] == "compressed_child"
|
||||
runner.session_store.load_transcript.assert_called_with("compressed_child")
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_clears_running_agent(self, tmp_path):
|
||||
"""Switching sessions clears any cached running agent."""
|
||||
|
||||
@@ -58,6 +58,13 @@ class ProgressCaptureAdapter(BasePlatformAdapter):
|
||||
return {"id": chat_id}
|
||||
|
||||
|
||||
class NonEditingProgressCaptureAdapter(ProgressCaptureAdapter):
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
|
||||
async def edit_message(self, chat_id, message_id, content) -> SendResult:
|
||||
raise AssertionError("non-editable adapters should not receive edit_message calls")
|
||||
|
||||
|
||||
class FakeAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
@@ -502,6 +509,7 @@ async def _run_with_agent(
|
||||
chat_id="-1001",
|
||||
chat_type="group",
|
||||
thread_id="17585",
|
||||
adapter_cls=ProgressCaptureAdapter,
|
||||
):
|
||||
if config_data:
|
||||
import yaml
|
||||
@@ -516,7 +524,7 @@ async def _run_with_agent(
|
||||
fake_run_agent.AIAgent = agent_cls
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
adapter = ProgressCaptureAdapter(platform=platform)
|
||||
adapter = adapter_cls(platform=platform)
|
||||
runner = _make_runner(adapter)
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
if config_data and "streaming" in config_data:
|
||||
@@ -666,6 +674,26 @@ async def test_run_agent_interim_commentary_works_with_tool_progress_off(monkeyp
|
||||
assert any(call["content"] == "I'll inspect the repo first." for call in adapter.sent)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_agent_bluebubbles_uses_commentary_send_path_for_quick_replies(monkeypatch, tmp_path):
|
||||
adapter, result = await _run_with_agent(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
CommentaryAgent,
|
||||
session_id="sess-bluebubbles-commentary",
|
||||
config_data={"display": {"interim_assistant_messages": True}},
|
||||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id="iMessage;-;user@example.com",
|
||||
chat_type="dm",
|
||||
thread_id=None,
|
||||
adapter_cls=NonEditingProgressCaptureAdapter,
|
||||
)
|
||||
|
||||
assert result.get("already_sent") is not True
|
||||
assert [call["content"] for call in adapter.sent] == ["I'll inspect the repo first."]
|
||||
assert adapter.edits == []
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_path):
|
||||
adapter, result = await _run_with_agent(
|
||||
|
||||
@@ -11,6 +11,8 @@ from gateway.session import (
|
||||
build_session_context,
|
||||
build_session_context_prompt,
|
||||
build_session_key,
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@@ -183,6 +185,25 @@ class TestBuildSessionContextPrompt:
|
||||
assert "Telegram" in prompt
|
||||
assert "Home Chat" in prompt
|
||||
|
||||
def test_bluebubbles_prompt_mentions_short_conversational_i_message_format(self):
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.BLUEBUBBLES: PlatformConfig(enabled=True, extra={"server_url": "http://localhost:1234", "password": "secret"}),
|
||||
},
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.BLUEBUBBLES,
|
||||
chat_id="iMessage;-;user@example.com",
|
||||
chat_name="Ben",
|
||||
chat_type="dm",
|
||||
)
|
||||
ctx = build_session_context(source, config)
|
||||
prompt = build_session_context_prompt(ctx)
|
||||
|
||||
assert "responding via iMessage" in prompt
|
||||
assert "short and conversational" in prompt
|
||||
assert "blank line" in prompt
|
||||
|
||||
def test_discord_prompt(self):
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
@@ -626,9 +647,9 @@ class TestSessionStoreSwitchSession:
|
||||
db.close()
|
||||
|
||||
|
||||
class TestWhatsAppDMSessionKeyConsistency:
|
||||
"""Regression: all session-key construction must go through build_session_key
|
||||
so DMs are isolated by chat_id across platforms."""
|
||||
class TestWhatsAppSessionKeyConsistency:
|
||||
"""Regression: WhatsApp session keys must collapse JID/LID aliases to a
|
||||
single stable identity for both DM chat_ids and group participant_ids."""
|
||||
|
||||
@pytest.fixture()
|
||||
def store(self, tmp_path):
|
||||
@@ -639,7 +660,7 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
def test_whatsapp_dm_includes_chat_id(self):
|
||||
def test_whatsapp_dm_uses_canonical_identifier(self):
|
||||
source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="15551234567@s.whatsapp.net",
|
||||
@@ -647,7 +668,80 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||
user_name="Phone User",
|
||||
)
|
||||
key = build_session_key(source)
|
||||
assert key == "agent:main:whatsapp:dm:15551234567@s.whatsapp.net"
|
||||
assert key == "agent:main:whatsapp:dm:15551234567"
|
||||
|
||||
def test_whatsapp_dm_aliases_share_one_session_key(self, tmp_path, monkeypatch):
|
||||
tmp_home = tmp_path / "hermes-home"
|
||||
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||
|
||||
lid_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="999999999999999@lid",
|
||||
chat_type="dm",
|
||||
user_name="Phone User",
|
||||
)
|
||||
phone_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="15551234567@s.whatsapp.net",
|
||||
chat_type="dm",
|
||||
user_name="Phone User",
|
||||
)
|
||||
|
||||
assert build_session_key(lid_source) == "agent:main:whatsapp:dm:15551234567"
|
||||
assert build_session_key(phone_source) == "agent:main:whatsapp:dm:15551234567"
|
||||
|
||||
def test_whatsapp_group_participant_aliases_share_session_key(self, tmp_path, monkeypatch):
|
||||
"""With group_sessions_per_user, the same human flipping between
|
||||
phone-JID and LID inside a group must not produce two isolated
|
||||
per-user sessions."""
|
||||
tmp_home = tmp_path / "hermes-home"
|
||||
mapping_dir = tmp_home / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_home))
|
||||
|
||||
lid_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="999999999999999@lid",
|
||||
user_name="Group Member",
|
||||
)
|
||||
phone_source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="15551234567@s.whatsapp.net",
|
||||
user_name="Group Member",
|
||||
)
|
||||
|
||||
expected = "agent:main:whatsapp:group:120363000000000000@g.us:15551234567"
|
||||
assert build_session_key(lid_source, group_sessions_per_user=True) == expected
|
||||
assert build_session_key(phone_source, group_sessions_per_user=True) == expected
|
||||
|
||||
def test_whatsapp_group_shared_sessions_untouched_by_canonicalisation(self):
|
||||
"""When group_sessions_per_user is False, participant_id is not in the
|
||||
key at all, so canonicalisation is a no-op for this mode."""
|
||||
source = SessionSource(
|
||||
platform=Platform.WHATSAPP,
|
||||
chat_id="120363000000000000@g.us",
|
||||
chat_type="group",
|
||||
user_id="999999999999999@lid",
|
||||
user_name="Group Member",
|
||||
)
|
||||
assert (
|
||||
build_session_key(source, group_sessions_per_user=False)
|
||||
== "agent:main:whatsapp:group:120363000000000000@g.us"
|
||||
)
|
||||
|
||||
def test_store_delegates_to_build_session_key(self, store):
|
||||
"""SessionStore._generate_session_key must produce the same result."""
|
||||
@@ -866,6 +960,57 @@ class TestWhatsAppDMSessionKeyConsistency:
|
||||
assert key == "agent:main:telegram:dm:99:topic-1"
|
||||
|
||||
|
||||
class TestWhatsAppIdentifierPublicHelpers:
|
||||
"""Contract tests for the public WhatsApp identifier helpers.
|
||||
|
||||
These helpers are part of the public API for plugins that need
|
||||
WhatsApp identity awareness. Breaking these contracts is a
|
||||
breaking change for downstream plugins.
|
||||
"""
|
||||
|
||||
def test_normalize_strips_jid_suffix(self):
|
||||
assert normalize_whatsapp_identifier("60123456789@s.whatsapp.net") == "60123456789"
|
||||
|
||||
def test_normalize_strips_lid_suffix(self):
|
||||
assert normalize_whatsapp_identifier("999999999999999@lid") == "999999999999999"
|
||||
|
||||
def test_normalize_strips_device_suffix(self):
|
||||
assert normalize_whatsapp_identifier("60123456789:47@s.whatsapp.net") == "60123456789"
|
||||
|
||||
def test_normalize_strips_leading_plus(self):
|
||||
assert normalize_whatsapp_identifier("+60123456789") == "60123456789"
|
||||
|
||||
def test_normalize_handles_bare_numeric(self):
|
||||
assert normalize_whatsapp_identifier("60123456789") == "60123456789"
|
||||
|
||||
def test_normalize_handles_empty_and_none(self):
|
||||
assert normalize_whatsapp_identifier("") == ""
|
||||
assert normalize_whatsapp_identifier(None) == "" # type: ignore[arg-type]
|
||||
|
||||
def test_canonical_without_mapping_returns_normalized(self, tmp_path, monkeypatch):
|
||||
"""With no bridge mapping files, the normalized input is returned."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
assert canonical_whatsapp_identifier("60123456789@lid") == "60123456789"
|
||||
|
||||
def test_canonical_walks_lid_mapping(self, tmp_path, monkeypatch):
|
||||
"""LID is resolved to its paired phone identity via lid-mapping files."""
|
||||
mapping_dir = tmp_path / "whatsapp" / "session"
|
||||
mapping_dir.mkdir(parents=True, exist_ok=True)
|
||||
(mapping_dir / "lid-mapping-999999999999999.json").write_text(
|
||||
json.dumps("15551234567@s.whatsapp.net"),
|
||||
encoding="utf-8",
|
||||
)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
canonical = canonical_whatsapp_identifier("999999999999999@lid")
|
||||
assert canonical == "15551234567"
|
||||
assert canonical_whatsapp_identifier("15551234567@s.whatsapp.net") == "15551234567"
|
||||
|
||||
def test_canonical_empty_input(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
assert canonical_whatsapp_identifier("") == ""
|
||||
|
||||
|
||||
class TestSessionStoreEntriesAttribute:
|
||||
"""Regression: /reset must access _entries, not _sessions."""
|
||||
|
||||
|
||||
@@ -322,7 +322,7 @@ class TestFallbackTransportInit:
|
||||
seen_kwargs.append(kwargs.copy())
|
||||
return FakeTransport([], {})
|
||||
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY"):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)
|
||||
@@ -333,6 +333,25 @@ class TestFallbackTransportInit:
|
||||
assert len(seen_kwargs) == 2
|
||||
assert all(kwargs["proxy"] == "http://proxy.example:8080" for kwargs in seen_kwargs)
|
||||
|
||||
def test_no_proxy_bypasses_fallback_ip_cidr(self, monkeypatch):
|
||||
seen_kwargs = []
|
||||
|
||||
def factory(**kwargs):
|
||||
seen_kwargs.append(kwargs.copy())
|
||||
return FakeTransport([], {})
|
||||
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy", "TELEGRAM_PROXY", "NO_PROXY", "no_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("HTTPS_PROXY", "http://proxy.example:8080")
|
||||
monkeypatch.setenv("NO_PROXY", "149.154.160.0/20")
|
||||
monkeypatch.setattr(tnet.httpx, "AsyncHTTPTransport", factory)
|
||||
|
||||
transport = tnet.TelegramFallbackTransport(["149.154.167.220"])
|
||||
|
||||
assert transport._fallback_ips == ["149.154.167.220"]
|
||||
assert len(seen_kwargs) == 2
|
||||
assert all("proxy" not in kwargs for kwargs in seen_kwargs)
|
||||
|
||||
|
||||
class TestFallbackTransportClose:
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -3,7 +3,6 @@ from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
import gateway.run as gateway_run
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionSource
|
||||
@@ -12,6 +11,7 @@ from gateway.session import SessionSource
|
||||
def _clear_auth_env(monkeypatch) -> None:
|
||||
for key in (
|
||||
"TELEGRAM_ALLOWED_USERS",
|
||||
"TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
"DISCORD_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS",
|
||||
"SLACK_ALLOWED_USERS",
|
||||
@@ -75,7 +75,7 @@ def _make_runner(platform: Platform, config: GatewayConfig):
|
||||
def test_whatsapp_lid_user_matches_phone_allowlist_via_session_mapping(monkeypatch, tmp_path):
|
||||
_clear_auth_env(monkeypatch)
|
||||
monkeypatch.setenv("WHATSAPP_ALLOWED_USERS", "15550000001")
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
session_dir = tmp_path / "whatsapp" / "session"
|
||||
session_dir.mkdir(parents=True)
|
||||
@@ -178,6 +178,26 @@ def test_qq_group_allowlist_does_not_authorize_other_groups(monkeypatch):
|
||||
assert runner._is_user_authorized(source) is False
|
||||
|
||||
|
||||
def test_telegram_group_allowlist_authorizes_forum_chat_without_user_allowlist(monkeypatch):
|
||||
_clear_auth_env(monkeypatch)
|
||||
monkeypatch.setenv("TELEGRAM_GROUP_ALLOWED_USERS", "-1001878443972")
|
||||
|
||||
runner, _adapter = _make_runner(
|
||||
Platform.TELEGRAM,
|
||||
GatewayConfig(platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="t")}),
|
||||
)
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="999",
|
||||
chat_id="-1001878443972",
|
||||
user_name="tester",
|
||||
chat_type="forum",
|
||||
)
|
||||
|
||||
assert runner._is_user_authorized(source) is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unauthorized_dm_pairs_by_default(monkeypatch):
|
||||
_clear_auth_env(monkeypatch)
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
"""Regression test for /model context-length display on provider-capped models.
|
||||
|
||||
Bug (April 2026): `/model gpt-5.5` on openai-codex (ChatGPT OAuth) showed
|
||||
"Context: 1,050,000 tokens" because the display code used the raw models.dev
|
||||
``ModelInfo.context_window`` (which reports the direct-OpenAI API value) instead
|
||||
of the provider-aware resolver. The agent was actually running at 272K — Codex
|
||||
OAuth's enforced cap — so the display was lying to the user.
|
||||
|
||||
Fix: ``resolve_display_context_length()`` prefers
|
||||
``agent.model_metadata.get_model_context_length`` (which knows about Codex OAuth,
|
||||
Copilot, Nous, etc.) and falls back to models.dev only if that returns nothing.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
|
||||
|
||||
class _FakeModelInfo:
|
||||
def __init__(self, ctx):
|
||||
self.context_window = ctx
|
||||
|
||||
|
||||
class TestResolveDisplayContextLength:
|
||||
def test_codex_oauth_overrides_models_dev(self):
|
||||
"""gpt-5.5 on openai-codex must show Codex's 272K cap, not models.dev's 1.05M."""
|
||||
fake_mi = _FakeModelInfo(1_050_000) # what models.dev reports
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=272_000, # what Codex OAuth actually enforces
|
||||
):
|
||||
ctx = resolve_display_context_length(
|
||||
"gpt-5.5",
|
||||
"openai-codex",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_key="",
|
||||
model_info=fake_mi,
|
||||
)
|
||||
assert ctx == 272_000, (
|
||||
"Codex OAuth's 272K cap must win over models.dev's 1.05M for gpt-5.5"
|
||||
)
|
||||
|
||||
def test_falls_back_to_model_info_when_resolver_returns_none(self):
|
||||
fake_mi = _FakeModelInfo(1_048_576)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length", return_value=None
|
||||
):
|
||||
ctx = resolve_display_context_length(
|
||||
"some-model",
|
||||
"some-provider",
|
||||
model_info=fake_mi,
|
||||
)
|
||||
assert ctx == 1_048_576
|
||||
|
||||
def test_returns_none_when_both_sources_empty(self):
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length", return_value=None
|
||||
):
|
||||
ctx = resolve_display_context_length(
|
||||
"unknown-model",
|
||||
"unknown-provider",
|
||||
model_info=None,
|
||||
)
|
||||
assert ctx is None
|
||||
|
||||
def test_resolver_exception_falls_back_to_model_info(self):
|
||||
fake_mi = _FakeModelInfo(200_000)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
side_effect=RuntimeError("network down"),
|
||||
):
|
||||
ctx = resolve_display_context_length(
|
||||
"x", "y", model_info=fake_mi
|
||||
)
|
||||
assert ctx == 200_000
|
||||
|
||||
def test_prefers_resolver_even_when_model_info_has_larger_value(self):
|
||||
"""Invariant: provider-aware resolver is authoritative, even if models.dev
|
||||
reports a bigger window."""
|
||||
fake_mi = _FakeModelInfo(2_000_000)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length", return_value=128_000
|
||||
):
|
||||
ctx = resolve_display_context_length(
|
||||
"capped-model",
|
||||
"capped-provider",
|
||||
model_info=fake_mi,
|
||||
)
|
||||
assert ctx == 128_000
|
||||
@@ -0,0 +1,172 @@
|
||||
"""Unit tests for hermes_cli.pty_bridge — PTY spawning + byte forwarding.
|
||||
|
||||
These tests drive the bridge with minimal POSIX processes (echo, env, sleep,
|
||||
printf) to verify it behaves like a PTY you can read/write/resize/close.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
pytest.importorskip("ptyprocess", reason="ptyprocess not installed")
|
||||
|
||||
from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
|
||||
|
||||
|
||||
skip_on_windows = pytest.mark.skipif(
|
||||
sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
|
||||
)
|
||||
|
||||
|
||||
def _read_until(bridge: PtyBridge, needle: bytes, timeout: float = 5.0) -> bytes:
|
||||
"""Accumulate PTY output until we see `needle` or time out."""
|
||||
deadline = time.monotonic() + timeout
|
||||
buf = bytearray()
|
||||
while time.monotonic() < deadline:
|
||||
chunk = bridge.read(timeout=0.2)
|
||||
if chunk is None:
|
||||
break
|
||||
buf.extend(chunk)
|
||||
if needle in buf:
|
||||
return bytes(buf)
|
||||
return bytes(buf)
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyBridgeSpawn:
|
||||
def test_is_available_on_posix(self):
|
||||
assert PtyBridge.is_available() is True
|
||||
|
||||
def test_spawn_returns_bridge_with_pid(self):
|
||||
bridge = PtyBridge.spawn(["true"])
|
||||
try:
|
||||
assert bridge.pid > 0
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
def test_spawn_raises_on_missing_argv0(self, tmp_path):
|
||||
with pytest.raises((FileNotFoundError, OSError)):
|
||||
PtyBridge.spawn([str(tmp_path / "definitely-not-a-real-binary")])
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyBridgeIO:
|
||||
def test_reads_child_stdout(self):
|
||||
bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf hermes-ok"])
|
||||
try:
|
||||
output = _read_until(bridge, b"hermes-ok")
|
||||
assert b"hermes-ok" in output
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
def test_write_sends_to_child_stdin(self):
|
||||
# `cat` with no args echoes stdin back to stdout. We write a line,
|
||||
# read it back, then signal EOF to let cat exit cleanly.
|
||||
bridge = PtyBridge.spawn(["/bin/cat"])
|
||||
try:
|
||||
bridge.write(b"hello-pty\n")
|
||||
output = _read_until(bridge, b"hello-pty")
|
||||
assert b"hello-pty" in output
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
def test_read_returns_none_after_child_exits(self):
|
||||
bridge = PtyBridge.spawn(["/bin/sh", "-c", "printf done"])
|
||||
try:
|
||||
_read_until(bridge, b"done")
|
||||
# Give the child a beat to exit cleanly, then drain until EOF.
|
||||
deadline = time.monotonic() + 3.0
|
||||
while bridge.is_alive() and time.monotonic() < deadline:
|
||||
bridge.read(timeout=0.1)
|
||||
# Next reads after exit should return None (EOF), not raise.
|
||||
got_none = False
|
||||
for _ in range(10):
|
||||
if bridge.read(timeout=0.1) is None:
|
||||
got_none = True
|
||||
break
|
||||
assert got_none, "PtyBridge.read did not return None after child EOF"
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyBridgeResize:
|
||||
def test_resize_updates_child_winsize(self):
|
||||
# tput reads COLUMNS/LINES from the TTY ioctl (TIOCGWINSZ).
|
||||
# Spawn a shell, resize, then ask tput for the dimensions.
|
||||
bridge = PtyBridge.spawn(
|
||||
["/bin/sh", "-c", "sleep 0.1; tput cols; tput lines"],
|
||||
cols=80,
|
||||
rows=24,
|
||||
)
|
||||
try:
|
||||
bridge.resize(cols=123, rows=45)
|
||||
output = _read_until(bridge, b"45", timeout=5.0)
|
||||
# tput prints just the numbers, one per line
|
||||
assert b"123" in output
|
||||
assert b"45" in output
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyBridgeClose:
|
||||
def test_close_is_idempotent(self):
|
||||
bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
|
||||
bridge.close()
|
||||
bridge.close() # must not raise
|
||||
assert not bridge.is_alive()
|
||||
|
||||
def test_close_terminates_long_running_child(self):
|
||||
bridge = PtyBridge.spawn(["/bin/sh", "-c", "sleep 30"])
|
||||
pid = bridge.pid
|
||||
bridge.close()
|
||||
# Give the kernel a moment to reap
|
||||
deadline = time.monotonic() + 3.0
|
||||
reaped = False
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
os.kill(pid, 0)
|
||||
time.sleep(0.05)
|
||||
except ProcessLookupError:
|
||||
reaped = True
|
||||
break
|
||||
assert reaped, f"pid {pid} still running after close()"
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyBridgeEnv:
|
||||
def test_cwd_is_respected(self, tmp_path):
|
||||
bridge = PtyBridge.spawn(
|
||||
["/bin/sh", "-c", "pwd"],
|
||||
cwd=str(tmp_path),
|
||||
)
|
||||
try:
|
||||
output = _read_until(bridge, str(tmp_path).encode())
|
||||
assert str(tmp_path).encode() in output
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
def test_env_is_forwarded(self):
|
||||
bridge = PtyBridge.spawn(
|
||||
["/bin/sh", "-c", "printf %s \"$HERMES_PTY_TEST\""],
|
||||
env={**os.environ, "HERMES_PTY_TEST": "pty-env-works"},
|
||||
)
|
||||
try:
|
||||
output = _read_until(bridge, b"pty-env-works")
|
||||
assert b"pty-env-works" in output
|
||||
finally:
|
||||
bridge.close()
|
||||
|
||||
|
||||
class TestPtyBridgeUnavailable:
|
||||
"""Platform fallback semantics — PtyUnavailableError is importable and
|
||||
carries a user-readable message."""
|
||||
|
||||
def test_error_carries_user_message(self):
|
||||
err = PtyUnavailableError("platform not supported")
|
||||
assert "platform" in str(err)
|
||||
@@ -601,3 +601,72 @@ class TestImagegenModelPicker:
|
||||
_configure_imagegen_model("fal", config)
|
||||
assert isinstance(config["image_gen"], dict)
|
||||
assert config["image_gen"]["model"] == "fal-ai/flux-2/klein/9b"
|
||||
|
||||
|
||||
def test_get_platform_tools_recovers_non_configurable_toolsets_from_composite():
|
||||
"""Non-configurable toolsets whose tools are in the composite but not in
|
||||
CONFIGURABLE_TOOLSETS should still appear in the result.
|
||||
"""
|
||||
from toolsets import TOOLSETS
|
||||
from hermes_cli.tools_config import PLATFORMS
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
fake_toolsets = dict(TOOLSETS)
|
||||
fake_toolsets["_test_platform_tool"] = {
|
||||
"description": "test",
|
||||
"tools": ["_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
fake_toolsets["hermes-_test_platform"] = {
|
||||
"description": "test composite",
|
||||
"tools": ["web_search", "web_extract", "terminal", "process", "_test_special_tool"],
|
||||
"includes": [],
|
||||
}
|
||||
|
||||
test_platforms = {
|
||||
"_test_platform": {"label": "Test", "default_toolset": "hermes-_test_platform"},
|
||||
}
|
||||
|
||||
with mock_patch("hermes_cli.tools_config.PLATFORMS", {**PLATFORMS, **test_platforms}):
|
||||
with mock_patch("toolsets.TOOLSETS", fake_toolsets):
|
||||
enabled = _get_platform_tools({}, "_test_platform")
|
||||
|
||||
assert "_test_platform_tool" in enabled
|
||||
assert "web" in enabled
|
||||
assert "terminal" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_second_pass_skips_fully_claimed_toolsets():
|
||||
"""Toolsets whose tools are fully covered by configurable keys should NOT
|
||||
be added by the second pass (prevents 'search', 'hermes-acp' noise).
|
||||
"""
|
||||
enabled = _get_platform_tools({}, "cli")
|
||||
|
||||
assert "search" not in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_discord_includes_discord_not_admin():
|
||||
enabled = _get_platform_tools({}, "discord")
|
||||
assert "discord" in enabled
|
||||
assert "discord_admin" not in enabled
|
||||
|
||||
|
||||
def test_discord_admin_in_configurable_toolsets():
|
||||
assert any(ts_key == "discord_admin" for ts_key, _, _ in CONFIGURABLE_TOOLSETS)
|
||||
|
||||
|
||||
def test_discord_admin_in_default_off():
|
||||
assert "discord_admin" in _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_includes_doc_and_drive():
|
||||
enabled = _get_platform_tools({}, "feishu")
|
||||
assert "feishu_doc" in enabled
|
||||
assert "feishu_drive" in enabled
|
||||
|
||||
|
||||
def test_get_platform_tools_feishu_tools_not_on_other_platforms():
|
||||
for plat in ["cli", "telegram", "discord"]:
|
||||
enabled = _get_platform_tools({}, plat)
|
||||
assert "feishu_doc" not in enabled, f"feishu_doc leaked onto {plat}"
|
||||
assert "feishu_drive" not in enabled, f"feishu_drive leaked onto {plat}"
|
||||
|
||||
@@ -1677,3 +1677,251 @@ class TestDashboardPluginManifestExtensions:
|
||||
plugins = web_server._get_dashboard_plugins(force_rescan=True)
|
||||
entry = next(p for p in plugins if p["name"] == "mixed-slots")
|
||||
assert entry["slots"] == ["sidebar", "header-right"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pty WebSocket — terminal bridge for the dashboard "Chat" tab.
|
||||
#
|
||||
# These tests drive the endpoint with a tiny fake command (typically ``cat``
|
||||
# or ``sh -c 'printf …'``) instead of the real ``hermes --tui`` binary. The
|
||||
# endpoint resolves its argv through ``_resolve_chat_argv``, so tests
|
||||
# monkeypatch that hook.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
skip_on_windows = pytest.mark.skipif(
|
||||
sys.platform.startswith("win"), reason="PTY bridge is POSIX-only"
|
||||
)
|
||||
|
||||
|
||||
@skip_on_windows
|
||||
class TestPtyWebSocket:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _setup(self, monkeypatch, _isolate_hermes_home):
|
||||
from starlette.testclient import TestClient
|
||||
|
||||
import hermes_cli.web_server as ws
|
||||
|
||||
# Avoid exec'ing the actual TUI in tests: every test below installs
|
||||
# its own fake argv via ``ws._resolve_chat_argv``.
|
||||
self.ws_module = ws
|
||||
monkeypatch.setattr(ws, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", True)
|
||||
self.token = ws._SESSION_TOKEN
|
||||
self.client = TestClient(ws.app)
|
||||
|
||||
def _url(self, token: str | None = None, **params: str) -> str:
|
||||
tok = token if token is not None else self.token
|
||||
# TestClient.websocket_connect takes the path; it reconstructs the
|
||||
# query string, so we pass it inline.
|
||||
from urllib.parse import urlencode
|
||||
|
||||
q = {"token": tok, **params}
|
||||
return f"/api/pty?{urlencode(q)}"
|
||||
|
||||
def test_rejects_when_embedded_chat_disabled(self, monkeypatch):
|
||||
monkeypatch.setattr(self.ws_module, "_DASHBOARD_EMBEDDED_CHAT_ENABLED", False)
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with self.client.websocket_connect(self._url()):
|
||||
pass
|
||||
assert exc.value.code == 4403
|
||||
|
||||
def test_rejects_missing_token(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
|
||||
)
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with self.client.websocket_connect("/api/pty"):
|
||||
pass
|
||||
assert exc.value.code == 4401
|
||||
|
||||
def test_rejects_bad_token(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
|
||||
)
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with self.client.websocket_connect(self._url(token="wrong")):
|
||||
pass
|
||||
assert exc.value.code == 4401
|
||||
|
||||
def test_streams_child_stdout_to_client(self, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
lambda resume=None, sidecar_url=None: (
|
||||
["/bin/sh", "-c", "printf hermes-ws-ok"],
|
||||
None,
|
||||
None,
|
||||
),
|
||||
)
|
||||
with self.client.websocket_connect(self._url()) as conn:
|
||||
# Drain frames until we see the needle or time out. TestClient's
|
||||
# recv_bytes blocks; loop until we have the signal byte string.
|
||||
buf = b""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
frame = conn.receive_bytes()
|
||||
except Exception:
|
||||
break
|
||||
if frame:
|
||||
buf += frame
|
||||
if b"hermes-ws-ok" in buf:
|
||||
break
|
||||
assert b"hermes-ws-ok" in buf
|
||||
|
||||
def test_client_input_reaches_child_stdin(self, monkeypatch):
|
||||
# ``cat`` echoes stdin back, so a write → read round-trip proves
|
||||
# the full duplex path.
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
|
||||
)
|
||||
with self.client.websocket_connect(self._url()) as conn:
|
||||
conn.send_bytes(b"round-trip-payload\n")
|
||||
buf = b""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline:
|
||||
frame = conn.receive_bytes()
|
||||
if frame:
|
||||
buf += frame
|
||||
if b"round-trip-payload" in buf:
|
||||
break
|
||||
assert b"round-trip-payload" in buf
|
||||
|
||||
def test_resize_escape_is_forwarded(self, monkeypatch):
|
||||
# Resize escape gets intercepted and applied via TIOCSWINSZ,
|
||||
# then ``tput cols/lines`` reports the new dimensions back.
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
# sleep gives the test time to push the resize before tput runs
|
||||
lambda resume=None, sidecar_url=None: (
|
||||
["/bin/sh", "-c", "sleep 0.15; tput cols; tput lines"],
|
||||
None,
|
||||
None,
|
||||
),
|
||||
)
|
||||
with self.client.websocket_connect(self._url()) as conn:
|
||||
conn.send_text("\x1b[RESIZE:99;41]")
|
||||
buf = b""
|
||||
import time
|
||||
|
||||
deadline = time.monotonic() + 5.0
|
||||
while time.monotonic() < deadline:
|
||||
frame = conn.receive_bytes()
|
||||
if frame:
|
||||
buf += frame
|
||||
if b"99" in buf and b"41" in buf:
|
||||
break
|
||||
assert b"99" in buf and b"41" in buf
|
||||
|
||||
def test_unavailable_platform_closes_with_message(self, monkeypatch):
|
||||
from hermes_cli.pty_bridge import PtyUnavailableError
|
||||
|
||||
def _raise(argv, **kwargs):
|
||||
raise PtyUnavailableError("pty missing for tests")
|
||||
|
||||
monkeypatch.setattr(
|
||||
self.ws_module,
|
||||
"_resolve_chat_argv",
|
||||
lambda resume=None, sidecar_url=None: (["/bin/cat"], None, None),
|
||||
)
|
||||
# Patch PtyBridge.spawn at the web_server module's binding.
|
||||
import hermes_cli.web_server as ws_mod
|
||||
|
||||
monkeypatch.setattr(ws_mod.PtyBridge, "spawn", classmethod(lambda cls, *a, **k: _raise(*a, **k)))
|
||||
|
||||
with self.client.websocket_connect(self._url()) as conn:
|
||||
# Expect a final text frame with the error message, then close.
|
||||
msg = conn.receive_text()
|
||||
assert "pty missing" in msg or "unavailable" in msg.lower() or "pty" in msg.lower()
|
||||
|
||||
def test_resume_parameter_is_forwarded_to_argv(self, monkeypatch):
|
||||
captured: dict = {}
|
||||
|
||||
def fake_resolve(resume=None, sidecar_url=None):
|
||||
captured["resume"] = resume
|
||||
return (["/bin/sh", "-c", "printf resume-arg-ok"], None, None)
|
||||
|
||||
monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
|
||||
|
||||
with self.client.websocket_connect(self._url(resume="sess-42")) as conn:
|
||||
# Drain briefly so the handler actually invokes the resolver.
|
||||
try:
|
||||
conn.receive_bytes()
|
||||
except Exception:
|
||||
pass
|
||||
assert captured.get("resume") == "sess-42"
|
||||
|
||||
def test_channel_param_propagates_sidecar_url(self, monkeypatch):
|
||||
"""When /api/pty is opened with ?channel=, the PTY child gets a
|
||||
HERMES_TUI_SIDECAR_URL env var pointing back at /api/pub on the
|
||||
same channel — which is how tool events reach the dashboard sidebar."""
|
||||
captured: dict = {}
|
||||
|
||||
def fake_resolve(resume=None, sidecar_url=None):
|
||||
captured["sidecar_url"] = sidecar_url
|
||||
return (["/bin/sh", "-c", "printf sidecar-ok"], None, None)
|
||||
|
||||
monkeypatch.setattr(self.ws_module, "_resolve_chat_argv", fake_resolve)
|
||||
monkeypatch.setattr(
|
||||
self.ws_module.app.state, "bound_host", "127.0.0.1", raising=False
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
self.ws_module.app.state, "bound_port", 9119, raising=False
|
||||
)
|
||||
|
||||
with self.client.websocket_connect(self._url(channel="abc-123")) as conn:
|
||||
try:
|
||||
conn.receive_bytes()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
url = captured.get("sidecar_url") or ""
|
||||
assert url.startswith("ws://127.0.0.1:9119/api/pub?")
|
||||
assert "channel=abc-123" in url
|
||||
assert "token=" in url
|
||||
|
||||
def test_pub_broadcasts_to_events_subscribers(self, monkeypatch):
|
||||
"""Frame written to /api/pub is rebroadcast verbatim to every
|
||||
/api/events subscriber on the same channel."""
|
||||
from urllib.parse import urlencode
|
||||
|
||||
qs = urlencode({"token": self.token, "channel": "broadcast-test"})
|
||||
pub_path = f"/api/pub?{qs}"
|
||||
sub_path = f"/api/events?{qs}"
|
||||
|
||||
with self.client.websocket_connect(sub_path) as sub:
|
||||
with self.client.websocket_connect(pub_path) as pub:
|
||||
pub.send_text('{"type":"tool.start","payload":{"tool_id":"t1"}}')
|
||||
received = sub.receive_text()
|
||||
|
||||
assert "tool.start" in received
|
||||
assert '"tool_id":"t1"' in received
|
||||
|
||||
def test_events_rejects_missing_channel(self):
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with self.client.websocket_connect(
|
||||
f"/api/events?token={self.token}"
|
||||
):
|
||||
pass
|
||||
assert exc.value.code == 4400
|
||||
|
||||
@@ -0,0 +1,213 @@
|
||||
"""Regression test: DeepSeek V4 thinking mode reasoning_content echo.
|
||||
|
||||
DeepSeek V4-flash / V4-pro thinking mode requires ``reasoning_content`` on
|
||||
every assistant message that carries ``tool_calls``. When a persisted
|
||||
session replays an assistant tool-call turn that was recorded without the
|
||||
field, DeepSeek rejects the next request with HTTP 400::
|
||||
|
||||
The reasoning_content in the thinking mode must be passed back to the API.
|
||||
|
||||
Fix covers three paths:
|
||||
|
||||
1. ``_build_assistant_message`` — new tool-call messages without raw
|
||||
reasoning_content get ``""`` pinned at creation time so nothing gets
|
||||
persisted poisoned.
|
||||
2. ``_copy_reasoning_content_for_api`` — already-poisoned history replays
|
||||
with ``reasoning_content=""`` injected defensively.
|
||||
3. Detection covers three signals: ``provider == "deepseek"``,
|
||||
``"deepseek" in model``, and ``api.deepseek.com`` host match. The third
|
||||
catches custom-provider setups pointing at DeepSeek.
|
||||
|
||||
Refs #15250 / #15353.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def _make_agent(provider: str = "", model: str = "", base_url: str = "") -> AIAgent:
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.provider = provider
|
||||
agent.model = model
|
||||
agent.base_url = base_url
|
||||
return agent
|
||||
|
||||
|
||||
class TestNeedsDeepSeekToolReasoning:
|
||||
"""_needs_deepseek_tool_reasoning() recognises all three detection signals."""
|
||||
|
||||
def test_provider_deepseek(self) -> None:
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
assert agent._needs_deepseek_tool_reasoning() is True
|
||||
|
||||
def test_model_substring(self) -> None:
|
||||
# Custom provider pointing at DeepSeek with provider='custom'
|
||||
agent = _make_agent(provider="custom", model="deepseek-v4-pro")
|
||||
assert agent._needs_deepseek_tool_reasoning() is True
|
||||
|
||||
def test_base_url_host(self) -> None:
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
model="some-aliased-name",
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
)
|
||||
assert agent._needs_deepseek_tool_reasoning() is True
|
||||
|
||||
def test_provider_case_insensitive(self) -> None:
|
||||
agent = _make_agent(provider="DeepSeek", model="")
|
||||
assert agent._needs_deepseek_tool_reasoning() is True
|
||||
|
||||
def test_non_deepseek_provider(self) -> None:
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
assert agent._needs_deepseek_tool_reasoning() is False
|
||||
|
||||
def test_empty_everything(self) -> None:
|
||||
agent = _make_agent()
|
||||
assert agent._needs_deepseek_tool_reasoning() is False
|
||||
|
||||
|
||||
class TestCopyReasoningContentForApi:
|
||||
"""_copy_reasoning_content_for_api pads reasoning_content for DeepSeek tool-calls."""
|
||||
|
||||
def test_deepseek_tool_call_poisoned_history_gets_empty_string(self) -> None:
|
||||
"""Already-poisoned history (no reasoning_content, no reasoning) gets ''."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg.get("reasoning_content") == ""
|
||||
|
||||
def test_deepseek_assistant_no_tool_call_left_alone(self) -> None:
|
||||
"""Plain assistant turns without tool_calls don't get padded."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {"role": "assistant", "content": "hello"}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert "reasoning_content" not in api_msg
|
||||
|
||||
def test_deepseek_explicit_reasoning_content_preserved(self) -> None:
|
||||
"""When reasoning_content is already set, it's copied verbatim."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"reasoning_content": "<think>real chain of thought</think>",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg["reasoning_content"] == "<think>real chain of thought</think>"
|
||||
|
||||
def test_deepseek_reasoning_field_promoted(self) -> None:
|
||||
"""When only 'reasoning' is set, it gets promoted to reasoning_content."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"reasoning": "thought trace",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg["reasoning_content"] == "thought trace"
|
||||
|
||||
def test_kimi_path_still_works(self) -> None:
|
||||
"""Existing Kimi detection still pads reasoning_content."""
|
||||
agent = _make_agent(provider="kimi-coding", model="kimi-k2.5")
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg.get("reasoning_content") == ""
|
||||
|
||||
def test_kimi_moonshot_base_url(self) -> None:
|
||||
agent = _make_agent(
|
||||
provider="custom", model="kimi-k2", base_url="https://api.moonshot.ai/v1"
|
||||
)
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg.get("reasoning_content") == ""
|
||||
|
||||
def test_non_thinking_provider_not_padded(self) -> None:
|
||||
"""Providers that don't require the echo are untouched."""
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert "reasoning_content" not in api_msg
|
||||
|
||||
def test_deepseek_custom_base_url(self) -> None:
|
||||
"""Custom provider pointing at api.deepseek.com is detected via host."""
|
||||
agent = _make_agent(
|
||||
provider="custom",
|
||||
model="whatever",
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
)
|
||||
source = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{"id": "c1", "function": {"name": "terminal"}}],
|
||||
}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert api_msg.get("reasoning_content") == ""
|
||||
|
||||
def test_non_assistant_role_ignored(self) -> None:
|
||||
"""User/tool messages are left alone."""
|
||||
agent = _make_agent(provider="deepseek", model="deepseek-v4-flash")
|
||||
source = {"role": "user", "content": "hi"}
|
||||
api_msg: dict = {}
|
||||
agent._copy_reasoning_content_for_api(source, api_msg)
|
||||
assert "reasoning_content" not in api_msg
|
||||
|
||||
|
||||
class TestNeedsKimiToolReasoning:
|
||||
"""The extracted _needs_kimi_tool_reasoning() helper keeps Kimi behavior intact."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"provider,base_url",
|
||||
[
|
||||
("kimi-coding", ""),
|
||||
("kimi-coding-cn", ""),
|
||||
("custom", "https://api.kimi.com/v1"),
|
||||
("custom", "https://api.moonshot.ai/v1"),
|
||||
("custom", "https://api.moonshot.cn/v1"),
|
||||
],
|
||||
)
|
||||
def test_kimi_signals(self, provider: str, base_url: str) -> None:
|
||||
agent = _make_agent(provider=provider, model="kimi-k2", base_url=base_url)
|
||||
assert agent._needs_kimi_tool_reasoning() is True
|
||||
|
||||
def test_non_kimi_provider(self) -> None:
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
model="moonshotai/kimi-k2",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
# model name contains 'moonshot' but host is openrouter — should be False
|
||||
assert agent._needs_kimi_tool_reasoning() is False
|
||||
@@ -188,6 +188,30 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
|
||||
def test_auxiliary_provider_failure_surfaces_warning_and_falls_back(self, monkeypatch):
|
||||
"""Provider/API failures from auxiliary flush must be visible.
|
||||
|
||||
Exhausted keys and rate limits are not always RuntimeError. They used
|
||||
to fall into the broad outer handler and disappear into debug logs.
|
||||
"""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
agent.client = MagicMock()
|
||||
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||
events = []
|
||||
agent.status_callback = lambda kind, text=None: events.append((kind, text))
|
||||
|
||||
with patch("agent.auxiliary_client.call_llm", side_effect=Exception("opencode-go key exhausted")), \
|
||||
patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there"},
|
||||
{"role": "user", "content": "Save this"},
|
||||
]
|
||||
agent.flush_memories(messages)
|
||||
|
||||
agent.client.chat.completions.create.assert_called_once()
|
||||
assert any(kind == "warn" and "Auxiliary memory flush failed" in text for kind, text in events)
|
||||
|
||||
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
||||
"""Verify that memory tool calls from the flush response actually get executed."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
@@ -209,6 +233,31 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||
assert call_kwargs.kwargs["target"] == "notes"
|
||||
assert "dark mode" in call_kwargs.kwargs["content"]
|
||||
|
||||
def test_flush_bridges_memory_write_metadata(self, monkeypatch):
|
||||
"""Flush memory writes notify external providers with flush provenance."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
agent._memory_manager = MagicMock()
|
||||
agent.session_id = "sess-flush"
|
||||
agent.platform = "cli"
|
||||
|
||||
mock_response = _chat_response_with_memory_call()
|
||||
|
||||
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
|
||||
messages = [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi"},
|
||||
{"role": "user", "content": "Note this"},
|
||||
]
|
||||
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
||||
agent.flush_memories(messages)
|
||||
|
||||
agent._memory_manager.on_memory_write.assert_called_once()
|
||||
call_kwargs = agent._memory_manager.on_memory_write.call_args
|
||||
assert call_kwargs.args[:3] == ("add", "notes", "User prefers dark mode.")
|
||||
assert call_kwargs.kwargs["metadata"]["write_origin"] == "memory_flush"
|
||||
assert call_kwargs.kwargs["metadata"]["execution_context"] == "flush_memories"
|
||||
assert call_kwargs.kwargs["metadata"]["session_id"] == "sess-flush"
|
||||
|
||||
def test_flush_strips_artifacts_from_messages(self, monkeypatch):
|
||||
"""After flush, the flush prompt and any response should be removed from messages."""
|
||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||
|
||||
@@ -0,0 +1,189 @@
|
||||
"""Regression guard for #15218 — external memory sync must skip interrupted turns.
|
||||
|
||||
Before this fix, ``run_conversation`` called
|
||||
``memory_manager.sync_all(original_user_message, final_response)`` at the
|
||||
end of every turn where both args were present. That gate didn't check
|
||||
the ``interrupted`` flag, so an external memory backend received partial
|
||||
assistant output, aborted tool chains, or mid-stream resets as durable
|
||||
conversational truth. Downstream recall then treated that not-yet-real
|
||||
state as if the user had seen it complete.
|
||||
|
||||
The fix is ``AIAgent._sync_external_memory_for_turn`` — a small helper
|
||||
that replaces the inline block and returns early when ``interrupted``
|
||||
is True (regardless of whether ``final_response`` and
|
||||
``original_user_message`` happen to be populated).
|
||||
|
||||
These tests exercise the helper directly on a bare ``AIAgent`` built
|
||||
via ``__new__`` so the full ``run_conversation`` machinery isn't needed
|
||||
— the method is pure logic and three state arguments.
|
||||
"""
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _bare_agent():
|
||||
"""Build an ``AIAgent`` with only the attributes
|
||||
``_sync_external_memory_for_turn`` touches — matches the bare-agent
|
||||
pattern used across ``tests/run_agent/test_interrupt_propagation.py``.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent._memory_manager = MagicMock()
|
||||
return agent
|
||||
|
||||
|
||||
class TestSyncExternalMemoryForTurn:
|
||||
# --- Interrupt guard (the #15218 fix) -------------------------------
|
||||
|
||||
def test_interrupted_turn_does_not_sync(self):
|
||||
"""The whole point of #15218: even with a final_response and a
|
||||
user message, an interrupted turn must NOT reach the memory
|
||||
backend."""
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="What time is it?",
|
||||
final_response="It is 3pm.", # looks complete — but partial
|
||||
interrupted=True,
|
||||
)
|
||||
agent._memory_manager.sync_all.assert_not_called()
|
||||
agent._memory_manager.queue_prefetch_all.assert_not_called()
|
||||
|
||||
def test_interrupted_turn_skips_even_when_response_is_full(self):
|
||||
"""A long, seemingly-complete assistant response is still
|
||||
partial if ``interrupted`` is True — an interrupt may have
|
||||
landed between the streamed reply and the next tool call. The
|
||||
memory backend has no way to distinguish on its own, so we must
|
||||
gate at the source."""
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="Plan a trip to Lisbon",
|
||||
final_response="Here's a detailed 7-day itinerary: [...]",
|
||||
interrupted=True,
|
||||
)
|
||||
agent._memory_manager.sync_all.assert_not_called()
|
||||
|
||||
# --- Normal completed turn still syncs ------------------------------
|
||||
|
||||
def test_completed_turn_syncs_and_queues_prefetch(self):
|
||||
"""Regression guard for the positive path: a normal completed
|
||||
turn must still trigger both ``sync_all`` AND
|
||||
``queue_prefetch_all`` — otherwise the external memory backend
|
||||
never learns about anything and every user complains.
|
||||
"""
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="What's the weather in Paris?",
|
||||
final_response="It's sunny and 22°C.",
|
||||
interrupted=False,
|
||||
)
|
||||
agent._memory_manager.sync_all.assert_called_once_with(
|
||||
"What's the weather in Paris?", "It's sunny and 22°C.",
|
||||
)
|
||||
agent._memory_manager.queue_prefetch_all.assert_called_once_with(
|
||||
"What's the weather in Paris?",
|
||||
)
|
||||
|
||||
# --- Edge cases (pre-existing behaviour preserved) ------------------
|
||||
|
||||
def test_no_final_response_skips(self):
|
||||
"""If the model produced no final_response (e.g. tool-only turn
|
||||
that never resolved), we must not fabricate an empty sync."""
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="Hello",
|
||||
final_response=None,
|
||||
interrupted=False,
|
||||
)
|
||||
agent._memory_manager.sync_all.assert_not_called()
|
||||
|
||||
def test_no_original_user_message_skips(self):
|
||||
"""No user-origin message means this wasn't a user turn (e.g.
|
||||
a system-initiated refresh). Don't sync an assistant-only
|
||||
exchange as if a user said something."""
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=None,
|
||||
final_response="Proactive notification text",
|
||||
interrupted=False,
|
||||
)
|
||||
agent._memory_manager.sync_all.assert_not_called()
|
||||
|
||||
def test_no_memory_manager_is_a_no_op(self):
|
||||
"""Sessions without an external memory manager must not crash
|
||||
or try to call .sync_all on None."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
agent = AIAgent.__new__(AIAgent)
|
||||
agent._memory_manager = None
|
||||
|
||||
# Must not raise.
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="hi",
|
||||
final_response="hey",
|
||||
interrupted=False,
|
||||
)
|
||||
|
||||
# --- Exception safety ----------------------------------------------
|
||||
|
||||
def test_sync_exception_is_swallowed(self):
|
||||
"""External memory providers are best-effort; a misconfigured
|
||||
or offline backend must not block the user from seeing their
|
||||
response by propagating the exception up."""
|
||||
agent = _bare_agent()
|
||||
agent._memory_manager.sync_all.side_effect = RuntimeError(
|
||||
"backend unreachable"
|
||||
)
|
||||
|
||||
# Must not raise.
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="hi",
|
||||
final_response="hey",
|
||||
interrupted=False,
|
||||
)
|
||||
# sync_all was attempted.
|
||||
agent._memory_manager.sync_all.assert_called_once()
|
||||
|
||||
def test_prefetch_exception_is_swallowed(self):
|
||||
"""Same best-effort contract applies to the prefetch step — a
|
||||
failure in queue_prefetch_all must not bubble out."""
|
||||
agent = _bare_agent()
|
||||
agent._memory_manager.queue_prefetch_all.side_effect = RuntimeError(
|
||||
"prefetch worker dead"
|
||||
)
|
||||
|
||||
# Must not raise.
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message="hi",
|
||||
final_response="hey",
|
||||
interrupted=False,
|
||||
)
|
||||
# sync_all still happened before the prefetch blew up.
|
||||
agent._memory_manager.sync_all.assert_called_once()
|
||||
|
||||
# --- The specific matrix the reporter asked about ------------------
|
||||
|
||||
@pytest.mark.parametrize("interrupted,final,user,expect_sync", [
|
||||
(False, "resp", "user", True), # normal completed → sync
|
||||
(True, "resp", "user", False), # interrupted → skip (the fix)
|
||||
(False, None, "user", False), # no response → skip
|
||||
(False, "resp", None, False), # no user msg → skip
|
||||
(True, None, "user", False), # interrupted + no response → skip
|
||||
(True, "resp", None, False), # interrupted + no user → skip
|
||||
(False, None, None, False), # nothing → skip
|
||||
(True, None, None, False), # interrupted + nothing → skip
|
||||
])
|
||||
def test_sync_matrix(self, interrupted, final, user, expect_sync):
|
||||
agent = _bare_agent()
|
||||
agent._sync_external_memory_for_turn(
|
||||
original_user_message=user,
|
||||
final_response=final,
|
||||
interrupted=interrupted,
|
||||
)
|
||||
if expect_sync:
|
||||
agent._memory_manager.sync_all.assert_called_once()
|
||||
agent._memory_manager.queue_prefetch_all.assert_called_once()
|
||||
else:
|
||||
agent._memory_manager.sync_all.assert_not_called()
|
||||
agent._memory_manager.queue_prefetch_all.assert_not_called()
|
||||
@@ -105,3 +105,39 @@ class TestRepairToolCallArguments:
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
# Should at least be valid JSON, even if background is lost
|
||||
json.loads(result)
|
||||
|
||||
# -- Stage 0: strict=False (literal control chars in strings) --
|
||||
# llama.cpp backends sometimes emit literal tabs/newlines inside JSON
|
||||
# string values. strict=False accepts these; we re-serialise to the
|
||||
# canonical wire form (#12068).
|
||||
|
||||
def test_literal_newline_inside_string_value(self):
|
||||
raw = '{"summary": "line one\nline two"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"summary": "line one\nline two"}
|
||||
|
||||
def test_literal_tab_inside_string_value(self):
|
||||
raw = '{"summary": "col1\tcol2"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {"summary": "col1\tcol2"}
|
||||
|
||||
def test_literal_control_char_reserialised_to_wire_form(self):
|
||||
"""After repair, the output must parse under strict=True."""
|
||||
raw = '{"msg": "has\tliteral\ttabs"}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
# strict=True must now accept this
|
||||
parsed = json.loads(result)
|
||||
assert parsed["msg"] == "has\tliteral\ttabs"
|
||||
|
||||
# -- Stage 4: control-char escape fallback --
|
||||
|
||||
def test_control_chars_with_trailing_comma(self):
|
||||
"""strict=False fails due to trailing comma, but brace-count pass
|
||||
+ control-char escape rescues it."""
|
||||
raw = '{"msg": "line\none",}'
|
||||
result = _repair_tool_call_arguments(raw, "t")
|
||||
parsed = json.loads(result)
|
||||
assert "line" in parsed["msg"]
|
||||
|
||||
|
||||
@@ -943,6 +943,113 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo
|
||||
assert "inspect the repository" in (assistant_message.content or "")
|
||||
|
||||
|
||||
def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch):
|
||||
"""Harmony-style `to=functions.foo` leaked into assistant content with no
|
||||
structured function_call items must be treated as incomplete so the
|
||||
continuation path can re-elicit a proper tool call. This is the
|
||||
Taiwan-embassy-email (Discord bug report) failure mode: child agent
|
||||
produces a confident-looking summary, tool_trace is empty because no
|
||||
tools actually ran, parent can't audit the claim.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
from agent.codex_responses_adapter import _normalize_codex_response
|
||||
|
||||
leaked_content = (
|
||||
"I'll check the official page directly.\n"
|
||||
"to=functions.exec_command {\"cmd\": \"curl https://example.test\"}\n"
|
||||
"assistant to=functions.exec_command {\"stdout\": \"mailto:foo@example.test\"}\n"
|
||||
"Extracted: foo@example.test"
|
||||
)
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(type="output_text", text=leaked_content)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="completed",
|
||||
model="gpt-5.4",
|
||||
)
|
||||
|
||||
assistant_message, finish_reason = _normalize_codex_response(response)
|
||||
|
||||
assert finish_reason == "incomplete"
|
||||
# Content is scrubbed so the parent never surfaces the leaked text as a
|
||||
# summary. tool_calls stays empty because no structured function_call
|
||||
# item existed.
|
||||
assert (assistant_message.content or "") == ""
|
||||
assert assistant_message.tool_calls == []
|
||||
|
||||
|
||||
def test_normalize_codex_response_ignores_tool_call_text_when_real_tool_call_present(monkeypatch):
|
||||
"""If the model emitted BOTH a structured function_call AND some text that
|
||||
happens to contain `to=functions.*` (unlikely but possible), trust the
|
||||
structured call — don't wipe content that came alongside a real tool use.
|
||||
"""
|
||||
agent = _build_agent(monkeypatch)
|
||||
from agent.codex_responses_adapter import _normalize_codex_response
|
||||
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(
|
||||
type="output_text",
|
||||
text="Running the command via to=functions.exec_command now.",
|
||||
)],
|
||||
),
|
||||
SimpleNamespace(
|
||||
type="function_call",
|
||||
id="fc_1",
|
||||
call_id="call_1",
|
||||
name="terminal",
|
||||
arguments="{}",
|
||||
),
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="completed",
|
||||
model="gpt-5.4",
|
||||
)
|
||||
|
||||
assistant_message, finish_reason = _normalize_codex_response(response)
|
||||
|
||||
assert finish_reason == "tool_calls"
|
||||
assert assistant_message.tool_calls # real call preserved
|
||||
assert "Running the command" in (assistant_message.content or "")
|
||||
|
||||
|
||||
def test_normalize_codex_response_no_leak_passes_through(monkeypatch):
|
||||
"""Sanity: normal assistant content that doesn't contain the leak pattern
|
||||
is returned verbatim with finish_reason=stop."""
|
||||
agent = _build_agent(monkeypatch)
|
||||
from agent.codex_responses_adapter import _normalize_codex_response
|
||||
|
||||
response = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
status="completed",
|
||||
content=[SimpleNamespace(
|
||||
type="output_text",
|
||||
text="Here is the answer with no leak.",
|
||||
)],
|
||||
)
|
||||
],
|
||||
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
|
||||
status="completed",
|
||||
model="gpt-5.4",
|
||||
)
|
||||
|
||||
assistant_message, finish_reason = _normalize_codex_response(response)
|
||||
|
||||
assert finish_reason == "stop"
|
||||
assert assistant_message.content == "Here is the answer with no leak."
|
||||
assert assistant_message.tool_calls == []
|
||||
|
||||
|
||||
def test_interim_commentary_is_not_marked_already_streamed_without_callbacks(monkeypatch):
|
||||
agent = _build_agent(monkeypatch)
|
||||
observed = {}
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
"""Tests for tool call argument repair in the streaming assembly path.
|
||||
|
||||
The streaming path (run_agent._call_chat_completions) assembles tool call
|
||||
deltas into full arguments. When a model truncates or malforms the JSON
|
||||
(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
|
||||
straight through — setting has_truncated_tool_args but NOT repairing it.
|
||||
That triggered the truncation handler to kill the session with /new required.
|
||||
|
||||
The fix: repair arguments in the streaming assembly path using
|
||||
_repair_tool_call_arguments() so repairable malformations (trailing commas,
|
||||
unclosed brackets, Python None) don't kill the session.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from run_agent import _repair_tool_call_arguments
|
||||
|
||||
|
||||
class TestStreamingAssemblyRepair:
|
||||
"""Verify that _repair_tool_call_arguments is applied to streaming tool
|
||||
call arguments before they're assembled into mock_tool_calls.
|
||||
|
||||
These tests verify the REPAIR FUNCTION itself works correctly for the
|
||||
cases that arise during streaming assembly. Integration tests that
|
||||
exercise the full streaming path are in test_agent_loop_tool_calling.py.
|
||||
"""
|
||||
|
||||
# -- Truncation cases (most common streaming failure) --
|
||||
|
||||
def test_truncated_object_no_close_brace(self):
|
||||
"""Model stops mid-JSON, common with output length limits."""
|
||||
raw = '{"command": "ls -la", "timeout": 30'
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
parsed = json.loads(result)
|
||||
assert parsed["command"] == "ls -la"
|
||||
assert parsed["timeout"] == 30
|
||||
|
||||
def test_truncated_nested_object(self):
|
||||
"""Model truncates inside a nested structure."""
|
||||
raw = '{"path": "/tmp/foo", "content": "hello"'
|
||||
result = _repair_tool_call_arguments(raw, "write_file")
|
||||
parsed = json.loads(result)
|
||||
assert parsed["path"] == "/tmp/foo"
|
||||
|
||||
def test_truncated_mid_value(self):
|
||||
"""Model cuts off mid-string-value."""
|
||||
raw = '{"command": "git clone ht'
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
# Should produce valid JSON (even if command value is lost)
|
||||
json.loads(result)
|
||||
|
||||
# -- Trailing comma cases (Ollama/GLM common) --
|
||||
|
||||
def test_trailing_comma_before_close_brace(self):
|
||||
raw = '{"path": "/tmp", "content": "x",}'
|
||||
result = _repair_tool_call_arguments(raw, "write_file")
|
||||
assert json.loads(result) == {"path": "/tmp", "content": "x"}
|
||||
|
||||
def test_trailing_comma_in_list(self):
|
||||
raw = '{"items": [1, 2, 3,]}'
|
||||
result = _repair_tool_call_arguments(raw, "test")
|
||||
assert json.loads(result) == {"items": [1, 2, 3]}
|
||||
|
||||
# -- Python None from model output --
|
||||
|
||||
def test_python_none_literal(self):
|
||||
raw = "None"
|
||||
result = _repair_tool_call_arguments(raw, "test")
|
||||
assert result == "{}"
|
||||
|
||||
# -- Empty arguments (some models emit empty string) --
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _repair_tool_call_arguments("", "test") == "{}"
|
||||
|
||||
def test_whitespace_only(self):
|
||||
assert _repair_tool_call_arguments(" \n ", "test") == "{}"
|
||||
|
||||
# -- Already-valid JSON passes through unchanged --
|
||||
|
||||
def test_valid_json_passthrough(self):
|
||||
raw = '{"path": "/tmp/foo", "content": "hello"}'
|
||||
result = _repair_tool_call_arguments(raw, "write_file")
|
||||
assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
|
||||
|
||||
# -- Extra closing brackets (rare but happens) --
|
||||
|
||||
def test_extra_closing_brace(self):
|
||||
raw = '{"key": "value"}}'
|
||||
result = _repair_tool_call_arguments(raw, "test")
|
||||
assert json.loads(result) == {"key": "value"}
|
||||
|
||||
# -- Real-world GLM-5.1 truncation pattern --
|
||||
|
||||
def test_glm_truncation_pattern(self):
|
||||
"""GLM-5.1 via Ollama commonly truncates like this.
|
||||
|
||||
This pattern has an unclosed colon at the end ("background":) which
|
||||
makes it unrepairable — the last-resort empty object {} is the
|
||||
safest option. The important thing is that repairable patterns
|
||||
(trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
|
||||
"""
|
||||
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
# Unrepairable — returns empty object (hanging colon can't be fixed)
|
||||
parsed = json.loads(result)
|
||||
assert parsed == {}
|
||||
|
||||
def test_glm_truncation_repairable(self):
|
||||
"""GLM-5.1 truncation pattern that IS repairable."""
|
||||
raw = '{"command": "ls -la /tmp", "timeout": 30'
|
||||
result = _repair_tool_call_arguments(raw, "terminal")
|
||||
parsed = json.loads(result)
|
||||
assert parsed["command"] == "ls -la /tmp"
|
||||
assert parsed["timeout"] == 30
|
||||
@@ -0,0 +1,157 @@
|
||||
"""Tests for AIAgent._sanitize_tool_call_arguments."""
|
||||
|
||||
import copy
|
||||
import logging
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
_MISSING = object()
|
||||
|
||||
|
||||
def _tool_call(call_id="call_1", name="read_file", arguments='{"path":"/tmp/foo"}'):
|
||||
function = {"name": name}
|
||||
if arguments is not _MISSING:
|
||||
function["arguments"] = arguments
|
||||
return {
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": function,
|
||||
}
|
||||
|
||||
|
||||
def _assistant_message(*tool_calls):
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": "tooling",
|
||||
"tool_calls": list(tool_calls),
|
||||
}
|
||||
|
||||
|
||||
def _tool_message(call_id="call_1", content="ok"):
|
||||
return {
|
||||
"role": "tool",
|
||||
"tool_call_id": call_id,
|
||||
"content": content,
|
||||
}
|
||||
|
||||
|
||||
def test_valid_arguments_unchanged():
|
||||
messages = [
|
||||
{"role": "user", "content": "hello"},
|
||||
_assistant_message(_tool_call(arguments='{"path":"/tmp/foo"}')),
|
||||
_tool_message(content="done"),
|
||||
]
|
||||
original = copy.deepcopy(messages)
|
||||
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(messages)
|
||||
|
||||
assert repaired == 0
|
||||
assert messages == original
|
||||
|
||||
|
||||
def test_truncated_arguments_replaced_with_empty_object(caplog):
|
||||
messages = [
|
||||
_assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
|
||||
]
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="run_agent"):
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(
|
||||
messages,
|
||||
logger=logging.getLogger("run_agent"),
|
||||
session_id="session-123",
|
||||
)
|
||||
|
||||
assert repaired == 1
|
||||
assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
|
||||
assert any(
|
||||
"session=session-123" in record.message
|
||||
and "tool_call_id=call_1" in record.message
|
||||
for record in caplog.records
|
||||
)
|
||||
|
||||
|
||||
def test_marker_appended_to_existing_tool_message():
|
||||
marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
|
||||
messages = [
|
||||
_assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
|
||||
_tool_message(content="existing tool output"),
|
||||
]
|
||||
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(messages)
|
||||
|
||||
assert repaired == 1
|
||||
assert messages[1]["content"] == f"{marker}\nexisting tool output"
|
||||
|
||||
|
||||
def test_marker_message_inserted_when_missing():
|
||||
marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
|
||||
messages = [
|
||||
_assistant_message(_tool_call(arguments='{"path": "/tmp/foo')),
|
||||
{"role": "user", "content": "next turn"},
|
||||
]
|
||||
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(messages)
|
||||
|
||||
assert repaired == 1
|
||||
assert messages[1] == {
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_1",
|
||||
"content": marker,
|
||||
}
|
||||
assert messages[2] == {"role": "user", "content": "next turn"}
|
||||
|
||||
|
||||
def test_multiple_corrupted_tool_calls_in_one_message():
|
||||
marker = AIAgent._TOOL_CALL_ARGUMENTS_CORRUPTION_MARKER
|
||||
messages = [
|
||||
_assistant_message(
|
||||
_tool_call(call_id="call_1", arguments='{"path": "/tmp/foo'),
|
||||
_tool_call(call_id="call_2", arguments='{"path":"/tmp/bar"}'),
|
||||
_tool_call(call_id="call_3", arguments='{"mode":"tail"'),
|
||||
),
|
||||
]
|
||||
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(messages)
|
||||
|
||||
assert repaired == 2
|
||||
assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
|
||||
assert messages[0]["tool_calls"][1]["function"]["arguments"] == '{"path":"/tmp/bar"}'
|
||||
assert messages[0]["tool_calls"][2]["function"]["arguments"] == "{}"
|
||||
assert messages[1]["tool_call_id"] == "call_1"
|
||||
assert messages[1]["content"] == marker
|
||||
assert messages[2]["tool_call_id"] == "call_3"
|
||||
assert messages[2]["content"] == marker
|
||||
|
||||
|
||||
def test_empty_string_arguments_treated_as_empty_object(caplog):
|
||||
messages = [
|
||||
_assistant_message(_tool_call(arguments="")),
|
||||
]
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="run_agent"):
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(
|
||||
messages,
|
||||
logger=logging.getLogger("run_agent"),
|
||||
session_id="session-123",
|
||||
)
|
||||
|
||||
assert repaired == 0
|
||||
assert messages[0]["tool_calls"][0]["function"]["arguments"] == "{}"
|
||||
assert caplog.records == []
|
||||
|
||||
|
||||
def test_non_assistant_messages_ignored():
|
||||
messages = [
|
||||
{"role": "user", "content": "hello", "tool_calls": [_tool_call(arguments='{"bad":')]},
|
||||
{"role": "tool", "tool_call_id": "call_1", "content": "ok"},
|
||||
{"role": "system", "content": "sys", "tool_calls": [_tool_call(arguments='{"bad":')]},
|
||||
None,
|
||||
"not a dict",
|
||||
]
|
||||
original = copy.deepcopy(messages)
|
||||
|
||||
repaired = AIAgent._sanitize_tool_call_arguments(messages)
|
||||
|
||||
assert repaired == 0
|
||||
assert messages == original
|
||||
@@ -240,3 +240,69 @@ class TestExchangeAuthCode:
|
||||
assert setup_module.TOKEN_PATH.exists()
|
||||
# Pending auth is cleaned up
|
||||
assert not setup_module.PENDING_AUTH_PATH.exists()
|
||||
|
||||
|
||||
class TestHermesConstantsFallback:
|
||||
"""Tests for _hermes_home.py fallback when hermes_constants is unavailable."""
|
||||
|
||||
HELPER_PATH = (
|
||||
Path(__file__).resolve().parents[2]
|
||||
/ "skills/productivity/google-workspace/scripts/_hermes_home.py"
|
||||
)
|
||||
|
||||
def _load_helper(self, monkeypatch):
|
||||
"""Load _hermes_home.py with hermes_constants blocked."""
|
||||
monkeypatch.setitem(sys.modules, "hermes_constants", None)
|
||||
spec = importlib.util.spec_from_file_location("_hermes_home_test", self.HELPER_PATH)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
def test_fallback_uses_hermes_home_env_var(self, monkeypatch, tmp_path):
|
||||
"""When hermes_constants is missing, HERMES_HOME comes from env var."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "custom-hermes"))
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.get_hermes_home() == tmp_path / "custom-hermes"
|
||||
|
||||
def test_fallback_defaults_to_dot_hermes(self, monkeypatch):
|
||||
"""When hermes_constants is missing and HERMES_HOME unset, default to ~/.hermes."""
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.get_hermes_home() == Path.home() / ".hermes"
|
||||
|
||||
def test_fallback_ignores_empty_hermes_home(self, monkeypatch):
|
||||
"""Empty/whitespace HERMES_HOME is treated as unset."""
|
||||
monkeypatch.setenv("HERMES_HOME", " ")
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.get_hermes_home() == Path.home() / ".hermes"
|
||||
|
||||
def test_fallback_display_hermes_home_shortens_path(self, monkeypatch):
|
||||
"""Fallback display_hermes_home() uses ~/ shorthand like the real one."""
|
||||
monkeypatch.delenv("HERMES_HOME", raising=False)
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.display_hermes_home() == "~/.hermes"
|
||||
|
||||
def test_fallback_display_hermes_home_profile_path(self, monkeypatch):
|
||||
"""Fallback display_hermes_home() handles profile paths under ~/."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(Path.home() / ".hermes/profiles/coder"))
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.display_hermes_home() == "~/.hermes/profiles/coder"
|
||||
|
||||
def test_fallback_display_hermes_home_custom_path(self, monkeypatch):
|
||||
"""Fallback display_hermes_home() returns full path for non-home locations."""
|
||||
monkeypatch.setenv("HERMES_HOME", "/opt/hermes-custom")
|
||||
module = self._load_helper(monkeypatch)
|
||||
assert module.display_hermes_home() == "/opt/hermes-custom"
|
||||
|
||||
def test_delegates_to_hermes_constants_when_available(self):
|
||||
"""When hermes_constants IS importable, _hermes_home delegates to it."""
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"_hermes_home_happy", self.HELPER_PATH
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
import hermes_constants
|
||||
assert module.get_hermes_home is hermes_constants.get_hermes_home
|
||||
assert module.display_hermes_home is hermes_constants.display_hermes_home
|
||||
|
||||
@@ -186,3 +186,67 @@ class TestBatchWorkerResumeBehavior:
|
||||
assert result["discarded_no_reasoning"] == 1
|
||||
assert result["completed_prompts"] == [0]
|
||||
assert not batch_file.exists() or batch_file.read_text() == ""
|
||||
|
||||
|
||||
class TestFinalCheckpointNoDuplicates:
|
||||
"""Regression: the final checkpoint must not contain duplicate prompt
|
||||
indices.
|
||||
|
||||
Before PR #15161, `run()` populated `completed_prompts_set` incrementally
|
||||
as each batch completed, then at the end built `all_completed_prompts =
|
||||
list(completed_prompts_set)` AND extended it again with every batch's
|
||||
`completed_prompts` — double-counting every index.
|
||||
"""
|
||||
|
||||
def _simulate_final_aggregation_fixed(self, batch_results):
|
||||
"""Mirror the fixed code path in batch_runner.run()."""
|
||||
completed_prompts_set = set()
|
||||
for result in batch_results:
|
||||
completed_prompts_set.update(result.get("completed_prompts", []))
|
||||
# This is what the fixed code now writes to the checkpoint:
|
||||
return sorted(completed_prompts_set)
|
||||
|
||||
def test_no_duplicates_in_final_list(self):
|
||||
batch_results = [
|
||||
{"completed_prompts": [0, 1, 2]},
|
||||
{"completed_prompts": [3, 4]},
|
||||
{"completed_prompts": [5]},
|
||||
]
|
||||
final = self._simulate_final_aggregation_fixed(batch_results)
|
||||
assert final == [0, 1, 2, 3, 4, 5]
|
||||
assert len(final) == len(set(final)) # no duplicates
|
||||
|
||||
def test_persisted_checkpoint_has_unique_prompts(self, runner):
|
||||
"""Write what run()'s fixed aggregation produces to disk; the file
|
||||
must load back with no duplicate indices."""
|
||||
batch_results = [
|
||||
{"completed_prompts": [0, 1]},
|
||||
{"completed_prompts": [2, 3]},
|
||||
]
|
||||
final = self._simulate_final_aggregation_fixed(batch_results)
|
||||
runner._save_checkpoint({
|
||||
"run_name": runner.run_name,
|
||||
"completed_prompts": final,
|
||||
"batch_stats": {},
|
||||
})
|
||||
loaded = json.loads(runner.checkpoint_file.read_text())
|
||||
cp = loaded["completed_prompts"]
|
||||
assert cp == sorted(set(cp))
|
||||
assert len(cp) == 4
|
||||
|
||||
def test_old_buggy_pattern_would_have_duplicates(self):
|
||||
"""Document the bug this PR fixes: the old code shape produced
|
||||
duplicates. Kept as a sanity anchor so a future refactor that
|
||||
re-introduces the pattern is immediately visible."""
|
||||
completed_prompts_set = set()
|
||||
results = []
|
||||
for batch in ({"completed_prompts": [0, 1, 2]},
|
||||
{"completed_prompts": [3, 4]}):
|
||||
completed_prompts_set.update(batch["completed_prompts"])
|
||||
results.append(batch)
|
||||
# Buggy aggregation (pre-fix):
|
||||
buggy = list(completed_prompts_set)
|
||||
for br in results:
|
||||
buggy.extend(br.get("completed_prompts", []))
|
||||
# Every index appears twice
|
||||
assert len(buggy) == 2 * len(set(buggy))
|
||||
|
||||
@@ -231,3 +231,46 @@ class TestBackwardCompat:
|
||||
def test_tool_to_toolset_map(self):
|
||||
assert isinstance(TOOL_TO_TOOLSET_MAP, dict)
|
||||
assert len(TOOL_TO_TOOLSET_MAP) > 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# _coerce_number — inf / nan must fall through to the original string
|
||||
# (regression: fix: eliminate duplicate checkpoint entries and JSON-unsafe coercion)
|
||||
# =========================================================================
|
||||
|
||||
class TestCoerceNumberInfNan:
|
||||
"""_coerce_number must honor its documented contract ("Returns original
|
||||
string on failure") for inf/nan inputs, because float('inf') and
|
||||
float('nan') are not JSON-compliant under strict serialization."""
|
||||
|
||||
def test_inf_returns_original_string(self):
|
||||
from model_tools import _coerce_number
|
||||
assert _coerce_number("inf") == "inf"
|
||||
|
||||
def test_negative_inf_returns_original_string(self):
|
||||
from model_tools import _coerce_number
|
||||
assert _coerce_number("-inf") == "-inf"
|
||||
|
||||
def test_nan_returns_original_string(self):
|
||||
from model_tools import _coerce_number
|
||||
assert _coerce_number("nan") == "nan"
|
||||
|
||||
def test_infinity_spelling_returns_original_string(self):
|
||||
from model_tools import _coerce_number
|
||||
# Python's float() parses "Infinity" too — still not JSON-safe.
|
||||
assert _coerce_number("Infinity") == "Infinity"
|
||||
|
||||
def test_coerced_result_is_strict_json_safe(self):
|
||||
"""Whatever _coerce_number returns for inf/nan must round-trip
|
||||
through strict (allow_nan=False) json.dumps without raising."""
|
||||
from model_tools import _coerce_number
|
||||
for s in ("inf", "-inf", "nan", "Infinity"):
|
||||
result = _coerce_number(s)
|
||||
json.dumps({"x": result}, allow_nan=False) # must not raise
|
||||
|
||||
def test_normal_numbers_still_coerce(self):
|
||||
"""Guard against over-correction — real numbers still coerce."""
|
||||
from model_tools import _coerce_number
|
||||
assert _coerce_number("42") == 42
|
||||
assert _coerce_number("3.14") == 3.14
|
||||
assert _coerce_number("1e3") == 1000
|
||||
|
||||
@@ -200,8 +200,8 @@ class TestToolsetConsistency:
|
||||
def test_hermes_platforms_share_core_tools(self):
|
||||
"""All hermes-* platform toolsets share the same core tools.
|
||||
|
||||
Platform-specific additions (e.g. ``discord_server`` on
|
||||
hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
|
||||
Platform-specific additions (e.g. ``discord`` / ``discord_admin``
|
||||
on hermes-discord, gated on DISCORD_BOT_TOKEN) are allowed on top —
|
||||
the invariant is that the core set is identical across platforms.
|
||||
"""
|
||||
platforms = ["hermes-cli", "hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"]
|
||||
|
||||
@@ -60,6 +60,22 @@ class TestWrapCommand:
|
||||
assert "cd ~" in wrapped
|
||||
assert "cd '~'" not in wrapped
|
||||
|
||||
def test_tilde_subpath_with_spaces_uses_home_and_quotes_suffix(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "~/my repo")
|
||||
|
||||
assert "cd $HOME/'my repo'" in wrapped
|
||||
assert "cd ~/my repo" not in wrapped
|
||||
|
||||
def test_tilde_slash_maps_to_home(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
wrapped = env._wrap_command("ls", "~/")
|
||||
|
||||
assert "cd $HOME" in wrapped
|
||||
assert "cd ~/" not in wrapped
|
||||
|
||||
def test_cd_failure_exit_126(self):
|
||||
env = _TestableEnv()
|
||||
env._snapshot_ready = True
|
||||
|
||||
@@ -1319,6 +1319,112 @@ class TestDelegateHeartbeat(unittest.TestCase):
|
||||
any("API call #5 completed" in desc for desc in touch_calls),
|
||||
f"Heartbeat should include last_activity_desc: {touch_calls}")
|
||||
|
||||
def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
|
||||
"""A long-running tool (no iteration advance, but current_tool set)
|
||||
must not be flagged stale at the idle threshold.
|
||||
|
||||
Bug #13041: when a child is legitimately busy inside a slow tool
|
||||
(terminal command, browser fetch), api_call_count does not advance.
|
||||
The previous stale check treated this as idle and stopped the
|
||||
heartbeat after 5 cycles (~150s), letting the gateway kill the
|
||||
session. The fix uses a much higher in-tool threshold and only
|
||||
applies the tight idle threshold when current_tool is None.
|
||||
"""
|
||||
from tools.delegate_tool import _run_single_child
|
||||
|
||||
parent = _make_mock_parent()
|
||||
touch_calls = []
|
||||
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
||||
|
||||
child = MagicMock()
|
||||
# Child is stuck inside a single terminal call for the whole run.
|
||||
# api_call_count never advances, current_tool is always set.
|
||||
child.get_activity_summary.return_value = {
|
||||
"current_tool": "terminal",
|
||||
"api_call_count": 1,
|
||||
"max_iterations": 50,
|
||||
"last_activity_desc": "executing tool: terminal",
|
||||
}
|
||||
|
||||
def slow_run(**kwargs):
|
||||
# Long enough to exceed the OLD idle threshold (5 cycles) at
|
||||
# the patched interval, but shorter than the new in-tool
|
||||
# threshold.
|
||||
time.sleep(0.4)
|
||||
return {"final_response": "done", "completed": True, "api_calls": 1}
|
||||
|
||||
child.run_conversation.side_effect = slow_run
|
||||
|
||||
# Patch both the interval AND the idle ceiling so the test proves
|
||||
# the in-tool branch takes effect: with a 0.05s interval and the
|
||||
# default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would
|
||||
# trip after 0.25s and stop firing. We should see heartbeats
|
||||
# continuing through the full 0.4s run.
|
||||
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
||||
_run_single_child(
|
||||
task_index=0,
|
||||
goal="Test long-running tool",
|
||||
child=child,
|
||||
parent_agent=parent,
|
||||
)
|
||||
|
||||
# With the old idle threshold (5 cycles = 0.25s), touch_calls
|
||||
# would cap at ~5. With the in-tool threshold (20 cycles = 1.0s),
|
||||
# we should see substantially more heartbeats over 0.4s.
|
||||
self.assertGreater(
|
||||
len(touch_calls), 6,
|
||||
f"Heartbeat stopped too early while child was inside a tool; "
|
||||
f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
|
||||
)
|
||||
|
||||
def test_heartbeat_still_trips_idle_stale_when_no_tool(self):
|
||||
"""A wedged child with no current_tool still trips the idle threshold.
|
||||
|
||||
Regression guard: the fix for #13041 must not disable stale
|
||||
detection entirely. A child that's hung between turns (no tool
|
||||
running, no iteration progress) must still stop touching the
|
||||
parent so the gateway timeout can fire.
|
||||
"""
|
||||
from tools.delegate_tool import _run_single_child
|
||||
|
||||
parent = _make_mock_parent()
|
||||
touch_calls = []
|
||||
parent._touch_activity = lambda desc: touch_calls.append(desc)
|
||||
|
||||
child = MagicMock()
|
||||
# Wedged child: no tool running, iteration frozen.
|
||||
child.get_activity_summary.return_value = {
|
||||
"current_tool": None,
|
||||
"api_call_count": 3,
|
||||
"max_iterations": 50,
|
||||
"last_activity_desc": "waiting for API response",
|
||||
}
|
||||
|
||||
def slow_run(**kwargs):
|
||||
time.sleep(0.6)
|
||||
return {"final_response": "done", "completed": True, "api_calls": 3}
|
||||
|
||||
child.run_conversation.side_effect = slow_run
|
||||
|
||||
# At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s.
|
||||
# We should see the heartbeat stop firing well before 0.6s.
|
||||
with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
|
||||
_run_single_child(
|
||||
task_index=0,
|
||||
goal="Test wedged child",
|
||||
child=child,
|
||||
parent_agent=parent,
|
||||
)
|
||||
|
||||
# With idle threshold=5 + interval=0.05s, touches should cap
|
||||
# around 5. Bound loosely to avoid timing flakes.
|
||||
self.assertLess(
|
||||
len(touch_calls), 9,
|
||||
f"Idle stale detection did not fire: got {len(touch_calls)} "
|
||||
f"touches over 0.6s — expected heartbeat to stop after "
|
||||
f"~5 stale cycles",
|
||||
)
|
||||
|
||||
|
||||
class TestDelegationReasoningEffort(unittest.TestCase):
|
||||
"""Tests for delegation.reasoning_effort config override."""
|
||||
|
||||
@@ -11,6 +11,8 @@ import pytest
|
||||
from tools.discord_tool import (
|
||||
DiscordAPIError,
|
||||
_ACTIONS,
|
||||
_ADMIN_ACTIONS,
|
||||
_CORE_ACTIONS,
|
||||
_available_actions,
|
||||
_build_schema,
|
||||
_channel_type_name,
|
||||
@@ -21,8 +23,11 @@ from tools.discord_tool import (
|
||||
_load_allowed_actions_config,
|
||||
_reset_capability_cache,
|
||||
check_discord_tool_requirements,
|
||||
discord_server,
|
||||
discord_admin_handler,
|
||||
discord_core,
|
||||
get_dynamic_schema,
|
||||
get_dynamic_schema_admin,
|
||||
get_dynamic_schema_core,
|
||||
)
|
||||
|
||||
|
||||
@@ -147,32 +152,32 @@ class TestDiscordRequest:
|
||||
class TestDiscordServerValidation:
|
||||
def test_no_token(self, monkeypatch):
|
||||
monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "DISCORD_BOT_TOKEN" in result["error"]
|
||||
|
||||
def test_unknown_action(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="bad_action"))
|
||||
result = json.loads(discord_core(action="bad_action"))
|
||||
assert "error" in result
|
||||
assert "Unknown action" in result["error"]
|
||||
assert "available_actions" in result
|
||||
|
||||
def test_missing_required_guild_id(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="list_channels"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels"))
|
||||
assert "error" in result
|
||||
assert "guild_id" in result["error"]
|
||||
|
||||
def test_missing_required_channel_id(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="fetch_messages"))
|
||||
result = json.loads(discord_core(action="fetch_messages"))
|
||||
assert "error" in result
|
||||
assert "channel_id" in result["error"]
|
||||
|
||||
def test_missing_multiple_params(self, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
result = json.loads(discord_server(action="add_role"))
|
||||
result = json.loads(discord_admin_handler(action="add_role"))
|
||||
assert "error" in result
|
||||
assert "guild_id" in result["error"]
|
||||
assert "user_id" in result["error"]
|
||||
@@ -191,7 +196,7 @@ class TestListGuilds:
|
||||
{"id": "111", "name": "Test Server", "icon": "abc", "owner": True, "permissions": "123"},
|
||||
{"id": "222", "name": "Other Server", "icon": None, "owner": False, "permissions": "456"},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert result["count"] == 2
|
||||
assert result["guilds"][0]["name"] == "Test Server"
|
||||
assert result["guilds"][1]["id"] == "222"
|
||||
@@ -219,7 +224,7 @@ class TestServerInfo:
|
||||
"premium_subscription_count": 5,
|
||||
"verification_level": 1,
|
||||
}
|
||||
result = json.loads(discord_server(action="server_info", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="server_info", guild_id="111"))
|
||||
assert result["name"] == "My Server"
|
||||
assert result["member_count"] == 42
|
||||
assert result["online_count"] == 10
|
||||
@@ -242,7 +247,7 @@ class TestListChannels:
|
||||
{"id": "12", "name": "voice", "type": 2, "position": 1, "parent_id": "10", "topic": None, "nsfw": False},
|
||||
{"id": "13", "name": "no-category", "type": 0, "position": 0, "parent_id": None, "topic": None, "nsfw": False},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_channels", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
|
||||
assert result["total_channels"] == 3 # excludes the category itself
|
||||
groups = result["channel_groups"]
|
||||
# Uncategorized first
|
||||
@@ -257,7 +262,7 @@ class TestListChannels:
|
||||
def test_empty_guild(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
result = json.loads(discord_server(action="list_channels", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_channels", guild_id="111"))
|
||||
assert result["total_channels"] == 0
|
||||
|
||||
|
||||
@@ -274,7 +279,7 @@ class TestChannelInfo:
|
||||
"topic": "Welcome!", "nsfw": False, "position": 0,
|
||||
"parent_id": "10", "rate_limit_per_user": 0, "last_message_id": "999",
|
||||
}
|
||||
result = json.loads(discord_server(action="channel_info", channel_id="11"))
|
||||
result = json.loads(discord_admin_handler(action="channel_info", channel_id="11"))
|
||||
assert result["name"] == "general"
|
||||
assert result["type"] == "text"
|
||||
assert result["guild_id"] == "111"
|
||||
@@ -293,7 +298,7 @@ class TestListRoles:
|
||||
{"id": "2", "name": "Admin", "position": 2, "color": 16711680, "mentionable": True, "managed": False, "hoist": True},
|
||||
{"id": "3", "name": "Mod", "position": 1, "color": 255, "mentionable": True, "managed": False, "hoist": True},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_roles", guild_id="111"))
|
||||
result = json.loads(discord_admin_handler(action="list_roles", guild_id="111"))
|
||||
assert result["count"] == 3
|
||||
# Should be sorted by position descending
|
||||
assert result["roles"][0]["name"] == "Admin"
|
||||
@@ -317,7 +322,7 @@ class TestMemberInfo:
|
||||
"joined_at": "2024-01-01T00:00:00Z",
|
||||
"premium_since": None,
|
||||
}
|
||||
result = json.loads(discord_server(action="member_info", guild_id="111", user_id="42"))
|
||||
result = json.loads(discord_admin_handler(action="member_info", guild_id="111", user_id="42"))
|
||||
assert result["username"] == "testuser"
|
||||
assert result["nickname"] == "Testy"
|
||||
assert result["roles"] == ["2", "3"]
|
||||
@@ -334,7 +339,7 @@ class TestSearchMembers:
|
||||
mock_req.return_value = [
|
||||
{"user": {"id": "42", "username": "testuser", "global_name": "Test", "bot": False}, "nick": None, "roles": []},
|
||||
]
|
||||
result = json.loads(discord_server(action="search_members", guild_id="111", query="test"))
|
||||
result = json.loads(discord_core(action="search_members", guild_id="111", query="test"))
|
||||
assert result["count"] == 1
|
||||
assert result["members"][0]["username"] == "testuser"
|
||||
mock_req.assert_called_once_with(
|
||||
@@ -346,7 +351,7 @@ class TestSearchMembers:
|
||||
def test_search_members_limit_capped(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
discord_server(action="search_members", guild_id="111", query="x", limit=200)
|
||||
discord_core(action="search_members", guild_id="111", query="x", limit=200)
|
||||
call_params = mock_req.call_args[1]["params"]
|
||||
assert call_params["limit"] == "100" # Capped at 100
|
||||
|
||||
@@ -370,7 +375,7 @@ class TestFetchMessages:
|
||||
"pinned": False,
|
||||
},
|
||||
]
|
||||
result = json.loads(discord_server(action="fetch_messages", channel_id="11"))
|
||||
result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
|
||||
assert result["count"] == 1
|
||||
assert result["messages"][0]["content"] == "Hello world"
|
||||
assert result["messages"][0]["author"]["username"] == "user1"
|
||||
@@ -379,7 +384,7 @@ class TestFetchMessages:
|
||||
def test_fetch_messages_with_pagination(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = []
|
||||
discord_server(action="fetch_messages", channel_id="11", before="999", limit=10)
|
||||
discord_core(action="fetch_messages", channel_id="11", before="999", limit=10)
|
||||
call_params = mock_req.call_args[1]["params"]
|
||||
assert call_params["before"] == "999"
|
||||
assert call_params["limit"] == "10"
|
||||
@@ -396,7 +401,7 @@ class TestListPins:
|
||||
mock_req.return_value = [
|
||||
{"id": "500", "content": "Important announcement", "author": {"username": "admin"}, "timestamp": "2024-01-01T00:00:00Z"},
|
||||
]
|
||||
result = json.loads(discord_server(action="list_pins", channel_id="11"))
|
||||
result = json.loads(discord_admin_handler(action="list_pins", channel_id="11"))
|
||||
assert result["count"] == 1
|
||||
assert result["pinned_messages"][0]["content"] == "Important announcement"
|
||||
|
||||
@@ -410,7 +415,7 @@ class TestPinUnpin:
|
||||
def test_pin_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None # 204
|
||||
result = json.loads(discord_server(action="pin_message", channel_id="11", message_id="500"))
|
||||
result = json.loads(discord_admin_handler(action="pin_message", channel_id="11", message_id="500"))
|
||||
assert result["success"] is True
|
||||
mock_req.assert_called_once_with("PUT", "/channels/11/pins/500", "test-token")
|
||||
|
||||
@@ -418,7 +423,7 @@ class TestPinUnpin:
|
||||
def test_unpin_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(action="unpin_message", channel_id="11", message_id="500"))
|
||||
result = json.loads(discord_admin_handler(action="unpin_message", channel_id="11", message_id="500"))
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
@@ -431,7 +436,7 @@ class TestCreateThread:
|
||||
def test_create_standalone_thread(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = {"id": "800", "name": "New Thread"}
|
||||
result = json.loads(discord_server(action="create_thread", channel_id="11", name="New Thread"))
|
||||
result = json.loads(discord_core(action="create_thread", channel_id="11", name="New Thread"))
|
||||
assert result["success"] is True
|
||||
assert result["thread_id"] == "800"
|
||||
# Verify the API call
|
||||
@@ -444,7 +449,7 @@ class TestCreateThread:
|
||||
def test_create_thread_from_message(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = {"id": "801", "name": "Discussion"}
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_core(
|
||||
action="create_thread", channel_id="11", name="Discussion", message_id="1001",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -463,7 +468,7 @@ class TestRoleManagement:
|
||||
def test_add_role(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="add_role", guild_id="111", user_id="42", role_id="2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -475,7 +480,7 @@ class TestRoleManagement:
|
||||
def test_remove_role(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.return_value = None
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="remove_role", guild_id="111", user_id="42", role_id="2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
@@ -490,15 +495,23 @@ class TestErrorHandling:
|
||||
def test_api_error_handled(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = DiscordAPIError(403, '{"message": "Missing Access"}')
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "403" in result["error"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_unexpected_error_handled(self, mock_req, monkeypatch):
|
||||
def test_unexpected_error_handled_admin(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = RuntimeError("something broke")
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "error" in result
|
||||
assert "something broke" in result["error"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_unexpected_error_handled_core(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "test-token")
|
||||
mock_req.side_effect = RuntimeError("something broke")
|
||||
result = json.loads(discord_core(action="fetch_messages", channel_id="11"))
|
||||
assert "error" in result
|
||||
assert "something broke" in result["error"]
|
||||
|
||||
@@ -508,79 +521,109 @@ class TestErrorHandling:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRegistration:
|
||||
def test_tool_registered(self):
|
||||
def test_core_tool_registered(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools.get("discord_server")
|
||||
entry = registry._tools.get("discord")
|
||||
assert entry is not None
|
||||
assert entry.schema["name"] == "discord_server"
|
||||
assert entry.schema["name"] == "discord"
|
||||
assert entry.toolset == "discord"
|
||||
assert entry.check_fn is not None
|
||||
assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
|
||||
|
||||
def test_schema_actions(self):
|
||||
"""Static schema should list all actions (the model_tools post-processing
|
||||
narrows this per-session; static registration is the superset)."""
|
||||
def test_admin_tool_registered(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
actions = entry.schema["parameters"]["properties"]["action"]["enum"]
|
||||
expected = [
|
||||
"list_guilds", "server_info", "list_channels", "channel_info",
|
||||
"list_roles", "member_info", "search_members", "fetch_messages",
|
||||
"list_pins", "pin_message", "unpin_message", "create_thread",
|
||||
"add_role", "remove_role",
|
||||
]
|
||||
assert set(actions) == set(expected)
|
||||
assert set(_ACTIONS.keys()) == set(expected)
|
||||
entry = registry._tools.get("discord_admin")
|
||||
assert entry is not None
|
||||
assert entry.schema["name"] == "discord_admin"
|
||||
assert entry.toolset == "discord_admin"
|
||||
assert entry.check_fn is not None
|
||||
assert entry.requires_env == ["DISCORD_BOT_TOKEN"]
|
||||
|
||||
def test_core_schema_actions(self):
|
||||
"""Core static schema should list only core actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord"]
|
||||
actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == {"fetch_messages", "search_members", "create_thread"}
|
||||
|
||||
def test_admin_schema_actions(self):
|
||||
"""Admin static schema should list only admin actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_admin"]
|
||||
actions = set(entry.schema["parameters"]["properties"]["action"]["enum"])
|
||||
expected_admin = set(_ACTIONS.keys()) - {"fetch_messages", "search_members", "create_thread"}
|
||||
assert actions == expected_admin
|
||||
|
||||
def test_all_actions_covered(self):
|
||||
"""Core + admin actions should cover all known actions."""
|
||||
assert set(_CORE_ACTIONS.keys()) | set(_ADMIN_ACTIONS.keys()) == set(_ACTIONS.keys())
|
||||
assert set(_CORE_ACTIONS.keys()) & set(_ADMIN_ACTIONS.keys()) == set()
|
||||
|
||||
def test_schema_parameter_bounds(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
props = entry.schema["parameters"]["properties"]
|
||||
assert props["limit"]["minimum"] == 1
|
||||
assert props["limit"]["maximum"] == 100
|
||||
assert props["auto_archive_duration"]["enum"] == [60, 1440, 4320, 10080]
|
||||
|
||||
def test_schema_description_is_action_manifest(self):
|
||||
"""The top-level description should include the action manifest
|
||||
(one-line signatures per action) so the model can find required
|
||||
params without re-reading every parameter description."""
|
||||
def test_core_schema_description(self):
|
||||
"""Core schema description should mention core actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
desc = entry.schema["description"]
|
||||
# Spot-check a few entries
|
||||
assert "list_guilds()" in desc
|
||||
assert "fetch_messages(channel_id)" in desc
|
||||
assert "search_members(guild_id, query)" in desc
|
||||
assert "create_thread(channel_id, name)" in desc
|
||||
# Admin actions should NOT be in core description
|
||||
assert "list_guilds()" not in desc
|
||||
assert "add_role(" not in desc
|
||||
|
||||
def test_admin_schema_description(self):
|
||||
"""Admin schema description should mention admin actions."""
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_admin"]
|
||||
desc = entry.schema["description"]
|
||||
assert "list_guilds()" in desc
|
||||
assert "add_role(guild_id, user_id, role_id)" in desc
|
||||
# Core actions should NOT be in admin description
|
||||
assert "fetch_messages(" not in desc
|
||||
assert "create_thread(" not in desc
|
||||
|
||||
def test_handler_callable(self):
|
||||
from tools.registry import registry
|
||||
entry = registry._tools["discord_server"]
|
||||
entry = registry._tools["discord"]
|
||||
assert callable(entry.handler)
|
||||
entry_admin = registry._tools["discord_admin"]
|
||||
assert callable(entry_admin.handler)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Toolset: discord_server only in hermes-discord
|
||||
# Toolset: discord / discord_admin only in hermes-discord
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestToolsetInclusion:
|
||||
def test_discord_server_in_hermes_discord_toolset(self):
|
||||
def test_discord_tools_in_hermes_discord_toolset(self):
|
||||
from toolsets import TOOLSETS
|
||||
assert "discord_server" in TOOLSETS["hermes-discord"]["tools"]
|
||||
assert "discord" in TOOLSETS["hermes-discord"]["tools"]
|
||||
assert "discord_admin" in TOOLSETS["hermes-discord"]["tools"]
|
||||
|
||||
def test_discord_server_not_in_core_tools(self):
|
||||
def test_discord_tools_not_in_core_tools(self):
|
||||
from toolsets import _HERMES_CORE_TOOLS
|
||||
assert "discord_server" not in _HERMES_CORE_TOOLS
|
||||
assert "discord" not in _HERMES_CORE_TOOLS
|
||||
assert "discord_admin" not in _HERMES_CORE_TOOLS
|
||||
|
||||
def test_discord_server_not_in_other_toolsets(self):
|
||||
def test_discord_tools_not_in_other_toolsets(self):
|
||||
from toolsets import TOOLSETS
|
||||
for name, ts in TOOLSETS.items():
|
||||
if name == "hermes-discord":
|
||||
if name in ("hermes-discord", "hermes-gateway", "discord", "discord_admin"):
|
||||
continue
|
||||
# The gateway toolset might include it if it unions all platform tools
|
||||
if name == "hermes-gateway":
|
||||
continue
|
||||
assert "discord_server" not in ts.get("tools", []), (
|
||||
f"discord_server should not be in toolset '{name}'"
|
||||
tools = ts.get("tools", [])
|
||||
assert "discord" not in tools or name == "discord", (
|
||||
f"discord tool should not be in toolset '{name}'"
|
||||
)
|
||||
assert "discord_admin" not in tools or name == "discord_admin", (
|
||||
f"discord_admin tool should not be in toolset '{name}'"
|
||||
)
|
||||
|
||||
|
||||
@@ -798,40 +841,69 @@ class TestDynamicSchema:
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_token_returns_none(self, mock_req, monkeypatch):
|
||||
monkeypatch.delenv("DISCORD_BOT_TOKEN", raising=False)
|
||||
assert get_dynamic_schema() is None
|
||||
assert get_dynamic_schema_core() is None
|
||||
assert get_dynamic_schema_admin() is None
|
||||
mock_req.assert_not_called()
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_full_intents_full_schema(self, mock_req, monkeypatch):
|
||||
def test_full_intents_core_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert set(actions) == set(_ACTIONS.keys())
|
||||
# No content warning
|
||||
schema = get_dynamic_schema_core()
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_CORE_ACTIONS.keys())
|
||||
assert schema["name"] == "discord"
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_full_intents_admin_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_ADMIN_ACTIONS.keys())
|
||||
assert schema["name"] == "discord_admin"
|
||||
# No content warning when MESSAGE_CONTENT is enabled
|
||||
assert "MESSAGE_CONTENT" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_members_intent_removes_member_actions_from_schema(
|
||||
def test_no_members_intent_removes_member_actions_from_admin_schema(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""member_info is an admin action; it should be hidden when
|
||||
GUILD_MEMBERS intent is missing."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 18} # only MESSAGE_CONTENT
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "member_info" not in actions
|
||||
assert "member_info" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_members_intent_hides_search_members_from_core(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""search_members is a core action gated by GUILD_MEMBERS intent."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 18} # only MESSAGE_CONTENT
|
||||
schema = get_dynamic_schema_core()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "search_members" not in actions
|
||||
assert "member_info" not in actions
|
||||
# Manifest description should also not advertise them
|
||||
assert "search_members" not in schema["description"]
|
||||
assert "member_info" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_no_message_content_adds_warning_note(self, mock_req, monkeypatch):
|
||||
@@ -841,41 +913,53 @@ class TestDynamicSchema:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": 1 << 14} # only GUILD_MEMBERS
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_core()
|
||||
assert "MESSAGE_CONTENT" in schema["description"]
|
||||
# But fetch_messages is still available
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert "fetch_messages" in actions
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_config_allowlist_narrows_schema(self, mock_req, monkeypatch):
|
||||
def test_config_allowlist_narrows_admin_schema(self, mock_req, monkeypatch):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "list_guilds,list_channels"}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
schema = get_dynamic_schema_admin()
|
||||
actions = schema["parameters"]["properties"]["action"]["enum"]
|
||||
assert actions == ["list_guilds", "list_channels"]
|
||||
# Manifest description should only show allowed ones (check for
|
||||
# the signature marker, which is specific to manifest lines)
|
||||
assert "list_guilds()" in schema["description"]
|
||||
assert "add_role(" not in schema["description"]
|
||||
assert "create_thread(" not in schema["description"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_empty_allowlist_with_valid_values_hides_tool(self, mock_req, monkeypatch):
|
||||
def test_empty_allowlist_with_valid_values_hides_tools(self, mock_req, monkeypatch):
|
||||
"""If the allowlist resolves to zero valid actions (e.g. all names
|
||||
were typos), get_dynamic_schema returns None so the tool is dropped
|
||||
entirely rather than showing an empty enum."""
|
||||
were typos), get_dynamic_schema returns None so the tool is dropped."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "typo_one,typo_two"}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
assert get_dynamic_schema() is None
|
||||
assert get_dynamic_schema_core() is None
|
||||
assert get_dynamic_schema_admin() is None
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_backward_compat_wrapper(self, mock_req, monkeypatch):
|
||||
"""get_dynamic_schema() should delegate to get_dynamic_schema_core()."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.return_value = {"flags": (1 << 14) | (1 << 18)}
|
||||
schema = get_dynamic_schema()
|
||||
assert schema is not None
|
||||
assert schema["name"] == "discord"
|
||||
actions = set(schema["parameters"]["properties"]["action"]["enum"])
|
||||
assert actions == set(_CORE_ACTIONS.keys())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -890,7 +974,7 @@ class TestRuntimeAllowlistEnforcement:
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: {"discord": {"server_actions": "list_guilds"}},
|
||||
)
|
||||
result = json.loads(discord_server(action="add_role", guild_id="1", user_id="2", role_id="3"))
|
||||
result = json.loads(discord_admin_handler(action="add_role", guild_id="1", user_id="2", role_id="3"))
|
||||
assert "error" in result
|
||||
assert "disabled by config" in result["error"]
|
||||
mock_req.assert_not_called()
|
||||
@@ -903,7 +987,7 @@ class TestRuntimeAllowlistEnforcement:
|
||||
lambda: {"discord": {"server_actions": "list_guilds"}},
|
||||
)
|
||||
mock_req.return_value = []
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "guilds" in result
|
||||
|
||||
|
||||
@@ -930,7 +1014,7 @@ class Test403Enrichment:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.side_effect = DiscordAPIError(403, '{"message":"Missing Permissions"}')
|
||||
result = json.loads(discord_server(
|
||||
result = json.loads(discord_admin_handler(
|
||||
action="add_role", guild_id="1", user_id="2", role_id="3",
|
||||
))
|
||||
assert "error" in result
|
||||
@@ -944,7 +1028,7 @@ class Test403Enrichment:
|
||||
lambda: {"discord": {"server_actions": ""}},
|
||||
)
|
||||
mock_req.side_effect = DiscordAPIError(500, "server error")
|
||||
result = json.loads(discord_server(action="list_guilds"))
|
||||
result = json.loads(discord_admin_handler(action="list_guilds"))
|
||||
assert "500" in result["error"]
|
||||
assert "MANAGE_ROLES" not in result["error"]
|
||||
|
||||
@@ -961,10 +1045,10 @@ class TestModelToolsIntegration:
|
||||
_reset_capability_cache()
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_discord_server_schema_rebuilt_by_get_tool_definitions(
|
||||
def test_discord_admin_schema_rebuilt_by_get_tool_definitions(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
"""When model_tools.get_tool_definitions runs with discord_server
|
||||
"""When model_tools.get_tool_definitions runs with discord_admin
|
||||
available, it should replace the static schema with the dynamic one."""
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
monkeypatch.setattr(
|
||||
@@ -976,16 +1060,16 @@ class TestModelToolsIntegration:
|
||||
|
||||
from model_tools import get_tool_definitions
|
||||
tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
|
||||
discord_tool = next(
|
||||
(t for t in tools if t.get("function", {}).get("name") == "discord_server"),
|
||||
discord_admin_tool = next(
|
||||
(t for t in tools if t.get("function", {}).get("name") == "discord_admin"),
|
||||
None,
|
||||
)
|
||||
assert discord_tool is not None, "discord_server should be in the schema"
|
||||
actions = discord_tool["function"]["parameters"]["properties"]["action"]["enum"]
|
||||
assert discord_admin_tool is not None, "discord_admin should be in the schema"
|
||||
actions = discord_admin_tool["function"]["parameters"]["properties"]["action"]["enum"]
|
||||
assert actions == ["list_guilds", "server_info"]
|
||||
|
||||
@patch("tools.discord_tool._discord_request")
|
||||
def test_discord_server_dropped_when_allowlist_empties_it(
|
||||
def test_discord_tools_dropped_when_allowlist_empties_them(
|
||||
self, mock_req, monkeypatch,
|
||||
):
|
||||
monkeypatch.setenv("DISCORD_BOT_TOKEN", "tok")
|
||||
@@ -998,4 +1082,6 @@ class TestModelToolsIntegration:
|
||||
from model_tools import get_tool_definitions
|
||||
tools = get_tool_definitions(enabled_toolsets=["hermes-discord"], quiet_mode=True)
|
||||
names = [t.get("function", {}).get("name") for t in tools]
|
||||
assert "discord" not in names
|
||||
assert "discord_admin" not in names
|
||||
assert "discord_server" not in names
|
||||
|
||||
@@ -347,6 +347,70 @@ class TestSkillView:
|
||||
assert result["name"] == "my-skill"
|
||||
assert "Step 1" in result["content"]
|
||||
|
||||
def test_skill_view_applies_template_vars(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_preprocessing.load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": False},
|
||||
),
|
||||
):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"templated",
|
||||
body="Run ${HERMES_SKILL_DIR}/scripts/do.sh in ${HERMES_SESSION_ID}",
|
||||
)
|
||||
raw = skill_view("templated", task_id="session-123")
|
||||
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert f"Run {skill_dir}/scripts/do.sh in session-123" in result["content"]
|
||||
assert "${HERMES_SKILL_DIR}" not in result["content"]
|
||||
|
||||
def test_skill_view_applies_inline_shell_when_enabled(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_preprocessing.load_skills_config",
|
||||
return_value={
|
||||
"template_vars": True,
|
||||
"inline_shell": True,
|
||||
"inline_shell_timeout": 5,
|
||||
},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dynamic",
|
||||
body="Current date: !`printf 2026-04-24`",
|
||||
)
|
||||
raw = skill_view("dynamic")
|
||||
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert "Current date: 2026-04-24" in result["content"]
|
||||
assert "!`printf 2026-04-24`" not in result["content"]
|
||||
|
||||
def test_skill_view_leaves_inline_shell_literal_when_disabled(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_preprocessing.load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": False},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"static",
|
||||
body="Current date: !`printf SHOULD_NOT_RUN`",
|
||||
)
|
||||
raw = skill_view("static")
|
||||
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert "Current date: !`printf SHOULD_NOT_RUN`" in result["content"]
|
||||
assert "Current date: SHOULD_NOT_RUN" not in result["content"]
|
||||
|
||||
def test_view_nonexistent_skill(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(tmp_path, "other-skill")
|
||||
|
||||
@@ -27,16 +27,22 @@ def test_make_agent_passes_resolved_provider():
|
||||
"agent": {"system_prompt": "test"},
|
||||
}
|
||||
|
||||
with patch("tui_gateway.server._load_cfg", return_value=fake_cfg), \
|
||||
patch("tui_gateway.server._get_db", return_value=MagicMock()), \
|
||||
patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"), \
|
||||
patch("tui_gateway.server._load_reasoning_config", return_value=None), \
|
||||
patch("tui_gateway.server._load_service_tier", return_value=None), \
|
||||
patch("tui_gateway.server._load_enabled_toolsets", return_value=None), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value=fake_runtime) as mock_resolve, \
|
||||
patch("run_agent.AIAgent") as mock_agent:
|
||||
with (
|
||||
patch("tui_gateway.server._load_cfg", return_value=fake_cfg),
|
||||
patch("tui_gateway.server._get_db", return_value=MagicMock()),
|
||||
patch("tui_gateway.server._load_tool_progress_mode", return_value="compact"),
|
||||
patch("tui_gateway.server._load_reasoning_config", return_value=None),
|
||||
patch("tui_gateway.server._load_service_tier", return_value=None),
|
||||
patch("tui_gateway.server._load_enabled_toolsets", return_value=None),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value=fake_runtime,
|
||||
) as mock_resolve,
|
||||
patch("run_agent.AIAgent") as mock_agent,
|
||||
):
|
||||
|
||||
from tui_gateway.server import _make_agent
|
||||
|
||||
_make_agent("sid-1", "key-1")
|
||||
|
||||
mock_resolve.assert_called_once_with(requested=None)
|
||||
@@ -46,3 +52,136 @@ def test_make_agent_passes_resolved_provider():
|
||||
assert call_kwargs.kwargs["base_url"] == "https://api.anthropic.com"
|
||||
assert call_kwargs.kwargs["api_key"] == "sk-test-key"
|
||||
assert call_kwargs.kwargs["api_mode"] == "anthropic_messages"
|
||||
|
||||
|
||||
def test_make_agent_ignores_display_personality_without_system_prompt():
|
||||
"""The TUI matches the classic CLI: personality only becomes active once
|
||||
it has been saved to agent.system_prompt."""
|
||||
|
||||
fake_runtime = {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://api.synthetic.new/v1",
|
||||
"api_key": "sk-test",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": None,
|
||||
"credential_pool": None,
|
||||
}
|
||||
fake_cfg = {
|
||||
"agent": {
|
||||
"system_prompt": "",
|
||||
"personalities": {"kawaii": "sparkle system prompt"},
|
||||
},
|
||||
"display": {"personality": "kawaii"},
|
||||
"model": {"default": "glm-5"},
|
||||
}
|
||||
|
||||
with (
|
||||
patch("tui_gateway.server._load_cfg", return_value=fake_cfg),
|
||||
patch("tui_gateway.server._get_db", return_value=MagicMock()),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value=fake_runtime,
|
||||
),
|
||||
patch("run_agent.AIAgent") as mock_agent,
|
||||
):
|
||||
from tui_gateway.server import _make_agent
|
||||
|
||||
_make_agent("sid-default-personality", "key-default-personality")
|
||||
|
||||
assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None
|
||||
|
||||
|
||||
def test_probe_config_health_flags_null_sections():
|
||||
"""Bare YAML keys (`agent:` with no value) parse as None and silently
|
||||
drop nested settings; probe must surface them so users can fix."""
|
||||
from tui_gateway.server import _probe_config_health
|
||||
|
||||
assert _probe_config_health({"agent": {"x": 1}}) == ""
|
||||
assert _probe_config_health({}) == ""
|
||||
|
||||
msg = _probe_config_health({"agent": None, "display": None, "model": {}})
|
||||
assert "agent" in msg and "display" in msg
|
||||
assert "model" not in msg
|
||||
|
||||
|
||||
def test_probe_config_health_flags_null_personalities_with_active_personality():
|
||||
from tui_gateway.server import _probe_config_health
|
||||
|
||||
msg = _probe_config_health(
|
||||
{
|
||||
"agent": {"personalities": None},
|
||||
"display": {"personality": "kawaii"},
|
||||
"model": {},
|
||||
}
|
||||
)
|
||||
assert "display.personality" in msg
|
||||
assert "agent.personalities" in msg
|
||||
|
||||
|
||||
def test_make_agent_tolerates_null_config_sections():
|
||||
"""Bare `agent:` / `display:` keys in ~/.hermes/config.yaml parse as
|
||||
None. cfg.get("agent", {}) returns None (default only fires on missing
|
||||
key), so downstream .get() chains must be guarded. Reported via Twitter
|
||||
against the new TUI."""
|
||||
|
||||
fake_runtime = {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://api.synthetic.new/v1",
|
||||
"api_key": "sk-test",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": None,
|
||||
"credential_pool": None,
|
||||
}
|
||||
null_cfg = {"agent": None, "display": None, "model": {"default": "glm-5"}}
|
||||
|
||||
with (
|
||||
patch("tui_gateway.server._load_cfg", return_value=null_cfg),
|
||||
patch("tui_gateway.server._get_db", return_value=MagicMock()),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value=fake_runtime,
|
||||
),
|
||||
patch("run_agent.AIAgent") as mock_agent,
|
||||
):
|
||||
|
||||
from tui_gateway.server import _make_agent
|
||||
|
||||
_make_agent("sid-null", "key-null")
|
||||
|
||||
assert mock_agent.called
|
||||
|
||||
|
||||
def test_make_agent_tolerates_null_personalities_with_active_personality():
|
||||
fake_runtime = {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://api.synthetic.new/v1",
|
||||
"api_key": "sk-test",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": None,
|
||||
"credential_pool": None,
|
||||
}
|
||||
cfg = {
|
||||
"agent": {"personalities": None},
|
||||
"display": {"personality": "kawaii"},
|
||||
"model": {"default": "glm-5"},
|
||||
}
|
||||
|
||||
with (
|
||||
patch("tui_gateway.server._load_cfg", return_value=cfg),
|
||||
patch("tui_gateway.server._get_db", return_value=MagicMock()),
|
||||
patch("cli.load_cli_config", return_value={"agent": {"personalities": None}}),
|
||||
patch(
|
||||
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
return_value=fake_runtime,
|
||||
),
|
||||
patch("run_agent.AIAgent") as mock_agent,
|
||||
):
|
||||
from tui_gateway.server import _make_agent
|
||||
|
||||
_make_agent("sid-null-personality", "key-null-personality")
|
||||
|
||||
assert mock_agent.called
|
||||
assert mock_agent.call_args.kwargs["ephemeral_system_prompt"] is None
|
||||
|
||||
+40
-14
@@ -411,9 +411,15 @@ def _preserve_parent_mcp_toolsets(
|
||||
DEFAULT_MAX_ITERATIONS = 50
|
||||
DEFAULT_CHILD_TIMEOUT = 600 # seconds before a child agent is considered stuck
|
||||
_HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation
|
||||
_HEARTBEAT_STALE_CYCLES = (
|
||||
5 # mark child stale after this many heartbeats with no iteration progress
|
||||
)
|
||||
# Stale-heartbeat thresholds. A child with no API-call progress is either:
|
||||
# - idle between turns (no current_tool) — probably stuck on a slow API call
|
||||
# - inside a tool (current_tool set) — probably running a legitimately long
|
||||
# operation (terminal command, web fetch, large file read)
|
||||
# The idle ceiling stays tight so genuinely stuck children don't mask the gateway
|
||||
# timeout. The in-tool ceiling is much higher so legit long-running tools get
|
||||
# time to finish; child_timeout_seconds (default 600s) is still the hard cap.
|
||||
_HEARTBEAT_STALE_CYCLES_IDLE = 5 # 5 * 30s = 150s idle between turns → stale
|
||||
_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20 # 20 * 30s = 600s stuck on same tool → stale
|
||||
DEFAULT_TOOLSETS = ["terminal", "file", "web"]
|
||||
|
||||
|
||||
@@ -1201,7 +1207,11 @@ def _run_single_child(
|
||||
# Without this, the parent's _last_activity_ts freezes when delegate_task
|
||||
# starts and the gateway eventually kills the agent for "no activity".
|
||||
_heartbeat_stop = threading.Event()
|
||||
_last_seen_iter = [0] # mutable container for heartbeat stale detection
|
||||
# Stale detection: track the child's (tool, iteration) pair across
|
||||
# heartbeat cycles. If neither advances, count the cycle as stale.
|
||||
# Different thresholds for idle vs in-tool (see _HEARTBEAT_STALE_CYCLES_*).
|
||||
_last_seen_iter = [0]
|
||||
_last_seen_tool = [None] # type: list
|
||||
_stale_count = [0]
|
||||
|
||||
def _heartbeat_loop():
|
||||
@@ -1219,22 +1229,38 @@ def _run_single_child(
|
||||
child_iter = child_summary.get("api_call_count", 0)
|
||||
child_max = child_summary.get("max_iterations", 0)
|
||||
|
||||
# Stale detection: if iteration count hasn't advanced,
|
||||
# increment stale counter. After N cycles with no
|
||||
# progress, stop masking the hang so the gateway
|
||||
# inactivity timeout can fire as a last resort.
|
||||
if child_iter <= _last_seen_iter[0]:
|
||||
_stale_count[0] += 1
|
||||
else:
|
||||
# Stale detection: count cycles where neither the iteration
|
||||
# count nor the current_tool advances. A child running a
|
||||
# legitimately long-running tool (terminal command, web
|
||||
# fetch) keeps current_tool set but doesn't advance
|
||||
# api_call_count — we don't want that to look stale at the
|
||||
# idle threshold.
|
||||
iter_advanced = child_iter > _last_seen_iter[0]
|
||||
tool_changed = child_tool != _last_seen_tool[0]
|
||||
if iter_advanced or tool_changed:
|
||||
_last_seen_iter[0] = child_iter
|
||||
_last_seen_tool[0] = child_tool
|
||||
_stale_count[0] = 0
|
||||
else:
|
||||
_stale_count[0] += 1
|
||||
|
||||
if _stale_count[0] >= _HEARTBEAT_STALE_CYCLES:
|
||||
# Pick threshold based on whether the child is currently
|
||||
# inside a tool call. In-tool threshold is high enough to
|
||||
# cover legitimately slow tools; idle threshold stays
|
||||
# tight so the gateway timeout can fire on a truly wedged
|
||||
# child.
|
||||
stale_limit = (
|
||||
_HEARTBEAT_STALE_CYCLES_IN_TOOL
|
||||
if child_tool
|
||||
else _HEARTBEAT_STALE_CYCLES_IDLE
|
||||
)
|
||||
if _stale_count[0] >= stale_limit:
|
||||
logger.warning(
|
||||
"Subagent %d appears stale (no iteration progress "
|
||||
"for %d heartbeat cycles) — stopping heartbeat",
|
||||
"Subagent %d appears stale (no progress for %d "
|
||||
"heartbeat cycles, tool=%s) — stopping heartbeat",
|
||||
task_index,
|
||||
_stale_count[0],
|
||||
child_tool or "<none>",
|
||||
)
|
||||
break # stop touching parent, let gateway timeout fire
|
||||
|
||||
|
||||
+111
-63
@@ -473,6 +473,12 @@ _ACTIONS = {
|
||||
"remove_role": _remove_role,
|
||||
}
|
||||
|
||||
_CORE_ACTION_NAMES = frozenset({"fetch_messages", "search_members", "create_thread"})
|
||||
_ADMIN_ACTION_NAMES = frozenset(_ACTIONS.keys()) - _CORE_ACTION_NAMES
|
||||
|
||||
_CORE_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _CORE_ACTION_NAMES}
|
||||
_ADMIN_ACTIONS = {k: v for k, v in _ACTIONS.items() if k in _ADMIN_ACTION_NAMES}
|
||||
|
||||
# Single-source-of-truth manifest: action → (signature, one-line description).
|
||||
# Consumed by :func:`_build_schema` so the schema's top-level description
|
||||
# always matches the registered action set.
|
||||
@@ -531,7 +537,7 @@ def _load_allowed_actions_config() -> Optional[List[str]]:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
except Exception as exc:
|
||||
logger.debug("discord_server: could not load config (%s); allowing all actions.", exc)
|
||||
logger.debug("discord: could not load config (%s); allowing all actions.", exc)
|
||||
return None
|
||||
|
||||
raw = (cfg.get("discord") or {}).get("server_actions")
|
||||
@@ -586,12 +592,16 @@ def _available_actions(
|
||||
def _build_schema(
|
||||
actions: List[str],
|
||||
caps: Optional[Dict[str, Any]] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the tool schema for the given filtered action list."""
|
||||
tool_name: str = "discord",
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Build the tool schema for the given filtered action list.
|
||||
|
||||
Returns ``None`` when *actions* is empty — callers should drop the
|
||||
tool from registration in that case.
|
||||
"""
|
||||
caps = caps or {}
|
||||
if not actions:
|
||||
# Tool shouldn't be registered when empty, but guard anyway.
|
||||
actions = list(_ACTIONS.keys())
|
||||
return None
|
||||
|
||||
# Action manifest lines (action-first, parameter-scoped).
|
||||
manifest_lines = [
|
||||
@@ -602,24 +612,36 @@ def _build_schema(
|
||||
manifest_block = "\n".join(manifest_lines)
|
||||
|
||||
content_note = ""
|
||||
if caps.get("detected") and caps.get("has_message_content") is False:
|
||||
affected_actions = {"fetch_messages", "list_pins"} & set(actions)
|
||||
if affected_actions and caps.get("detected") and caps.get("has_message_content") is False:
|
||||
names = " and ".join(sorted(affected_actions))
|
||||
content_note = (
|
||||
"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
|
||||
"fetch_messages and list_pins will return message metadata (author, "
|
||||
f"\n\nNOTE: Bot does NOT have the MESSAGE_CONTENT privileged intent. "
|
||||
f"{names} will return message metadata (author, "
|
||||
"timestamps, attachments, reactions, pin state) but `content` will be "
|
||||
"empty for messages not sent as a direct mention to the bot or in DMs. "
|
||||
"Enable the intent in the Discord Developer Portal to see all content."
|
||||
)
|
||||
|
||||
description = (
|
||||
"Query and manage a Discord server via the REST API.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Call list_guilds first to discover guild_ids, then list_channels for "
|
||||
"channel_ids. Runtime errors will tell you if the bot lacks a specific "
|
||||
"per-guild permission (e.g. MANAGE_ROLES for add_role)."
|
||||
f"{content_note}"
|
||||
)
|
||||
if tool_name == "discord_admin":
|
||||
description = (
|
||||
"Manage a Discord server via the REST API.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Call list_guilds first to discover guild_ids, then list_channels for "
|
||||
"channel_ids. Runtime errors will tell you if the bot lacks a specific "
|
||||
"per-guild permission (e.g. MANAGE_ROLES for add_role)."
|
||||
f"{content_note}"
|
||||
)
|
||||
else:
|
||||
description = (
|
||||
"Read and participate in a Discord server.\n\n"
|
||||
"Available actions:\n"
|
||||
f"{manifest_block}\n\n"
|
||||
"Use the channel_id from the current conversation context. "
|
||||
"Use search_members to look up user IDs by name prefix."
|
||||
f"{content_note}"
|
||||
)
|
||||
|
||||
properties: Dict[str, Any] = {
|
||||
"action": {
|
||||
@@ -676,7 +698,7 @@ def _build_schema(
|
||||
}
|
||||
|
||||
return {
|
||||
"name": "discord_server",
|
||||
"name": tool_name,
|
||||
"description": description,
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
@@ -686,28 +708,33 @@ def _build_schema(
|
||||
}
|
||||
|
||||
|
||||
def get_dynamic_schema() -> Optional[Dict[str, Any]]:
|
||||
"""Return a schema filtered by current intents + config allowlist.
|
||||
|
||||
Called by ``model_tools.get_tool_definitions`` as a post-processing
|
||||
step so the schema the model sees always reflects reality. Returns
|
||||
``None`` when no actions are available (tool should be removed from
|
||||
the schema list entirely).
|
||||
"""
|
||||
def _get_dynamic_schema(
|
||||
action_subset: Dict[str, Any],
|
||||
tool_name: str,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Build a dynamic schema for *action_subset* filtered by intents + config."""
|
||||
token = _get_bot_token()
|
||||
if not token:
|
||||
return None
|
||||
|
||||
caps = _detect_capabilities(token)
|
||||
allowlist = _load_allowed_actions_config()
|
||||
actions = _available_actions(caps, allowlist)
|
||||
actions = [a for a in _available_actions(caps, allowlist) if a in action_subset]
|
||||
if not actions:
|
||||
logger.warning(
|
||||
"discord_server: config allowlist/intents left zero available actions; "
|
||||
"hiding tool from this session."
|
||||
)
|
||||
return None
|
||||
return _build_schema(actions, caps)
|
||||
return _build_schema(actions, caps, tool_name=tool_name)
|
||||
|
||||
|
||||
def get_dynamic_schema_core() -> Optional[Dict[str, Any]]:
|
||||
return _get_dynamic_schema(_CORE_ACTIONS, "discord")
|
||||
|
||||
|
||||
def get_dynamic_schema_admin() -> Optional[Dict[str, Any]]:
|
||||
return _get_dynamic_schema(_ADMIN_ACTIONS, "discord_admin")
|
||||
|
||||
|
||||
def get_dynamic_schema() -> Optional[Dict[str, Any]]:
|
||||
"""Backward-compat wrapper — returns core schema."""
|
||||
return get_dynamic_schema_core()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -774,11 +801,13 @@ def check_discord_tool_requirements() -> bool:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main handler
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def discord_server(
|
||||
def _run_discord_action(
|
||||
action: str,
|
||||
valid_actions: Dict[str, Any],
|
||||
tool_label: str,
|
||||
guild_id: str = "",
|
||||
channel_id: str = "",
|
||||
user_id: str = "",
|
||||
@@ -790,18 +819,17 @@ def discord_server(
|
||||
before: str = "",
|
||||
after: str = "",
|
||||
auto_archive_duration: int = 1440,
|
||||
task_id: str = None,
|
||||
) -> str:
|
||||
"""Execute a Discord server action."""
|
||||
"""Shared handler logic for both discord tools."""
|
||||
token = _get_bot_token()
|
||||
if not token:
|
||||
return json.dumps({"error": "DISCORD_BOT_TOKEN not configured."})
|
||||
|
||||
action_fn = _ACTIONS.get(action)
|
||||
action_fn = valid_actions.get(action)
|
||||
if not action_fn:
|
||||
return json.dumps({
|
||||
"error": f"Unknown action: {action}",
|
||||
"available_actions": list(_ACTIONS.keys()),
|
||||
"available_actions": list(valid_actions.keys()),
|
||||
})
|
||||
|
||||
# Config-level allowlist gate (defense in depth — schema already filtered,
|
||||
@@ -848,44 +876,64 @@ def discord_server(
|
||||
auto_archive_duration=auto_archive_duration,
|
||||
)
|
||||
except DiscordAPIError as e:
|
||||
logger.warning("Discord API error in action '%s': %s", action, e)
|
||||
logger.warning("Discord API error in %s action '%s': %s", tool_label, action, e)
|
||||
if e.status == 403:
|
||||
return json.dumps({"error": _enrich_403(action, e.body)})
|
||||
return json.dumps({"error": str(e)})
|
||||
except Exception as e:
|
||||
logger.exception("Unexpected error in discord_server action '%s'", action)
|
||||
logger.exception("Unexpected error in %s action '%s'", tool_label, action)
|
||||
return json.dumps({"error": f"Unexpected error: {e}"})
|
||||
|
||||
|
||||
def discord_core(action: str, **kwargs) -> str:
|
||||
"""Execute a core Discord action (fetch_messages, search_members, create_thread)."""
|
||||
return _run_discord_action(action, _CORE_ACTIONS, "discord", **kwargs)
|
||||
|
||||
|
||||
def discord_admin_handler(action: str, **kwargs) -> str:
|
||||
"""Execute a Discord admin action (server management)."""
|
||||
return _run_discord_action(action, _ADMIN_ACTIONS, "discord_admin", **kwargs)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Register with the full unfiltered schema. ``model_tools.get_tool_definitions``
|
||||
# rebuilds this per-session via ``get_dynamic_schema`` so the model only ever
|
||||
# sees intent-available, config-allowed actions. The static registration is a
|
||||
# safe baseline for tools that inspect the registry directly.
|
||||
_STATIC_SCHEMA = _build_schema(list(_ACTIONS.keys()), caps={"detected": False})
|
||||
_HANDLER_DEFAULTS = {
|
||||
"action": "", "guild_id": "", "channel_id": "", "user_id": "",
|
||||
"role_id": "", "message_id": "", "query": "", "name": "",
|
||||
"limit": 50, "before": "", "after": "", "auto_archive_duration": 1440,
|
||||
}
|
||||
|
||||
|
||||
def _make_handler(handler_fn):
|
||||
"""Create a registry-compatible handler lambda for a discord handler."""
|
||||
return lambda args, **kw: handler_fn(
|
||||
**{k: args.get(k, v) for k, v in _HANDLER_DEFAULTS.items()},
|
||||
)
|
||||
|
||||
|
||||
_STATIC_CORE_SCHEMA = _build_schema(
|
||||
list(_CORE_ACTIONS.keys()), caps={"detected": False}, tool_name="discord",
|
||||
)
|
||||
_STATIC_ADMIN_SCHEMA = _build_schema(
|
||||
list(_ADMIN_ACTIONS.keys()), caps={"detected": False}, tool_name="discord_admin",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="discord_server",
|
||||
name="discord",
|
||||
toolset="discord",
|
||||
schema=_STATIC_SCHEMA,
|
||||
handler=lambda args, **kw: discord_server(
|
||||
action=args.get("action", ""),
|
||||
guild_id=args.get("guild_id", ""),
|
||||
channel_id=args.get("channel_id", ""),
|
||||
user_id=args.get("user_id", ""),
|
||||
role_id=args.get("role_id", ""),
|
||||
message_id=args.get("message_id", ""),
|
||||
query=args.get("query", ""),
|
||||
name=args.get("name", ""),
|
||||
limit=args.get("limit", 50),
|
||||
before=args.get("before", ""),
|
||||
after=args.get("after", ""),
|
||||
auto_archive_duration=args.get("auto_archive_duration", 1440),
|
||||
task_id=kw.get("task_id"),
|
||||
),
|
||||
schema=_STATIC_CORE_SCHEMA,
|
||||
handler=_make_handler(discord_core),
|
||||
check_fn=check_discord_tool_requirements,
|
||||
requires_env=["DISCORD_BOT_TOKEN"],
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="discord_admin",
|
||||
toolset="discord_admin",
|
||||
schema=_STATIC_ADMIN_SCHEMA,
|
||||
handler=_make_handler(discord_admin_handler),
|
||||
check_fn=check_discord_tool_requirements,
|
||||
requires_env=["DISCORD_BOT_TOKEN"],
|
||||
)
|
||||
|
||||
@@ -368,6 +368,17 @@ class BaseEnvironment(ABC):
|
||||
# Command wrapping
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _quote_cwd_for_cd(cwd: str) -> str:
|
||||
"""Quote a ``cd`` target while preserving ``~`` expansion."""
|
||||
if cwd == "~":
|
||||
return cwd
|
||||
if cwd == "~/":
|
||||
return "$HOME"
|
||||
if cwd.startswith("~/"):
|
||||
return f"$HOME/{shlex.quote(cwd[2:])}"
|
||||
return shlex.quote(cwd)
|
||||
|
||||
def _wrap_command(self, command: str, cwd: str) -> str:
|
||||
"""Build the full bash script that sources snapshot, cd's, runs command,
|
||||
re-dumps env vars, and emits CWD markers."""
|
||||
@@ -379,10 +390,9 @@ class BaseEnvironment(ABC):
|
||||
if self._snapshot_ready:
|
||||
parts.append(f"source {self._snapshot_path} 2>/dev/null || true")
|
||||
|
||||
# cd to working directory — let bash expand ~ natively
|
||||
quoted_cwd = (
|
||||
shlex.quote(cwd) if cwd != "~" and not cwd.startswith("~/") else cwd
|
||||
)
|
||||
# Preserve bare ``~`` expansion, but rewrite ``~/...`` through
|
||||
# ``$HOME`` so suffixes with spaces remain a single shell word.
|
||||
quoted_cwd = self._quote_cwd_for_cd(cwd)
|
||||
parts.append(f"builtin cd {quoted_cwd} || exit 126")
|
||||
|
||||
# Run the actual command
|
||||
|
||||
+51
-4
@@ -743,6 +743,9 @@ def _serve_plugin_skill(
|
||||
skill_md: Path,
|
||||
namespace: str,
|
||||
bare: str,
|
||||
*,
|
||||
preprocess: bool = True,
|
||||
session_id: str | None = None,
|
||||
) -> str:
|
||||
"""Read a plugin-provided skill, apply guards, return JSON."""
|
||||
from hermes_cli.plugins import _get_disabled_plugins, get_plugin_manager
|
||||
@@ -812,11 +815,26 @@ def _serve_plugin_skill(
|
||||
except Exception:
|
||||
banner = ""
|
||||
|
||||
rendered_content = content
|
||||
if preprocess:
|
||||
try:
|
||||
from agent.skill_preprocessing import preprocess_skill_content
|
||||
|
||||
rendered_content = preprocess_skill_content(
|
||||
content,
|
||||
skill_md.parent,
|
||||
session_id=session_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Could not preprocess plugin skill %s:%s", namespace, bare, exc_info=True
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{
|
||||
"success": True,
|
||||
"name": f"{namespace}:{bare}",
|
||||
"content": f"{banner}{content}" if banner else content,
|
||||
"content": f"{banner}{rendered_content}" if banner else rendered_content,
|
||||
"description": description,
|
||||
"linked_files": None,
|
||||
"readiness_status": SkillReadinessStatus.AVAILABLE.value,
|
||||
@@ -825,7 +843,12 @@ def _serve_plugin_skill(
|
||||
)
|
||||
|
||||
|
||||
def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
|
||||
def skill_view(
|
||||
name: str,
|
||||
file_path: str = None,
|
||||
task_id: str = None,
|
||||
preprocess: bool = True,
|
||||
) -> str:
|
||||
"""
|
||||
View the content of a skill or a specific file within a skill directory.
|
||||
|
||||
@@ -834,6 +857,9 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
|
||||
Qualified names like "plugin:skill" resolve to plugin-provided skills.
|
||||
file_path: Optional path to a specific file within the skill (e.g., "references/api.md")
|
||||
task_id: Optional task identifier used to probe the active backend
|
||||
preprocess: Apply configured SKILL.md template and inline shell rendering
|
||||
to main skill content. Internal slash/preload callers disable this
|
||||
because they render the skill message themselves.
|
||||
|
||||
Returns:
|
||||
JSON string with skill content or error message
|
||||
@@ -879,7 +905,13 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
return _serve_plugin_skill(plugin_skill_md, namespace, bare)
|
||||
return _serve_plugin_skill(
|
||||
plugin_skill_md,
|
||||
namespace,
|
||||
bare,
|
||||
preprocess=preprocess,
|
||||
session_id=task_id,
|
||||
)
|
||||
|
||||
# Plugin exists but this specific skill is missing?
|
||||
available = pm.list_plugin_skills(namespace)
|
||||
@@ -1280,13 +1312,28 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
rendered_content = content
|
||||
if preprocess:
|
||||
try:
|
||||
from agent.skill_preprocessing import preprocess_skill_content
|
||||
|
||||
rendered_content = preprocess_skill_content(
|
||||
content,
|
||||
skill_dir,
|
||||
session_id=task_id,
|
||||
)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Could not preprocess skill content for %s", skill_name, exc_info=True
|
||||
)
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
"name": skill_name,
|
||||
"description": frontmatter.get("description", ""),
|
||||
"tags": tags,
|
||||
"related_skills": related_skills,
|
||||
"content": content,
|
||||
"content": rendered_content,
|
||||
"path": rel_path,
|
||||
"skill_dir": str(skill_dir) if skill_dir else None,
|
||||
"linked_files": linked_files if linked_files else None,
|
||||
|
||||
+21
-3
@@ -202,6 +202,18 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"discord": {
|
||||
"description": "Discord read and participate tools (fetch messages, search members, create threads)",
|
||||
"tools": ["discord"],
|
||||
"includes": [],
|
||||
},
|
||||
|
||||
"discord_admin": {
|
||||
"description": "Discord server management (list channels/roles, pin messages, assign roles)",
|
||||
"tools": ["discord_admin"],
|
||||
"includes": [],
|
||||
},
|
||||
|
||||
"feishu_doc": {
|
||||
"description": "Read Feishu/Lark document content",
|
||||
"tools": ["feishu_doc_read"],
|
||||
@@ -326,8 +338,8 @@ TOOLSETS = {
|
||||
"hermes-discord": {
|
||||
"description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)",
|
||||
"tools": _HERMES_CORE_TOOLS + [
|
||||
# Discord server introspection & management (gated on DISCORD_BOT_TOKEN via check_fn)
|
||||
"discord_server",
|
||||
"discord",
|
||||
"discord_admin",
|
||||
],
|
||||
"includes": []
|
||||
},
|
||||
@@ -388,7 +400,13 @@ TOOLSETS = {
|
||||
|
||||
"hermes-feishu": {
|
||||
"description": "Feishu/Lark bot toolset - enterprise messaging via Feishu/Lark (full access)",
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
"tools": _HERMES_CORE_TOOLS + [
|
||||
"feishu_doc_read",
|
||||
"feishu_drive_list_comments",
|
||||
"feishu_drive_list_comment_replies",
|
||||
"feishu_drive_reply_comment",
|
||||
"feishu_drive_add_comment",
|
||||
],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
|
||||
@@ -5,7 +5,28 @@ import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from tui_gateway import server
|
||||
from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
|
||||
from tui_gateway.transport import TeeTransport
|
||||
|
||||
|
||||
def _install_sidecar_publisher() -> None:
|
||||
"""Mirror every dispatcher emit to the dashboard sidebar via WS.
|
||||
|
||||
Activated by `HERMES_TUI_SIDECAR_URL`, set by the dashboard's
|
||||
``/api/pty`` endpoint when a chat tab passes a ``channel`` query param.
|
||||
Best-effort: connect failure or runtime drop falls back to stdio-only.
|
||||
"""
|
||||
url = os.environ.get("HERMES_TUI_SIDECAR_URL")
|
||||
|
||||
if not url:
|
||||
return
|
||||
|
||||
from tui_gateway.event_publisher import WsPublisherTransport
|
||||
|
||||
server._stdio_transport = TeeTransport(
|
||||
server._stdio_transport, WsPublisherTransport(url)
|
||||
)
|
||||
|
||||
|
||||
def _log_signal(signum: int, frame) -> None:
|
||||
@@ -82,6 +103,8 @@ def _log_exit(reason: str) -> None:
|
||||
|
||||
|
||||
def main():
|
||||
_install_sidecar_publisher()
|
||||
|
||||
if not write_json({
|
||||
"jsonrpc": "2.0",
|
||||
"method": "event",
|
||||
|
||||
@@ -0,0 +1,126 @@
|
||||
"""Best-effort WebSocket publisher transport for the PTY-side gateway.
|
||||
|
||||
The dashboard's `/api/pty` spawns `hermes --tui` as a child process, which
|
||||
spawns its own ``tui_gateway.entry``. Tool/reasoning/status events fire on
|
||||
*that* gateway's transport — three processes removed from the dashboard
|
||||
server itself. To surface them in the dashboard sidebar (`/api/events`),
|
||||
the PTY-side gateway opens a back-WS to the dashboard at startup and
|
||||
mirrors every emit through this transport.
|
||||
|
||||
Wire protocol: newline-framed JSON dicts (the same shape the dispatcher
|
||||
already passes to ``write``). No JSON-RPC envelope here — the dashboard's
|
||||
``/api/pub`` endpoint just rebroadcasts the bytes verbatim to subscribers.
|
||||
|
||||
Failure mode: silent. The agent loop must never block waiting for the
|
||||
sidecar to drain. A dead WS short-circuits all subsequent writes.
|
||||
Actual ``send`` calls run on a daemon thread so the TeeTransport's
|
||||
``write`` returns after enqueueing (best-effort; drop when the queue is full).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
from websockets.sync.client import connect as ws_connect
|
||||
except ImportError: # pragma: no cover - websockets is a required install path
|
||||
ws_connect = None # type: ignore[assignment]
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
_DRAIN_STOP = object()
|
||||
|
||||
_QUEUE_MAX = 256
|
||||
|
||||
|
||||
class WsPublisherTransport:
|
||||
__slots__ = ("_url", "_lock", "_ws", "_dead", "_q", "_worker")
|
||||
|
||||
def __init__(self, url: str, *, connect_timeout: float = 2.0) -> None:
|
||||
self._url = url
|
||||
self._lock = threading.Lock()
|
||||
self._ws: Optional[object] = None
|
||||
self._dead = False
|
||||
self._q: queue.Queue[object] = queue.Queue(maxsize=_QUEUE_MAX)
|
||||
self._worker: Optional[threading.Thread] = None
|
||||
|
||||
if ws_connect is None:
|
||||
self._dead = True
|
||||
|
||||
return
|
||||
|
||||
try:
|
||||
self._ws = ws_connect(url, open_timeout=connect_timeout, max_size=None)
|
||||
except Exception as exc:
|
||||
_log.debug("event publisher connect failed: %s", exc)
|
||||
self._dead = True
|
||||
self._ws = None
|
||||
|
||||
return
|
||||
|
||||
self._worker = threading.Thread(
|
||||
target=self._drain,
|
||||
name="hermes-ws-pub",
|
||||
daemon=True,
|
||||
)
|
||||
self._worker.start()
|
||||
|
||||
def _drain(self) -> None:
|
||||
while True:
|
||||
item = self._q.get()
|
||||
if item is _DRAIN_STOP:
|
||||
return
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
if self._ws is None:
|
||||
continue
|
||||
try:
|
||||
with self._lock:
|
||||
if self._ws is not None:
|
||||
self._ws.send(item) # type: ignore[union-attr]
|
||||
except Exception as exc:
|
||||
_log.debug("event publisher write failed: %s", exc)
|
||||
self._dead = True
|
||||
self._ws = None
|
||||
|
||||
def write(self, obj: dict) -> bool:
|
||||
if self._dead or self._ws is None or self._worker is None:
|
||||
return False
|
||||
|
||||
line = json.dumps(obj, ensure_ascii=False)
|
||||
|
||||
try:
|
||||
self._q.put_nowait(line)
|
||||
|
||||
return True
|
||||
except queue.Full:
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
self._dead = True
|
||||
w = self._worker
|
||||
if w is not None and w.is_alive():
|
||||
try:
|
||||
self._q.put_nowait(_DRAIN_STOP)
|
||||
except queue.Full:
|
||||
# Best-effort: if the queue is wedged, the daemon thread
|
||||
# will be torn down with the process.
|
||||
pass
|
||||
w.join(timeout=3.0)
|
||||
self._worker = None
|
||||
|
||||
if self._ws is None:
|
||||
return
|
||||
|
||||
try:
|
||||
with self._lock:
|
||||
if self._ws is not None:
|
||||
self._ws.close() # type: ignore[union-attr]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._ws = None
|
||||
+153
-82
@@ -1,5 +1,6 @@
|
||||
import atexit
|
||||
import concurrent.futures
|
||||
import contextvars
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
@@ -12,9 +13,17 @@ import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.env_loader import load_hermes_dotenv
|
||||
from tui_gateway.transport import (
|
||||
StdioTransport,
|
||||
Transport,
|
||||
bind_transport,
|
||||
current_transport,
|
||||
reset_transport,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -52,7 +61,11 @@ def _panic_hook(exc_type, exc_value, exc_tb):
|
||||
# Stderr goes through to the TUI as a gateway.stderr Activity line —
|
||||
# the first line here is what the user will see without opening any
|
||||
# log files. Rest of the stack is still in the log for full context.
|
||||
first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__
|
||||
first = (
|
||||
str(exc_value).strip().splitlines()[0]
|
||||
if str(exc_value).strip()
|
||||
else exc_type.__name__
|
||||
)
|
||||
print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True)
|
||||
# Chain to the default hook so the process still terminates normally.
|
||||
sys.__excepthook__(exc_type, exc_value, exc_tb)
|
||||
@@ -147,6 +160,11 @@ atexit.register(lambda: _pool.shutdown(wait=False, cancel_futures=True))
|
||||
_real_stdout = sys.stdout
|
||||
sys.stdout = sys.stderr
|
||||
|
||||
# Module-level stdio transport — fallback sink when no transport is bound via
|
||||
# contextvar or session. Stream resolved through a lambda so runtime monkey-
|
||||
# patches of `_real_stdout` (used extensively in tests) still land correctly.
|
||||
_stdio_transport = StdioTransport(lambda: _real_stdout, _stdout_lock)
|
||||
|
||||
|
||||
class _SlashWorker:
|
||||
"""Persistent HermesCLI subprocess for slash commands."""
|
||||
@@ -266,14 +284,24 @@ def _db_unavailable_error(rid, *, code: int):
|
||||
|
||||
|
||||
def write_json(obj: dict) -> bool:
|
||||
line = json.dumps(obj, ensure_ascii=False) + "\n"
|
||||
try:
|
||||
with _stdout_lock:
|
||||
_real_stdout.write(line)
|
||||
_real_stdout.flush()
|
||||
return True
|
||||
except BrokenPipeError:
|
||||
return False
|
||||
"""Emit one JSON frame. Routes via the most-specific transport available.
|
||||
|
||||
Precedence:
|
||||
|
||||
1. Event frames with a session id → the transport stored on that session,
|
||||
so async events land with the client that owns the session even if
|
||||
the emitting thread has no contextvar binding.
|
||||
2. Otherwise the transport bound on the current context (set by
|
||||
:func:`dispatch` for the lifetime of a request).
|
||||
3. Otherwise the module-level stdio transport, matching the historical
|
||||
behaviour and keeping tests that monkey-patch ``_real_stdout`` green.
|
||||
"""
|
||||
if obj.get("method") == "event":
|
||||
sid = ((obj.get("params") or {}).get("session_id")) or ""
|
||||
if sid and (t := (_sessions.get(sid) or {}).get("transport")) is not None:
|
||||
return t.write(obj)
|
||||
|
||||
return (current_transport() or _stdio_transport).write(obj)
|
||||
|
||||
|
||||
def _emit(event: str, sid: str, payload: dict | None = None):
|
||||
@@ -343,27 +371,40 @@ def handle_request(req: dict) -> dict | None:
|
||||
return fn(req.get("id"), req.get("params", {}))
|
||||
|
||||
|
||||
def dispatch(req: dict) -> dict | None:
|
||||
def dispatch(req: dict, transport: Optional[Transport] = None) -> dict | None:
|
||||
"""Route inbound RPCs — long handlers to the pool, everything else inline.
|
||||
|
||||
Returns a response dict when handled inline. Returns None when the
|
||||
handler was scheduled on the pool; the worker writes its own
|
||||
response via write_json when done.
|
||||
handler was scheduled on the pool; the worker writes its own response
|
||||
via the bound transport when done.
|
||||
|
||||
*transport* (optional): pins every write produced by this request —
|
||||
including any events emitted by the handler — to the given transport.
|
||||
Omitting it falls back to the module-level stdio transport, preserving
|
||||
the original behaviour for ``tui_gateway.entry``.
|
||||
"""
|
||||
if req.get("method") not in _LONG_HANDLERS:
|
||||
return handle_request(req)
|
||||
t = transport or _stdio_transport
|
||||
token = bind_transport(t)
|
||||
try:
|
||||
if req.get("method") not in _LONG_HANDLERS:
|
||||
return handle_request(req)
|
||||
|
||||
def run():
|
||||
try:
|
||||
resp = handle_request(req)
|
||||
except Exception as exc:
|
||||
resp = _err(req.get("id"), -32000, f"handler error: {exc}")
|
||||
if resp is not None:
|
||||
write_json(resp)
|
||||
# Snapshot the context so the pool worker sees the bound transport.
|
||||
ctx = contextvars.copy_context()
|
||||
|
||||
_pool.submit(run)
|
||||
def run():
|
||||
try:
|
||||
resp = handle_request(req)
|
||||
except Exception as exc:
|
||||
resp = _err(req.get("id"), -32000, f"handler error: {exc}")
|
||||
if resp is not None:
|
||||
t.write(resp)
|
||||
|
||||
return None
|
||||
_pool.submit(lambda: ctx.run(run))
|
||||
|
||||
return None
|
||||
finally:
|
||||
reset_transport(token)
|
||||
|
||||
|
||||
def _wait_agent(session: dict, rid: str, timeout: float = 30.0) -> dict | None:
|
||||
@@ -556,13 +597,17 @@ def _coerce_statusbar(raw) -> str:
|
||||
def _load_reasoning_config() -> dict | None:
|
||||
from hermes_constants import parse_reasoning_effort
|
||||
|
||||
effort = str(_load_cfg().get("agent", {}).get("reasoning_effort", "") or "").strip()
|
||||
effort = str(
|
||||
(_load_cfg().get("agent") or {}).get("reasoning_effort", "") or ""
|
||||
).strip()
|
||||
return parse_reasoning_effort(effort)
|
||||
|
||||
|
||||
def _load_service_tier() -> str | None:
|
||||
raw = (
|
||||
str(_load_cfg().get("agent", {}).get("service_tier", "") or "").strip().lower()
|
||||
str((_load_cfg().get("agent") or {}).get("service_tier", "") or "")
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if not raw or raw in {"normal", "default", "standard", "off", "none"}:
|
||||
return None
|
||||
@@ -572,11 +617,11 @@ def _load_service_tier() -> str | None:
|
||||
|
||||
|
||||
def _load_show_reasoning() -> bool:
|
||||
return bool(_load_cfg().get("display", {}).get("show_reasoning", False))
|
||||
return bool((_load_cfg().get("display") or {}).get("show_reasoning", False))
|
||||
|
||||
|
||||
def _load_tool_progress_mode() -> str:
|
||||
raw = _load_cfg().get("display", {}).get("tool_progress", "all")
|
||||
raw = (_load_cfg().get("display") or {}).get("tool_progress", "all")
|
||||
if raw is False:
|
||||
return "off"
|
||||
if raw is True:
|
||||
@@ -779,6 +824,39 @@ def _probe_credentials(agent) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
def _probe_config_health(cfg: dict) -> str:
|
||||
"""Flag bare YAML keys (`agent:` with no value → None) that silently
|
||||
drop nested settings. Returns warning or ''."""
|
||||
if not isinstance(cfg, dict):
|
||||
return ""
|
||||
warnings: list[str] = []
|
||||
null_keys = sorted(k for k, v in cfg.items() if v is None)
|
||||
if not null_keys:
|
||||
pass
|
||||
else:
|
||||
keys = ", ".join(f"`{k}`" for k in null_keys)
|
||||
warnings.append(
|
||||
f"config.yaml has empty section(s): {keys}. "
|
||||
f"Remove the line(s) or set them to `{{}}` — "
|
||||
f"empty sections silently drop nested settings."
|
||||
)
|
||||
display_cfg = cfg.get("display")
|
||||
agent_cfg = cfg.get("agent")
|
||||
if isinstance(display_cfg, dict):
|
||||
personality = str(display_cfg.get("personality", "") or "").strip().lower()
|
||||
if (
|
||||
personality
|
||||
and personality not in {"default", "none", "neutral"}
|
||||
and isinstance(agent_cfg, dict)
|
||||
and agent_cfg.get("personalities") is None
|
||||
):
|
||||
warnings.append(
|
||||
"`display.personality` is set but `agent.personalities` is empty/null; "
|
||||
"personality overlay will be skipped."
|
||||
)
|
||||
return " ".join(warnings).strip()
|
||||
|
||||
|
||||
def _session_info(agent) -> dict:
|
||||
info: dict = {
|
||||
"model": getattr(agent, "model", ""),
|
||||
@@ -1065,28 +1143,6 @@ def _wire_callbacks(sid: str):
|
||||
set_secret_capture_callback(secret_cb)
|
||||
|
||||
|
||||
def _resolve_personality_prompt(cfg: dict) -> str:
|
||||
"""Resolve the active personality into a system prompt string."""
|
||||
name = (cfg.get("display", {}).get("personality", "") or "").strip().lower()
|
||||
if not name or name in ("default", "none", "neutral"):
|
||||
return ""
|
||||
try:
|
||||
from cli import load_cli_config
|
||||
|
||||
personalities = load_cli_config().get("agent", {}).get("personalities", {})
|
||||
except Exception:
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_full_cfg
|
||||
|
||||
personalities = _load_full_cfg().get("agent", {}).get("personalities", {})
|
||||
except Exception:
|
||||
personalities = cfg.get("agent", {}).get("personalities", {})
|
||||
pval = personalities.get(name)
|
||||
if pval is None:
|
||||
return ""
|
||||
return _render_personality_prompt(pval)
|
||||
|
||||
|
||||
def _render_personality_prompt(value) -> str:
|
||||
if isinstance(value, dict):
|
||||
parts = [value.get("system_prompt", "")]
|
||||
@@ -1102,15 +1158,15 @@ def _available_personalities(cfg: dict | None = None) -> dict:
|
||||
try:
|
||||
from cli import load_cli_config
|
||||
|
||||
return load_cli_config().get("agent", {}).get("personalities", {}) or {}
|
||||
return (load_cli_config().get("agent") or {}).get("personalities", {}) or {}
|
||||
except Exception:
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_full_cfg
|
||||
|
||||
return _load_full_cfg().get("agent", {}).get("personalities", {}) or {}
|
||||
return (_load_full_cfg().get("agent") or {}).get("personalities", {}) or {}
|
||||
except Exception:
|
||||
cfg = cfg or _load_cfg()
|
||||
return cfg.get("agent", {}).get("personalities", {}) or {}
|
||||
return (cfg.get("agent") or {}).get("personalities", {}) or {}
|
||||
|
||||
|
||||
def _validate_personality(value: str, cfg: dict | None = None) -> tuple[str, str]:
|
||||
@@ -1220,9 +1276,7 @@ def _make_agent(sid: str, key: str, session_id: str | None = None):
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
cfg = _load_cfg()
|
||||
system_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
|
||||
if not system_prompt:
|
||||
system_prompt = _resolve_personality_prompt(cfg)
|
||||
system_prompt = ((cfg.get("agent") or {}).get("system_prompt", "") or "").strip()
|
||||
runtime = resolve_runtime_provider(requested=None)
|
||||
return AIAgent(
|
||||
model=_resolve_model(),
|
||||
@@ -1262,6 +1316,9 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80):
|
||||
"tool_progress_mode": _load_tool_progress_mode(),
|
||||
"edit_snapshots": {},
|
||||
"tool_started_at": {},
|
||||
# Pin async event emissions to whichever transport created the
|
||||
# session (stdio for Ink, JSON-RPC WS for the dashboard sidebar).
|
||||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
try:
|
||||
_sessions[sid]["slash_worker"] = _SlashWorker(
|
||||
@@ -1404,6 +1461,7 @@ def _(rid, params: dict) -> dict:
|
||||
"slash_worker": None,
|
||||
"tool_progress_mode": _load_tool_progress_mode(),
|
||||
"tool_started_at": {},
|
||||
"transport": current_transport() or _stdio_transport,
|
||||
}
|
||||
|
||||
def _build() -> None:
|
||||
@@ -1462,6 +1520,10 @@ def _(rid, params: dict) -> dict:
|
||||
warn = _probe_credentials(agent)
|
||||
if warn:
|
||||
info["credential_warning"] = warn
|
||||
cfg_warn = _probe_config_health(_load_cfg())
|
||||
if cfg_warn:
|
||||
info["config_warning"] = cfg_warn
|
||||
logger.warning(cfg_warn)
|
||||
_emit("session.info", sid, info)
|
||||
except Exception as e:
|
||||
session["agent_error"] = str(e)
|
||||
@@ -1608,9 +1670,7 @@ def _(rid, params: dict) -> dict:
|
||||
return _db_unavailable_error(rid, code=5007)
|
||||
title, key = params.get("title", ""), session["session_key"]
|
||||
if not title:
|
||||
return _ok(
|
||||
rid, {"title": db.get_session_title(key) or "", "session_key": key}
|
||||
)
|
||||
return _ok(rid, {"title": db.get_session_title(key) or "", "session_key": key})
|
||||
try:
|
||||
db.set_session_title(key, title)
|
||||
return _ok(rid, {"title": title})
|
||||
@@ -2237,7 +2297,9 @@ def _(rid, params: dict) -> dict:
|
||||
f.write(trace)
|
||||
except Exception:
|
||||
pass
|
||||
print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True)
|
||||
print(
|
||||
f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True
|
||||
)
|
||||
_emit("error", sid, {"message": str(e)})
|
||||
finally:
|
||||
try:
|
||||
@@ -2660,9 +2722,7 @@ def _(rid, params: dict) -> dict:
|
||||
cfg = _load_cfg()
|
||||
display = cfg.get("display") if isinstance(cfg.get("display"), dict) else {}
|
||||
sections_cfg = (
|
||||
display.get("sections")
|
||||
if isinstance(display.get("sections"), dict)
|
||||
else {}
|
||||
display.get("sections") if isinstance(display.get("sections"), dict) else {}
|
||||
)
|
||||
|
||||
nv = str(value or "").strip().lower()
|
||||
@@ -2797,18 +2857,21 @@ def _(rid, params: dict) -> dict:
|
||||
return _ok(rid, {"prompt": _load_cfg().get("custom_prompt", "")})
|
||||
if key == "skin":
|
||||
return _ok(
|
||||
rid, {"value": _load_cfg().get("display", {}).get("skin", "default")}
|
||||
rid, {"value": (_load_cfg().get("display") or {}).get("skin", "default")}
|
||||
)
|
||||
if key == "personality":
|
||||
return _ok(
|
||||
rid, {"value": _load_cfg().get("display", {}).get("personality", "default")}
|
||||
rid,
|
||||
{"value": (_load_cfg().get("display") or {}).get("personality", "default")},
|
||||
)
|
||||
if key == "reasoning":
|
||||
cfg = _load_cfg()
|
||||
effort = str(cfg.get("agent", {}).get("reasoning_effort", "medium") or "medium")
|
||||
effort = str(
|
||||
(cfg.get("agent") or {}).get("reasoning_effort", "medium") or "medium"
|
||||
)
|
||||
display = (
|
||||
"show"
|
||||
if bool(cfg.get("display", {}).get("show_reasoning", False))
|
||||
if bool((cfg.get("display") or {}).get("show_reasoning", False))
|
||||
else "hide"
|
||||
)
|
||||
return _ok(rid, {"value": effort, "display": display})
|
||||
@@ -2816,7 +2879,7 @@ def _(rid, params: dict) -> dict:
|
||||
allowed_dm = frozenset({"hidden", "collapsed", "expanded"})
|
||||
raw = (
|
||||
str(
|
||||
_load_cfg().get("display", {}).get("details_mode", "collapsed")
|
||||
(_load_cfg().get("display") or {}).get("details_mode", "collapsed")
|
||||
or "collapsed"
|
||||
)
|
||||
.strip()
|
||||
@@ -2827,13 +2890,17 @@ def _(rid, params: dict) -> dict:
|
||||
if key == "thinking_mode":
|
||||
allowed_tm = frozenset({"collapsed", "truncated", "full"})
|
||||
cfg = _load_cfg()
|
||||
raw = str(cfg.get("display", {}).get("thinking_mode", "") or "").strip().lower()
|
||||
raw = (
|
||||
str((cfg.get("display") or {}).get("thinking_mode", "") or "")
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if raw in allowed_tm:
|
||||
nv = raw
|
||||
else:
|
||||
dm = (
|
||||
str(
|
||||
cfg.get("display", {}).get("details_mode", "collapsed")
|
||||
(cfg.get("display") or {}).get("details_mode", "collapsed")
|
||||
or "collapsed"
|
||||
)
|
||||
.strip()
|
||||
@@ -2842,7 +2909,7 @@ def _(rid, params: dict) -> dict:
|
||||
nv = "full" if dm == "expanded" else "collapsed"
|
||||
return _ok(rid, {"value": nv})
|
||||
if key == "compact":
|
||||
on = bool(_load_cfg().get("display", {}).get("tui_compact", False))
|
||||
on = bool((_load_cfg().get("display") or {}).get("tui_compact", False))
|
||||
return _ok(rid, {"value": "on" if on else "off"})
|
||||
if key == "statusbar":
|
||||
display = _load_cfg().get("display")
|
||||
@@ -3328,7 +3395,16 @@ def _list_repo_files(root: str) -> list[str]:
|
||||
if top_result.returncode == 0:
|
||||
top = top_result.stdout.decode("utf-8", "replace").strip()
|
||||
list_result = subprocess.run(
|
||||
["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
|
||||
[
|
||||
"git",
|
||||
"-C",
|
||||
top,
|
||||
"ls-files",
|
||||
"-z",
|
||||
"--cached",
|
||||
"--others",
|
||||
"--exclude-standard",
|
||||
],
|
||||
capture_output=True,
|
||||
timeout=2.0,
|
||||
check=False,
|
||||
@@ -3337,7 +3413,9 @@ def _list_repo_files(root: str) -> list[str]:
|
||||
for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
|
||||
if not p:
|
||||
continue
|
||||
rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
|
||||
rel = os.path.relpath(os.path.join(top, p), root).replace(
|
||||
os.sep, "/"
|
||||
)
|
||||
# Skip parents/siblings of cwd — keep the picker scoped
|
||||
# to root-and-below, matching Cmd-P workspace semantics.
|
||||
if rel.startswith("../"):
|
||||
@@ -3471,12 +3549,7 @@ def _(rid, params: dict) -> dict:
|
||||
# editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
|
||||
# `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
|
||||
# path so explicit navigation intent is preserved.
|
||||
if (
|
||||
is_context
|
||||
and path_part
|
||||
and "/" not in path_part
|
||||
and prefix_tag != "folder"
|
||||
):
|
||||
if is_context and path_part and "/" not in path_part and prefix_tag != "folder":
|
||||
root = os.getcwd()
|
||||
ranked: list[tuple[tuple[int, int], str, str]] = []
|
||||
for rel in _list_repo_files(root):
|
||||
@@ -3680,7 +3753,7 @@ def _mirror_slash_side_effects(sid: str, session: dict, command: str) -> str:
|
||||
_apply_personality_to_session(sid, session, new_prompt)
|
||||
elif name == "prompt" and agent:
|
||||
cfg = _load_cfg()
|
||||
new_prompt = cfg.get("agent", {}).get("system_prompt", "") or ""
|
||||
new_prompt = (cfg.get("agent") or {}).get("system_prompt", "") or ""
|
||||
agent.ephemeral_system_prompt = new_prompt or None
|
||||
agent._cached_system_prompt = None
|
||||
elif name == "compress" and agent:
|
||||
@@ -3902,9 +3975,7 @@ def _(rid, params: dict) -> dict:
|
||||
|
||||
voice_cfg = _load_cfg().get("voice", {})
|
||||
start_continuous(
|
||||
on_transcript=lambda t: _voice_emit(
|
||||
"voice.transcript", {"text": t}
|
||||
),
|
||||
on_transcript=lambda t: _voice_emit("voice.transcript", {"text": t}),
|
||||
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
||||
on_silent_limit=lambda: _voice_emit(
|
||||
"voice.transcript", {"no_speech_limit": True}
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Transport abstraction for the tui_gateway JSON-RPC server.
|
||||
|
||||
Historically the gateway wrote every JSON frame directly to real stdout. This
|
||||
module decouples the I/O sink from the handler logic so the same dispatcher
|
||||
can be driven over stdio (``tui_gateway.entry``) or WebSocket
|
||||
(``tui_gateway.ws``) without duplicating code.
|
||||
|
||||
A :class:`Transport` is anything that can accept a JSON-serialisable dict and
|
||||
forward it to its peer. The active transport for the current request is
|
||||
tracked in a :class:`contextvars.ContextVar` so handlers — including those
|
||||
dispatched onto the worker pool — route their writes to the right peer.
|
||||
|
||||
Backward compatibility
|
||||
----------------------
|
||||
``tui_gateway.server.write_json`` still works without any transport bound.
|
||||
When nothing is on the contextvar and no session-level transport is found,
|
||||
it falls back to the module-level :class:`StdioTransport`, which wraps the
|
||||
original ``_real_stdout`` + ``_stdout_lock`` pair. Tests that monkey-patch
|
||||
``server._real_stdout`` continue to work because the stdio transport resolves
|
||||
the stream lazily through a callback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextvars
|
||||
import json
|
||||
import threading
|
||||
from typing import Any, Callable, Optional, Protocol, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class Transport(Protocol):
|
||||
"""Minimal interface every transport implements."""
|
||||
|
||||
def write(self, obj: dict) -> bool:
|
||||
"""Emit one JSON frame. Return ``False`` when the peer is gone."""
|
||||
|
||||
def close(self) -> None:
|
||||
"""Release any resources owned by this transport."""
|
||||
|
||||
|
||||
_current_transport: contextvars.ContextVar[Optional[Transport]] = (
|
||||
contextvars.ContextVar(
|
||||
"hermes_gateway_transport",
|
||||
default=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def current_transport() -> Optional[Transport]:
|
||||
"""Return the transport bound for the current request, if any."""
|
||||
return _current_transport.get()
|
||||
|
||||
|
||||
def bind_transport(transport: Optional[Transport]):
|
||||
"""Bind *transport* for the current context. Returns a token for :func:`reset_transport`."""
|
||||
return _current_transport.set(transport)
|
||||
|
||||
|
||||
def reset_transport(token) -> None:
|
||||
"""Restore the transport binding captured by :func:`bind_transport`."""
|
||||
_current_transport.reset(token)
|
||||
|
||||
|
||||
class StdioTransport:
|
||||
"""Writes JSON frames to a stream (usually ``sys.stdout``).
|
||||
|
||||
The stream is resolved via a callable so runtime monkey-patches of the
|
||||
underlying stream continue to work — this preserves the behaviour the
|
||||
existing test suite relies on (``monkeypatch.setattr(server, "_real_stdout", ...)``).
|
||||
"""
|
||||
|
||||
__slots__ = ("_stream_getter", "_lock")
|
||||
|
||||
def __init__(self, stream_getter: Callable[[], Any], lock: threading.Lock) -> None:
|
||||
self._stream_getter = stream_getter
|
||||
self._lock = lock
|
||||
|
||||
def write(self, obj: dict) -> bool:
|
||||
line = json.dumps(obj, ensure_ascii=False) + "\n"
|
||||
try:
|
||||
with self._lock:
|
||||
stream = self._stream_getter()
|
||||
stream.write(line)
|
||||
stream.flush()
|
||||
return True
|
||||
except BrokenPipeError:
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class TeeTransport:
|
||||
"""Mirrors writes to one primary plus N best-effort secondaries.
|
||||
|
||||
The primary's return value (and exceptions) determine the result —
|
||||
secondaries swallow failures so a wedged sidecar never stalls the
|
||||
main IO path. Used by the PTY child so every dispatcher emit lands
|
||||
on stdio (Ink) AND on a back-WS feeding the dashboard sidebar.
|
||||
"""
|
||||
|
||||
__slots__ = ("_primary", "_secondaries")
|
||||
|
||||
def __init__(self, primary: "Transport", *secondaries: "Transport") -> None:
|
||||
self._primary = primary
|
||||
self._secondaries = secondaries
|
||||
|
||||
def write(self, obj: dict) -> bool:
|
||||
# Primary first so a slow sidecar (WS publisher) never delays Ink/stdio.
|
||||
ok = self._primary.write(obj)
|
||||
for sec in self._secondaries:
|
||||
try:
|
||||
sec.write(obj)
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
|
||||
def close(self) -> None:
|
||||
try:
|
||||
self._primary.close()
|
||||
finally:
|
||||
for sec in self._secondaries:
|
||||
try:
|
||||
sec.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,174 @@
|
||||
"""WebSocket transport for the tui_gateway JSON-RPC server.
|
||||
|
||||
Reuses :func:`tui_gateway.server.dispatch` verbatim so every RPC method, every
|
||||
slash command, every approval/clarify/sudo flow, and every agent event flows
|
||||
through the same handlers whether the client is Ink over stdio or an iOS /
|
||||
web client over WebSocket.
|
||||
|
||||
Wire protocol
|
||||
-------------
|
||||
Identical to stdio: newline-delimited JSON-RPC in both directions. The server
|
||||
emits a ``gateway.ready`` event immediately after connection accept, then
|
||||
echoes responses/events for inbound requests. No framing differences.
|
||||
|
||||
Mounting
|
||||
--------
|
||||
from fastapi import WebSocket
|
||||
from tui_gateway.ws import handle_ws
|
||||
|
||||
@app.websocket("/api/ws")
|
||||
async def ws(ws: WebSocket):
|
||||
await handle_ws(ws)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from tui_gateway import server
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Max seconds a pool-dispatched handler will block waiting for the event loop
|
||||
# to flush a WS frame before we mark the transport dead. Protects handler
|
||||
# threads from a wedged socket.
|
||||
_WS_WRITE_TIMEOUT_S = 10.0
|
||||
|
||||
# Keep starlette optional at import time; handle_ws uses the real class when
|
||||
# it's available and falls back to a generic Exception sentinel otherwise.
|
||||
try:
|
||||
from starlette.websockets import WebSocketDisconnect as _WebSocketDisconnect
|
||||
except ImportError: # pragma: no cover - starlette is a required install path
|
||||
_WebSocketDisconnect = Exception # type: ignore[assignment]
|
||||
|
||||
|
||||
class WSTransport:
|
||||
"""Per-connection WS transport.
|
||||
|
||||
``write`` is safe to call from any thread *other than* the event loop
|
||||
thread that owns the socket. Pool workers (the only real caller) run in
|
||||
their own threads, so marshalling onto the loop via
|
||||
:func:`asyncio.run_coroutine_threadsafe` + ``future.result()`` is correct
|
||||
and deadlock-free there.
|
||||
|
||||
When called from the loop thread itself (e.g. by ``handle_ws`` for an
|
||||
inline response) the same call would deadlock: we'd schedule work onto
|
||||
the loop we're currently blocking. We detect that case and fire-and-
|
||||
forget instead. Callers that need to know when the bytes are on the wire
|
||||
should use :meth:`write_async` from the loop thread.
|
||||
"""
|
||||
|
||||
def __init__(self, ws: Any, loop: asyncio.AbstractEventLoop) -> None:
|
||||
self._ws = ws
|
||||
self._loop = loop
|
||||
self._closed = False
|
||||
|
||||
def write(self, obj: dict) -> bool:
|
||||
if self._closed:
|
||||
return False
|
||||
|
||||
line = json.dumps(obj, ensure_ascii=False)
|
||||
|
||||
try:
|
||||
on_loop = asyncio.get_running_loop() is self._loop
|
||||
except RuntimeError:
|
||||
on_loop = False
|
||||
|
||||
if on_loop:
|
||||
# Fire-and-forget — don't block the loop waiting on itself.
|
||||
self._loop.create_task(self._safe_send(line))
|
||||
return True
|
||||
|
||||
try:
|
||||
fut = asyncio.run_coroutine_threadsafe(self._safe_send(line), self._loop)
|
||||
fut.result(timeout=_WS_WRITE_TIMEOUT_S)
|
||||
return not self._closed
|
||||
except Exception as exc:
|
||||
self._closed = True
|
||||
_log.debug("ws write failed: %s", exc)
|
||||
return False
|
||||
|
||||
async def write_async(self, obj: dict) -> bool:
|
||||
"""Send from the owning event loop. Awaits until the frame is on the wire."""
|
||||
if self._closed:
|
||||
return False
|
||||
await self._safe_send(json.dumps(obj, ensure_ascii=False))
|
||||
return not self._closed
|
||||
|
||||
async def _safe_send(self, line: str) -> None:
|
||||
try:
|
||||
await self._ws.send_text(line)
|
||||
except Exception as exc:
|
||||
self._closed = True
|
||||
_log.debug("ws send failed: %s", exc)
|
||||
|
||||
def close(self) -> None:
|
||||
self._closed = True
|
||||
|
||||
|
||||
async def handle_ws(ws: Any) -> None:
|
||||
"""Run one WebSocket session. Wire-compatible with ``tui_gateway.entry``."""
|
||||
await ws.accept()
|
||||
|
||||
transport = WSTransport(ws, asyncio.get_running_loop())
|
||||
|
||||
await transport.write_async(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"method": "event",
|
||||
"params": {
|
||||
"type": "gateway.ready",
|
||||
"payload": {"skin": server.resolve_skin()},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
raw = await ws.receive_text()
|
||||
except _WebSocketDisconnect:
|
||||
break
|
||||
|
||||
line = raw.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
req = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
ok = await transport.write_async(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"error": {"code": -32700, "message": "parse error"},
|
||||
"id": None,
|
||||
}
|
||||
)
|
||||
if not ok:
|
||||
break
|
||||
continue
|
||||
|
||||
# dispatch() may schedule long handlers on the pool; it returns
|
||||
# None in that case and the worker writes the response itself via
|
||||
# the transport we pass in (a separate thread, so transport.write
|
||||
# is the safe path there). For inline handlers it returns the
|
||||
# response dict, which we write here from the loop.
|
||||
resp = await asyncio.to_thread(server.dispatch, req, transport)
|
||||
if resp is not None and not await transport.write_async(resp):
|
||||
break
|
||||
finally:
|
||||
transport.close()
|
||||
|
||||
# Detach the transport from any sessions it owned so later emits
|
||||
# fall back to stdio instead of crashing into a closed socket.
|
||||
for _, sess in list(server._sessions.items()):
|
||||
if sess.get("transport") is transport:
|
||||
sess["transport"] = server._stdio_transport
|
||||
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -175,14 +175,16 @@ describe('createGatewayEventHandler', () => {
|
||||
|
||||
onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)
|
||||
|
||||
// Three transcript messages: pre-tool narration → diff (kind='diff',
|
||||
// so MessageLine gives it blank-line breathing room) → post-tool
|
||||
// narration. The final message does NOT contain a diff.
|
||||
expect(appended).toHaveLength(3)
|
||||
// Four transcript messages: pre-tool narration → tool trail → diff
|
||||
// (kind='diff', so MessageLine gives it blank-line breathing room) →
|
||||
// post-tool narration. The final message does NOT contain a diff.
|
||||
expect(appended).toHaveLength(4)
|
||||
expect(appended[0]?.text).toBe('Editing the file')
|
||||
expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
|
||||
expect(appended[2]?.text).toBe('patch applied')
|
||||
expect(appended[2]?.text).not.toContain('```diff')
|
||||
expect(appended[1]).toMatchObject({ kind: 'trail' })
|
||||
expect(appended[1]?.tools?.[0]).toContain('Patch')
|
||||
expect(appended[2]).toMatchObject({ kind: 'diff', text: block })
|
||||
expect(appended[3]?.text).toBe('patch applied')
|
||||
expect(appended[3]?.text).not.toContain('```diff')
|
||||
})
|
||||
|
||||
it('drops the diff segment when the final assistant text narrates the same diff', () => {
|
||||
@@ -209,12 +211,13 @@ describe('createGatewayEventHandler', () => {
|
||||
onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
|
||||
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
|
||||
|
||||
// diff segment first (kind='diff'), final narration second
|
||||
expect(appended).toHaveLength(2)
|
||||
expect(appended[0]?.kind).toBe('diff')
|
||||
expect(appended[0]?.text).not.toContain('┊ review diff')
|
||||
expect(appended[0]?.text).toContain('--- a/foo.ts')
|
||||
expect(appended[1]?.text).toBe('done')
|
||||
// Tool trail first, then diff segment (kind='diff'), then final narration.
|
||||
expect(appended).toHaveLength(3)
|
||||
expect(appended[0]?.kind).toBe('trail')
|
||||
expect(appended[1]?.kind).toBe('diff')
|
||||
expect(appended[1]?.text).not.toContain('┊ review diff')
|
||||
expect(appended[1]?.text).toContain('--- a/foo.ts')
|
||||
expect(appended[2]?.text).toBe('done')
|
||||
})
|
||||
|
||||
it('drops the diff segment when assistant writes its own ```diff fence', () => {
|
||||
@@ -242,16 +245,17 @@ describe('createGatewayEventHandler', () => {
|
||||
} as any)
|
||||
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
|
||||
|
||||
// Two segments: the diff block (kind='diff', no tool row) and the final
|
||||
// narration (tool row belongs here since pendingSegmentTools carries
|
||||
// across the flushStreamingSegment call).
|
||||
expect(appended).toHaveLength(2)
|
||||
expect(appended[0]?.kind).toBe('diff')
|
||||
expect(appended[0]?.text).toContain('```diff')
|
||||
expect(appended[0]?.tools ?? []).toEqual([])
|
||||
expect(appended[1]?.text).toBe('done')
|
||||
expect(appended[1]?.tools?.[0]).toContain('Review Diff')
|
||||
expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
|
||||
// Tool row is now placed before the diff, so telemetry does not render
|
||||
// below the patch that came from that tool.
|
||||
expect(appended).toHaveLength(3)
|
||||
expect(appended[0]?.kind).toBe('trail')
|
||||
expect(appended[0]?.tools?.[0]).toContain('Review Diff')
|
||||
expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
|
||||
expect(appended[1]?.kind).toBe('diff')
|
||||
expect(appended[1]?.text).toContain('```diff')
|
||||
expect(appended[1]?.tools ?? []).toEqual([])
|
||||
expect(appended[2]?.text).toBe('done')
|
||||
expect(appended[2]?.tools ?? []).toEqual([])
|
||||
})
|
||||
|
||||
it('shows setup panel for missing provider startup error', () => {
|
||||
|
||||
@@ -246,7 +246,7 @@ export const coreCommands: SlashCommand[] = [
|
||||
}
|
||||
|
||||
writeOsc52Clipboard(target.text)
|
||||
sys('sent OSC52 copy sequence (terminal support required)')
|
||||
sys(`copied ${target.text.length} chars`)
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@@ -33,6 +33,12 @@ const diffSegmentBody = (msg: Msg): null | string => {
|
||||
return m ? m[1]! : null
|
||||
}
|
||||
|
||||
const insertBeforeFirstDiff = (segments: Msg[], msg: Msg): Msg[] => {
|
||||
const index = segments.findIndex(segment => segment.kind === 'diff')
|
||||
|
||||
return index < 0 ? [...segments, msg] : [...segments.slice(0, index), msg, ...segments.slice(index)]
|
||||
}
|
||||
|
||||
export interface InterruptDeps {
|
||||
appendMessage: (msg: Msg) => void
|
||||
gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
|
||||
@@ -292,16 +298,30 @@ class TurnController {
|
||||
return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
|
||||
})
|
||||
|
||||
const finalMessages = [...segments]
|
||||
const hasDiffSegment = segments.some(msg => msg.kind === 'diff')
|
||||
const detailsBelongBeforeDiff = hasDiffSegment && (tools.length > 0 || Boolean(savedReasoning))
|
||||
const finalMessages = detailsBelongBeforeDiff
|
||||
? insertBeforeFirstDiff(segments, {
|
||||
kind: 'trail',
|
||||
role: 'system',
|
||||
text: '',
|
||||
thinking: savedReasoning || undefined,
|
||||
thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
|
||||
toolTokens: savedToolTokens || undefined,
|
||||
...(tools.length && { tools })
|
||||
})
|
||||
: [...segments]
|
||||
|
||||
if (finalText) {
|
||||
finalMessages.push({
|
||||
role: 'assistant',
|
||||
text: finalText,
|
||||
thinking: savedReasoning || undefined,
|
||||
thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
|
||||
toolTokens: savedToolTokens || undefined,
|
||||
...(tools.length && { tools })
|
||||
...(!detailsBelongBeforeDiff && {
|
||||
thinking: savedReasoning || undefined,
|
||||
thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
|
||||
toolTokens: savedToolTokens || undefined,
|
||||
...(tools.length && { tools })
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -142,6 +142,10 @@ export function useSessionLifecycle(opts: UseSessionLifecycleOptions) {
|
||||
sys(`warning: ${info.credential_warning}`)
|
||||
}
|
||||
|
||||
if (info?.config_warning) {
|
||||
sys(`warning: ${info.config_warning}`)
|
||||
}
|
||||
|
||||
if (msg) {
|
||||
sys(msg)
|
||||
}
|
||||
|
||||
@@ -31,11 +31,20 @@ export const MessageLine = memo(function MessageLine({
|
||||
const thinkingMode = sectionMode('thinking', detailsMode, sections)
|
||||
const toolsMode = sectionMode('tools', detailsMode, sections)
|
||||
const activityMode = sectionMode('activity', detailsMode, sections)
|
||||
const thinking = msg.thinking?.trim() ?? ''
|
||||
|
||||
if (msg.kind === 'trail' && msg.tools?.length) {
|
||||
return toolsMode !== 'hidden' || activityMode !== 'hidden' ? (
|
||||
if (msg.kind === 'trail' && (msg.tools?.length || thinking)) {
|
||||
return thinkingMode !== 'hidden' || toolsMode !== 'hidden' || activityMode !== 'hidden' ? (
|
||||
<Box flexDirection="column" marginTop={1}>
|
||||
<ToolTrail detailsMode={detailsMode} sections={sections} t={t} trail={msg.tools} />
|
||||
<ToolTrail
|
||||
detailsMode={detailsMode}
|
||||
reasoning={thinking}
|
||||
reasoningTokens={msg.thinkingTokens}
|
||||
sections={sections}
|
||||
t={t}
|
||||
toolTokens={msg.toolTokens}
|
||||
trail={msg.tools ?? []}
|
||||
/>
|
||||
</Box>
|
||||
) : null
|
||||
}
|
||||
@@ -61,7 +70,6 @@ export const MessageLine = memo(function MessageLine({
|
||||
}
|
||||
|
||||
const { body, glyph, prefix } = ROLE[msg.role](t)
|
||||
const thinking = msg.thinking?.trim() ?? ''
|
||||
|
||||
const showDetails =
|
||||
(toolsMode !== 'hidden' && Boolean(msg.tools?.length)) ||
|
||||
|
||||
@@ -392,6 +392,9 @@ function SubagentAccordion({
|
||||
const hasTools = item.tools.length > 0
|
||||
const noteRows = [...(summary ? [summary] : []), ...item.notes]
|
||||
const hasNotes = noteRows.length > 0
|
||||
// `showChildren` only seeds the recursive `expanded` prop for nested
|
||||
// subagents — it MUST NOT be OR-ed into the local section toggles, or
|
||||
// expand-all permanently locks the inner chevrons open.
|
||||
const showChildren = expanded || deep
|
||||
const noteColor = statusTone === 'error' ? t.color.error : statusTone === 'warn' ? t.color.warn : t.color.dim
|
||||
|
||||
@@ -414,13 +417,13 @@ function SubagentAccordion({
|
||||
setOpenThinking(v => !v)
|
||||
}
|
||||
}}
|
||||
open={showChildren || openThinking}
|
||||
open={openThinking}
|
||||
t={t}
|
||||
title="Thinking"
|
||||
/>
|
||||
),
|
||||
key: 'thinking',
|
||||
open: showChildren || openThinking,
|
||||
open: openThinking,
|
||||
render: childRails => (
|
||||
<Thinking
|
||||
active={item.status === 'running'}
|
||||
@@ -447,13 +450,13 @@ function SubagentAccordion({
|
||||
setOpenTools(v => !v)
|
||||
}
|
||||
}}
|
||||
open={showChildren || openTools}
|
||||
open={openTools}
|
||||
t={t}
|
||||
title="Tool calls"
|
||||
/>
|
||||
),
|
||||
key: 'tools',
|
||||
open: showChildren || openTools,
|
||||
open: openTools,
|
||||
render: childRails => (
|
||||
<Box flexDirection="column">
|
||||
{item.tools.map((line, index) => (
|
||||
@@ -488,14 +491,14 @@ function SubagentAccordion({
|
||||
setOpenNotes(v => !v)
|
||||
}
|
||||
}}
|
||||
open={showChildren || openNotes}
|
||||
open={openNotes}
|
||||
t={t}
|
||||
title="Progress"
|
||||
tone={statusTone}
|
||||
/>
|
||||
),
|
||||
key: 'notes',
|
||||
open: showChildren || openNotes,
|
||||
open: openNotes,
|
||||
render: childRails => (
|
||||
<Box flexDirection="column">
|
||||
{noteRows.map((line, index) => (
|
||||
@@ -528,14 +531,14 @@ function SubagentAccordion({
|
||||
setOpenKids(v => !v)
|
||||
}
|
||||
}}
|
||||
open={showChildren || openKids}
|
||||
open={openKids}
|
||||
suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
|
||||
t={t}
|
||||
title="Spawned"
|
||||
/>
|
||||
),
|
||||
key: 'subagents',
|
||||
open: showChildren || openKids,
|
||||
open: openKids,
|
||||
render: childRails => (
|
||||
<Box flexDirection="column">
|
||||
{children.map((child, i) => (
|
||||
@@ -718,6 +721,13 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
)
|
||||
|
||||
const [now, setNow] = useState(() => Date.now())
|
||||
// Local toggles own the open state once mounted. Init from the resolved
|
||||
// section visibility so default-expanded sections (thinking/tools) render
|
||||
// open on first paint; the useEffect below re-syncs when the user mutates
|
||||
// visibility at runtime via /details. NEVER OR these against
|
||||
// `visible.X === 'expanded'` at render time — that locks the panel open
|
||||
// and silently breaks manual chevron clicks for default-expanded
|
||||
// sections (regression caught after #14968).
|
||||
const [openThinking, setOpenThinking] = useState(visible.thinking === 'expanded')
|
||||
const [openTools, setOpenTools] = useState(visible.tools === 'expanded')
|
||||
const [openSubagents, setOpenSubagents] = useState(visible.subagents === 'expanded')
|
||||
@@ -960,7 +970,7 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
}}
|
||||
>
|
||||
<Text color={t.color.dim} dim={!thinkingLive}>
|
||||
<Text color={t.color.amber}>{visible.thinking === 'expanded' || openThinking ? '▾ ' : '▸ '}</Text>
|
||||
<Text color={t.color.amber}>{openThinking ? '▾ ' : '▸ '}</Text>
|
||||
{thinkingLive ? (
|
||||
<Text bold color={t.color.cornsilk}>
|
||||
Thinking
|
||||
@@ -980,7 +990,7 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
</Box>
|
||||
),
|
||||
key: 'thinking',
|
||||
open: visible.thinking === 'expanded' || openThinking,
|
||||
open: openThinking,
|
||||
render: rails => (
|
||||
<Thinking
|
||||
active={reasoningActive}
|
||||
@@ -1007,14 +1017,14 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
setOpenTools(v => !v)
|
||||
}
|
||||
}}
|
||||
open={visible.tools === 'expanded' || openTools}
|
||||
open={openTools}
|
||||
suffix={toolTokensLabel}
|
||||
t={t}
|
||||
title="Tool calls"
|
||||
/>
|
||||
),
|
||||
key: 'tools',
|
||||
open: visible.tools === 'expanded' || openTools,
|
||||
open: openTools,
|
||||
render: rails => (
|
||||
<Box flexDirection="column">
|
||||
{groups.map((group, index) => {
|
||||
@@ -1072,14 +1082,14 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
setDeepSubagents(false)
|
||||
}
|
||||
}}
|
||||
open={visible.subagents === 'expanded' || openSubagents}
|
||||
open={openSubagents}
|
||||
suffix={suffix}
|
||||
t={t}
|
||||
title="Spawn tree"
|
||||
/>
|
||||
),
|
||||
key: 'subagents',
|
||||
open: visible.subagents === 'expanded' || openSubagents,
|
||||
open: openSubagents,
|
||||
render: renderSubagentList
|
||||
})
|
||||
}
|
||||
@@ -1096,14 +1106,14 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
setOpenMeta(v => !v)
|
||||
}
|
||||
}}
|
||||
open={visible.activity === 'expanded' || openMeta}
|
||||
open={openMeta}
|
||||
t={t}
|
||||
title="Activity"
|
||||
tone={metaTone}
|
||||
/>
|
||||
),
|
||||
key: 'meta',
|
||||
open: visible.activity === 'expanded' || openMeta,
|
||||
open: openMeta,
|
||||
render: rails => (
|
||||
<Box flexDirection="column">
|
||||
{meta.map((row, index) => (
|
||||
|
||||
@@ -93,7 +93,7 @@ export interface SetupStatusResponse {
|
||||
// ── Session lifecycle ────────────────────────────────────────────────
|
||||
|
||||
export interface SessionCreateResponse {
|
||||
info?: SessionInfo & { credential_warning?: string }
|
||||
info?: SessionInfo & { config_warning?: string; credential_warning?: string }
|
||||
session_id: string
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ resolution-markers = [
|
||||
]
|
||||
|
||||
[options]
|
||||
exclude-newer = "2026-04-16T11:49:00.318115Z"
|
||||
exclude-newer = "2026-04-17T16:49:45.944715922Z"
|
||||
exclude-newer-span = "P7D"
|
||||
|
||||
[[package]]
|
||||
@@ -1870,7 +1870,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "hermes-agent"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "anthropic" },
|
||||
|
||||
Generated
+632
-570
File diff suppressed because it is too large
Load Diff
+6
-1
@@ -13,10 +13,15 @@
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"@nous-research/ui": "^0.3.0",
|
||||
"@nous-research/ui": "^0.4.0",
|
||||
"@observablehq/plot": "^0.6.17",
|
||||
"@react-three/fiber": "^9.6.0",
|
||||
"@tailwindcss/vite": "^4.2.1",
|
||||
"@xterm/addon-fit": "^0.11.0",
|
||||
"@xterm/addon-unicode11": "^0.9.0",
|
||||
"@xterm/addon-web-links": "^0.12.0",
|
||||
"@xterm/addon-webgl": "^0.19.0",
|
||||
"@xterm/xterm": "^6.0.0",
|
||||
"class-variance-authority": "^0.7.1",
|
||||
"clsx": "^2.1.1",
|
||||
"gsap": "^3.15.0",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user