Compare commits
56 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2c4f3ea196 | |||
| cb12ee4b2d | |||
| a57781f8a9 | |||
| 6749e335a3 | |||
| 53814b39c3 | |||
| efd71e8914 | |||
| bc2ba1356e | |||
| e0b3fa6eb3 | |||
| 929245ba69 | |||
| 73a3de5798 | |||
| 3a26076194 | |||
| 04d3a2e2be | |||
| 70882abe9b | |||
| 2771d404a3 | |||
| 7150715e19 | |||
| 4eab358ff7 | |||
| f96db81d3b | |||
| 6f436a463e | |||
| 9d61408837 | |||
| ec2ab5bfaf | |||
| 82c2035823 | |||
| 2e509422ef | |||
| 3ac2125140 | |||
| 7dea33303a | |||
| d246f9a278 | |||
| c1e93aa331 | |||
| 8b49012a0a | |||
| 3fc715ddf5 | |||
| 9c90b3a597 | |||
| 22b0d6dc1a | |||
| 5dc232a6e2 | |||
| c25f9d1d36 | |||
| d617858896 | |||
| 2d587c5662 | |||
| caf0f30eab | |||
| 70d53d8b75 | |||
| fbdca64f73 | |||
| 07b7cf6fe4 | |||
| c52cd48e25 | |||
| d3f62c6913 | |||
| c769be344a | |||
| 372e9a18cd | |||
| b5c6d9ac08 | |||
| f6f25b9449 | |||
| e77f1ed5f7 | |||
| 4c61fb6cf6 | |||
| 1264fab156 | |||
| 4e2c66a098 | |||
| eb51fb6f50 | |||
| 4a2fa77c15 | |||
| 9896e43db5 | |||
| d08c2a016a | |||
| 0e2873a77d | |||
| 280dd4513a | |||
| bb694bad42 | |||
| 9e30ef224d |
+27
-2
@@ -1125,7 +1125,18 @@ def init_agent(
|
||||
# through _ra().get_tool_definitions()). Duplicate function names cause
|
||||
# 400 errors on providers that enforce unique names (e.g. Xiaomi
|
||||
# MiMo via Nous Portal).
|
||||
if agent._memory_manager and agent.tools is not None:
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# enabled_toolsets is None → no filter, inject (backward compat)
|
||||
# "memory" in enabled_toolsets → user opted in, inject
|
||||
# otherwise (incl. []) → user excluded memory, skip injection
|
||||
#
|
||||
# Without this gate, `platform_toolsets: telegram: []` still leaks memory
|
||||
# provider tools (fact_store, etc.) into the tool surface — a 10x latency
|
||||
# penalty on local models and a frequent trigger of tool-call loops.
|
||||
if agent._memory_manager and agent.tools is not None and (
|
||||
agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
|
||||
):
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
@@ -1435,8 +1446,22 @@ def init_agent(
|
||||
# errors. Even with the cache fix, dedup is the right defense
|
||||
# against plugin paths that may register the same schemas via
|
||||
# ctx.register_tool(). Mirrors the memory tools dedup above.
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# context engine tools follow the same gating pattern as memory
|
||||
# provider tools — without the gate, `platform_toolsets: telegram: []`
|
||||
# would still leak lcm_* tools into the tool surface and incur the
|
||||
# same local-model latency penalty.
|
||||
agent._context_engine_tool_names: set = set()
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
|
||||
if (
|
||||
hasattr(agent, "context_compressor")
|
||||
and agent.context_compressor
|
||||
and agent.tools is not None
|
||||
and (
|
||||
agent.enabled_toolsets is None
|
||||
or "context_engine" in agent.enabled_toolsets
|
||||
)
|
||||
):
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
|
||||
+254
-230
@@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert an assistant message to Anthropic content blocks.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
Handles thinking blocks, regular content, tool calls, and
|
||||
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||
"""
|
||||
system = None
|
||||
result = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
result.append({"role": "assistant", "content": effective})
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content.
|
||||
# Computer-use (and other multimodal) tool results arrive as
|
||||
# either a list of OpenAI-style content parts, or a dict
|
||||
# marked `_multimodal` with an embedded `content` list. Convert
|
||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
continue
|
||||
|
||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||||
content = m.get("content", "")
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
# Check if all text blocks are empty
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
result.append({"role": "user", "content": converted_blocks})
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
# Validate string content is non-empty
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
result.append({"role": "user", "content": content})
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
return {"role": "assistant", "content": effective}
|
||||
|
||||
|
||||
def _convert_tool_message_to_result(
|
||||
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||
results into one user message.
|
||||
|
||||
Mutates ``result`` in place — either appends a new user message or extends
|
||||
the trailing user message's tool_result list.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
|
||||
|
||||
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||
"""Validate and convert a user message to anthropic format."""
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
return {"role": "user", "content": converted_blocks}
|
||||
else:
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
|
||||
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||
|
||||
Context compression or session truncation can remove either side of a
|
||||
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
for m in result:
|
||||
@@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||
# This is the mirror of the above: context compression or session truncation
|
||||
# can remove an assistant message containing a tool_use while leaving the
|
||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||
tool_use_ids = set()
|
||||
for m in result:
|
||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||
@@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||
|
||||
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||
|
||||
Returns a new list (caller must rebind ``result``).
|
||||
"""
|
||||
fixed = []
|
||||
for m in result:
|
||||
if fixed and fixed[-1]["role"] == m["role"]:
|
||||
if m["role"] == "user":
|
||||
# Merge consecutive user messages
|
||||
prev_content = fixed[-1]["content"]
|
||||
curr_content = m["content"]
|
||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||
@@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
|
||||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Mixed types — wrap string in list
|
||||
if isinstance(prev_content, str):
|
||||
prev_content = [{"type": "text", "text": prev_content}]
|
||||
if isinstance(curr_content, str):
|
||||
@@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
|
||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
@@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
|
||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
result = fixed
|
||||
return fixed
|
||||
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
|
||||
def _manage_thinking_signatures(
|
||||
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||
) -> None:
|
||||
"""Strip or preserve thinking blocks based on endpoint type.
|
||||
|
||||
Anthropic signs thinking blocks against the full turn content.
|
||||
Any upstream mutation (context compression, session truncation, orphan
|
||||
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||
"Invalid signature in thinking block".
|
||||
|
||||
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||
hermes-agent#16748 (DeepSeek).
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
@@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
@@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||
# to text so the reasoning isn't lost.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
# Redacted blocks use 'data' for the signature payload —
|
||||
# drop the block when 'data' is missing (can't be validated).
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
@@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
|
||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
||||
# computer_use screenshots (base64 images) sit inside tool_result
|
||||
# blocks: they accumulate and are sent with every API call. Each
|
||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
||||
# even for simple text queries. Walk backward, keep the most recent
|
||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
||||
|
||||
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||
|
||||
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
_MAX_KEEP_IMAGES = 3
|
||||
_image_count = 0
|
||||
for msg in reversed(result):
|
||||
@@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
|
||||
for b in inner
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result: List[Dict[str, Any]] = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
result.append(_convert_assistant_message(m))
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
_convert_tool_message_to_result(result, m)
|
||||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append(_convert_user_message(content))
|
||||
|
||||
_strip_orphaned_tool_blocks(result)
|
||||
result = _merge_consecutive_roles(result)
|
||||
_manage_thinking_signatures(result, base_url, model)
|
||||
_evict_old_screenshots(result)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@ from agent.message_sanitization import (
|
||||
_strip_non_ascii,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
@@ -73,6 +74,50 @@ from utils import base_url_host_matches, env_var_enabled
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
|
||||
"""Return a user-facing error when Ollama is loaded with too little context."""
|
||||
if not getattr(agent, "tools", None):
|
||||
return None
|
||||
|
||||
runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
|
||||
if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
|
||||
return None
|
||||
if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
|
||||
return None
|
||||
|
||||
model = getattr(agent, "model", "") or "the selected model"
|
||||
base_url = getattr(agent, "base_url", "") or "unknown base URL"
|
||||
provider = getattr(agent, "provider", "") or "unknown"
|
||||
tool_count = len(getattr(agent, "tools", None) or [])
|
||||
|
||||
logger.warning(
|
||||
"Ollama runtime context too small for Hermes tool use: "
|
||||
"model=%s provider=%s base_url=%s runtime_context=%d "
|
||||
"minimum_context=%d estimated_request_tokens=%d tool_count=%d "
|
||||
"session=%s",
|
||||
model,
|
||||
provider,
|
||||
base_url,
|
||||
runtime_ctx,
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
request_tokens,
|
||||
tool_count,
|
||||
getattr(agent, "session_id", None) or "none",
|
||||
)
|
||||
|
||||
return (
|
||||
f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
|
||||
f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
|
||||
"for reliable tool use.\n\n"
|
||||
"Increase the Ollama context for this model and restart/reload the "
|
||||
"model before trying again. A known-good starting point is 65,536 "
|
||||
"tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
|
||||
"(and `model.context_length: 65536` if you also override the displayed "
|
||||
"model context). If you manage the model through an Ollama Modelfile, "
|
||||
"set `PARAMETER num_ctx 65536` there instead."
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so callers can patch
|
||||
``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
|
||||
@@ -527,6 +572,7 @@ def run_conversation(
|
||||
api_call_count = 0
|
||||
final_response = None
|
||||
interrupted = False
|
||||
failed = False
|
||||
codex_ack_continuations = 0
|
||||
length_continue_retries = 0
|
||||
truncated_tool_call_retries = 0
|
||||
@@ -883,6 +929,26 @@ def run_conversation(
|
||||
# Calculate approximate request size for logging
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
approx_request_tokens = estimate_request_tokens_rough(
|
||||
api_messages, tools=agent.tools or None
|
||||
)
|
||||
|
||||
_runtime_context_error = _ollama_context_limit_error(
|
||||
agent, approx_request_tokens
|
||||
)
|
||||
if _runtime_context_error:
|
||||
final_response = _runtime_context_error
|
||||
failed = True
|
||||
_turn_exit_reason = "ollama_runtime_context_too_small"
|
||||
messages.append({"role": "assistant", "content": final_response})
|
||||
agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
|
||||
api_call_count -= 1
|
||||
agent._api_call_count = api_call_count
|
||||
try:
|
||||
agent.iteration_budget.refund()
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
|
||||
# Thinking spinner for quiet mode (animated during API call)
|
||||
thinking_spinner = None
|
||||
@@ -923,6 +989,7 @@ def run_conversation(
|
||||
copilot_auth_retry_attempted=False
|
||||
thinking_sig_retry_attempted = False
|
||||
image_shrink_retry_attempted = False
|
||||
multimodal_tool_content_retry_attempted = False
|
||||
oauth_1m_beta_retry_attempted = False
|
||||
llama_cpp_grammar_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
@@ -1994,6 +2061,31 @@ def run_conversation(
|
||||
"or shrink didn't reduce size; surfacing original error."
|
||||
)
|
||||
|
||||
# Multimodal-tool-content recovery: providers that follow
|
||||
# the OpenAI spec strictly (tool message content must be a
|
||||
# string) reject our list-type content with a 400. Strip
|
||||
# image parts from any list-type tool messages, mark the
|
||||
# (provider, model) as no-list-tool-content for the rest
|
||||
# of this session so future tool results preemptively
|
||||
# downgrade, and retry once. See issue #27344.
|
||||
if (
|
||||
classified.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
and not multimodal_tool_content_retry_attempted
|
||||
):
|
||||
multimodal_tool_content_retry_attempted = True
|
||||
if agent._try_strip_image_parts_from_tool_messages(api_messages):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
|
||||
f"downgraded screenshots to text and retrying...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
"multimodal-tool-content recovery: no list-type tool "
|
||||
"messages with image parts found; surfacing original error."
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejected the 1M-context beta
|
||||
# header ("long context beta is not yet available for this
|
||||
# subscription"). Disable the beta for the rest of this
|
||||
@@ -3848,7 +3940,11 @@ def run_conversation(
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = final_response is not None and api_call_count < agent.max_iterations
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
@@ -3998,6 +4094,7 @@ def run_conversation(
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
|
||||
@@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported" # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
@@ -165,6 +166,32 @@ _IMAGE_TOO_LARGE_PATTERNS = [
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Providers that follow the OpenAI spec strictly require tool message
|
||||
# ``content`` to be a string. Some (Anthropic native, Codex Responses,
|
||||
# Gemini native, first-party OpenAI) extend this to accept a content-parts
|
||||
# list (text + image_url) so screenshots from computer_use survive. Others
|
||||
# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
|
||||
# providers) reject the list with a 400 — the patterns below are the most
|
||||
# common error shapes we see. Recovery: strip image parts from tool
|
||||
# messages in-place, record the (provider, model) for the rest of the
|
||||
# session so we don't waste another call learning the same lesson, retry.
|
||||
#
|
||||
# See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
|
||||
# Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
|
||||
"text is not set",
|
||||
# Generic "tool message must be string" shapes
|
||||
"tool message content must be a string",
|
||||
"tool content must be a string",
|
||||
"tool message must be a string",
|
||||
# OpenAI-compat servers that reject list-type tool content with a
|
||||
# schema-validation message
|
||||
"expected string, got list",
|
||||
"expected string, got array",
|
||||
# Alibaba/DashScope variant
|
||||
"tool_call.content must be string",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -781,6 +808,19 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Multimodal tool content rejected from 400. Must be checked BEFORE
|
||||
# image_too_large because the recovery is different (strip image parts
|
||||
# from tool messages, mark the model as no-list-tool-content for the
|
||||
# rest of the session) and BEFORE context_overflow because some of the
|
||||
# patterns ("text is not set") are ambiguous in isolation but become
|
||||
# specific when combined with a 400 on a request known to contain
|
||||
# multimodal tool content.
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
@@ -922,6 +962,13 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Multimodal tool content patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
+38
-1
@@ -1258,6 +1258,10 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
"terminal",
|
||||
"process",
|
||||
"execute_code",
|
||||
"app_search_tools",
|
||||
"app_tool_schemas",
|
||||
"app_execute_tools",
|
||||
"app_manage_connections",
|
||||
}
|
||||
|
||||
if valid_names and not (valid_names & relevant_tool_names):
|
||||
@@ -1279,7 +1283,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, browser automation (Browser Use), and app integrations (500+ apps) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
@@ -1294,6 +1298,39 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# App tools (500+ external integrations) behavioural prompt
|
||||
# =========================================================================
|
||||
|
||||
_APP_TOOLS_PROMPT = """\
|
||||
## App Tools (500+ External Integrations)
|
||||
|
||||
You have app_search_tools, app_tool_schemas, app_execute_tools, and app_manage_connections available RIGHT NOW as callable tools. They are already configured and connected to the Nous tool gateway — no SDK installation, no API keys, no plugin setup needed. Just call them.
|
||||
|
||||
**When to use:** When a user asks to interact with ANY external app or service — Gmail, Slack, GitHub, Jira, Notion, Google Sheets, Linear, HubSpot, Figma, Salesforce, or any of 500+ other apps. ALWAYS prefer these tools over loading skills about the same service (e.g. do NOT load the 'linear', 'airtable', 'google-workspace', 'notion', or any similar skill — use app_search_tools instead). Do NOT suggest installing SDKs, CLI tools, MCP servers, or API keys for external services — call app_search_tools directly.
|
||||
|
||||
**Workflow:**
|
||||
1. Call app_search_tools with a clear use_case description to discover available tools
|
||||
2. Check the response for connection status — if no active connection, call app_manage_connections and share the auth link with the user
|
||||
3. Review the execution plan and pitfalls in the search response before executing
|
||||
4. If a tool has schemaRef instead of input_schema, call app_tool_schemas to get the full schema
|
||||
5. Execute tools via app_execute_tools with schema-compliant arguments
|
||||
|
||||
**Session tracking:** Pass session: {generate_id: true} on your first app_search_tools call. Reuse the returned session.id in all subsequent calls. Generate a new session when the user pivots to a different task.
|
||||
|
||||
**Important:** Never fabricate tool slugs or argument field names. Only use slugs and schemas returned by app_search_tools or app_tool_schemas."""
|
||||
|
||||
|
||||
def build_app_tools_prompt(valid_tool_names: "set[str] | None" = None) -> str:
|
||||
"""Return the app tools behavioural guidance when the toolset is active."""
|
||||
if valid_tool_names and "app_search_tools" not in valid_tool_names:
|
||||
return ""
|
||||
if not valid_tool_names:
|
||||
# No tool names known — skip (conservative)
|
||||
return ""
|
||||
return _APP_TOOLS_PROMPT
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
+19
-1
@@ -12,7 +12,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_config_path, get_skills_dir
|
||||
from hermes_constants import get_config_path, get_skills_dir, is_termux
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -136,6 +136,14 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
|
||||
If the field is absent or empty the skill is compatible with **all**
|
||||
platforms (backward-compatible default).
|
||||
|
||||
Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
|
||||
older Pythons but became ``"android"`` on Python 3.13+. Termux is a
|
||||
Linux userland riding on the Android kernel, so skills tagged
|
||||
``linux`` are treated as compatible in Termux regardless of which
|
||||
``sys.platform`` value Python reports. Individual Linux commands
|
||||
inside a skill may still misbehave (no systemd, BusyBox utils, no
|
||||
apt/dnf, etc.) but that is on the skill, not on platform gating.
|
||||
"""
|
||||
platforms = frontmatter.get("platforms")
|
||||
if not platforms:
|
||||
@@ -143,11 +151,21 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
if not isinstance(platforms, list):
|
||||
platforms = [platforms]
|
||||
current = sys.platform
|
||||
running_in_termux = is_termux()
|
||||
for platform in platforms:
|
||||
normalized = str(platform).lower().strip()
|
||||
mapped = PLATFORM_MAP.get(normalized, normalized)
|
||||
if current.startswith(mapped):
|
||||
return True
|
||||
# Termux runs a Linux userland on Android. Accept linux-tagged
|
||||
# skills regardless of whether sys.platform is "linux" (pre-3.13
|
||||
# Termux) or "android" (Python 3.13+ Termux, and any other
|
||||
# Android runtime).
|
||||
if running_in_termux and mapped == "linux":
|
||||
return True
|
||||
# Explicit termux/android tags match a Termux session too.
|
||||
if running_in_termux and mapped in ("termux", "android"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -130,6 +130,12 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
|
||||
if nous_subscription_prompt:
|
||||
stable_parts.append(nous_subscription_prompt)
|
||||
|
||||
# App tools (500+ external integrations) behavioural guidance
|
||||
app_tools_prompt = _r.build_app_tools_prompt(agent.valid_tool_names)
|
||||
if app_tools_prompt:
|
||||
stable_parts.append(app_tools_prompt)
|
||||
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
|
||||
+95
-20
@@ -18,6 +18,7 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -148,6 +149,11 @@ class PairingStore:
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
@staticmethod
|
||||
def _hash_code(code: str, salt: bytes) -> str:
|
||||
"""Hash a pairing code with the given salt using SHA-256."""
|
||||
return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
@@ -158,6 +164,9 @@ class PairingStore:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
|
||||
The code is NOT stored in plaintext. Only a salted SHA-256 hash is
|
||||
persisted so that reading the pending file does not reveal codes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -178,8 +187,17 @@ class PairingStore:
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
# Hash the code with a random salt before storing
|
||||
salt = os.urandom(16)
|
||||
code_hash = self._hash_code(code, salt)
|
||||
|
||||
# Use a unique entry id as the key (not the code itself)
|
||||
entry_id = secrets.token_hex(8)
|
||||
|
||||
# Store pending request with hashed code
|
||||
pending[entry_id] = {
|
||||
"hash": code_hash,
|
||||
"salt": salt.hex(),
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
@@ -195,10 +213,16 @@ class PairingStore:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns {user_id, user_name} on success, None if code is
|
||||
Returns ``{user_id, user_name}`` on success, ``None`` if the code is
|
||||
invalid/expired OR the platform is currently locked out after
|
||||
``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
|
||||
disambiguate with ``_is_locked_out(platform)``.
|
||||
|
||||
Verification: the user-provided code is hashed with each stored
|
||||
entry's salt and compared to the stored hash using constant-time
|
||||
comparison. Pre-hash entries (legacy plaintext-key format from
|
||||
pre-upgrade pending.json files) are silently ignored — they get
|
||||
pruned at TTL by ``_cleanup_expired``.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -213,34 +237,73 @@ class PairingStore:
|
||||
return None
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
if code not in pending:
|
||||
|
||||
# Find the entry whose hash matches the provided code.
|
||||
# Tolerate legacy plaintext-key entries (no salt/hash) and
|
||||
# malformed entries — skip them rather than KeyError, so an
|
||||
# in-place upgrade across an existing pending.json doesn't
|
||||
# crash on the first approve call. Legacy entries get pruned
|
||||
# at their TTL by _cleanup_expired.
|
||||
matched_key = None
|
||||
matched_entry = None
|
||||
for entry_id, entry in pending.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if "salt" not in entry or "hash" not in entry:
|
||||
continue
|
||||
try:
|
||||
salt = bytes.fromhex(entry["salt"])
|
||||
except ValueError:
|
||||
continue
|
||||
candidate_hash = self._hash_code(code, salt)
|
||||
if secrets.compare_digest(candidate_hash, entry["hash"]):
|
||||
matched_key = entry_id
|
||||
matched_entry = entry
|
||||
break
|
||||
|
||||
if matched_key is None:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
entry = pending.pop(code)
|
||||
del pending[matched_key]
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
self._approve_user(platform, matched_entry["user_id"],
|
||||
matched_entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
"user_id": matched_entry["user_id"],
|
||||
"user_name": matched_entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
"""List pending pairing requests, optionally filtered by platform.
|
||||
|
||||
Codes are stored hashed — the ``code`` field is replaced with the
|
||||
first 8 hex characters of the hash so admins can distinguish entries
|
||||
without revealing the original code. Legacy plaintext-key entries
|
||||
(pre-hash format) are shown with a "legacy" placeholder so admins
|
||||
can see them age out without crashing on a missing ``hash`` field.
|
||||
"""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
continue
|
||||
age_min = int((time.time() - created_at) / 60)
|
||||
hash_val = info.get("hash")
|
||||
code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"code": code_display,
|
||||
"user_id": info.get("user_id", ""),
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
@@ -297,17 +360,29 @@ class PairingStore:
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes."""
|
||||
"""Remove expired pending codes.
|
||||
|
||||
Tolerant of malformed / legacy entries — anything without a numeric
|
||||
``created_at`` is treated as expired (it's effectively unusable
|
||||
with the new hash-keyed schema anyway).
|
||||
"""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
expired = []
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
if (now - created_at) > CODE_TTL_SECONDS:
|
||||
expired.append(entry_id)
|
||||
if expired:
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
for entry_id in expired:
|
||||
del pending[entry_id]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
|
||||
@@ -308,11 +308,26 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
data = json.loads(subs_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
return
|
||||
# Merge: static routes take precedence over dynamic ones
|
||||
self._dynamic_routes = {
|
||||
k: v for k, v in data.items()
|
||||
if k not in self._static_routes
|
||||
}
|
||||
# Merge: static routes take precedence over dynamic ones.
|
||||
# Reject any dynamic route whose effective secret is empty —
|
||||
# an empty secret would cause _handle_webhook to skip HMAC
|
||||
# validation entirely, letting unauthenticated callers in.
|
||||
new_dynamic: Dict[str, dict] = {}
|
||||
for k, v in data.items():
|
||||
if k in self._static_routes:
|
||||
continue
|
||||
effective_secret = v.get("secret", self._global_secret)
|
||||
if not effective_secret:
|
||||
logger.warning(
|
||||
"[webhook] Dynamic route '%s' skipped: 'secret' is "
|
||||
"missing or empty. Set a valid HMAC secret, or use "
|
||||
"'%s' to explicitly disable auth (testing only).",
|
||||
k,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
continue
|
||||
new_dynamic[k] = v
|
||||
self._dynamic_routes = new_dynamic
|
||||
self._routes = {**self._dynamic_routes, **self._static_routes}
|
||||
self._dynamic_routes_mtime = mtime
|
||||
logger.info(
|
||||
|
||||
+47
-2
@@ -1778,8 +1778,17 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
},
|
||||
|
||||
# ── Nous Portal feature flags ──────────────────────────────────────
|
||||
"portal": {
|
||||
# App tools: 500+ external app integrations (Gmail, Slack, GitHub,
|
||||
# Notion, etc.) via the Nous tool gateway. Requires an active Nous
|
||||
# subscription. Set to False to hide the app_tools toolset even
|
||||
# when a subscription is present.
|
||||
"app_tools": True,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 23,
|
||||
"_config_version": 24,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -2267,6 +2276,22 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TOOLS_GATEWAY_URL": {
|
||||
"description": "Explicit URL for the tools-gateway (app integrations). Overrides the auto-derived tools-gateway.nousresearch.com",
|
||||
"prompt": "Tools-gateway URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"PORTAL_APP_TOOLS": {
|
||||
"description": "Enable app integration tools (500+ apps via Nous tool gateway). Requires Nous subscription.",
|
||||
"prompt": "Enable app tools (500+ apps)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TAVILY_API_KEY": {
|
||||
"description": "Tavily API key for AI-native web search, extract, and crawl",
|
||||
"prompt": "Tavily API key",
|
||||
@@ -3301,7 +3326,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions",
|
||||
"sessions", "portal",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
@@ -3964,6 +3989,26 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
f"{', '.join(added_aux)}"
|
||||
)
|
||||
|
||||
# ── Version 23 → 24: inject app_tools into saved platform_toolsets ──
|
||||
# The portal.app_tools config flag is handled by deep-merge (DEFAULT_CONFIG
|
||||
# has it, so load_config() always includes it). But platform_toolsets are
|
||||
# user-owned lists that deep-merge can't append to — existing users who
|
||||
# ran `hermes tools` have a saved list that won't include app_tools.
|
||||
if current_ver < 24:
|
||||
config = read_raw_config()
|
||||
pt = config.get("platform_toolsets")
|
||||
if isinstance(pt, dict):
|
||||
patched = False
|
||||
for plat_key, ts_list in pt.items():
|
||||
if isinstance(ts_list, list) and "app_tools" not in ts_list:
|
||||
ts_list.append("app_tools")
|
||||
patched = True
|
||||
if patched:
|
||||
save_config(config)
|
||||
results["config_added"].append("app_tools added to platform_toolsets")
|
||||
if not quiet:
|
||||
print(" ✓ Added app_tools to saved platform toolset lists")
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ def curses_checklist(
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
|
||||
@@ -21,6 +21,44 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
# tests) don't spam the same warning multiple times.
|
||||
_WARNED_KEYS: set[str] = set()
|
||||
|
||||
# Map of env-var name → source label ("bitwarden", etc.) for credentials
|
||||
# that were injected by an external secret source during load_hermes_dotenv().
|
||||
# Used by setup / `hermes model` flows to label detected credentials so
|
||||
# users understand WHERE a key came from when their .env doesn't contain it
|
||||
# directly (otherwise the "credentials detected ✓" line looks identical to
|
||||
# the .env case and they don't know Bitwarden is wired up).
|
||||
_SECRET_SOURCES: dict[str, str] = {}
|
||||
|
||||
|
||||
def get_secret_source(env_var: str) -> str | None:
|
||||
"""Return the label of the secret source that supplied ``env_var``, if any.
|
||||
|
||||
Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
|
||||
during the current process's ``load_hermes_dotenv()`` call. Returns
|
||||
``None`` for keys that came from ``.env``, the shell environment, or
|
||||
aren't tracked.
|
||||
"""
|
||||
return _SECRET_SOURCES.get(env_var)
|
||||
|
||||
|
||||
def format_secret_source_suffix(env_var: str) -> str:
|
||||
"""Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
|
||||
|
||||
Use this when printing a detected credential so the user can see where
|
||||
it came from. Empty string when the credential came from ``.env`` or
|
||||
the shell — those are the implicit / "default" cases users already
|
||||
understand.
|
||||
"""
|
||||
source = get_secret_source(env_var)
|
||||
if not source:
|
||||
return ""
|
||||
if source == "bitwarden":
|
||||
return " (from Bitwarden)"
|
||||
# Generic fallback — future-proofing for additional secret sources
|
||||
# (e.g. 1Password, HashiCorp Vault) without having to update every
|
||||
# call site.
|
||||
return f" (from {source})"
|
||||
|
||||
|
||||
def _format_offending_chars(value: str, limit: int = 3) -> str:
|
||||
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
|
||||
@@ -213,6 +251,12 @@ def _apply_external_secret_sources(home_path: Path) -> None:
|
||||
# and might have the same copy-paste corruption as a manually
|
||||
# edited .env (see #6843).
|
||||
_sanitize_loaded_credentials()
|
||||
# Remember where these came from so the setup / `hermes model`
|
||||
# flows can label detected credentials with "(from Bitwarden)" —
|
||||
# otherwise users see "credentials ✓" with no hint that the value
|
||||
# came from BSM rather than .env.
|
||||
for name in result.applied:
|
||||
_SECRET_SOURCES[name] = "bitwarden"
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
|
||||
f"secret{'s' if len(result.applied) != 1 else ''} "
|
||||
|
||||
+30
-5
@@ -591,7 +591,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1) # selected
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1) # header
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1) # search
|
||||
curses.init_pair(4, 8, -1) # dim
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim
|
||||
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
@@ -2433,6 +2433,9 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("triage_specifier", "Triage specifier", "kanban spec fleshing"),
|
||||
("kanban_decomposer", "Kanban decomposer", "task decomposition"),
|
||||
("profile_describer", "Profile describer", "auto profile descriptions"),
|
||||
("curator", "Curator", "skill-usage review pass"),
|
||||
]
|
||||
|
||||
@@ -4662,7 +4665,9 @@ def _model_flow_copilot(config, current_model=""):
|
||||
source = creds.get("source", "")
|
||||
else:
|
||||
if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source})")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
bw_suffix = format_secret_source_suffix(source)
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
|
||||
elif source == "gh auth token":
|
||||
print(" GitHub token: ✓ (from `gh auth token`)")
|
||||
else:
|
||||
@@ -4919,7 +4924,10 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
|
||||
return new_key, False
|
||||
|
||||
# Already configured — offer K / R / C ────────────────────────────────
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
|
||||
source_suffix = format_secret_source_suffix(key_env) if key_env else ""
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
|
||||
if not key_env:
|
||||
# Nothing we can rewrite; just acknowledge and move on.
|
||||
print()
|
||||
@@ -5202,7 +5210,9 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
|
||||
# Prompt for API key
|
||||
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
|
||||
if existing_key:
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
|
||||
else:
|
||||
print(f" Endpoint: {mantle_base_url}")
|
||||
print()
|
||||
@@ -5873,7 +5883,22 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
if has_creds:
|
||||
# Show what we found
|
||||
if existing_key:
|
||||
print(f" Anthropic credentials: {existing_key[:12]}... ✓")
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
|
||||
# Surface which env var supplied the key so users with
|
||||
# Bitwarden see "(from Bitwarden)" — without this, a detected
|
||||
# BSM key looks identical to a key in .env and users assume
|
||||
# nothing is wired up.
|
||||
source_suffix = ""
|
||||
for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
|
||||
if os.getenv(var, "").strip() == existing_key:
|
||||
source_suffix = format_secret_source_suffix(var)
|
||||
if source_suffix:
|
||||
break
|
||||
print(
|
||||
f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
|
||||
)
|
||||
elif cc_available:
|
||||
print(" Claude Code credentials: ✓ (auto-detected)")
|
||||
print()
|
||||
|
||||
@@ -74,8 +74,12 @@ class NousSubscriptionFeatures:
|
||||
def modal(self) -> NousFeatureState:
|
||||
return self.features["modal"]
|
||||
|
||||
@property
|
||||
def app_tools(self) -> NousFeatureState:
|
||||
return self.features["app_tools"]
|
||||
|
||||
def items(self) -> Iterable[NousFeatureState]:
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal")
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal", "app_tools")
|
||||
for key in ordered:
|
||||
yield self.features[key]
|
||||
|
||||
@@ -225,6 +229,22 @@ def _resolve_browser_feature_state(
|
||||
return "local", available, active, False
|
||||
|
||||
|
||||
def _read_portal_app_tools_enabled(config: Optional[Dict[str, object]] = None) -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on."""
|
||||
if config is not None:
|
||||
# Fast path: use the pre-loaded config snapshot from the caller
|
||||
import os
|
||||
env_val = os.getenv("PORTAL_APP_TOOLS")
|
||||
if env_val is not None:
|
||||
return is_truthy_value(env_val)
|
||||
portal = config.get("portal")
|
||||
if isinstance(portal, dict):
|
||||
return bool(portal.get("app_tools", True))
|
||||
return True
|
||||
from tools.tool_backend_helpers import portal_app_tools_enabled
|
||||
return portal_app_tools_enabled()
|
||||
|
||||
|
||||
def get_nous_subscription_features(
|
||||
config: Optional[Dict[str, object]] = None,
|
||||
) -> NousSubscriptionFeatures:
|
||||
@@ -313,6 +333,8 @@ def get_nous_subscription_features(
|
||||
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
|
||||
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
|
||||
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
|
||||
app_gw_ready = bool(managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("tools"))
|
||||
app_config_on = _read_portal_app_tools_enabled(config)
|
||||
modal_state = resolve_modal_backend_state(
|
||||
modal_mode,
|
||||
has_direct=direct_modal,
|
||||
@@ -476,6 +498,17 @@ def get_nous_subscription_features(
|
||||
current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
|
||||
explicit_configured=terminal_backend == "modal",
|
||||
),
|
||||
"app_tools": NousFeatureState(
|
||||
key="app_tools",
|
||||
label="App tools (500+ apps)",
|
||||
included_by_default=True,
|
||||
available=app_gw_ready,
|
||||
active=app_gw_ready and app_config_on,
|
||||
managed_by_nous=app_gw_ready and app_config_on,
|
||||
direct_override=False,
|
||||
toolset_enabled=app_config_on,
|
||||
current_provider="Nous Tool Gateway",
|
||||
),
|
||||
}
|
||||
|
||||
return NousSubscriptionFeatures(
|
||||
|
||||
@@ -1051,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1) # dim gray
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
@@ -1196,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {curses.KEY_ENTER, 10, 13}:
|
||||
if cursor < n_plugins:
|
||||
@@ -1228,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {27, ord("q")}:
|
||||
# Save plugin changes on exit
|
||||
|
||||
+86
-29
@@ -78,6 +78,7 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
("yuanbao", "🤖 Yuanbao", "group info, member queries, DM"),
|
||||
("computer_use", "🖱️ Computer Use (macOS)", "background desktop control via cua-driver"),
|
||||
("app_tools", "🔌 App Integrations (500+)", "Gmail, Slack, GitHub, Jira, Notion, etc. via Nous tool gateway"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
@@ -311,6 +312,16 @@ TOOL_CATEGORIES = {
|
||||
"image_gen": {
|
||||
"name": "Image Generation",
|
||||
"icon": "🎨",
|
||||
# Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
|
||||
# OpenAI Codex, and xAI are injected at runtime from each
|
||||
# ``plugins.image_gen.<vendor>`` package via
|
||||
# ``_plugin_image_gen_providers()`` in ``_visible_providers``.
|
||||
# Only non-provider UX setup-flow rows remain here:
|
||||
# - "Nous Subscription" — managed FAL billed via the Nous
|
||||
# subscription (requires_nous_auth + override_env_vars).
|
||||
# Uses the fal plugin as the underlying backend but has a
|
||||
# distinct setup UX.
|
||||
# Mirrors the shape browser/video_gen ship today.
|
||||
"providers": [
|
||||
{
|
||||
"name": "Nous Subscription",
|
||||
@@ -322,15 +333,6 @@ TOOL_CATEGORIES = {
|
||||
"override_env_vars": ["FAL_KEY"],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
{
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
|
||||
],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
],
|
||||
},
|
||||
"video_gen": {
|
||||
@@ -482,6 +484,11 @@ TOOLSET_ENV_REQUIREMENTS = {
|
||||
# ─── Post-Setup Hooks ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _cua_driver_cmd() -> str:
|
||||
"""Return the cua-driver executable name/path, honoring non-empty overrides."""
|
||||
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
|
||||
|
||||
|
||||
def _pip_install(
|
||||
args: List[str],
|
||||
*,
|
||||
@@ -550,6 +557,55 @@ def _pip_install(
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _check_cua_driver_asset_for_arch() -> bool:
|
||||
"""Check whether the latest CUA release ships an asset for this architecture.
|
||||
|
||||
Returns True if the asset likely exists (or if we cannot determine it).
|
||||
Returns False and prints a warning when the asset is confirmed missing,
|
||||
so callers can skip the install attempt and avoid a raw 404.
|
||||
"""
|
||||
import platform as _plat
|
||||
import urllib.request
|
||||
|
||||
machine = _plat.machine() # "x86_64" or "arm64"
|
||||
if machine == "arm64":
|
||||
# arm64 (Apple Silicon) assets are always published.
|
||||
return True
|
||||
|
||||
# x86_64 / Intel — probe the latest release for an architecture-specific
|
||||
# asset before falling through to the upstream installer.
|
||||
api_url = (
|
||||
"https://api.github.com/repos/trycua/cua/releases/latest"
|
||||
)
|
||||
try:
|
||||
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
release = _json.loads(resp.read().decode())
|
||||
tag = release.get("tag_name", "")
|
||||
assets = release.get("assets", [])
|
||||
arch_names = {"x86_64", "amd64"}
|
||||
has_asset = any(
|
||||
any(a in a_info.get("name", "").lower() for a in arch_names)
|
||||
for a_info in assets
|
||||
)
|
||||
if not has_asset:
|
||||
_print_warning(
|
||||
f" Latest CUA release ({tag}) has no Intel (x86_64) asset."
|
||||
)
|
||||
_print_info(
|
||||
" CUA Driver currently only ships Apple Silicon builds."
|
||||
)
|
||||
_print_info(
|
||||
" See: https://github.com/trycua/cua/issues/1493"
|
||||
)
|
||||
return False
|
||||
except Exception:
|
||||
# Network / API failure — proceed and let the installer handle it.
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
"""Install or refresh the cua-driver binary used by Computer Use.
|
||||
|
||||
@@ -579,7 +635,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" Computer Use (cua-driver) is macOS-only; skipping.")
|
||||
return False
|
||||
|
||||
binary = shutil.which("cua-driver")
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
binary = shutil.which(driver_cmd)
|
||||
|
||||
# Not installed → fresh install path (only when caller asked for it).
|
||||
if not binary and not upgrade:
|
||||
@@ -587,18 +644,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — install manually:")
|
||||
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
|
||||
return False
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return False
|
||||
return _run_cua_driver_installer(label="Installing")
|
||||
|
||||
# Already installed and caller didn't ask to upgrade → just confirm.
|
||||
if binary and not upgrade:
|
||||
try:
|
||||
version = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
_print_success(f" cua-driver already installed: {version or 'unknown version'}")
|
||||
_print_success(f" {driver_cmd} already installed: {version or 'unknown version'}")
|
||||
except Exception:
|
||||
_print_success(" cua-driver already installed.")
|
||||
_print_success(f" {driver_cmd} already installed.")
|
||||
_print_info(" Grant macOS permissions if not done yet:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -609,11 +668,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — cannot refresh cua-driver.")
|
||||
return bool(binary)
|
||||
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return bool(binary)
|
||||
|
||||
if binary:
|
||||
# Show before/after version when we have a baseline. Best-effort.
|
||||
try:
|
||||
before = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
except Exception:
|
||||
@@ -625,13 +687,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
if ok and before:
|
||||
try:
|
||||
after = subprocess.run(
|
||||
["cua-driver", "--version"],
|
||||
[driver_cmd, "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
if after and after != before:
|
||||
_print_success(f" cua-driver upgraded: {before} → {after}")
|
||||
_print_success(f" {driver_cmd} upgraded: {before} → {after}")
|
||||
elif after:
|
||||
_print_info(f" cua-driver up to date: {after}")
|
||||
_print_info(f" {driver_cmd} up to date: {after}")
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
@@ -655,11 +717,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
|
||||
_print_info(f" {label} cua-driver (macOS background computer-use)...")
|
||||
else:
|
||||
_print_info(f" {label} cua-driver...")
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
try:
|
||||
result = subprocess.run(install_cmd, shell=True, timeout=300)
|
||||
if result.returncode == 0 and shutil.which("cua-driver"):
|
||||
if result.returncode == 0 and shutil.which(driver_cmd):
|
||||
if verbose:
|
||||
_print_success(" cua-driver installed.")
|
||||
_print_success(f" {driver_cmd} installed.")
|
||||
_print_info(" IMPORTANT — grant macOS permissions now:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -1506,12 +1569,9 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
|
||||
row but carries an ``image_gen_plugin_name`` marker so downstream
|
||||
code (config writing, model picker) knows to route through the
|
||||
plugin registry instead of the in-tree FAL backend.
|
||||
|
||||
FAL is skipped — it's already exposed by the hardcoded
|
||||
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
|
||||
a plugin in a follow-up PR, the hardcoded entries go away and this
|
||||
function surfaces it alongside OpenAI automatically.
|
||||
plugin registry. Every image-gen backend is a plugin now — there
|
||||
are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
|
||||
this function to dedupe against (see issue #26241).
|
||||
"""
|
||||
try:
|
||||
from agent.image_gen_registry import list_providers
|
||||
@@ -1524,9 +1584,6 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
|
||||
rows: list[dict] = []
|
||||
for provider in providers:
|
||||
if getattr(provider, "name", None) == "fal":
|
||||
# FAL has its own hardcoded rows today.
|
||||
continue
|
||||
try:
|
||||
schema = provider.get_setup_schema()
|
||||
except Exception:
|
||||
@@ -1751,7 +1808,7 @@ _POST_SETUP_INSTALLED: dict = {
|
||||
# entry when (a) the post_setup is the ONLY install side-effect for
|
||||
# a no-key provider, and (b) an installed-state check is cheap and
|
||||
# doesn't trigger a heavy import.
|
||||
"cua_driver": lambda: bool(shutil.which("cua-driver")),
|
||||
"cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -975,11 +975,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
|
||||
"vision",
|
||||
"web_extract",
|
||||
"compression",
|
||||
"session_search",
|
||||
"skills_hub",
|
||||
"approval",
|
||||
"mcp",
|
||||
"title_generation",
|
||||
"triage_specifier",
|
||||
"kanban_decomposer",
|
||||
"profile_describer",
|
||||
"curator",
|
||||
)
|
||||
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 2.1 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.6 MiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.4 MiB |
@@ -0,0 +1,85 @@
|
||||
Create a professional infographic following these specifications:
|
||||
|
||||
## Image Specifications
|
||||
|
||||
- **Type**: Infographic
|
||||
- **Layout**: bento-grid
|
||||
- **Style**: technical-schematic (engineering blueprint variant)
|
||||
- **Aspect Ratio**: 1:1 (square)
|
||||
- **Language**: English
|
||||
|
||||
## Core Principles
|
||||
|
||||
- Follow the bento-grid layout precisely with varied cell sizes
|
||||
- Apply technical-schematic aesthetics consistently throughout
|
||||
- Keep information concise, highlight keywords and core concepts
|
||||
- Use ample whitespace for visual clarity
|
||||
- Maintain clear visual hierarchy with a hero cell for the headline metric
|
||||
|
||||
## Style Guidelines (technical-schematic blueprint)
|
||||
|
||||
- Color palette: deep blue background (#1E3A5F), white lines and text, amber accent (#F59E0B) ONLY on the hero metric and critical deltas, cyan callouts for measurement annotations
|
||||
- Grid pattern overlay across the entire canvas — fine white grid lines on the deep blue background
|
||||
- All-caps technical stencil typography for headers; clean sans-serif for body
|
||||
- Dimension lines with arrowheads connecting metrics to their cells
|
||||
- Technical symbols where appropriate (gear icons, flow arrows, modular block diagrams)
|
||||
- Consistent stroke weights — bold for cell borders, thin for grid, medium for connector lines
|
||||
- Engineering spec-sheet aesthetic: feels like a printed architectural blueprint, austere and precise
|
||||
|
||||
## Layout Guidelines (bento-grid)
|
||||
|
||||
- Hero cell (TOP-CENTER or LEFT, occupying ~40% of canvas): "−61 COMPLEXITY · 79 → 18" headline metric in massive amber-on-blue, with subtitle "convert_messages_to_anthropic refactored"
|
||||
- 7 helper cells in a 2x4 or 3x3 grid showing each extracted helper as its own modular block — each cell has the helper name in all-caps, its complexity number, and one-line role
|
||||
- Metrics strip cell: BEFORE/AFTER table with deltas (185 statements → ~70, 79 C → 18 C, +5 violations intentional)
|
||||
- Test validation cell: "152/152 + 213/213 PASS" with checkmark stencil
|
||||
- Footer strip across bottom: "PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Content to render
|
||||
|
||||
**Main title (top of canvas, all caps):** "ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION"
|
||||
**Subtitle:** "PR #27784 — convert_messages_to_anthropic refactor"
|
||||
|
||||
**Hero cell (largest, amber accent):**
|
||||
- "−61"
|
||||
- "CYCLOMATIC COMPLEXITY"
|
||||
- "79 → 18 MAX (−77%)"
|
||||
- Subtext: "convert_messages_to_anthropic · pure code motion · zero behavior change"
|
||||
|
||||
**7 helper cells (one per helper, each its own modular block):**
|
||||
|
||||
1. _convert_assistant_message · C<10 · "Assistant msg → content blocks"
|
||||
2. _convert_tool_message_to_result · C=12 · "Tool msg → tool_result + merge"
|
||||
3. _convert_user_message · C<10 · "User msg validation"
|
||||
4. _strip_orphaned_tool_blocks · C=15 · "Orphan tool_use removal"
|
||||
5. _merge_consecutive_roles · C=13 · "Anthropic role-alternation"
|
||||
6. _manage_thinking_signatures · C=18 · "Strip/preserve by endpoint"
|
||||
7. _evict_old_screenshots · C<10 · "Keep most recent 3 images"
|
||||
|
||||
**Metrics cell (table format with arrows):**
|
||||
- MAX FUNCTION COMPLEXITY: 79 → 18 (−77%)
|
||||
- MAX STATEMENTS/FUNCTION: 185 → ~70 (−62%)
|
||||
- LOC FILE-WIDE: −4
|
||||
- MAIN FUNCTION LOC: 395 → 63
|
||||
|
||||
**Test validation cell (checkmark stencil):**
|
||||
- test_anthropic_adapter.py: 152/152 PASS
|
||||
- test_auxiliary_client.py: 172/172 PASS
|
||||
- test_azure_identity_adapter.py: 39/39 PASS
|
||||
- test_bedrock_1m_context.py: 2/2 PASS
|
||||
|
||||
**Behavior preservation cell:**
|
||||
"ZERO LOGIC CHANGES · ANTHROPIC + KIMI + DEEPSEEK + MINIMAX + AZURE FOUNDRY + BEDROCK SEMANTICS PRESERVED"
|
||||
|
||||
**Footer strip:**
|
||||
"PR #27784 · agent/anthropic_adapter.py · cherry-picked from #23968 · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Text Requirements
|
||||
|
||||
- All text in English, all-caps for headers
|
||||
- Hero metric "−61" in amber (#F59E0B), oversized, with thick blueprint stencil treatment
|
||||
- Helper names in white technical stencil
|
||||
- Complexity numbers (C=12, C=18, etc.) in cyan callouts
|
||||
- "BEFORE" labels in white-on-blue, "AFTER" labels in amber-on-blue
|
||||
- Footer in small white stencil
|
||||
|
||||
Generate the infographic now as a square engineering blueprint.
|
||||
@@ -0,0 +1,66 @@
|
||||
# Infographic: PR #27784 — convert_messages_to_anthropic refactor
|
||||
|
||||
## Hero metric
|
||||
**−61 cyclomatic complexity** in `agent/anthropic_adapter.py` (79 → 18 max).
|
||||
**−4 LOC** net file-wide. **77% drop** in single-function complexity ceiling.
|
||||
|
||||
## Title
|
||||
ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION
|
||||
PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor
|
||||
|
||||
## Section 1: BEFORE (left side)
|
||||
**convert_messages_to_anthropic**
|
||||
- 185 statements
|
||||
- 90 branches
|
||||
- Cyclomatic: 79
|
||||
- Did 7 jobs in one function
|
||||
|
||||
Inline responsibilities mixed together:
|
||||
1. Walk + dispatch by role
|
||||
2. Tool-result conversion
|
||||
3. Orphan tool-use stripping
|
||||
4. Same-role merging
|
||||
5. Thinking-signature management
|
||||
6. Screenshot eviction
|
||||
7. Final assembly
|
||||
|
||||
## Section 2: AFTER (right side)
|
||||
**convert_messages_to_anthropic** — now 63 lines, C<10
|
||||
Plus 7 single-responsibility helpers:
|
||||
|
||||
| Helper | C | Role |
|
||||
|---|---|---|
|
||||
| _convert_assistant_message | <10 | Assistant msg → content blocks |
|
||||
| _convert_tool_message_to_result | 12 | Tool msg → tool_result + merge |
|
||||
| _convert_user_message | <10 | User msg validation + conversion |
|
||||
| _strip_orphaned_tool_blocks | 15 | Strip orphan tool_use + tool_result |
|
||||
| _merge_consecutive_roles | 13 | Anthropic role-alternation enforce |
|
||||
| _manage_thinking_signatures | 18 | Strip/preserve/downgrade by endpoint |
|
||||
| _evict_old_screenshots | <10 | Keep most recent 3 images |
|
||||
|
||||
## Section 3: METRICS
|
||||
| Metric | Before | After | Δ |
|
||||
|---|---:|---:|---:|
|
||||
| Max function complexity | 79 | 18 | −77% |
|
||||
| Max statements/function | 185 | ~70 | −62% |
|
||||
| LOC (file-wide) | — | — | **−4** |
|
||||
| C901 violations | 3 | 8 | +5 (intentional split) |
|
||||
|
||||
## Section 4: ZERO BEHAVIOR CHANGE
|
||||
- Pure code motion — no logic edits
|
||||
- Mutating helpers update `result` in place (same as inline)
|
||||
- `_merge_consecutive_roles` returns new list — caller rebinds
|
||||
- Anthropic / Kimi / DeepSeek / MiniMax / Azure Foundry / Bedrock semantics preserved
|
||||
- Thinking-signature handling identical to pre-refactor
|
||||
|
||||
## Section 5: TEST VALIDATION
|
||||
- tests/agent/test_anthropic_adapter.py — **152 / 152 pass**
|
||||
- tests/agent/test_auxiliary_client.py — **172 / 172 pass**
|
||||
- tests/agent/test_azure_identity_adapter.py — **39 / 39 pass**
|
||||
- tests/agent/test_bedrock_1m_context.py — **2 / 2 pass**
|
||||
|
||||
## Footer
|
||||
File: agent/anthropic_adapter.py
|
||||
Original PR: #27784 (cherry-pick of #23968)
|
||||
Salvage commit: 9c102b937 (kshitijk4poor authorship preserved)
|
||||
Repo: NousResearch/hermes-agent
|
||||
@@ -148,7 +148,7 @@ class BrowserUseBrowserProvider(BrowserProvider):
|
||||
|
||||
return {
|
||||
"api_key": managed.nous_user_token,
|
||||
"base_url": managed.gateway_origin.rstrip("/"),
|
||||
"base_url": managed.resolved_origin.rstrip("/"),
|
||||
"managed_mode": True,
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Wraps the 18-model FAL catalog (FLUX 2, Z-Image, Nano Banana, GPT
|
||||
Image 1.5, Recraft, Imagen 4, Qwen, Ideogram, …) as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
The heavy lifting — model catalog, payload construction, request
|
||||
submission, managed-Nous-gateway selection, Clarity Upscaler chaining
|
||||
— lives in :mod:`tools.image_generation_tool`. This plugin reaches into
|
||||
that module via call-time indirection (``import tools.image_generation_tool as _it``)
|
||||
so:
|
||||
|
||||
* the existing test suite (``tests/tools/test_image_generation.py``,
|
||||
``tests/tools/test_managed_media_gateways.py``) keeps patching
|
||||
``image_tool._submit_fal_request`` / ``image_tool.fal_client`` /
|
||||
``image_tool._managed_fal_client`` without modification, and
|
||||
* there's exactly one canonical FAL code path on disk — the plugin is a
|
||||
registration adapter, not a parallel implementation.
|
||||
|
||||
See issue #26241 for the migration plan and the
|
||||
``plugin-extraction-test-patch-compatibility.md`` rules this follows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
resolve_aspect_ratio,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FalImageGenProvider(ImageGenProvider):
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Delegates to ``tools.image_generation_tool.image_generate_tool`` so
|
||||
the in-tree FAL implementation (model catalog, payload builder,
|
||||
managed-gateway selection, Clarity Upscaler chaining) is the single
|
||||
source of truth. Everything is resolved at call time via the
|
||||
``_it`` indirection so tests can monkey-patch the legacy module.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "fal"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "FAL.ai"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
# Available when direct FAL_KEY is set OR the managed Nous
|
||||
# gateway resolves a fal-queue origin. Both checks come from the
|
||||
# legacy module so this provider tracks whatever logic ships
|
||||
# there.
|
||||
import tools.image_generation_tool as _it
|
||||
try:
|
||||
return bool(_it.check_fal_api_key())
|
||||
except Exception: # noqa: BLE001 — defensive; never break the picker
|
||||
return False
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
import tools.image_generation_tool as _it
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
"price": meta.get("price", ""),
|
||||
}
|
||||
for model_id, meta in _it.FAL_MODELS.items()
|
||||
]
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
import tools.image_generation_tool as _it
|
||||
return _it.DEFAULT_MODEL
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "FAL_KEY",
|
||||
"prompt": "FAL API key",
|
||||
"url": "https://fal.ai/dashboard/keys",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image via the legacy FAL pipeline.
|
||||
|
||||
Forwards prompt + aspect_ratio (and any forward-compat extras
|
||||
the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
|
||||
then reshapes its JSON-string response into the provider-ABC
|
||||
dict format consumed by ``_dispatch_to_plugin_provider``.
|
||||
"""
|
||||
import tools.image_generation_tool as _it
|
||||
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
passthrough = {
|
||||
key: kwargs[key]
|
||||
for key in (
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"num_images",
|
||||
"output_format",
|
||||
"seed",
|
||||
)
|
||||
if key in kwargs and kwargs[key] is not None
|
||||
}
|
||||
|
||||
try:
|
||||
raw = _it.image_generate_tool(
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
**passthrough,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — never raise out of generate
|
||||
logger.warning("FAL image_generate_tool raised: %s", exc, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": f"FAL image generation failed: {exc}",
|
||||
"error_type": type(exc).__name__,
|
||||
"provider": "fal",
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect,
|
||||
}
|
||||
|
||||
try:
|
||||
response = json.loads(raw) if isinstance(raw, str) else raw
|
||||
except Exception: # noqa: BLE001
|
||||
response = {"success": False, "image": None, "error": "Invalid JSON from FAL pipeline"}
|
||||
|
||||
if not isinstance(response, dict):
|
||||
response = {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": "FAL pipeline returned a non-dict response",
|
||||
"error_type": "provider_contract",
|
||||
}
|
||||
|
||||
# Stamp provider/prompt/aspect_ratio so downstream consumers see
|
||||
# the uniform shape declared in ``agent.image_gen_provider``.
|
||||
response.setdefault("provider", "fal")
|
||||
response.setdefault("prompt", prompt)
|
||||
response.setdefault("aspect_ratio", aspect)
|
||||
# Annotate model best-effort — the legacy pipeline resolves it
|
||||
# internally, so query it after the fact for the response shape.
|
||||
if "model" not in response:
|
||||
try:
|
||||
model_id, _meta = _it._resolve_fal_model()
|
||||
response["model"] = model_id
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Plugin entry point — wire ``FalImageGenProvider`` into the registry."""
|
||||
ctx.register_image_gen_provider(FalImageGenProvider())
|
||||
@@ -0,0 +1,7 @@
|
||||
name: fal
|
||||
version: 1.0.0
|
||||
description: "FAL.ai image generation backend (flux-2-klein, flux-2-pro, nano-banana, gpt-image-1.5, recraft-v3, etc.)."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
requires_env:
|
||||
- FAL_KEY
|
||||
@@ -47,6 +47,25 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
|
||||
_TIMEOUT = 30.0
|
||||
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
|
||||
|
||||
# Maps the viking_remember `category` enum to a viking:// subdirectory.
|
||||
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
|
||||
_CATEGORY_SUBDIR_MAP = {
|
||||
"preference": "preferences",
|
||||
"entity": "entities",
|
||||
"event": "events",
|
||||
"case": "cases",
|
||||
"pattern": "patterns",
|
||||
}
|
||||
_DEFAULT_MEMORY_SUBDIR = "preferences"
|
||||
|
||||
# Maps the built-in memory tool's `target` ("user" vs "memory") to a subdir
|
||||
# for on_memory_write mirroring. User profile facts → preferences; agent
|
||||
# notes / observations → patterns. Anything unknown falls back to the default.
|
||||
_MEMORY_WRITE_TARGET_SUBDIR_MAP = {
|
||||
"user": "preferences",
|
||||
"memory": "patterns",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
@@ -607,24 +626,35 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
except Exception as e:
|
||||
logger.warning("OpenViking session commit failed: %s", e)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking as explicit memories."""
|
||||
def _build_memory_uri(self, subdir: str) -> str:
|
||||
"""Build a viking:// memory URI under the configured user/subdir."""
|
||||
slug = uuid.uuid4().hex[:12]
|
||||
return f"viking://user/{self._user}/memories/{subdir}/mem_{slug}.md"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking via content/write."""
|
||||
if not self._client or action != "add" or not content:
|
||||
return
|
||||
|
||||
subdir = _MEMORY_WRITE_TARGET_SUBDIR_MAP.get(target, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
|
||||
def _write():
|
||||
try:
|
||||
client = _VikingClient(
|
||||
self._endpoint, self._api_key,
|
||||
account=self._account, user=self._user, agent=self._agent,
|
||||
)
|
||||
# Add as a user message with memory context so the commit
|
||||
# picks it up as an explicit memory during extraction
|
||||
client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": f"[Memory note — {target}] {content}"},
|
||||
],
|
||||
client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("OpenViking memory mirror failed: %s", e)
|
||||
@@ -858,24 +888,27 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
if not content:
|
||||
return tool_error("content is required")
|
||||
|
||||
# Store as a session message that will be extracted during commit.
|
||||
# The category hint helps OpenViking's extraction classify correctly.
|
||||
category = args.get("category", "")
|
||||
text = f"[Remember] {content}"
|
||||
if category:
|
||||
text = f"[Remember — {category}] {content}"
|
||||
subdir = _CATEGORY_SUBDIR_MAP.get(category, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
|
||||
self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": text},
|
||||
],
|
||||
})
|
||||
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": "Memory recorded. Will be extracted and indexed on session commit.",
|
||||
})
|
||||
# Write directly via content/write API.
|
||||
# This creates the file, stores the content, and queues vector indexing
|
||||
# in a single call — no dependency on session commit / VLM extraction.
|
||||
try:
|
||||
result = self._client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
})
|
||||
written = result.get("result", {}).get("written_bytes", 0)
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": f"Memory stored ({written}b) and queued for vector indexing.",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("OpenViking content/write failed: %s", e)
|
||||
return tool_error(f"Failed to store memory: {e}")
|
||||
|
||||
def _tool_add_resource(self, args: dict) -> str:
|
||||
url = args.get("url", "")
|
||||
|
||||
@@ -282,20 +282,24 @@ def _build_payload(
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fal_client lazy import (same pattern as image_generation_tool)
|
||||
# fal_client lazy import (shared with image_generation_tool via fal_common)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_fal_client: Any = None
|
||||
|
||||
|
||||
def _load_fal_client() -> Any:
|
||||
"""Lazy-load the ``fal_client`` SDK and cache it on this module.
|
||||
|
||||
Delegates the actual import to :func:`tools.fal_common.import_fal_client`
|
||||
so the ``lazy_deps`` ensure-install handling stays in one place.
|
||||
"""
|
||||
global _fal_client
|
||||
if _fal_client is not None:
|
||||
return _fal_client
|
||||
import fal_client # type: ignore
|
||||
|
||||
_fal_client = fal_client
|
||||
return fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
_fal_client = import_fal_client()
|
||||
return _fal_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -238,7 +238,7 @@ def _get_firecrawl_client() -> Any:
|
||||
|
||||
kwargs = {
|
||||
"api_key": managed_gateway.nous_user_token,
|
||||
"api_url": managed_gateway.gateway_origin,
|
||||
"api_url": managed_gateway.resolved_origin,
|
||||
}
|
||||
client_config = (
|
||||
"tool-gateway",
|
||||
|
||||
@@ -3357,6 +3357,25 @@ class AIAgent:
|
||||
return content
|
||||
|
||||
if self._model_supports_vision():
|
||||
# Vision-capable on paper — but if we've already learned in this
|
||||
# session that the active (provider, model) rejects list-type
|
||||
# tool content (e.g. Xiaomi MiMo's 400 "text is not set"),
|
||||
# short-circuit to a text summary so we don't burn another
|
||||
# round-trip relearning the same lesson. Cache populated by
|
||||
# the 400 recovery path in agent.conversation_loop. Transient
|
||||
# per-session; next session retries.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
no_list = getattr(self, "_no_list_tool_content_models", None)
|
||||
if no_list and key in no_list:
|
||||
logger.debug(
|
||||
"Tool %s: model %s/%s known to reject list-type tool "
|
||||
"content this session — sending text summary",
|
||||
tool_name, key[0], key[1],
|
||||
)
|
||||
return _multimodal_text_summary(result)
|
||||
return content
|
||||
|
||||
summary = _multimodal_text_summary(result)
|
||||
@@ -3385,6 +3404,80 @@ class AIAgent:
|
||||
from agent.conversation_compression import try_shrink_image_parts_in_messages
|
||||
return try_shrink_image_parts_in_messages(api_messages)
|
||||
|
||||
def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
|
||||
"""Downgrade list-type tool messages to text summaries in-place.
|
||||
|
||||
Recovery path for providers that reject list-type tool message content
|
||||
(e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344). Walks
|
||||
``api_messages`` for any ``role: "tool"`` message whose ``content`` is
|
||||
a list containing image parts, replaces the content with the existing
|
||||
text part(s) (or a minimal placeholder if none survive), and records
|
||||
the active (provider, model) in ``self._no_list_tool_content_models``
|
||||
so subsequent ``_tool_result_content_for_active_model`` calls in this
|
||||
session preemptively downgrade screenshots without a round-trip.
|
||||
|
||||
Returns True when at least one tool message was downgraded — the
|
||||
caller (the 400 recovery branch in ``agent.conversation_loop``) uses
|
||||
this to decide whether to retry the API call with the modified
|
||||
history or surface the original error.
|
||||
"""
|
||||
if not isinstance(api_messages, list):
|
||||
return False
|
||||
|
||||
# Record (provider, model) so we don't relearn this lesson.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
if not hasattr(self, "_no_list_tool_content_models"):
|
||||
self._no_list_tool_content_models = set()
|
||||
if key[1]: # only record when we actually have a model id
|
||||
self._no_list_tool_content_models.add(key)
|
||||
|
||||
changed = False
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
|
||||
# Salvage any text parts so the model still sees some signal.
|
||||
text_parts: List[str] = []
|
||||
had_image = False
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
if isinstance(part, str) and part.strip():
|
||||
text_parts.append(part.strip())
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image_url" or ptype == "input_image":
|
||||
had_image = True
|
||||
continue
|
||||
if ptype in {"text", "input_text"}:
|
||||
text = str(part.get("text") or "").strip()
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
|
||||
if not had_image:
|
||||
# List-type content but no image parts — leave alone (some
|
||||
# providers reject ANY list content, but stripping a
|
||||
# text-only list doesn't reduce ambiguity; let the caller
|
||||
# surface the original error if this turns out to be the
|
||||
# case).
|
||||
continue
|
||||
|
||||
if text_parts:
|
||||
msg["content"] = "\n\n".join(text_parts)
|
||||
else:
|
||||
msg["content"] = (
|
||||
"[image content removed — provider does not accept "
|
||||
"list-type tool message content]"
|
||||
)
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
def _anthropic_preserve_dots(self) -> bool:
|
||||
"""True when using an anthropic-compatible endpoint that preserves dots in model names.
|
||||
Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
|
||||
|
||||
@@ -47,6 +47,7 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
||||
AUTHOR_MAP = {
|
||||
# teknium (multiple emails)
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"cipherframe@users.noreply.github.com": "CipherFrame",
|
||||
"me@promplate.dev": "CNSeniorious000",
|
||||
"yichengqiao21@gmail.com": "YarrowQiao",
|
||||
"erhanyasarx@gmail.com": "erhnysr",
|
||||
@@ -59,14 +60,18 @@ AUTHOR_MAP = {
|
||||
"mgongzai@gmail.com": "vKongv",
|
||||
"0x.badfriend@gmail.com": "discodirector",
|
||||
"altriatree@gmail.com": "TruaShamu",
|
||||
"contact-me@stark-x.cn": "Stark-X",
|
||||
"nat@nthrow.io": "nthrow",
|
||||
"m@mobrienv.dev": "mikeyobrien",
|
||||
"saeed919@pm.me": "falasi",
|
||||
"chrisdlc119@outlook.com": "chdlc",
|
||||
"omar@techdeveloper.site": "nycomar",
|
||||
"qiyin.zuo@pcitc.com": "qiyin-code",
|
||||
"mr.aashiz@gmail.com": "aashizpoudel",
|
||||
"70629228+shaun0927@users.noreply.github.com": "shaun0927",
|
||||
"98262967+Bihruze@users.noreply.github.com": "Bihruze",
|
||||
"189280367+Lempkey@users.noreply.github.com": "Lempkey",
|
||||
"leovillalbajr@gmail.com": "Lempkey",
|
||||
"nidhi2894@gmail.com": "nidhi-singh02",
|
||||
"30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
|
||||
"oleksii.lisikh@gmail.com": "olisikh",
|
||||
@@ -930,6 +935,8 @@ AUTHOR_MAP = {
|
||||
"holynn@placeholder.local": "holynn-q",
|
||||
"agent@hermes.local": "jacdevos",
|
||||
"sunsky.lau@gmail.com": "liuhao1024",
|
||||
"fabianoeq@gmail.com": "rodrigoeqnit",
|
||||
"178342791+sgtworkman@users.noreply.github.com": "sgtworkman",
|
||||
"qiuqfang98@qq.com": "keepcalmqqf",
|
||||
"261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026",
|
||||
"yanzh.su@gmail.com": "YanzhongSu",
|
||||
|
||||
@@ -56,6 +56,7 @@ class TestFailoverReason:
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large", "image_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"multimodal_tool_content_unsupported",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier",
|
||||
"oauth_long_context_beta_forbidden",
|
||||
@@ -1256,3 +1257,66 @@ class TestRateLimitErrorWithoutStatusCode:
|
||||
e.status_code = None
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason != FailoverReason.rate_limit
|
||||
|
||||
|
||||
|
||||
# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
|
||||
|
||||
class TestMultimodalToolContentUnsupported:
|
||||
"""Issue #27344 — providers that reject list-type tool message content
|
||||
should be classified as ``multimodal_tool_content_unsupported`` so the
|
||||
retry loop can downgrade screenshots to text and try again.
|
||||
"""
|
||||
|
||||
def test_xiaomi_mimo_text_is_not_set_pattern(self):
|
||||
"""The actual Xiaomi MiMo 400 wording from the bug report."""
|
||||
e = MockAPIError(
|
||||
"Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
assert result.retryable is True
|
||||
|
||||
def test_generic_tool_message_must_be_string(self):
|
||||
e = MockAPIError(
|
||||
"tool message content must be a string",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_expected_string_got_list(self):
|
||||
e = MockAPIError(
|
||||
"Schema validation failed: expected string, got list",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
|
||||
"""Some providers return a 400 whose message contains BOTH
|
||||
'text is not set' and a length-shaped phrase; the tool-content
|
||||
recovery is cheaper than compression so it must win the priority.
|
||||
"""
|
||||
e = MockAPIError(
|
||||
"text is not set; context length exceeded",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_no_status_code_path_also_classifies(self):
|
||||
"""When the error reaches us without a status code (transport
|
||||
layer ate it) the message-only classifier branch must also
|
||||
recognise the pattern.
|
||||
"""
|
||||
e = MockTransportError("tool_call.content must be string")
|
||||
result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_unrelated_400_is_not_misclassified(self):
|
||||
"""Make sure the patterns don't false-positive on normal 400s."""
|
||||
e = MockAPIError("bad request: missing field 'model'", status_code=400)
|
||||
result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
|
||||
assert result.reason != FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
@@ -1060,3 +1060,191 @@ class TestHonchoCadenceTracking:
|
||||
p.on_turn_start(2, "second message")
|
||||
should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
|
||||
assert should_skip, "Second turn (turn 2) SHOULD be skipped"
|
||||
|
||||
|
||||
class TestMemoryToolToolsetGate:
|
||||
"""Issue #5544: memory provider tools must respect platform_toolsets.
|
||||
|
||||
Before the fix, MemoryManager.get_all_tool_schemas() output was appended
|
||||
to AIAgent.tools unconditionally in agent_init.py — bypassing the
|
||||
enabled_toolsets filter. Result: `platform_toolsets: telegram: []`
|
||||
still leaked fact_store and other memory tools into the tool surface,
|
||||
causing 10x latency on local models (Qwen3-30B: 1.7s → 42s) and
|
||||
tool-call loops on small models.
|
||||
|
||||
These tests mirror the gate logic in agent/agent_init.py around the
|
||||
memory provider tool injection block. The gate condition is:
|
||||
|
||||
enabled_toolsets is None → no filter, inject (backward compat)
|
||||
"memory" in enabled_toolsets → user opted in, inject
|
||||
otherwise (incl. []) → skip injection
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_memory_injection(enabled_toolsets, memory_manager):
|
||||
"""Simulate the gated memory-tool injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
|
||||
if memory_manager and tools is not None and (
|
||||
enabled_toolsets is None or "memory" in enabled_toolsets
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in memory_manager.get_all_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names
|
||||
|
||||
def _mgr_with_tools(self, *tool_names):
|
||||
"""Build a MemoryManager whose providers expose the named tool schemas."""
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider(
|
||||
"ext",
|
||||
tools=[{"name": n, "description": n, "parameters": {}} for n in tool_names],
|
||||
)
|
||||
mgr.add_provider(p)
|
||||
return mgr
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None (no filter) injects memory tools — backward compat."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert "fact_store" in names
|
||||
assert any(t["function"]["name"] == "fact_store" for t in tools)
|
||||
|
||||
def test_memory_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'memory' injects memory tools."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "memory", "web"], mgr)
|
||||
assert "fact_store" in names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress memory tools. (#5544)"""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection([], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_toolsets_without_memory_blocks_injection(self):
|
||||
"""Toolset list that doesn't name 'memory' must suppress injection."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "web"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_no_memory_manager_no_injection(self):
|
||||
"""Gate is moot without a memory manager."""
|
||||
tools, names = self._run_memory_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
def test_multiple_schemas_all_blocked_together(self):
|
||||
"""When the gate is closed, no memory tools leak — not even partially."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(["terminal"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_multiple_schemas_all_injected_when_enabled(self):
|
||||
"""When the gate is open, every memory tool schema is injected."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert names == {"fact_store", "memory_search", "memory_add"}
|
||||
|
||||
|
||||
class TestContextEngineToolsetGate:
|
||||
"""Issue #5544 (sibling): context engine tools follow the same gate.
|
||||
|
||||
`agent.context_compressor.get_tool_schemas()` (e.g. lcm_grep, lcm_describe,
|
||||
lcm_expand) was appended to AIAgent.tools unconditionally. Same blind
|
||||
injection class as the memory bug; same local-model penalty. Gate name:
|
||||
"context_engine" (matches the existing plugin-system convention).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_context_engine_injection(enabled_toolsets, compressor):
|
||||
"""Simulate the gated context-engine injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
engine_tool_names = set()
|
||||
|
||||
if (
|
||||
compressor is not None
|
||||
and tools is not None
|
||||
and (
|
||||
enabled_toolsets is None
|
||||
or "context_engine" in enabled_toolsets
|
||||
)
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in compressor.get_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
engine_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names, engine_tool_names
|
||||
|
||||
class _FakeCompressor:
|
||||
def __init__(self, schemas):
|
||||
self._schemas = schemas
|
||||
|
||||
def get_tool_schemas(self):
|
||||
return list(self._schemas)
|
||||
|
||||
def _compressor_with(self, *tool_names):
|
||||
return self._FakeCompressor(
|
||||
[{"name": n, "description": n, "parameters": {}} for n in tool_names]
|
||||
)
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None injects context-engine tools — backward compat."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe", "lcm_expand")
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, c)
|
||||
assert engine_names == {"lcm_grep", "lcm_describe", "lcm_expand"}
|
||||
|
||||
def test_context_engine_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'context_engine' injects the tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "context_engine"], c
|
||||
)
|
||||
assert "lcm_grep" in engine_names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress context-engine tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection([], c)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_toolsets_without_context_engine_blocks_injection(self):
|
||||
"""A toolset list that doesn't name 'context_engine' suppresses injection."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "memory"], c
|
||||
)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_no_compressor_no_injection(self):
|
||||
"""Gate is moot without a context_compressor."""
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
@@ -444,6 +444,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -468,6 +469,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, False, False, False, False, True, ""),
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
"""Tests for agent/skill_utils.py."""
|
||||
|
||||
from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.skill_utils import (
|
||||
extract_skill_conditions,
|
||||
iter_skill_index_files,
|
||||
skill_matches_platform,
|
||||
)
|
||||
|
||||
|
||||
def test_metadata_as_dict_with_hermes():
|
||||
@@ -94,3 +100,100 @@ def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
|
||||
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
|
||||
|
||||
assert found == [real / "SKILL.md"]
|
||||
|
||||
|
||||
# ── skill_matches_platform on Termux ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestSkillMatchesPlatformTermux:
|
||||
"""Termux is Linux userland on Android. Skills tagged platforms:[linux]
|
||||
must load there regardless of whether Python reports sys.platform as
|
||||
"linux" (pre-3.13) or "android" (3.13+). Reported by user @LikiusInik
|
||||
in May 2026 — only 3 built-in skills appeared on Termux because every
|
||||
github/productivity/mlops skill is tagged platforms:[linux,macos,windows]
|
||||
and sys.platform=="android" did not start with "linux".
|
||||
"""
|
||||
|
||||
def test_no_platforms_field_matches_everywhere(self):
|
||||
# Backward-compat default — skills without a platforms tag load
|
||||
# on any OS, Termux included.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({}) is True
|
||||
assert skill_matches_platform({"name": "foo"}) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_android_platform(self):
|
||||
# Python 3.13+ on Termux reports sys.platform == "android".
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_macos_windows_skill_loads_on_termux(self):
|
||||
# The common "[linux, macos, windows]" tag used by github-*,
|
||||
# productivity, mlops, etc.
|
||||
fm = {"platforms": ["linux", "macos", "windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_linux_platform(self):
|
||||
# Pre-3.13 Termux reports sys.platform == "linux" already — this
|
||||
# works without the Termux escape hatch but must still pass.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_only_skill_still_excluded_on_termux(self):
|
||||
# macOS-only skills (apple-notes, imessage, ...) should NOT load
|
||||
# on Termux. The Termux fallback only widens platforms:[linux,...].
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_windows_only_skill_still_excluded_on_termux(self):
|
||||
fm = {"platforms": ["windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_explicit_termux_or_android_tag_matches(self):
|
||||
# Skills can also opt in explicitly via platforms:[termux] or
|
||||
# platforms:[android] — both should match a Termux session.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({"platforms": ["termux"]}) is True
|
||||
assert skill_matches_platform({"platforms": ["android"]}) is True
|
||||
|
||||
def test_non_termux_android_does_not_widen(self):
|
||||
# If we're somehow on a plain Android Python (not Termux), don't
|
||||
# silently load Linux skills — Termux is the supported environment.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_linux_skill_on_real_linux_unaffected(self):
|
||||
# The non-Termux Linux path must not change.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_skill_on_real_macos_unaffected(self):
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "darwin"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
@@ -75,9 +75,197 @@ class TestCodeGeneration:
|
||||
code = store.generate_code("telegram", "user1", "Alice")
|
||||
pending = store.list_pending("telegram")
|
||||
assert len(pending) == 1
|
||||
assert pending[0]["code"] == code
|
||||
# list_pending no longer returns the original code — it returns a
|
||||
# truncated hash prefix. Verify the metadata is correct instead.
|
||||
assert pending[0]["user_id"] == "user1"
|
||||
assert pending[0]["user_name"] == "Alice"
|
||||
# The code field is now a hash prefix, not the original plaintext code
|
||||
assert pending[0]["code"] != code
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hashed storage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHashedStorage:
|
||||
def test_pending_file_contains_hash_and_salt(self, tmp_path):
|
||||
"""Stored entries must have 'hash' and 'salt', never the plaintext code."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1", "Alice")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
|
||||
assert len(raw) == 1
|
||||
entry = next(iter(raw.values()))
|
||||
# Must have hash and salt fields
|
||||
assert "hash" in entry
|
||||
assert "salt" in entry
|
||||
# Hash must be a valid hex SHA-256 digest (64 hex chars)
|
||||
assert len(entry["hash"]) == 64
|
||||
assert all(c in "0123456789abcdef" for c in entry["hash"])
|
||||
# Salt must be a valid hex string (32 hex chars for 16 bytes)
|
||||
assert len(entry["salt"]) == 32
|
||||
assert all(c in "0123456789abcdef" for c in entry["salt"])
|
||||
# The plaintext code must NOT appear as a key or value anywhere
|
||||
assert code not in raw # not a key
|
||||
for key, val in raw.items():
|
||||
assert code != key
|
||||
for field_val in val.values():
|
||||
if isinstance(field_val, str):
|
||||
assert field_val != code
|
||||
|
||||
def test_plaintext_code_not_stored(self, tmp_path):
|
||||
"""The raw JSON file must not contain the plaintext code anywhere."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
raw_text = (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
assert code not in raw_text
|
||||
|
||||
def test_valid_code_verifies_against_hash(self, tmp_path):
|
||||
"""approve_code with the correct code should succeed."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1", "Bob")
|
||||
result = store.approve_code("telegram", code)
|
||||
assert result is not None
|
||||
assert result["user_id"] == "user1"
|
||||
assert result["user_name"] == "Bob"
|
||||
|
||||
def test_invalid_code_rejected(self, tmp_path):
|
||||
"""approve_code with a wrong code should fail."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
store.generate_code("telegram", "user1")
|
||||
result = store.approve_code("telegram", "ZZZZZZZZ")
|
||||
assert result is None
|
||||
|
||||
def test_different_salts_per_entry(self, tmp_path):
|
||||
"""Each pending entry should have a unique salt."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
store.generate_code("telegram", "user0")
|
||||
store.generate_code("telegram", "user1")
|
||||
store.generate_code("telegram", "user2")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
salts = [entry["salt"] for entry in raw.values()]
|
||||
assert len(set(salts)) == 3 # all unique
|
||||
|
||||
def test_hash_code_static_method(self, tmp_path):
|
||||
"""_hash_code should be deterministic for the same code+salt."""
|
||||
salt = os.urandom(16)
|
||||
h1 = PairingStore._hash_code("ABCD1234", salt)
|
||||
h2 = PairingStore._hash_code("ABCD1234", salt)
|
||||
assert h1 == h2
|
||||
# Different salt should produce a different hash
|
||||
salt2 = os.urandom(16)
|
||||
h3 = PairingStore._hash_code("ABCD1234", salt2)
|
||||
assert h3 != h1
|
||||
|
||||
|
||||
class TestLegacyPendingFileCompat:
|
||||
"""Defensive coverage for pre-hash pending.json on upgraded installs.
|
||||
|
||||
Existing user installs may have a pending.json written by the old
|
||||
code (plaintext code as key, no hash/salt fields). The new
|
||||
approve_code / list_pending / _cleanup_expired must not crash on
|
||||
those entries — they should be ignored and aged out at TTL.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _write_legacy(tmp_path, code="ABCD1234", created_at=None):
|
||||
"""Write a pre-hash pending.json with plaintext code as the key."""
|
||||
import time as _time
|
||||
if created_at is None:
|
||||
created_at = _time.time()
|
||||
legacy = {
|
||||
code: {
|
||||
"user_id": "legacy-user",
|
||||
"user_name": "Legacy",
|
||||
"created_at": created_at,
|
||||
}
|
||||
}
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps(legacy), encoding="utf-8"
|
||||
)
|
||||
|
||||
def test_approve_code_ignores_legacy_entries(self, tmp_path):
|
||||
"""A valid old-format code must NOT silently approve under the new schema."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(tmp_path, code="LEGACY01")
|
||||
store = PairingStore()
|
||||
# The plaintext "code" used to be the key — under the new schema
|
||||
# it's not even looked at, and there's no hash/salt to verify.
|
||||
# Result: approve_code returns None, the legacy entry is left
|
||||
# alone (gets pruned by _cleanup_expired at TTL).
|
||||
result = store.approve_code("telegram", "LEGACY01")
|
||||
assert result is None
|
||||
# Approved list must be empty
|
||||
assert store.is_approved("telegram", "legacy-user") is False
|
||||
|
||||
def test_list_pending_handles_legacy_entries(self, tmp_path):
|
||||
"""list_pending must not KeyError on a missing 'hash' field."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(tmp_path)
|
||||
store = PairingStore()
|
||||
pending = store.list_pending("telegram")
|
||||
assert len(pending) == 1
|
||||
assert pending[0]["user_id"] == "legacy-user"
|
||||
assert pending[0]["code"] == "legacy" # placeholder
|
||||
|
||||
def test_cleanup_expired_removes_legacy_at_ttl(self, tmp_path):
|
||||
"""Legacy entries past CODE_TTL must still get pruned."""
|
||||
import time as _time
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(
|
||||
tmp_path,
|
||||
code="LEGACY99",
|
||||
created_at=_time.time() - CODE_TTL_SECONDS - 1,
|
||||
)
|
||||
store = PairingStore()
|
||||
store._cleanup_expired("telegram")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
assert raw == {}
|
||||
|
||||
def test_cleanup_expired_handles_malformed_entries(self, tmp_path):
|
||||
"""Non-dict / missing-created_at entries get evicted, not crashed on."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps({
|
||||
"broken1": "not a dict",
|
||||
"broken2": {"user_id": "x"}, # no created_at
|
||||
"broken3": {"created_at": "not a number"},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
store = PairingStore()
|
||||
store._cleanup_expired("telegram")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
assert raw == {}
|
||||
|
||||
def test_approve_code_skips_malformed_entries(self, tmp_path):
|
||||
"""Malformed entries must not crash approve_code's hash loop."""
|
||||
import time as _time
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps({
|
||||
"broken": {"user_id": "x", "created_at": _time.time(),
|
||||
"salt": "not-hex", "hash": "doesntmatter"},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
store = PairingStore()
|
||||
# Approving with any code must just return None, not crash.
|
||||
assert store.approve_code("telegram", "ABCD1234") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -300,9 +488,10 @@ class TestCodeExpiry:
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
|
||||
# Manually expire the code
|
||||
# Manually expire all pending entries
|
||||
pending = store._load_json(store._pending_path("telegram"))
|
||||
pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
for entry_id in pending:
|
||||
pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
store._save_json(store._pending_path("telegram"), pending)
|
||||
|
||||
# Cleanup happens on next operation
|
||||
@@ -314,9 +503,10 @@ class TestCodeExpiry:
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
|
||||
# Expire it
|
||||
# Expire all entries
|
||||
pending = store._load_json(store._pending_path("telegram"))
|
||||
pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
for entry_id in pending:
|
||||
pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
store._save_json(store._pending_path("telegram"), pending)
|
||||
|
||||
result = store.approve_code("telegram", code)
|
||||
|
||||
@@ -6,7 +6,11 @@ import pytest
|
||||
from pathlib import Path
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.webhook import WebhookAdapter, _DYNAMIC_ROUTES_FILENAME
|
||||
from gateway.platforms.webhook import (
|
||||
WebhookAdapter,
|
||||
_DYNAMIC_ROUTES_FILENAME,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
|
||||
|
||||
def _make_adapter(routes=None, extra=None):
|
||||
@@ -85,3 +89,78 @@ class TestDynamicRouteLoading:
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "static" in adapter._routes
|
||||
assert len(adapter._dynamic_routes) == 0
|
||||
|
||||
|
||||
class TestDynamicRouteSecretValidation:
|
||||
"""Empty/missing secrets must be rejected during hot-reload.
|
||||
|
||||
Regression for HMAC bypass: prior to the fix, an agent-induced
|
||||
dynamic route with `"secret": ""` would be merged into self._routes
|
||||
by _reload_dynamic_routes(), then _handle_webhook's
|
||||
`if secret and secret != _INSECURE_NO_AUTH` would skip signature
|
||||
validation because empty string is falsy. Unauthenticated POSTs
|
||||
would then execute the webhook prompt.
|
||||
"""
|
||||
|
||||
def test_empty_secret_rejected(self, tmp_path):
|
||||
# Explicit empty-string secret must NOT fall back to the global
|
||||
# secret, and the route must be skipped entirely.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"evil": {"secret": "", "prompt": "rm -rf"}})
|
||||
)
|
||||
adapter = _make_adapter() # has global secret
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "evil" not in adapter._routes
|
||||
assert "evil" not in adapter._dynamic_routes
|
||||
|
||||
def test_missing_secret_no_global_rejected(self, tmp_path):
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"orphan": {"prompt": "test"}})
|
||||
)
|
||||
# No global secret configured
|
||||
adapter = _make_adapter(extra={"secret": ""})
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "orphan" not in adapter._routes
|
||||
assert "orphan" not in adapter._dynamic_routes
|
||||
|
||||
def test_missing_secret_inherits_global(self, tmp_path):
|
||||
# No per-route secret but a global one is set → route is kept,
|
||||
# the global secret protects it. Preserves existing fallback.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"valid": {"prompt": "ok"}})
|
||||
)
|
||||
adapter = _make_adapter() # global secret set
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "valid" in adapter._routes
|
||||
|
||||
def test_insecure_no_auth_preserved(self, tmp_path):
|
||||
# Explicit opt-in escape hatch for local testing — must still load.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"test": {"secret": _INSECURE_NO_AUTH, "prompt": "p"}})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "test" in adapter._routes
|
||||
|
||||
def test_warning_logged_on_skip(self, tmp_path, caplog):
|
||||
import logging
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"silent": {"secret": "", "prompt": "x"}})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
with caplog.at_level(logging.WARNING, logger="gateway.platforms.webhook"):
|
||||
adapter._reload_dynamic_routes()
|
||||
assert any("silent" in rec.message for rec in caplog.records)
|
||||
|
||||
def test_partial_skip(self, tmp_path):
|
||||
# One route bad, one route good — only the bad one is dropped.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({
|
||||
"bad": {"secret": "", "prompt": "x"},
|
||||
"good": {"secret": "valid-secret", "prompt": "y"},
|
||||
})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "good" in adapter._routes
|
||||
assert "bad" not in adapter._routes
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Tests for curses color compatibility on low-color terminals (Docker).
|
||||
|
||||
Regression test for #13688: ``hermes plugins`` crashes with
|
||||
``curses.error: init_pair() : color number is greater than COLORS-1``
|
||||
in Docker containers where curses.COLORS == 8 (only colors 0-7 exist).
|
||||
|
||||
The bug was ``curses.init_pair(4, 8, -1)`` using raw color 8 ("bright
|
||||
black" / dim gray) which does not exist on 8-color terminals. The fix
|
||||
clamps with ``min(8, curses.COLORS - 1)``.
|
||||
"""
|
||||
|
||||
import curses
|
||||
import re
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# Path to the source files under test
|
||||
_SRC_ROOT = Path(__file__).parent.parent.parent / "hermes_cli"
|
||||
|
||||
|
||||
class TestInitPairClampingBehavior:
|
||||
"""Simulate curses color initialization on low-color terminals.
|
||||
|
||||
Patches curses.COLORS to 8 (Docker default) and verifies that
|
||||
init_pair is never called with a color >= COLORS.
|
||||
"""
|
||||
|
||||
def _collect_init_pair_calls(self, draw_fn, colors_value):
|
||||
"""Run a curses draw function with a mock stdscr and patched COLORS.
|
||||
|
||||
Returns list of (pair_number, fg, bg) tuples from init_pair calls.
|
||||
"""
|
||||
calls = []
|
||||
real_init_pair = curses.init_pair
|
||||
|
||||
def tracking_init_pair(pair, fg, bg):
|
||||
calls.append((pair, fg, bg))
|
||||
|
||||
mock_stdscr = MagicMock()
|
||||
mock_stdscr.getmaxyx.return_value = (24, 80)
|
||||
mock_stdscr.getch.return_value = 27 # ESC to exit
|
||||
|
||||
with patch("curses.COLORS", colors_value, create=True), \
|
||||
patch("curses.init_pair", side_effect=tracking_init_pair), \
|
||||
patch("curses.has_colors", return_value=True), \
|
||||
patch("curses.start_color"), \
|
||||
patch("curses.use_default_colors"), \
|
||||
patch("curses.curs_set"):
|
||||
try:
|
||||
draw_fn(mock_stdscr)
|
||||
except (SystemExit, StopIteration, Exception):
|
||||
pass # draw functions loop until keypress
|
||||
|
||||
return calls
|
||||
|
||||
def test_8_color_terminal_no_color_exceeds_limit(self):
|
||||
"""On an 8-color terminal (Docker), no init_pair fg color >= 8."""
|
||||
# Simulate the color init pattern from plugins_cmd.py
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 8)
|
||||
for pair, fg, bg in calls:
|
||||
assert fg < 8, (
|
||||
f"init_pair({pair}, {fg}, {bg}) uses color {fg} which "
|
||||
f"does not exist on an 8-color terminal (valid: 0-7)"
|
||||
)
|
||||
|
||||
def test_256_color_terminal_uses_color_8(self):
|
||||
"""On a 256-color terminal, color 8 (dim gray) should be used."""
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 256)
|
||||
assert any(fg == 8 for _, fg, _ in calls), (
|
||||
"On 256-color terminals, color 8 (dim gray) should be used"
|
||||
)
|
||||
|
||||
def test_16_color_terminal_uses_color_8(self):
|
||||
"""On a 16-color terminal, color 8 should be available."""
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 16)
|
||||
assert any(fg == 8 for _, fg, _ in calls)
|
||||
|
||||
|
||||
class TestSourceCodeGuardrails:
|
||||
"""Regression guardrails: raw color 8 must not reappear in source.
|
||||
|
||||
These complement the behavioral tests above — they catch regressions
|
||||
introduced by copy-paste of the old pattern.
|
||||
"""
|
||||
|
||||
_RAW_COLOR_8_PATTERN = re.compile(r'init_pair\(\d+,\s*8\s*,')
|
||||
|
||||
def test_no_raw_color_8_in_plugins_cmd(self):
|
||||
source = (_SRC_ROOT / "plugins_cmd.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"plugins_cmd.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
|
||||
def test_no_raw_color_8_in_main(self):
|
||||
source = (_SRC_ROOT / "main.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"main.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
|
||||
def test_no_raw_color_8_in_curses_ui(self):
|
||||
source = (_SRC_ROOT / "curses_ui.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"curses_ui.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
@@ -69,18 +69,19 @@ class TestPluginPickerInjection:
|
||||
assert "Myimg" in names
|
||||
assert "myimg" in plugin_names
|
||||
|
||||
def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
|
||||
def test_fal_surfaced_alongside_other_plugins(self, monkeypatch):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
# Simulate a FAL plugin being registered — the picker already has
|
||||
# hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
|
||||
# skipped to avoid showing FAL twice.
|
||||
# After #26241, FAL is itself a plugin (`plugins/image_gen/fal/`)
|
||||
# and the hardcoded `TOOL_CATEGORIES["image_gen"]` FAL row is
|
||||
# gone. The plugin-row builder therefore surfaces it like any
|
||||
# other backend — no deduplication step needed.
|
||||
image_gen_registry.register_provider(_FakeProvider("fal"))
|
||||
image_gen_registry.register_provider(_FakeProvider("openai"))
|
||||
|
||||
rows = tools_config._plugin_image_gen_providers()
|
||||
names = [r.get("image_gen_plugin_name") for r in rows]
|
||||
assert "fal" not in names
|
||||
assert "fal" in names
|
||||
assert "openai" in names
|
||||
|
||||
def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for ``install_cua_driver`` upgrade semantics.
|
||||
"""Tests for ``install_cua_driver`` upgrade semantics and architecture pre-check.
|
||||
|
||||
The cua-driver upstream installer always pulls the latest release tag, so
|
||||
re-running it is the canonical upgrade path. ``install_cua_driver(upgrade=True)``
|
||||
@@ -10,18 +10,18 @@ must:
|
||||
fix for the "we only pulled cua-driver once on enable" complaint).
|
||||
* Preserve original ``upgrade=False`` behaviour for the toolset-enable flow:
|
||||
skip if installed, install otherwise, warn on non-macOS.
|
||||
* Pre-check architecture compatibility before downloading to avoid raw 404
|
||||
errors on Intel macOS when the upstream release lacks x86_64 assets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
class TestInstallCuaDriverUpgrade:
|
||||
def test_upgrade_on_non_macos_is_silent_noop(self):
|
||||
"""``hermes update`` calls install_cua_driver(upgrade=True) for every
|
||||
user. On Linux/Windows it must return False without printing the
|
||||
"macOS-only; skipping" warning that the toolset-enable path emits."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch.object(tools_config, "_print_warning") as warn, \
|
||||
@@ -30,8 +30,6 @@ class TestInstallCuaDriverUpgrade:
|
||||
warn.assert_not_called()
|
||||
|
||||
def test_non_upgrade_on_non_macos_warns(self):
|
||||
"""The toolset-enable path (upgrade=False) should still warn loudly
|
||||
when the user tries to enable Computer Use on a non-macOS host."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch.object(tools_config, "_print_warning") as warn, \
|
||||
@@ -40,43 +38,36 @@ class TestInstallCuaDriverUpgrade:
|
||||
warn.assert_called()
|
||||
|
||||
def test_upgrade_on_macos_with_binary_runs_installer(self):
|
||||
"""When cua-driver is already on PATH and upgrade=True, we must
|
||||
re-run the upstream installer (this is the fix for the bug report).
|
||||
"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in {"cua-driver", "curl"} else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner, \
|
||||
patch("subprocess.run"):
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
# Refresh path uses non-verbose mode so we don't re-print the
|
||||
# "grant macOS permissions" block on every `hermes update`.
|
||||
kwargs = runner.call_args.kwargs
|
||||
assert kwargs.get("verbose") is False
|
||||
|
||||
def test_upgrade_on_macos_without_binary_runs_installer(self):
|
||||
"""upgrade=True with cua-driver missing must still trigger an
|
||||
install — equivalent to a fresh install. (Don't silently no-op.)"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_non_upgrade_on_macos_with_binary_skips_install(self):
|
||||
"""Original toolset-enable behaviour: cua-driver already installed
|
||||
+ upgrade=False → confirm and return without re-running installer.
|
||||
This is the behaviour that ``hermes tools`` (re)enable depends on,
|
||||
so the new helper must not regress it."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
@@ -89,27 +80,133 @@ class TestInstallCuaDriverUpgrade:
|
||||
runner.assert_not_called()
|
||||
|
||||
def test_non_upgrade_on_macos_without_binary_runs_installer(self):
|
||||
"""Original fresh-install path must still work."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_upgrade_without_curl_does_not_crash(self):
|
||||
"""If curl isn't on PATH we can't refresh — must warn and return
|
||||
the current install state, not raise."""
|
||||
|
||||
class TestCheckCuaDriverAssetForArch:
|
||||
def test_arm64_always_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
# cua-driver present, curl missing.
|
||||
def _which(name):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
with patch("platform.machine", return_value="arm64"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_with_asset_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver-0.1.6-darwin-x86_64.tar.gz"},
|
||||
],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_without_asset_returns_false(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver.tar.gz"},
|
||||
],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is False
|
||||
warn.assert_called_once()
|
||||
assert "no Intel" in warn.call_args[0][0].lower() or "x86_64" in warn.call_args[0][0]
|
||||
|
||||
def test_x86_64_api_failure_returns_true(self):
|
||||
"""Network failure should fail open — let the installer handle it."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", side_effect=Exception("timeout")):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_fresh_install_x86_64_no_asset_skips_installer(self):
|
||||
"""When the latest release has no Intel asset, skip the installer."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which", side_effect=_which), \
|
||||
patch.object(tools_config, "_print_warning"):
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is False
|
||||
runner.assert_not_called()
|
||||
|
||||
def test_upgrade_x86_64_no_asset_returns_existing_status(self):
|
||||
"""On upgrade with no Intel asset, return whether binary existed."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# With binary installed — returns True (binary exists)
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in ("cua-driver", "curl") else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_not_called()
|
||||
|
||||
# Without binary — returns False
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is False
|
||||
runner.assert_not_called()
|
||||
|
||||
@@ -90,6 +90,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
|
||||
},
|
||||
),
|
||||
raising=False,
|
||||
|
||||
@@ -12,8 +12,10 @@ from hermes_cli.tools_config import (
|
||||
_get_platform_tools,
|
||||
_platform_toolset_summary,
|
||||
_reconfigure_tool,
|
||||
_run_post_setup,
|
||||
_save_platform_tools,
|
||||
_toolset_has_keys,
|
||||
_toolset_needs_configuration_prompt,
|
||||
CONFIGURABLE_TOOLSETS,
|
||||
TOOL_CATEGORIES,
|
||||
_visible_providers,
|
||||
@@ -752,6 +754,91 @@ def test_numeric_mcp_server_name_does_not_crash_sorted():
|
||||
|
||||
# ─── Imagegen Backend Picker Wiring ────────────────────────────────────────
|
||||
|
||||
def test_toolset_has_keys_treats_no_key_providers_as_configured():
|
||||
config = {}
|
||||
|
||||
assert _toolset_has_keys("computer_use", config) is True
|
||||
|
||||
|
||||
def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
|
||||
"""No-key providers can still need setup when their post_setup is unsatisfied.
|
||||
|
||||
Returning users enabling Computer Use through `hermes tools` must reach the
|
||||
cua-driver post-setup installer even though the provider has no API keys.
|
||||
"""
|
||||
with patch("shutil.which", return_value=None):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is True
|
||||
|
||||
|
||||
def test_computer_use_skips_configuration_when_cua_driver_already_installed():
|
||||
"""Installed post_setup dependencies should keep returning-user toggles no-op."""
|
||||
def fake_which(name: str):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
|
||||
with patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_respects_custom_cua_driver_command():
|
||||
"""The setup gate should match runtime's HERMES_CUA_DRIVER_CMD override."""
|
||||
def fake_which(name: str):
|
||||
return "/opt/bin/custom-cua" if name == "custom-cua" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_blank_custom_driver_command_falls_back_to_default():
|
||||
"""Blank overrides should not make the setup gate look for an empty command."""
|
||||
def fake_which(name: str):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": " "}), \
|
||||
patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_post_setup_respects_custom_driver_command_when_installed():
|
||||
"""post_setup already-installed checks should version-probe the override."""
|
||||
def fake_which(name: str):
|
||||
return "/opt/bin/custom-cua" if name == "custom-cua" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("platform.system", return_value="Darwin"), \
|
||||
patch("shutil.which", side_effect=fake_which), \
|
||||
patch("subprocess.run") as run:
|
||||
run.return_value.stdout = "custom 1.2.3\n"
|
||||
|
||||
_run_post_setup("cua_driver")
|
||||
|
||||
run.assert_called_once()
|
||||
assert run.call_args.args[0] == ["custom-cua", "--version"]
|
||||
|
||||
|
||||
def test_computer_use_post_setup_missing_override_does_not_accept_default_binary():
|
||||
"""A default cua-driver binary must not satisfy a missing runtime override."""
|
||||
seen = []
|
||||
|
||||
def fake_which(name: str):
|
||||
seen.append(name)
|
||||
if name == "cua-driver":
|
||||
return "/usr/local/bin/cua-driver"
|
||||
if name == "curl":
|
||||
return None
|
||||
return None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("platform.system", return_value="Darwin"), \
|
||||
patch("shutil.which", side_effect=fake_which), \
|
||||
patch("subprocess.run") as run:
|
||||
_run_post_setup("cua_driver")
|
||||
|
||||
run.assert_not_called()
|
||||
assert "custom-cua" in seen
|
||||
assert "curl" in seen
|
||||
|
||||
|
||||
class TestImagegenBackendRegistry:
|
||||
"""IMAGEGEN_BACKENDS tags drive the model picker flow in tools_config."""
|
||||
|
||||
|
||||
@@ -168,7 +168,7 @@ def test_make_tui_argv_skips_build_only_on_termux_when_fresh(
|
||||
|
||||
argv, cwd = main_mod._make_tui_argv(tmp_path, tui_dev=False)
|
||||
|
||||
assert argv == ["/bin/node", str(tmp_path / "dist" / "entry.js")]
|
||||
assert argv == ["/bin/node", "--expose-gc", str(tmp_path / "dist" / "entry.js")]
|
||||
assert cwd == tmp_path
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
"""Behavior-parity check for the image-gen FAL plugin migration (#26241).
|
||||
|
||||
Spawns one subprocess per (version, scenario) cell — pinned to either
|
||||
``origin/main`` (legacy in-tree FAL fall-through + ``configured == "fal"``
|
||||
skip in ``_dispatch_to_plugin_provider``) or this PR's worktree (FAL is
|
||||
itself a plugin and the dispatcher routes every set provider through
|
||||
the registry). Each subprocess clears all FAL-related env vars + writes
|
||||
a ``config.yaml``, then asks the dispatcher how it would route an
|
||||
``image_generate`` call. The emitted shape tuple is
|
||||
``{dispatch_kind, provider_name, model}``:
|
||||
|
||||
* ``dispatch_kind`` ∈ ``{"legacy_fal", "plugin", "error", None}`` —
|
||||
whether the call would go straight to the in-tree pipeline,
|
||||
through ``_dispatch_to_plugin_provider``, raise an explicit
|
||||
provider-not-registered error, or fall through silently.
|
||||
* ``provider_name`` — when ``dispatch_kind == "plugin"``, the
|
||||
resolved provider name. ``None`` otherwise.
|
||||
* ``model`` — the resolved FAL model id when applicable.
|
||||
|
||||
The parent process diffs the shapes per scenario. A diff means the
|
||||
migration introduced an observable behaviour change vs origin/main —
|
||||
likely a real regression for users on the existing config keys.
|
||||
|
||||
Run from the PR worktree:
|
||||
|
||||
python tests/plugins/image_gen/check_parity_vs_main.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
# Pin one path to current main, one to the PR worktree.
|
||||
# ``REPO_ROOT`` is ``.../.worktrees/<name>``; the main checkout lives
|
||||
# two levels up. When running directly from a regular clone (no
|
||||
# worktree), ``MAIN_DIR`` falls back to a sibling ``hermes-agent-main``
|
||||
# checkout if one exists.
|
||||
def _resolve_main_dir() -> Path:
|
||||
candidate = REPO_ROOT.parent.parent
|
||||
if (candidate / "tools" / "image_generation_tool.py").exists() and candidate != REPO_ROOT:
|
||||
return candidate
|
||||
sibling = REPO_ROOT.parent / "hermes-agent-main"
|
||||
if (sibling / "tools" / "image_generation_tool.py").exists():
|
||||
return sibling
|
||||
return REPO_ROOT
|
||||
|
||||
|
||||
MAIN_DIR = _resolve_main_dir()
|
||||
PR_DIR = REPO_ROOT
|
||||
assert (PR_DIR / "tools" / "image_generation_tool.py").exists(), (
|
||||
f"PR_DIR={PR_DIR} doesn't look like a hermes-agent checkout"
|
||||
)
|
||||
|
||||
|
||||
SUBPROCESS_SCRIPT = r"""
|
||||
import json, os, sys, tempfile
|
||||
sys.path.insert(0, sys.argv[1])
|
||||
|
||||
# Isolated HERMES_HOME so the config write is hermetic.
|
||||
home = tempfile.mkdtemp()
|
||||
os.environ["HERMES_HOME"] = home
|
||||
|
||||
# Clear FAL-related env so dispatch decisions are config-driven.
|
||||
for k in (
|
||||
"FAL_KEY", "FAL_QUEUE_GATEWAY_URL",
|
||||
"TOOL_GATEWAY_DOMAIN", "TOOL_GATEWAY_USER_TOKEN",
|
||||
"FAL_IMAGE_MODEL",
|
||||
):
|
||||
os.environ.pop(k, None)
|
||||
|
||||
scenario_env = json.loads(sys.argv[2])
|
||||
os.environ.update(scenario_env)
|
||||
|
||||
config_yaml = sys.argv[3]
|
||||
config_path = os.path.join(home, "config.yaml")
|
||||
with open(config_path, "w") as f:
|
||||
f.write(config_yaml)
|
||||
|
||||
# Fresh import — must not have anything cached.
|
||||
for name in list(sys.modules):
|
||||
if (name.startswith("tools.")
|
||||
or name.startswith("agent.")
|
||||
or name.startswith("plugins.")
|
||||
or name.startswith("hermes_cli.")):
|
||||
sys.modules.pop(name, None)
|
||||
|
||||
import tools.image_generation_tool as image_tool
|
||||
|
||||
dispatch_kind = None
|
||||
provider_name = None
|
||||
model = None
|
||||
error_text = None
|
||||
|
||||
try:
|
||||
raw = image_tool._dispatch_to_plugin_provider("ping", "landscape")
|
||||
if raw is None:
|
||||
dispatch_kind = "legacy_fal"
|
||||
else:
|
||||
parsed = json.loads(raw) if isinstance(raw, str) else raw
|
||||
if isinstance(parsed, dict):
|
||||
if parsed.get("error_type") == "provider_not_registered":
|
||||
dispatch_kind = "error"
|
||||
error_text = parsed.get("error")
|
||||
else:
|
||||
dispatch_kind = "plugin"
|
||||
provider_name = parsed.get("provider")
|
||||
model = parsed.get("model")
|
||||
else:
|
||||
dispatch_kind = "unknown_payload"
|
||||
|
||||
if model is None:
|
||||
# _resolve_fal_model still returns the active FAL model id even
|
||||
# when dispatch goes to a non-FAL plugin — used for the diff
|
||||
# only when applicable.
|
||||
try:
|
||||
model_id, _meta = image_tool._resolve_fal_model()
|
||||
if dispatch_kind == "legacy_fal":
|
||||
model = model_id
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
dispatch_kind = "exception"
|
||||
error_text = repr(exc)
|
||||
|
||||
shape = {
|
||||
"dispatch_kind": dispatch_kind,
|
||||
"provider_name": provider_name,
|
||||
"model": model,
|
||||
"error_present": error_text is not None,
|
||||
}
|
||||
print(json.dumps(shape))
|
||||
"""
|
||||
|
||||
|
||||
SCENARIOS: list[tuple[str, str, dict[str, str]]] = [
|
||||
# (label, config.yaml body, extra env vars)
|
||||
("no-config-no-env", "", {}),
|
||||
(
|
||||
"explicit-fal-no-creds",
|
||||
"image_gen:\n provider: fal\n",
|
||||
{},
|
||||
),
|
||||
(
|
||||
"explicit-fal-with-creds",
|
||||
"image_gen:\n provider: fal\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"explicit-fal-with-model",
|
||||
"image_gen:\n provider: fal\n model: fal-ai/flux-2-pro\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"explicit-typo-provider",
|
||||
"image_gen:\n provider: not-a-real-backend\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"managed-gateway-only",
|
||||
"",
|
||||
{
|
||||
"TOOL_GATEWAY_DOMAIN": "nousresearch.com",
|
||||
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _run_scenario(repo_path: Path, label: str, config_yaml: str, env: dict) -> dict:
|
||||
venv_python = repo_path / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = MAIN_DIR / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = Path("python3")
|
||||
|
||||
out = subprocess.run(
|
||||
[
|
||||
str(venv_python),
|
||||
"-c",
|
||||
SUBPROCESS_SCRIPT,
|
||||
str(repo_path),
|
||||
json.dumps(env),
|
||||
config_yaml,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if out.returncode != 0:
|
||||
return {
|
||||
"error": "subprocess failed",
|
||||
"stdout": out.stdout[-500:],
|
||||
"stderr": out.stderr[-500:],
|
||||
}
|
||||
try:
|
||||
return json.loads(out.stdout.strip().splitlines()[-1])
|
||||
except Exception as exc:
|
||||
return {"error": f"could not parse output: {exc}", "stdout": out.stdout}
|
||||
|
||||
|
||||
def _reduce(shape: dict) -> dict:
|
||||
"""Reduce to the parts that matter for user-visible parity.
|
||||
|
||||
On origin/main, ``explicit-fal-*`` scenarios short-circuit to
|
||||
``legacy_fal`` because of the ``configured == "fal"`` skip. On the
|
||||
PR, those same scenarios route through the plugin and emit
|
||||
``dispatch_kind == "plugin"`` with ``provider_name == "fal"``.
|
||||
|
||||
Both shapes are functionally equivalent — the plugin's ``generate()``
|
||||
re-enters the same in-tree pipeline via ``_it`` indirection — but
|
||||
we want the diff to be visible so reviewers can sign off on the
|
||||
intentional behaviour delta.
|
||||
"""
|
||||
return {
|
||||
"dispatch_kind": shape.get("dispatch_kind"),
|
||||
"provider_name": shape.get("provider_name"),
|
||||
"model": shape.get("model"),
|
||||
"error_present": shape.get("error_present"),
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"main: {MAIN_DIR}")
|
||||
print(f"pr: {PR_DIR}")
|
||||
print()
|
||||
|
||||
if MAIN_DIR == PR_DIR:
|
||||
print(
|
||||
"WARN: MAIN_DIR == PR_DIR — diffs will be trivially identical.\n"
|
||||
" Set up a sibling 'hermes-agent-main' checkout pinned to "
|
||||
"origin/main to get real parity coverage."
|
||||
)
|
||||
print()
|
||||
|
||||
failures: list[str] = []
|
||||
errors: list[str] = []
|
||||
intentional_diffs: list[tuple[str, dict, dict]] = []
|
||||
for label, config_yaml, env in SCENARIOS:
|
||||
main_shape = _run_scenario(MAIN_DIR, label, config_yaml, env)
|
||||
pr_shape = _run_scenario(PR_DIR, label, config_yaml, env)
|
||||
|
||||
if "error" in main_shape or "error" in pr_shape:
|
||||
print(f" [ERR ] {label}: subprocess failed")
|
||||
print(f" main: {main_shape}")
|
||||
print(f" pr: {pr_shape}")
|
||||
errors.append(label)
|
||||
continue
|
||||
|
||||
main_reduced = _reduce(main_shape)
|
||||
pr_reduced = _reduce(pr_shape)
|
||||
|
||||
if main_reduced == pr_reduced:
|
||||
print(f" [OK] {label}: {main_reduced}")
|
||||
continue
|
||||
|
||||
# On main, "explicit-fal-*" returns legacy_fal; on PR, plugin
|
||||
# dispatch. That's the only acceptable diff — flag everything
|
||||
# else as a regression.
|
||||
legacy_to_plugin_fal = (
|
||||
main_reduced.get("dispatch_kind") == "legacy_fal"
|
||||
and pr_reduced.get("dispatch_kind") == "plugin"
|
||||
and pr_reduced.get("provider_name") == "fal"
|
||||
)
|
||||
if legacy_to_plugin_fal:
|
||||
print(f" [DIFF] {label}: legacy_fal → plugin (fal) — expected")
|
||||
intentional_diffs.append((label, main_reduced, pr_reduced))
|
||||
else:
|
||||
print(f" [FAIL] {label}")
|
||||
print(f" main: {main_reduced}")
|
||||
print(f" pr: {pr_reduced}")
|
||||
failures.append(label)
|
||||
|
||||
print()
|
||||
if errors:
|
||||
print(f"SUBPROCESS ERRORS in {len(errors)} scenario(s):")
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
if failures:
|
||||
print(f"BEHAVIOUR REGRESSION in {len(failures)} scenario(s):")
|
||||
for f in failures:
|
||||
print(f" - {f}")
|
||||
if intentional_diffs:
|
||||
print(
|
||||
f"INTENTIONAL DIFFS ({len(intentional_diffs)}): "
|
||||
f"legacy_fal → plugin dispatch for explicit FAL paths."
|
||||
)
|
||||
if failures or errors:
|
||||
return 1
|
||||
print(f"PARITY OK across {len(SCENARIOS)} scenarios.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for the FAL.ai image generation plugin.
|
||||
|
||||
The plugin is a thin registration adapter — actual FAL pipeline logic
|
||||
lives in ``tools.image_generation_tool`` and is exercised by
|
||||
``tests/tools/test_image_generation.py``. These tests focus on:
|
||||
|
||||
* the ``ImageGenProvider`` ABC surface (name, models, schema)
|
||||
* call-time indirection (``_it`` resolution at ``generate()`` time so
|
||||
``monkeypatch.setattr(image_tool, ...)`` keeps working)
|
||||
* response shape stamping (provider/prompt/aspect_ratio/model)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenProviderSurface:
|
||||
def test_name(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
assert FalImageGenProvider().name == "fal"
|
||||
|
||||
def test_display_name(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
assert FalImageGenProvider().display_name == "FAL.ai"
|
||||
|
||||
def test_default_model_matches_legacy(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
from tools.image_generation_tool import DEFAULT_MODEL
|
||||
|
||||
assert FalImageGenProvider().default_model() == DEFAULT_MODEL
|
||||
|
||||
def test_list_models_uses_legacy_catalog(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
from tools.image_generation_tool import FAL_MODELS
|
||||
|
||||
provider = FalImageGenProvider()
|
||||
models = provider.list_models()
|
||||
ids = {m["id"] for m in models}
|
||||
# Whatever FAL_MODELS ships, the provider mirrors verbatim.
|
||||
assert ids == set(FAL_MODELS.keys())
|
||||
# Spot-check the expected first-class fields are present.
|
||||
for entry in models:
|
||||
for field in ("id", "display", "speed", "strengths", "price"):
|
||||
assert field in entry
|
||||
|
||||
def test_setup_schema_advertises_fal_key(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
schema = FalImageGenProvider().get_setup_schema()
|
||||
assert schema["name"] == "FAL.ai"
|
||||
assert schema["badge"] == "paid"
|
||||
env_keys = {entry["key"] for entry in schema.get("env_vars", [])}
|
||||
assert "FAL_KEY" in env_keys
|
||||
|
||||
|
||||
class TestFalImageGenProviderAvailability:
|
||||
def test_is_available_when_legacy_check_passes(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: True)
|
||||
assert FalImageGenProvider().is_available() is True
|
||||
|
||||
def test_is_available_false_when_legacy_check_fails(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: False)
|
||||
assert FalImageGenProvider().is_available() is False
|
||||
|
||||
def test_is_available_handles_legacy_exception(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
def _boom():
|
||||
raise RuntimeError("config broke")
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", _boom)
|
||||
# Picker must not propagate exceptions — show as "not available".
|
||||
assert FalImageGenProvider().is_available() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate() — call-time indirection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenProviderGenerate:
|
||||
def test_generate_delegates_to_legacy_image_generate_tool(self, monkeypatch):
|
||||
"""Plugin must look up ``image_generate_tool`` at call time so
|
||||
``monkeypatch.setattr(image_tool, "image_generate_tool", ...)``
|
||||
takes effect."""
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_image_generate_tool(prompt, aspect_ratio, **kwargs):
|
||||
captured["prompt"] = prompt
|
||||
captured["aspect_ratio"] = aspect_ratio
|
||||
captured["kwargs"] = kwargs
|
||||
return json.dumps({"success": True, "image": "https://fake/image.png"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake_image_generate_tool)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
result = FalImageGenProvider().generate(
|
||||
"a serene mountain landscape",
|
||||
aspect_ratio="square",
|
||||
seed=42,
|
||||
)
|
||||
|
||||
assert captured["prompt"] == "a serene mountain landscape"
|
||||
assert captured["aspect_ratio"] == "square"
|
||||
assert captured["kwargs"] == {"seed": 42}
|
||||
assert result["success"] is True
|
||||
assert result["image"] == "https://fake/image.png"
|
||||
# Stamped fields for the unified response shape
|
||||
assert result["provider"] == "fal"
|
||||
assert result["prompt"] == "a serene mountain landscape"
|
||||
assert result["aspect_ratio"] == "square"
|
||||
assert result["model"] == "fal-ai/flux-2/klein/9b"
|
||||
|
||||
def test_generate_invalid_aspect_ratio_is_coerced(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
seen_aspect = {}
|
||||
|
||||
def fake(prompt, aspect_ratio, **kwargs):
|
||||
seen_aspect["v"] = aspect_ratio
|
||||
return json.dumps({"success": True, "image": "x"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
FalImageGenProvider().generate("p", aspect_ratio="not-a-real-ratio")
|
||||
# ``resolve_aspect_ratio`` clamps to landscape.
|
||||
assert seen_aspect["v"] == "landscape"
|
||||
|
||||
def test_generate_passthrough_drops_none_kwargs(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
seen = {}
|
||||
|
||||
def fake(prompt, aspect_ratio, **kwargs):
|
||||
seen.update(kwargs)
|
||||
return json.dumps({"success": True, "image": "x"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
FalImageGenProvider().generate(
|
||||
"p",
|
||||
aspect_ratio="landscape",
|
||||
seed=None,
|
||||
num_images=2,
|
||||
guidance_scale=None,
|
||||
)
|
||||
|
||||
# ``None`` values must not be forwarded — they'd override the
|
||||
# model's defaults inside the legacy payload builder.
|
||||
assert "seed" not in seen
|
||||
assert "guidance_scale" not in seen
|
||||
assert seen.get("num_images") == 2
|
||||
|
||||
def test_generate_catches_exception_from_legacy(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
raise RuntimeError("FAL endpoint exploded")
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", boom)
|
||||
|
||||
result = FalImageGenProvider().generate("p")
|
||||
assert result["success"] is False
|
||||
assert "FAL image generation failed" in result["error"]
|
||||
assert result["error_type"] == "RuntimeError"
|
||||
assert result["provider"] == "fal"
|
||||
|
||||
def test_generate_invalid_json_response(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", lambda **kw: "not-json")
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
result = FalImageGenProvider().generate("p")
|
||||
assert result["success"] is False
|
||||
assert "Invalid JSON" in result["error"]
|
||||
assert result["provider"] == "fal"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenPluginRegistration:
|
||||
def test_register_wires_provider_into_registry(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider, register
|
||||
|
||||
ctx = MagicMock()
|
||||
register(ctx)
|
||||
|
||||
ctx.register_image_gen_provider.assert_called_once()
|
||||
(registered,), _ = ctx.register_image_gen_provider.call_args
|
||||
assert isinstance(registered, FalImageGenProvider)
|
||||
@@ -0,0 +1,260 @@
|
||||
"""Tests for reactive multimodal-tool-content recovery.
|
||||
|
||||
Covers the full chain for providers that reject list-type content in
|
||||
``role: "tool"`` messages (Xiaomi MiMo's 400 "text is not set", etc.):
|
||||
|
||||
1. agent/error_classifier.py: 400 with the right wording classifies as
|
||||
``FailoverReason.multimodal_tool_content_unsupported``.
|
||||
2. run_agent._try_strip_image_parts_from_tool_messages downgrades tool
|
||||
messages whose ``content`` is a list-with-image to a string text
|
||||
summary, in-place, and records the active (provider, model) in
|
||||
``self._no_list_tool_content_models`` so future tool results in this
|
||||
session preemptively downgrade.
|
||||
3. run_agent._tool_result_content_for_active_model short-circuits to a
|
||||
text summary when the (provider, model) is in the cache, even though
|
||||
``_model_supports_vision`` returns True — avoiding a wasted round
|
||||
trip on every subsequent screenshot in the session.
|
||||
|
||||
The end-to-end retry loop wiring (`conversation_loop.py`) is exercised by
|
||||
the classifier signal + helper-mutation tests; the integration only adds
|
||||
a trivial flag-and-continue around the existing pattern used for
|
||||
``image_too_large`` recovery.
|
||||
|
||||
See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.error_classifier import FailoverReason, classify_api_error
|
||||
|
||||
|
||||
class _FakeApiError(Exception):
|
||||
"""Stand-in for an openai.BadRequestError with status_code + body."""
|
||||
|
||||
def __init__(self, status_code: int, message: str, body: dict | None = None):
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.body = body or {"error": {"message": message}}
|
||||
self.response = None
|
||||
|
||||
|
||||
def _make_agent(provider: str = "xiaomi", model: str = "mimo-v2.5"):
|
||||
"""Build a bare AIAgent for method-level testing, no provider setup."""
|
||||
from run_agent import AIAgent
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.provider = provider
|
||||
agent.model = model
|
||||
return agent
|
||||
|
||||
|
||||
# ─── Strip helper ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStripImagePartsHelper:
|
||||
def test_no_messages_returns_false(self):
|
||||
agent = _make_agent()
|
||||
assert agent._try_strip_image_parts_from_tool_messages([]) is False
|
||||
assert agent._try_strip_image_parts_from_tool_messages(None) is False
|
||||
|
||||
def test_no_tool_messages_returns_false(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "user", "content": "plain text"},
|
||||
{"role": "assistant", "content": "ack"},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
|
||||
def test_tool_message_with_string_content_unchanged(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": "plain string result"},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
assert msgs[0]["content"] == "plain string result"
|
||||
|
||||
def test_tool_message_list_without_image_unchanged(self):
|
||||
"""List content with only text parts is left alone — caller surfaces
|
||||
the original error if this turns out to also be rejected."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "hello"},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
|
||||
def test_tool_message_list_with_image_downgrades(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "AX summary: 5 buttons visible"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
|
||||
# Image stripped; text preserved as a string.
|
||||
assert isinstance(msgs[0]["content"], str)
|
||||
assert "AX summary" in msgs[0]["content"]
|
||||
assert "image_url" not in msgs[0]["content"]
|
||||
assert "iVBOR" not in msgs[0]["content"]
|
||||
|
||||
def test_tool_message_image_only_gets_placeholder(self):
|
||||
"""If the list had nothing but image parts, leave a placeholder so
|
||||
the assistant message has something to reference."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
|
||||
assert isinstance(msgs[0]["content"], str)
|
||||
assert "image content removed" in msgs[0]["content"]
|
||||
|
||||
def test_records_provider_model_in_session_cache(self):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
assert ("xiaomi", "mimo-v2.5") in agent._no_list_tool_content_models
|
||||
|
||||
def test_only_tool_messages_get_downgraded(self):
|
||||
"""User / assistant messages with list-type content are out of
|
||||
scope — they're handled by the existing image-routing path."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "user", "content": [
|
||||
{"type": "text", "text": "describe"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,Y"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
# User message untouched.
|
||||
assert isinstance(msgs[0]["content"], list)
|
||||
assert any(p.get("type") == "image_url" for p in msgs[0]["content"])
|
||||
# Tool message downgraded.
|
||||
assert isinstance(msgs[1]["content"], str)
|
||||
assert "summary" in msgs[1]["content"]
|
||||
|
||||
def test_skips_recording_when_no_model_id(self):
|
||||
"""Don't poison the cache with empty keys when provider/model is
|
||||
unset (e.g. lazy-initialised mid-handshake)."""
|
||||
agent = _make_agent(provider="", model="")
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
assert agent._no_list_tool_content_models == set()
|
||||
|
||||
|
||||
# ─── Short-circuit on cached models ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestToolResultContentShortCircuit:
|
||||
"""Once the session has learned that (provider, model) rejects list
|
||||
content, ``_tool_result_content_for_active_model`` returns a text
|
||||
summary even though ``_model_supports_vision`` reports True.
|
||||
"""
|
||||
|
||||
def _multimodal_result(self, png_b64: str = "iVBORw0KGgoAAAA"):
|
||||
return {
|
||||
"_multimodal": True,
|
||||
"content": [
|
||||
{"type": "text", "text": "capture mode=som 800x600 app=Safari"},
|
||||
{"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{png_b64}"}},
|
||||
],
|
||||
"text_summary": "capture mode=som 800x600 app=Safari",
|
||||
"meta": {"mode": "som", "width": 800, "height": 600, "elements": 5,
|
||||
"png_bytes": 1024},
|
||||
}
|
||||
|
||||
def test_returns_list_when_cache_empty_and_vision_supported(self, monkeypatch):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
agent._no_list_tool_content_models = set() # explicit empty
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
# Native multimodal path: returns the content parts list.
|
||||
assert isinstance(out, list)
|
||||
assert any(p.get("type") == "image_url" for p in out)
|
||||
|
||||
def test_returns_text_summary_when_model_in_cache(self, monkeypatch):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
# Short-circuit: a plain string summary, no image_url present.
|
||||
assert isinstance(out, str)
|
||||
assert "data:image" not in out
|
||||
assert "image_url" not in out
|
||||
|
||||
def test_cache_miss_on_different_model(self, monkeypatch):
|
||||
"""Cache is per (provider, model). A cached entry for mimo-v2.5
|
||||
must NOT affect a session running on a different model.
|
||||
"""
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5-pro")
|
||||
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
assert isinstance(out, list)
|
||||
|
||||
def test_missing_cache_attribute_falls_through(self, monkeypatch):
|
||||
"""Tests that build agents via ``object.__new__`` without calling
|
||||
``__init__`` must not crash — the cache attribute may be absent.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# Deliberately do not assign _no_list_tool_content_models.
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
assert isinstance(out, list)
|
||||
|
||||
|
||||
# ─── Classifier ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestRecoveryEndToEndClassification:
|
||||
"""Lock in that the patterns used by the recovery path classify to
|
||||
the right ``FailoverReason``. (The recovery hook in
|
||||
``agent.conversation_loop`` consumes this reason directly.)
|
||||
"""
|
||||
|
||||
def test_xiaomi_mimo_classifies(self):
|
||||
err = _FakeApiError(
|
||||
status_code=400,
|
||||
message=(
|
||||
"Error code: 400 - {'error': {'code': '400', 'message': "
|
||||
"'Param Incorrect', 'param': 'text is not set', 'type': ''}}"
|
||||
),
|
||||
)
|
||||
result = classify_api_error(err, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
assert result.retryable is True
|
||||
|
||||
def test_alibaba_variant_classifies(self):
|
||||
err = _FakeApiError(
|
||||
status_code=400,
|
||||
message="tool_call.content must be string",
|
||||
)
|
||||
result = classify_api_error(err, provider="alibaba", model="qwen3.5-plus")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
@@ -2636,6 +2636,31 @@ class TestRunConversation:
|
||||
assert result["final_response"] == "Final answer"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_ollama_small_runtime_context_fails_before_api_call(self, agent, caplog):
|
||||
self._setup_agent(agent)
|
||||
agent.model = "qwen3.5:9b"
|
||||
agent.provider = "custom"
|
||||
agent.base_url = "http://host.docker.internal:11434/v1"
|
||||
agent._ollama_num_ctx = 4096
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
caplog.at_level(logging.WARNING, logger="agent.conversation_loop"),
|
||||
):
|
||||
result = agent.run_conversation("Call ps -aux")
|
||||
|
||||
assert result["failed"] is True
|
||||
assert result["completed"] is False
|
||||
assert result["api_calls"] == 0
|
||||
assert result["turn_exit_reason"] == "ollama_runtime_context_too_small"
|
||||
assert "Ollama loaded `qwen3.5:9b` with only 4,096 tokens" in result["final_response"]
|
||||
assert "model.ollama_num_ctx: 65536" in result["final_response"]
|
||||
assert not agent.client.chat.completions.create.called
|
||||
assert "Ollama runtime context too small for Hermes tool use" in caplog.text
|
||||
assert "runtime_context=4096" in caplog.text
|
||||
|
||||
def test_tool_calls_then_stop(self, agent):
|
||||
self._setup_agent(agent)
|
||||
tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
|
||||
|
||||
@@ -0,0 +1,119 @@
|
||||
"""Tests for the secret-source tracking in ``hermes_cli.env_loader``.
|
||||
|
||||
These cover the small public surface that lets `hermes model` / `hermes setup`
|
||||
label detected credentials with their origin ("from Bitwarden") so users
|
||||
don't see an unexplained "credentials ✓" line when their .env is empty.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from hermes_cli import env_loader # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_sources():
|
||||
"""Each test starts with a clean source map."""
|
||||
env_loader._SECRET_SOURCES.clear()
|
||||
yield
|
||||
env_loader._SECRET_SOURCES.clear()
|
||||
|
||||
|
||||
def test_get_secret_source_returns_none_for_untracked_var():
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
|
||||
|
||||
|
||||
def test_get_secret_source_returns_label_for_tracked_var():
|
||||
env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_empty_for_untracked():
|
||||
# Credentials from .env or the shell shouldn't add noise — the
|
||||
# implicit case stays unlabeled.
|
||||
assert env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY") == ""
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_bitwarden_uses_proper_name():
|
||||
env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
|
||||
== " (from Bitwarden)"
|
||||
)
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_generic_label_for_future_sources():
|
||||
# Future-proofing: a new secret source (e.g. "vault") should still
|
||||
# produce a sensible label without needing to edit every call site.
|
||||
env_loader._SECRET_SOURCES["OPENAI_API_KEY"] = "vault"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("OPENAI_API_KEY")
|
||||
== " (from vault)"
|
||||
)
|
||||
|
||||
|
||||
def test_apply_external_secret_sources_records_bitwarden_origin(tmp_path, monkeypatch):
|
||||
"""End-to-end: when ``apply_bitwarden_secrets`` returns applied keys,
|
||||
they end up in ``_SECRET_SOURCES`` so the UI can label them."""
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
"secrets:\n"
|
||||
" bitwarden:\n"
|
||||
" enabled: true\n"
|
||||
" project_id: test-project\n"
|
||||
" access_token_env: BWS_ACCESS_TOKEN\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Stub apply_bitwarden_secrets to return a synthetic FetchResult.
|
||||
from agent.secret_sources.bitwarden import FetchResult
|
||||
|
||||
fake_result = FetchResult(
|
||||
secrets={"ANTHROPIC_API_KEY": "sk-ant-test"},
|
||||
applied=["ANTHROPIC_API_KEY"],
|
||||
)
|
||||
|
||||
def _fake_apply(**_kwargs):
|
||||
return fake_result
|
||||
|
||||
# The import inside _apply_external_secret_sources is lazy, so we
|
||||
# patch the *module attribute* it will pull in.
|
||||
import agent.secret_sources.bitwarden as bw_module
|
||||
|
||||
monkeypatch.setattr(bw_module, "apply_bitwarden_secrets", _fake_apply)
|
||||
|
||||
env_loader._apply_external_secret_sources(tmp_path)
|
||||
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
|
||||
== " (from Bitwarden)"
|
||||
)
|
||||
|
||||
|
||||
def test_apply_external_secret_sources_noop_when_disabled(tmp_path, monkeypatch):
|
||||
"""Disabled Bitwarden config must not touch the source map."""
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
"secrets:\n"
|
||||
" bitwarden:\n"
|
||||
" enabled: false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
env_loader._apply_external_secret_sources(tmp_path)
|
||||
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
|
||||
@@ -59,6 +59,59 @@ def test_write_json_returns_false_on_broken_pipe(monkeypatch):
|
||||
assert server.write_json({"ok": True}) is False
|
||||
|
||||
|
||||
def test_tui_verbose_tool_details_fail_closed_when_redaction_fails(monkeypatch):
|
||||
redact_module = types.ModuleType("agent.redact")
|
||||
|
||||
def fail_redaction(*_args, **_kwargs):
|
||||
raise RuntimeError("redaction unavailable")
|
||||
|
||||
setattr(redact_module, "redact_sensitive_text", fail_redaction)
|
||||
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
|
||||
|
||||
assert server._redact_tui_verbose_text("api_key=secret") == ""
|
||||
assert server._tool_args_text({"api_key": "secret"}) == ""
|
||||
assert server._tool_result_text("token=secret") == ""
|
||||
|
||||
|
||||
def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch):
|
||||
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_CHARS", 12)
|
||||
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_LINES", 2)
|
||||
|
||||
capped = server._cap_tui_verbose_text("one\ntwo\nthree\nfour")
|
||||
|
||||
assert capped.startswith("[showing verbose tail; omitted ")
|
||||
assert capped.endswith("three\nfour")
|
||||
assert "one" not in capped
|
||||
|
||||
|
||||
def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch):
|
||||
redact_module = types.ModuleType("agent.redact")
|
||||
|
||||
def fail_redaction(*_args, **_kwargs):
|
||||
raise RuntimeError("redaction unavailable")
|
||||
|
||||
setattr(redact_module, "redact_sensitive_text", fail_redaction)
|
||||
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
|
||||
|
||||
events: list[tuple[str, str, dict]] = []
|
||||
monkeypatch.setattr(
|
||||
server, "_emit", lambda event_type, sid, payload: events.append((event_type, sid, payload))
|
||||
)
|
||||
monkeypatch.setitem(
|
||||
server._sessions,
|
||||
"redaction-test",
|
||||
{"tool_progress_mode": "verbose", "tool_started_at": {}},
|
||||
)
|
||||
|
||||
server._on_tool_start("redaction-test", "tool-1", "terminal", {"command": "pwd"})
|
||||
server._on_tool_complete("redaction-test", "tool-1", "terminal", {"command": "pwd"}, "done")
|
||||
|
||||
assert events[0][0] == "tool.start"
|
||||
assert events[1][0] == "tool.complete"
|
||||
assert "args_text" not in events[0][2]
|
||||
assert "result_text" not in events[1][2]
|
||||
|
||||
|
||||
def test_dispatch_rejects_non_object_request():
|
||||
resp = server.dispatch([])
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ depend on the registry being populated should use it explicitly or via
|
||||
``@pytest.mark.usefixtures("web_registry_populated")``.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -48,3 +50,20 @@ def web_registry_populated():
|
||||
yield
|
||||
from agent.web_search_registry import _reset_for_tests
|
||||
_reset_for_tests()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_lazy_stt_install():
|
||||
"""Disarm the runtime lazy-install probe so static ``_HAS_FASTER_WHISPER``
|
||||
patches accurately simulate 'faster-whisper not installed'.
|
||||
|
||||
Without this, ``_try_lazy_install_stt()`` calls
|
||||
``importlib.util.find_spec("faster_whisper")``, which returns truthy
|
||||
whenever the package is installed in the dev / CI environment —
|
||||
defeating the test's ``_HAS_FASTER_WHISPER=False`` patch.
|
||||
|
||||
Opt in at module scope with
|
||||
``pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")``.
|
||||
"""
|
||||
with patch("tools.transcription_tools._try_lazy_install_stt", return_value=False):
|
||||
yield
|
||||
|
||||
@@ -0,0 +1,246 @@
|
||||
"""Unit tests for tools/app_tools.py — the Nous tool gateway integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from tools.managed_tool_gateway import ManagedToolGatewayConfig
|
||||
|
||||
|
||||
_FAKE_GATEWAY = ManagedToolGatewayConfig(
|
||||
vendor="tools",
|
||||
gateway_origin="https://tools-gateway.example.com",
|
||||
nous_user_token="test-token-abc123",
|
||||
managed_mode=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_http_client_cache():
|
||||
"""Clear the module-level cached httpx client between tests."""
|
||||
import tools.app_tools as mod
|
||||
mod._http_client = None
|
||||
mod._http_client_origin = None
|
||||
yield
|
||||
mod._http_client = None
|
||||
mod._http_client_origin = None
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def gateway_post(monkeypatch):
|
||||
"""Patch the gateway and httpx.Client.post; return a dict capturing the request."""
|
||||
monkeypatch.setattr(
|
||||
"tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"tools.app_tools._get_current_model_name", lambda: None
|
||||
)
|
||||
captured = {}
|
||||
resp = MagicMock(spec=httpx.Response)
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {"data": {}, "error": None}
|
||||
resp.text = json.dumps({"data": {}, "error": None})
|
||||
|
||||
def fake_post(self, url, *, json=None, headers=None, **kw):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
captured["json"] = json
|
||||
return resp
|
||||
|
||||
monkeypatch.setattr(httpx.Client, "post", fake_post)
|
||||
return captured
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_fn gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAppToolsAvailability:
|
||||
def test_returns_false_when_gateway_not_ready(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: False)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is False
|
||||
|
||||
def test_returns_true_when_gateway_ready_and_config_on(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is True
|
||||
|
||||
def test_returns_false_when_config_off(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: False)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL + auth header
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSearchPostsCorrectUrlAndAuth:
|
||||
def test_posts_to_v1_search_with_bearer_token(self, monkeypatch, gateway_post):
|
||||
monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "test-model")
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "send email"}]})
|
||||
|
||||
assert gateway_post["url"] == "https://tools-gateway.example.com/v1/search"
|
||||
assert gateway_post["headers"]["Authorization"] == "Bearer test-token-abc123"
|
||||
assert gateway_post["headers"]["Content-Type"] == "application/json"
|
||||
assert gateway_post["json"]["queries"] == [{"use_case": "send email"}]
|
||||
assert gateway_post["json"]["model"] == "test-model"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model auto-injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestModelAutoInjection:
|
||||
def test_injects_model_from_config(self, monkeypatch, gateway_post):
|
||||
monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "claude-sonnet-4")
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}]})
|
||||
assert gateway_post["json"]["model"] == "claude-sonnet-4"
|
||||
|
||||
def test_omits_model_when_unresolvable(self, gateway_post):
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}]})
|
||||
assert "model" not in gateway_post["json"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gateway-internal param stripping (allowlist approach)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExecuteStripsInternalParams:
|
||||
def test_strips_sync_response_thought_step_metric(self, gateway_post):
|
||||
from tools.app_tools import handle_app_execute_tools
|
||||
handle_app_execute_tools({
|
||||
"tools": [{"tool_slug": "TEST", "arguments": {}}],
|
||||
"sync_response_to_workbench": True,
|
||||
"thought": "testing",
|
||||
"current_step": "TESTING",
|
||||
"current_step_metric": "1/1 tests",
|
||||
})
|
||||
body = gateway_post["json"]
|
||||
for key in ("sync_response_to_workbench", "thought", "current_step", "current_step_metric"):
|
||||
assert key not in body
|
||||
assert body["tools"] == [{"tool_slug": "TEST", "arguments": {}}]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP error → tool result (not exception)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHttpErrorReturnedAsToolResult:
|
||||
@pytest.mark.parametrize("status_code", [402, 403, 422, 500])
|
||||
def test_returns_error_json_not_exception(self, monkeypatch, status_code):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
error_body = {"error": {"code": "TEST_ERROR", "message": "fail"}}
|
||||
resp = MagicMock(spec=httpx.Response)
|
||||
resp.status_code = status_code
|
||||
resp.json.return_value = error_body
|
||||
resp.text = json.dumps(error_body)
|
||||
monkeypatch.setattr(httpx.Client, "post", lambda self, url, **kw: resp)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "TEST_ERROR"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Network failure → tool result
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNetworkFailureReturnedAsToolResult:
|
||||
def test_connect_error_returns_gateway_unreachable(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
|
||||
def raise_connect(self, url, **kw):
|
||||
raise httpx.ConnectError("Connection refused")
|
||||
monkeypatch.setattr(httpx.Client, "post", raise_connect)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "GATEWAY_UNREACHABLE"
|
||||
|
||||
def test_timeout_returns_gateway_timeout(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
|
||||
def raise_timeout(self, url, **kw):
|
||||
raise httpx.ReadTimeout("timed out")
|
||||
monkeypatch.setattr(httpx.Client, "post", raise_timeout)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "GATEWAY_TIMEOUT"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoint routing + payload forwarding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEndpointRouting:
|
||||
def test_manage_connections_forwards_toolkits(self, gateway_post):
|
||||
from tools.app_tools import handle_app_manage_connections
|
||||
handle_app_manage_connections({"toolkits": ["gmail", "slack"], "reinitiate_all": True})
|
||||
assert gateway_post["url"].endswith("/v1/connections")
|
||||
assert gateway_post["json"]["toolkits"] == ["gmail", "slack"]
|
||||
assert gateway_post["json"]["reinitiate_all"] is True
|
||||
|
||||
def test_tool_schemas_forwards_slugs(self, gateway_post):
|
||||
from tools.app_tools import handle_app_tool_schemas
|
||||
handle_app_tool_schemas({"tool_slugs": ["GMAIL_SEND_EMAIL"], "include": ["input_schema", "output_schema"]})
|
||||
assert gateway_post["url"].endswith("/v1/schemas")
|
||||
assert gateway_post["json"]["tool_slugs"] == ["GMAIL_SEND_EMAIL"]
|
||||
assert gateway_post["json"]["include"] == ["input_schema", "output_schema"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry entries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRegistryEntries:
|
||||
def test_all_four_tools_registered_under_app_tools(self):
|
||||
from tools.registry import registry
|
||||
import tools.app_tools # noqa: F401
|
||||
expected = {"app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"}
|
||||
for name in expected:
|
||||
entry = registry._tools.get(name)
|
||||
assert entry is not None, f"{name} not registered"
|
||||
assert entry.toolset == "app_tools"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# session (object) vs session_id (string) asymmetry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSessionHandling:
|
||||
def test_search_uses_session_object(self, gateway_post):
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}], "session": {"generate_id": True}})
|
||||
assert isinstance(gateway_post["json"]["session"], dict)
|
||||
assert "session_id" not in gateway_post["json"]
|
||||
|
||||
def test_schemas_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_tool_schemas
|
||||
handle_app_tool_schemas({"tool_slugs": ["TEST"], "session_id": "sess-123"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-123"
|
||||
assert "session" not in gateway_post["json"]
|
||||
|
||||
def test_execute_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_execute_tools
|
||||
handle_app_execute_tools({"tools": [{"tool_slug": "TEST", "arguments": {}}], "session_id": "sess-456"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-456"
|
||||
assert "session" not in gateway_post["json"]
|
||||
|
||||
def test_connections_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_manage_connections
|
||||
handle_app_manage_connections({"toolkits": ["gmail"], "session_id": "sess-789"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-789"
|
||||
assert "session" not in gateway_post["json"]
|
||||
@@ -76,6 +76,27 @@ class TestSchema:
|
||||
modes = set(COMPUTER_USE_SCHEMA["parameters"]["properties"]["mode"]["enum"])
|
||||
assert modes == {"som", "vision", "ax"}
|
||||
|
||||
def test_schema_exposes_max_elements_cap_for_capture(self):
|
||||
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
|
||||
props = COMPUTER_USE_SCHEMA["parameters"]["properties"]
|
||||
assert "max_elements" in props
|
||||
assert props["max_elements"]["type"] == "integer"
|
||||
assert props["max_elements"].get("minimum", 1) >= 1
|
||||
|
||||
def test_schema_max_elements_documents_default_and_upper_bound(self):
|
||||
"""Schema description must agree with the runtime. The original PR
|
||||
text said "Default 100" without a corresponding `default` field, and
|
||||
had no upper bound — both Copilot findings.
|
||||
"""
|
||||
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
|
||||
from tools.computer_use.tool import (
|
||||
_DEFAULT_MAX_ELEMENTS,
|
||||
_MAX_ALLOWED_MAX_ELEMENTS,
|
||||
)
|
||||
prop = COMPUTER_USE_SCHEMA["parameters"]["properties"]["max_elements"]
|
||||
assert prop.get("default") == _DEFAULT_MAX_ELEMENTS
|
||||
assert prop.get("maximum") == _MAX_ALLOWED_MAX_ELEMENTS
|
||||
|
||||
|
||||
class TestRegistration:
|
||||
def test_tool_registers_with_registry(self):
|
||||
@@ -205,6 +226,54 @@ class TestDispatch:
|
||||
parsed = json.loads(out)
|
||||
assert "error" in parsed
|
||||
|
||||
def test_set_value_routes_to_backend(self, noop_backend):
|
||||
"""set_value must reach the backend — regression for missing _NoopBackend stub."""
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "set_value", "value": "Option A", "element": 5})
|
||||
parsed = json.loads(out)
|
||||
assert parsed.get("ok") is True
|
||||
assert parsed.get("action") == "set_value"
|
||||
assert any(c[0] == "set_value" for c in noop_backend.calls)
|
||||
|
||||
def test_set_value_missing_value_returns_error(self, noop_backend):
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "set_value"})
|
||||
parsed = json.loads(out)
|
||||
assert "error" in parsed
|
||||
def test_capture_after_skipped_when_action_failed(self, noop_backend):
|
||||
"""capture_after must not fire when res.ok=False (regression guard).
|
||||
|
||||
A follow-up screenshot after a failed action shows the screen in a
|
||||
normal state, misleading the model into thinking the action succeeded.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
from tools.computer_use.backend import ActionResult
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
|
||||
# Make click() return a failure.
|
||||
with patch.object(noop_backend, "click",
|
||||
return_value=ActionResult(ok=False, action="click",
|
||||
message="element not found")):
|
||||
out = handle_computer_use({"action": "click", "element": 99,
|
||||
"capture_after": True})
|
||||
|
||||
parsed = json.loads(out)
|
||||
# Should return the error, not a multimodal capture.
|
||||
assert parsed.get("ok") is False
|
||||
assert parsed.get("action") == "click"
|
||||
# No follow-up capture should have been issued.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 0, "capture must not be called after a failed action"
|
||||
|
||||
def test_capture_after_fires_when_action_succeeds(self, noop_backend):
|
||||
"""capture_after must trigger for successful actions."""
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "click", "element": 1,
|
||||
"capture_after": True})
|
||||
# Noop backend returns ok=True, so capture should have been called.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Safety guards (type / key block lists)
|
||||
@@ -337,6 +406,193 @@ class TestCaptureResponse:
|
||||
assert "AXButton" in text_part["text"]
|
||||
assert "AXTextField" in text_part["text"]
|
||||
|
||||
def _ax_backend_with(self, count: int):
|
||||
"""Construct a fake backend that yields ``count`` AX elements."""
|
||||
from tools.computer_use.backend import CaptureResult, UIElement
|
||||
|
||||
elements = [
|
||||
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
|
||||
for i in range(count)
|
||||
]
|
||||
|
||||
class FakeBackend:
|
||||
def start(self): pass
|
||||
def stop(self): pass
|
||||
def is_available(self): return True
|
||||
def capture(self, mode="som", app=None):
|
||||
return CaptureResult(
|
||||
mode=mode, width=800, height=600,
|
||||
png_b64="",
|
||||
elements=list(elements),
|
||||
app="Obsidian",
|
||||
)
|
||||
def click(self, **kw): ...
|
||||
def drag(self, **kw): ...
|
||||
def scroll(self, **kw): ...
|
||||
def type_text(self, text): ...
|
||||
def key(self, keys): ...
|
||||
def list_apps(self): return []
|
||||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
return FakeBackend()
|
||||
|
||||
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
|
||||
"""Regression for #22865: an Electron-style 600-element AX tree must
|
||||
not emit the entire array verbatim into the tool result.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert parsed["mode"] == "ax"
|
||||
assert parsed["total_elements"] == 600
|
||||
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS
|
||||
assert parsed["truncated_elements"] == 600 - cu_tool._DEFAULT_MAX_ELEMENTS
|
||||
# Truncation must be visible in the human summary so the model knows
|
||||
# the JSON view is partial and can re-issue with a tighter scope.
|
||||
assert "truncated to" in parsed["summary"]
|
||||
|
||||
def test_capture_ax_honors_explicit_max_elements_override(self):
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 250}
|
||||
)
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == 250
|
||||
assert parsed["truncated_elements"] == 350
|
||||
|
||||
def test_capture_ax_below_cap_is_unchanged(self):
|
||||
"""Backwards-compat: small captures keep the full elements array and
|
||||
do not surface a `truncated_elements` field.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(5)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == 5
|
||||
assert parsed["total_elements"] == 5
|
||||
assert "truncated_elements" not in parsed
|
||||
assert "truncated to" not in parsed["summary"]
|
||||
|
||||
def test_capture_ax_invalid_max_elements_falls_back_to_default(self):
|
||||
"""Malformed `max_elements` (string, negative, zero) must not silently
|
||||
disable the cap and re-introduce the original unbounded behavior.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
for bad in ("not-a-number", 0, -10):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": bad}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS, (
|
||||
f"bad max_elements={bad!r} disabled the cap"
|
||||
)
|
||||
|
||||
def test_capture_ax_clamps_oversized_max_elements_to_hard_cap(self):
|
||||
"""A caller passing a very large `max_elements` must not be able to
|
||||
disable the safeguard. The cap is clamped to a hard upper bound so
|
||||
the context-blow-up protection cannot be bypassed by argument.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(5000)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 10_000}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == cu_tool._MAX_ALLOWED_MAX_ELEMENTS
|
||||
assert parsed["total_elements"] == 5000
|
||||
assert parsed["truncated_elements"] == 5000 - cu_tool._MAX_ALLOWED_MAX_ELEMENTS
|
||||
|
||||
def test_capture_ax_summary_indices_match_returned_elements(self):
|
||||
"""When `max_elements` is below the human-summary's own line cap, the
|
||||
summary must not index elements that aren't in the returned array.
|
||||
Otherwise the model sees `#15` in the summary and finds no matching
|
||||
entry in `elements`.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 5}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
returned_indices = {e["index"] for e in parsed["elements"]}
|
||||
summary_lines = parsed["summary"].splitlines()
|
||||
indexed_lines = [ln for ln in summary_lines if ln.lstrip().startswith("#")]
|
||||
for ln in indexed_lines:
|
||||
idx_token = ln.lstrip().split()[0].lstrip("#")
|
||||
idx = int(idx_token)
|
||||
assert idx in returned_indices, (
|
||||
f"summary references #{idx} but it is absent from elements payload "
|
||||
f"(returned: {sorted(returned_indices)})"
|
||||
)
|
||||
|
||||
def test_capture_multimodal_summary_omits_truncation_note(self):
|
||||
"""The som/vision multimodal envelope returns a screenshot, not an
|
||||
`elements` array — so a "response truncated to N of M elements"
|
||||
claim in the summary would be inaccurate.
|
||||
"""
|
||||
from tools.computer_use.backend import CaptureResult, UIElement
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_png = "iVBORw0KGgo="
|
||||
elements = [
|
||||
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
|
||||
for i in range(600)
|
||||
]
|
||||
|
||||
class FakeBackend:
|
||||
def start(self): pass
|
||||
def stop(self): pass
|
||||
def is_available(self): return True
|
||||
def capture(self, mode="som", app=None):
|
||||
return CaptureResult(
|
||||
mode=mode, width=800, height=600,
|
||||
png_b64=fake_png, elements=list(elements),
|
||||
app="Obsidian",
|
||||
)
|
||||
def click(self, **kw): ...
|
||||
def drag(self, **kw): ...
|
||||
def scroll(self, **kw): ...
|
||||
def type_text(self, text): ...
|
||||
def key(self, keys): ...
|
||||
def list_apps(self): return []
|
||||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
|
||||
assert isinstance(out, dict) and out["_multimodal"] is True
|
||||
text_part = next(p for p in out["content"] if p.get("type") == "text")
|
||||
assert "truncated to" not in text_part["text"], (
|
||||
"multimodal response carries an image, not an elements array; "
|
||||
"the truncation note describes a payload field that isn't present"
|
||||
)
|
||||
assert "truncated to" not in out["text_summary"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anthropic adapter: multimodal tool-result conversion
|
||||
|
||||
@@ -78,6 +78,63 @@ def test_resolve_managed_tool_gateway_is_disabled_without_subscription():
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_rewrites_subdomain():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost:3009")
|
||||
assert resolved == "http://127.0.0.1:3009"
|
||||
assert host == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_preserves_path():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost:3009/v1/foo")
|
||||
assert resolved == "http://127.0.0.1:3009/v1/foo"
|
||||
assert host == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_no_port():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost")
|
||||
assert resolved == "http://127.0.0.1"
|
||||
assert host == "tools-gateway.localhost"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_ignores_bare_localhost():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://localhost:3009")
|
||||
assert resolved == "http://localhost:3009"
|
||||
assert host is None
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_ignores_real_domains():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("https://tools-gateway.nousresearch.com")
|
||||
assert resolved == "https://tools-gateway.nousresearch.com"
|
||||
assert host is None
|
||||
|
||||
|
||||
def test_gateway_config_resolved_origin_and_host_header():
|
||||
cfg = managed_tool_gateway.ManagedToolGatewayConfig(
|
||||
vendor="tools",
|
||||
gateway_origin="http://tools-gateway.localhost:3009",
|
||||
nous_user_token="tok",
|
||||
managed_mode=True,
|
||||
)
|
||||
assert cfg.resolved_origin == "http://127.0.0.1:3009"
|
||||
assert cfg.gateway_host_header == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_gateway_config_resolved_origin_passthrough_for_real_domain():
|
||||
cfg = managed_tool_gateway.ManagedToolGatewayConfig(
|
||||
vendor="firecrawl",
|
||||
gateway_origin="https://firecrawl-gateway.nousresearch.com",
|
||||
nous_user_token="tok",
|
||||
managed_mode=True,
|
||||
)
|
||||
assert cfg.resolved_origin == "https://firecrawl-gateway.nousresearch.com"
|
||||
assert cfg.gateway_host_header is None
|
||||
|
||||
|
||||
def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
@@ -91,7 +91,7 @@ class TestSSHBulkUpload:
|
||||
assert "/home/testuser/.hermes/credentials" in mkdir_str
|
||||
|
||||
def test_staging_symlinks_mirror_remote_layout(self, mock_env, tmp_path):
|
||||
"""Symlinks in staging dir should mirror the remote path structure."""
|
||||
"""Symlinks in staging dir should mirror the .hermes-relative layout."""
|
||||
f1 = tmp_path / "local_a.txt"
|
||||
f1.write_text("content a")
|
||||
|
||||
@@ -107,9 +107,7 @@ class TestSSHBulkUpload:
|
||||
c_idx = cmd.index("-C")
|
||||
staging_dir = cmd[c_idx + 1]
|
||||
# Check the symlink exists
|
||||
expected = os.path.join(
|
||||
staging_dir, "home/testuser/.hermes/skills/my_skill.md"
|
||||
)
|
||||
expected = os.path.join(staging_dir, "skills/my_skill.md")
|
||||
staging_paths.append(expected)
|
||||
assert os.path.islink(expected), f"Expected symlink at {expected}"
|
||||
assert os.readlink(expected) == os.path.abspath(str(f1))
|
||||
@@ -166,14 +164,42 @@ class TestSSHBulkUpload:
|
||||
assert "-" in tar_cmd # stdout
|
||||
assert "-C" in tar_cmd
|
||||
|
||||
# ssh: extract from stdin at /, preserving existing dir modes (#17767)
|
||||
# ssh: extract from stdin at ~/.hermes, preserving existing dir modes (#17767)
|
||||
ssh_str = " ".join(ssh_cmd)
|
||||
assert "ssh" in ssh_str
|
||||
assert "tar xf -" in ssh_str
|
||||
assert "--no-overwrite-dir" in ssh_str
|
||||
assert "-C /" in ssh_str
|
||||
assert "-C /home/testuser/.hermes" in ssh_str
|
||||
assert "testuser@example.com" in ssh_str
|
||||
|
||||
def test_bulk_upload_never_stages_remote_home_prefix(self, mock_env, tmp_path):
|
||||
"""Regression: do not archive /home/<user> path components."""
|
||||
f1 = tmp_path / "nested.txt"
|
||||
f1.write_text("nested")
|
||||
files = [(str(f1), "/home/testuser/.hermes/cache/nested.txt")]
|
||||
|
||||
def capture_tar_cmd(cmd, **kwargs):
|
||||
if cmd[0] == "tar":
|
||||
c_idx = cmd.index("-C")
|
||||
staging_dir = cmd[c_idx + 1]
|
||||
assert not os.path.exists(os.path.join(staging_dir, "home"))
|
||||
expected = os.path.join(staging_dir, "cache/nested.txt")
|
||||
assert os.path.islink(expected)
|
||||
|
||||
mock = MagicMock()
|
||||
mock.stdout = MagicMock()
|
||||
mock.returncode = 0
|
||||
mock.poll.return_value = 0
|
||||
mock.communicate.return_value = (b"", b"")
|
||||
mock.stderr = MagicMock()
|
||||
mock.stderr.read.return_value = b""
|
||||
return mock
|
||||
|
||||
with patch.object(subprocess, "run",
|
||||
return_value=subprocess.CompletedProcess([], 0)), \
|
||||
patch.object(subprocess, "Popen", side_effect=capture_tar_cmd):
|
||||
mock_env._ssh_bulk_upload(files)
|
||||
|
||||
def test_mkdir_failure_raises(self, mock_env, tmp_path):
|
||||
"""mkdir failure should raise RuntimeError before tar pipe."""
|
||||
f1 = tmp_path / "y.txt"
|
||||
|
||||
@@ -23,6 +23,9 @@ def _fake_faster_whisper_module(mock_model):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_openai_env(monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
|
||||
@@ -12,6 +12,9 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolate_env(monkeypatch):
|
||||
"""Strip every STT-related env var so the test really exercises the
|
||||
|
||||
@@ -42,6 +42,9 @@ def sample_ogg(tmp_path):
|
||||
return str(ogg_path)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_env(monkeypatch):
|
||||
"""Ensure no real API keys leak into tests."""
|
||||
|
||||
@@ -0,0 +1,438 @@
|
||||
"""App integration tools — 500+ external apps via the Nous tool gateway.
|
||||
|
||||
Four meta tools that let the LLM discover, authenticate, and execute
|
||||
real app tools at runtime through the Nous managed tool gateway.
|
||||
|
||||
Architecture:
|
||||
Hermes → POST JSON → tools-gateway.nousresearch.com/v1/* → External APIs
|
||||
Auth: Bearer <nous_user_token> (subscription-gated)
|
||||
Vendor: "tools" in the managed gateway infra (build_vendor_gateway_url)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from tools.registry import registry
|
||||
from tools.managed_tool_gateway import (
|
||||
is_managed_tool_gateway_ready,
|
||||
resolve_managed_tool_gateway,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timeouts per endpoint (connect, read)
|
||||
# ---------------------------------------------------------------------------
|
||||
_TIMEOUT_SEARCH = httpx.Timeout(30.0, connect=5.0)
|
||||
_TIMEOUT_SCHEMAS = httpx.Timeout(15.0, connect=5.0)
|
||||
_TIMEOUT_EXECUTE = httpx.Timeout(120.0, connect=5.0)
|
||||
_TIMEOUT_CONNECTIONS = httpx.Timeout(30.0, connect=5.0)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level cached httpx client — avoids TCP+TLS setup per tool call.
|
||||
# Follows the same thread-safe staleness pattern as image_generation_tool.py.
|
||||
# ---------------------------------------------------------------------------
|
||||
import threading
|
||||
|
||||
_http_client: Optional[httpx.Client] = None
|
||||
_http_client_origin: Optional[str] = None
|
||||
_http_client_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_http_client(origin: str, verify: bool = True) -> httpx.Client:
|
||||
"""Return a reusable httpx.Client, recreated when the origin changes."""
|
||||
global _http_client, _http_client_origin
|
||||
with _http_client_lock:
|
||||
if _http_client is not None and _http_client_origin == origin:
|
||||
return _http_client
|
||||
if _http_client is not None:
|
||||
try:
|
||||
_http_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
_http_client = httpx.Client(verify=verify)
|
||||
_http_client_origin = origin
|
||||
return _http_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config / availability helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_portal_app_tools_enabled() -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on."""
|
||||
from tools.tool_backend_helpers import portal_app_tools_enabled
|
||||
return portal_app_tools_enabled()
|
||||
|
||||
|
||||
def _app_tools_available() -> bool:
|
||||
"""check_fn: True when subscription is active, gateway reachable, config on."""
|
||||
if not _read_portal_app_tools_enabled():
|
||||
return False
|
||||
return is_managed_tool_gateway_ready("tools")
|
||||
|
||||
|
||||
def _get_current_model_name() -> Optional[str]:
|
||||
"""Best-effort read of the current model name from config.
|
||||
|
||||
Handles both ``"model": "name"`` and ``"model": {"default": "name"}``
|
||||
config shapes. Returns None if unresolvable (caller should omit the
|
||||
field rather than sending garbage).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
if isinstance(model_cfg, dict):
|
||||
default = model_cfg.get("default")
|
||||
if isinstance(default, str) and default.strip():
|
||||
return default.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gateway HTTP client
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _gateway_post(
|
||||
path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: httpx.Timeout,
|
||||
) -> Dict[str, Any]:
|
||||
"""POST JSON to the tool gateway and return the parsed response.
|
||||
|
||||
Never raises — HTTP errors and network failures are returned as dicts
|
||||
so the LLM can see them and communicate with the user.
|
||||
"""
|
||||
gateway = resolve_managed_tool_gateway("tools")
|
||||
if gateway is None:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_UNAVAILABLE",
|
||||
"message": "Nous tool gateway is not available. Check your subscription status.",
|
||||
}
|
||||
}
|
||||
|
||||
url = f"{gateway.gateway_origin.rstrip('/')}{path}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {gateway.nous_user_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
client = _get_http_client(url.split("/v1/")[0])
|
||||
response = client.post(url, json=payload, headers=headers, timeout=timeout)
|
||||
|
||||
# Return parsed body regardless of status code — the LLM handles errors
|
||||
try:
|
||||
return response.json()
|
||||
except Exception:
|
||||
return {
|
||||
"error": {
|
||||
"code": f"HTTP_{response.status_code}",
|
||||
"message": response.text[:2000],
|
||||
}
|
||||
}
|
||||
|
||||
except httpx.TimeoutException as exc:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_TIMEOUT",
|
||||
"message": f"Request to {path} timed out: {exc}",
|
||||
}
|
||||
}
|
||||
except Exception as exc:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_UNREACHABLE",
|
||||
"message": f"Failed to reach tool gateway: {exc}",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def handle_app_search_tools(args: dict, **kw) -> str:
|
||||
"""Search 500+ app integrations for tools matching a use case."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
queries = args.get("queries")
|
||||
if queries:
|
||||
payload["queries"] = queries
|
||||
|
||||
# session is an OBJECT {id, generate_id} — NOT a string
|
||||
session = args.get("session")
|
||||
if session is not None:
|
||||
payload["session"] = session
|
||||
|
||||
# Auto-inject model name from config (omit if unresolvable)
|
||||
model = args.get("model") or _get_current_model_name()
|
||||
if model:
|
||||
payload["model"] = model
|
||||
|
||||
return json.dumps(_gateway_post("/v1/search", payload, _TIMEOUT_SEARCH),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_tool_schemas(args: dict, **kw) -> str:
|
||||
"""Get full input schemas for tools discovered via app_search_tools."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
tool_slugs = args.get("tool_slugs")
|
||||
if tool_slugs:
|
||||
payload["tool_slugs"] = tool_slugs
|
||||
|
||||
include = args.get("include")
|
||||
if include:
|
||||
payload["include"] = include
|
||||
|
||||
# session_id is a STRING — not an object
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
return json.dumps(_gateway_post("/v1/schemas", payload, _TIMEOUT_SCHEMAS),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_execute_tools(args: dict, **kw) -> str:
|
||||
"""Execute one or more app tools in parallel."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
tools = args.get("tools")
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
|
||||
# session_id is a STRING
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
# Strip gateway-internal params that are meaningless in Hermes
|
||||
# (sync_response_to_workbench, thought, current_step, current_step_metric)
|
||||
# They never enter the payload — we only pick the fields we need.
|
||||
|
||||
return json.dumps(_gateway_post("/v1/execute", payload, _TIMEOUT_EXECUTE),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_manage_connections(args: dict, **kw) -> str:
|
||||
"""Check or initiate OAuth/API key connections for app toolkits."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
toolkits = args.get("toolkits")
|
||||
if toolkits:
|
||||
payload["toolkits"] = toolkits
|
||||
|
||||
reinitiate_all = args.get("reinitiate_all")
|
||||
if reinitiate_all is not None:
|
||||
payload["reinitiate_all"] = reinitiate_all
|
||||
|
||||
# session_id is a STRING
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
return json.dumps(_gateway_post("/v1/connections", payload, _TIMEOUT_CONNECTIONS),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
registry.register(
|
||||
name="app_search_tools",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_search_tools",
|
||||
"description": (
|
||||
"Search 500+ app integrations (Gmail, Slack, GitHub, Notion, Google Sheets, "
|
||||
"Jira, Linear, Figma, and more) to find tools for a task. Returns tool slugs, "
|
||||
"execution plans, pitfalls, and connection status."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["queries"],
|
||||
"properties": {
|
||||
"queries": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"description": (
|
||||
"Structured search queries. Split independent app actions "
|
||||
"into separate queries. Each returns 4-6 tools."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["use_case"],
|
||||
"properties": {
|
||||
"use_case": {
|
||||
"type": "string",
|
||||
"maxLength": 1024,
|
||||
"description": (
|
||||
"Normalized description of the task. Include app "
|
||||
"names if mentioned. Do NOT include personal "
|
||||
"identifiers — put those in known_fields."
|
||||
),
|
||||
},
|
||||
"known_fields": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Known inputs as comma-separated key:value pairs "
|
||||
"(e.g. 'channel_name:general'). Omit if not relevant."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"session": {
|
||||
"type": "object",
|
||||
"description": "Session context. Pass {generate_id: true} for new workflows, {id: \"EXISTING\"} to continue.",
|
||||
"properties": {
|
||||
"id": {"type": "string", "description": "Existing session ID to reuse."},
|
||||
"generate_id": {"type": "boolean", "description": "Set true for first call of a new workflow."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_search_tools(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Search 500+ app integrations",
|
||||
emoji="🔍",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_tool_schemas",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_tool_schemas",
|
||||
"description": (
|
||||
"Get full input parameter schemas for tools discovered via "
|
||||
"app_search_tools. Only use slugs from search results — never invent."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["tool_slugs"],
|
||||
"properties": {
|
||||
"tool_slugs": {
|
||||
"type": "array",
|
||||
"description": "Tool slugs to retrieve schemas for.",
|
||||
"items": {"type": "string", "minLength": 1},
|
||||
},
|
||||
"include": {
|
||||
"type": "array",
|
||||
"default": ["input_schema"],
|
||||
"description": "Schema fields to include. Add 'output_schema' for response validation.",
|
||||
"items": {"type": "string", "enum": ["input_schema", "output_schema"]},
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_tool_schemas(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Get tool input schemas",
|
||||
emoji="📋",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_execute_tools",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_execute_tools",
|
||||
"description": (
|
||||
"Execute one or more app tools in parallel (up to 50). "
|
||||
"Requires active connection per toolkit. Use schema-compliant arguments only."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["tools"],
|
||||
"properties": {
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"maxItems": 50,
|
||||
"description": "Logically independent tools to execute in parallel.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["tool_slug", "arguments"],
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"tool_slug": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Tool slug from search results — never invent.",
|
||||
},
|
||||
"arguments": {
|
||||
"type": "object",
|
||||
"additionalProperties": True,
|
||||
"description": "Arguments matching the tool's input schema exactly.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_execute_tools(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
max_result_size_chars=50_000,
|
||||
description="Execute app tools",
|
||||
emoji="⚡",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_manage_connections",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_manage_connections",
|
||||
"description": (
|
||||
"Check or initiate OAuth/API key connections for app toolkits. "
|
||||
"Returns auth links for inactive connections."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["toolkits"],
|
||||
"properties": {
|
||||
"toolkits": {
|
||||
"type": "array",
|
||||
"description": "Toolkit slugs to check or connect (e.g. ['gmail', 'slack']).",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
"reinitiate_all": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "Force reconnection even for active connections.",
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_manage_connections(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Manage app connections",
|
||||
emoji="🔗",
|
||||
)
|
||||
@@ -142,6 +142,14 @@ class ComputerUseBackend(ABC):
|
||||
def focus_app(self, app: str, raise_window: bool = False) -> ActionResult:
|
||||
"""Route input to `app` (by name or bundle ID). Default: focus without raise."""
|
||||
|
||||
# ── Native-value mutation ────────────────────────────────────────
|
||||
@abstractmethod
|
||||
def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
|
||||
"""Set a native value on an element (e.g. AXPopUpButton selection).
|
||||
|
||||
`element` is the 1-based SOM index returned by a prior capture call.
|
||||
"""
|
||||
|
||||
# ── Timing ──────────────────────────────────────────────────────
|
||||
def wait(self, seconds: float) -> ActionResult:
|
||||
"""Default implementation: time.sleep."""
|
||||
|
||||
@@ -75,6 +75,28 @@ COMPUTER_USE_SCHEMA: Dict[str, Any] = {
|
||||
"frontmost app's window or the whole screen."
|
||||
),
|
||||
},
|
||||
"max_elements": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Optional cap on the AX `elements` array returned by "
|
||||
"`action='capture'`. Default 100, hard maximum 1000. "
|
||||
"Dense UIs (Electron apps such as Obsidian or VS Code, "
|
||||
"JetBrains IDEs) can publish 500+ AX nodes — capping "
|
||||
"prevents a single capture from blowing session "
|
||||
"context. When the cap trims the response, "
|
||||
"`total_elements` and `truncated_elements` are "
|
||||
"surfaced in the result so you can re-call with "
|
||||
"`app=` to narrow scope or raise `max_elements` when "
|
||||
"the full tree is required. Has no effect on "
|
||||
"`mode='som'` / `mode='vision'` when a screenshot is "
|
||||
"included in the response; only the rare image-"
|
||||
"missing fallback returns an `elements` array and is "
|
||||
"subject to the cap."
|
||||
),
|
||||
"default": 100,
|
||||
"minimum": 1,
|
||||
"maximum": 1000,
|
||||
},
|
||||
# ── click / drag / scroll targeting ────────────────────
|
||||
"element": {
|
||||
"type": "integer",
|
||||
|
||||
@@ -200,6 +200,10 @@ class _NoopBackend(ComputerUseBackend): # pragma: no cover
|
||||
self.calls.append(("focus_app", {"app": app, "raise": raise_window}))
|
||||
return ActionResult(ok=True, action="focus_app")
|
||||
|
||||
def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
|
||||
self.calls.append(("set_value", {"value": value, "element": element}))
|
||||
return ActionResult(ok=True, action="set_value")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
@@ -317,7 +321,7 @@ def _dispatch(backend: ComputerUseBackend, action: str, args: Dict[str, Any]) ->
|
||||
if mode not in {"som", "vision", "ax"}:
|
||||
return json.dumps({"error": f"bad mode {mode!r}; use som|vision|ax"})
|
||||
cap = backend.capture(mode=mode, app=args.get("app"))
|
||||
return _capture_response(cap)
|
||||
return _capture_response(cap, max_elements=_coerce_max_elements(args.get("max_elements")))
|
||||
|
||||
if action == "wait":
|
||||
seconds = float(args.get("seconds", 1.0))
|
||||
@@ -416,16 +420,62 @@ def _text_response(res: ActionResult) -> str:
|
||||
return json.dumps(payload)
|
||||
|
||||
|
||||
def _capture_response(cap: CaptureResult) -> Any:
|
||||
element_index = _format_elements(cap.elements)
|
||||
# Default cap for the AX `elements` array returned by capture. Dense UIs
|
||||
# (Electron apps, Obsidian, JetBrains IDEs) can publish 500+ AX nodes, which
|
||||
# can exhaust session context after a single capture. The model-facing
|
||||
# `max_elements` argument lets callers raise this when they need the full tree.
|
||||
_DEFAULT_MAX_ELEMENTS = 100
|
||||
# Hard upper bound on caller-supplied `max_elements`. Without this, a tool
|
||||
# call passing a very large integer would silently disable the safeguard and
|
||||
# reintroduce the original unbounded behavior.
|
||||
_MAX_ALLOWED_MAX_ELEMENTS = 1000
|
||||
|
||||
|
||||
def _coerce_max_elements(value: Any) -> int:
|
||||
"""Validate the caller-supplied ``max_elements``.
|
||||
|
||||
Falls back to :data:`_DEFAULT_MAX_ELEMENTS` for missing / non-integer /
|
||||
sub-1 inputs so the cap can never be silently disabled by a malformed
|
||||
tool-call argument. Clamps oversized values to
|
||||
:data:`_MAX_ALLOWED_MAX_ELEMENTS` so a caller cannot bypass the
|
||||
safeguard by passing a very large integer.
|
||||
"""
|
||||
if value is None:
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
try:
|
||||
n = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
if n < 1:
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
if n > _MAX_ALLOWED_MAX_ELEMENTS:
|
||||
return _MAX_ALLOWED_MAX_ELEMENTS
|
||||
return n
|
||||
|
||||
|
||||
def _capture_response(cap: CaptureResult, max_elements: int = _DEFAULT_MAX_ELEMENTS) -> Any:
|
||||
total_elements = len(cap.elements)
|
||||
visible_elements = cap.elements[:max_elements]
|
||||
truncated_elements = max(0, total_elements - len(visible_elements))
|
||||
|
||||
# Index only what's actually surfaced in the response — otherwise the
|
||||
# human-readable summary references element indices the model cannot
|
||||
# find in the JSON `elements` array (e.g. max_elements=10 vs the default
|
||||
# 40-line index window).
|
||||
element_index = _format_elements(visible_elements)
|
||||
summary_lines = [
|
||||
f"capture mode={cap.mode} {cap.width}x{cap.height}"
|
||||
+ (f" app={cap.app}" if cap.app else "")
|
||||
+ (f" window={cap.window_title!r}" if cap.window_title else ""),
|
||||
f"{len(cap.elements)} interactable element(s):",
|
||||
f"{total_elements} interactable element(s):",
|
||||
]
|
||||
if element_index:
|
||||
summary_lines.extend(element_index)
|
||||
# Multimodal and AX paths both reference `summary`; build it once up-front
|
||||
# so the aux-vision routing branch (which fires before either path is
|
||||
# selected) has a valid value to hand to _route_capture_through_aux_vision.
|
||||
# The AX path appends the "truncated to N of M" note to summary_lines
|
||||
# below and rebuilds; the multimodal path keeps this version untouched.
|
||||
summary = "\n".join(summary_lines)
|
||||
|
||||
if cap.png_b64 and cap.mode != "ax":
|
||||
@@ -449,6 +499,9 @@ def _capture_response(cap: CaptureResult) -> Any:
|
||||
# JPEG: base64 starts with /9j/ PNG: starts with iVBOR
|
||||
_b64_prefix = cap.png_b64[:8]
|
||||
_mime = "image/jpeg" if _b64_prefix.startswith("/9j/") else "image/png"
|
||||
# The multimodal response carries the screenshot, not the AX
|
||||
# elements array, so a "response truncated to N of M elements"
|
||||
# note would be inaccurate — skip it on this branch.
|
||||
return {
|
||||
"_multimodal": True,
|
||||
"content": [
|
||||
@@ -458,18 +511,29 @@ def _capture_response(cap: CaptureResult) -> Any:
|
||||
],
|
||||
"text_summary": summary,
|
||||
"meta": {"mode": cap.mode, "width": cap.width, "height": cap.height,
|
||||
"elements": len(cap.elements), "png_bytes": cap.png_bytes_len},
|
||||
"elements": total_elements, "png_bytes": cap.png_bytes_len},
|
||||
}
|
||||
# AX-only (or image missing): text path.
|
||||
return json.dumps({
|
||||
# AX-only (or image-missing fallback): text path actually carries the
|
||||
# `elements` array, so the truncation note applies here.
|
||||
if truncated_elements:
|
||||
summary_lines.append(
|
||||
f" (response truncated to {len(visible_elements)} of {total_elements} elements; "
|
||||
f"raise max_elements or pass app= to narrow)"
|
||||
)
|
||||
summary = "\n".join(summary_lines)
|
||||
payload: Dict[str, Any] = {
|
||||
"mode": cap.mode,
|
||||
"width": cap.width,
|
||||
"height": cap.height,
|
||||
"app": cap.app,
|
||||
"window_title": cap.window_title,
|
||||
"elements": [_element_to_dict(e) for e in cap.elements],
|
||||
"elements": [_element_to_dict(e) for e in visible_elements],
|
||||
"total_elements": total_elements,
|
||||
"summary": summary,
|
||||
})
|
||||
}
|
||||
if truncated_elements:
|
||||
payload["truncated_elements"] = truncated_elements
|
||||
return json.dumps(payload)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -611,6 +675,11 @@ def _maybe_follow_capture(
|
||||
) -> Any:
|
||||
if not do_capture:
|
||||
return _text_response(res)
|
||||
# Skip the follow-up capture when the action itself failed: showing a
|
||||
# normal-looking screenshot after a failure misleads the model into thinking
|
||||
# the action succeeded. Return the error text instead.
|
||||
if not res.ok:
|
||||
return _text_response(res)
|
||||
try:
|
||||
# Preserve the app context established by the preceding capture/focus_app so
|
||||
# that capture_after=True re-captures the same app rather than the frontmost
|
||||
|
||||
@@ -60,7 +60,8 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
|
||||
if gateway is None:
|
||||
raise ValueError("Managed Modal requires a configured tool gateway and Nous user token")
|
||||
|
||||
self._gateway_origin = gateway.gateway_origin.rstrip("/")
|
||||
self._gateway_origin = gateway.resolved_origin.rstrip("/")
|
||||
self._gateway_host_header = gateway.gateway_host_header
|
||||
self._nous_user_token = gateway.nous_user_token
|
||||
self._task_id = task_id
|
||||
self._persistent = persistent_filesystem
|
||||
@@ -234,6 +235,8 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
|
||||
"Authorization": f"Bearer {self._nous_user_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
if self._gateway_host_header:
|
||||
headers["Host"] = self._gateway_host_header
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
|
||||
@@ -169,6 +169,7 @@ class SSHEnvironment(BaseEnvironment):
|
||||
if not files:
|
||||
return
|
||||
|
||||
base = f"{self._remote_home}/.hermes"
|
||||
parents = unique_parent_dirs(files)
|
||||
if parents:
|
||||
cmd = self._build_ssh_command()
|
||||
@@ -180,7 +181,19 @@ class SSHEnvironment(BaseEnvironment):
|
||||
# Symlink staging avoids fragile GNU tar --transform rules.
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-ssh-bulk-") as staging:
|
||||
for host_path, remote_path in files:
|
||||
staged = os.path.join(staging, remote_path.lstrip("/"))
|
||||
try:
|
||||
rel_remote = os.path.relpath(remote_path, base)
|
||||
except ValueError as exc:
|
||||
raise RuntimeError(
|
||||
f"remote path {remote_path!r} is not under sync base {base!r}"
|
||||
) from exc
|
||||
|
||||
if rel_remote == "." or rel_remote.startswith("../"):
|
||||
raise RuntimeError(
|
||||
f"remote path {remote_path!r} escapes sync base {base!r}"
|
||||
)
|
||||
|
||||
staged = os.path.join(staging, rel_remote)
|
||||
os.makedirs(os.path.dirname(staged), exist_ok=True)
|
||||
os.symlink(os.path.abspath(host_path), staged)
|
||||
|
||||
@@ -190,7 +203,7 @@ class SSHEnvironment(BaseEnvironment):
|
||||
# existing directories (e.g. /home/<user>) with the staging
|
||||
# directory's mode. Without this, a umask 002 produces 0775
|
||||
# dirs which breaks sshd StrictModes (refuses authorized_keys).
|
||||
ssh_cmd.append("tar xf - --no-overwrite-dir -C /")
|
||||
ssh_cmd.append(f"tar xf - --no-overwrite-dir -C {shlex.quote(base)}")
|
||||
|
||||
tar_proc = subprocess.Popen(
|
||||
tar_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Shared FAL.ai SDK plumbing.
|
||||
|
||||
Holds the stateless atoms that every FAL-backed tool needs:
|
||||
|
||||
* :func:`import_fal_client` — lazy import + ``lazy_deps`` integration so
|
||||
``fal_client`` isn't pulled at cold start (it added ~64 ms per CLI
|
||||
invocation when imported eagerly).
|
||||
* :class:`_ManagedFalSyncClient` — wrapper that drives a Nous-managed
|
||||
fal-queue gateway through the standard ``fal_client.SyncClient``
|
||||
primitives.
|
||||
* :func:`_normalize_fal_queue_url_format`, :func:`_extract_http_status`
|
||||
— small helpers used by both the managed client wrapper and
|
||||
``_submit_fal_request``.
|
||||
|
||||
Stateful pieces (cache globals, ``_managed_fal_client*`` selectors,
|
||||
``_submit_fal_request``) intentionally stay on
|
||||
:mod:`tools.image_generation_tool`. That module is the patch target for
|
||||
existing test suites (``tests/tools/test_image_generation.py``,
|
||||
``tests/tools/test_managed_media_gateways.py``) and for the
|
||||
``plugins/image_gen/fal/`` plugin's ``_it`` indirection — moving the
|
||||
caches here would silently defeat ``monkeypatch.setattr(image_tool,
|
||||
"_managed_fal_client", None)`` because the lookups would go against
|
||||
``fal_common``'s namespace instead. See the per-rule walkthrough at
|
||||
issue #26241 for details.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
||||
def import_fal_client() -> Any:
|
||||
"""Import ``fal_client`` (via ``lazy_deps`` when available) and return
|
||||
the module reference.
|
||||
|
||||
Callers are responsible for caching the result on their own module
|
||||
global — keeping per-module globals lets tests monkey-patch the
|
||||
target module's ``fal_client`` attribute and have the patched value
|
||||
stick for that module's call sites.
|
||||
|
||||
Raises :class:`ImportError` if the package is genuinely unavailable.
|
||||
"""
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("image.fal", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc: # noqa: BLE001 — lazy_deps surfaces install hints
|
||||
raise ImportError(str(exc))
|
||||
import fal_client # type: ignore # noqa: WPS433 — intentionally lazy
|
||||
return fal_client
|
||||
|
||||
|
||||
def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
|
||||
normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
|
||||
if not normalized_origin:
|
||||
raise ValueError("Managed FAL queue origin is required")
|
||||
return f"{normalized_origin}/"
|
||||
|
||||
|
||||
def _extract_http_status(exc: BaseException) -> Optional[int]:
|
||||
"""Return an HTTP status code from httpx/fal exceptions, else None.
|
||||
|
||||
Defensive across exception shapes — httpx.HTTPStatusError exposes
|
||||
``.response.status_code`` while fal_client wrappers may expose
|
||||
``.status_code`` directly.
|
||||
"""
|
||||
response = getattr(exc, "response", None)
|
||||
if response is not None:
|
||||
status = getattr(response, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
status = getattr(exc, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
return None
|
||||
|
||||
|
||||
class _ManagedFalSyncClient:
|
||||
"""Small per-instance wrapper around ``fal_client.SyncClient`` for
|
||||
managed queue hosts.
|
||||
|
||||
The wrapper carries its own ``fal_client`` module reference instead
|
||||
of reaching into a module global, so callers stay in control of
|
||||
which module's ``fal_client`` is in scope (matters for the test
|
||||
patches that swap the legacy module's ``fal_client`` attribute).
|
||||
"""
|
||||
|
||||
def __init__(self, fal_client: Any, *, key: str, queue_run_origin: str):
|
||||
sync_client_class = getattr(fal_client, "SyncClient", None)
|
||||
if sync_client_class is None:
|
||||
raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
|
||||
|
||||
client_module = getattr(fal_client, "client", None)
|
||||
if client_module is None:
|
||||
raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
|
||||
|
||||
self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
|
||||
self._sync_client = sync_client_class(key=key)
|
||||
self._http_client = getattr(self._sync_client, "_client", None)
|
||||
self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
|
||||
self._raise_for_status = getattr(client_module, "_raise_for_status", None)
|
||||
self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
|
||||
self._add_hint_header = getattr(client_module, "add_hint_header", None)
|
||||
self._add_priority_header = getattr(client_module, "add_priority_header", None)
|
||||
self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
|
||||
|
||||
if self._http_client is None:
|
||||
raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
|
||||
if self._maybe_retry_request is None or self._raise_for_status is None:
|
||||
raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
|
||||
if self._request_handle_class is None:
|
||||
raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
|
||||
|
||||
def submit(
|
||||
self,
|
||||
application: str,
|
||||
arguments: Dict[str, Any],
|
||||
*,
|
||||
path: str = "",
|
||||
hint: Optional[str] = None,
|
||||
webhook_url: Optional[str] = None,
|
||||
priority: Any = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
start_timeout: Optional[Union[int, float]] = None,
|
||||
):
|
||||
url = self._queue_url_format + application
|
||||
if path:
|
||||
url += "/" + path.lstrip("/")
|
||||
if webhook_url is not None:
|
||||
url += "?" + urlencode({"fal_webhook": webhook_url})
|
||||
|
||||
request_headers = dict(headers or {})
|
||||
if hint is not None and self._add_hint_header is not None:
|
||||
self._add_hint_header(hint, request_headers)
|
||||
if priority is not None:
|
||||
if self._add_priority_header is None:
|
||||
raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
|
||||
self._add_priority_header(priority, request_headers)
|
||||
if start_timeout is not None:
|
||||
if self._add_timeout_header is None:
|
||||
raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
|
||||
self._add_timeout_header(start_timeout, request_headers)
|
||||
|
||||
response = self._maybe_retry_request(
|
||||
self._http_client,
|
||||
"POST",
|
||||
url,
|
||||
json=arguments,
|
||||
timeout=getattr(self._sync_client, "default_timeout", 120.0),
|
||||
headers=request_headers,
|
||||
)
|
||||
self._raise_for_status(response)
|
||||
|
||||
data = response.json()
|
||||
return self._request_handle_class(
|
||||
request_id=data["request_id"],
|
||||
response_url=data["response_url"],
|
||||
status_url=data["status_url"],
|
||||
cancel_url=data["cancel_url"],
|
||||
client=self._http_client,
|
||||
)
|
||||
+26
-128
@@ -26,8 +26,7 @@ import os
|
||||
import datetime
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
# fal_client is imported lazily — see _load_fal_client(). Pulling it
|
||||
# eagerly added ~64 ms to every CLI cold start because
|
||||
@@ -52,19 +51,17 @@ def _load_fal_client() -> Any:
|
||||
global fal_client
|
||||
if fal_client is not None:
|
||||
return fal_client
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("image.fal", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
raise ImportError(str(e))
|
||||
import fal_client as _fal_client # noqa: F811 — module-global rebind
|
||||
fal_client = _fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
fal_client = import_fal_client()
|
||||
return fal_client
|
||||
|
||||
|
||||
from tools.debug_helpers import DebugSession
|
||||
from tools.fal_common import (
|
||||
_ManagedFalSyncClient,
|
||||
_extract_http_status,
|
||||
_normalize_fal_queue_url_format, # noqa: F401 — re-exported for tests
|
||||
)
|
||||
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
||||
from tools.tool_backend_helpers import (
|
||||
fal_key_is_configured,
|
||||
@@ -360,110 +357,25 @@ def _resolve_managed_fal_gateway():
|
||||
return resolve_managed_tool_gateway("fal-queue")
|
||||
|
||||
|
||||
def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
|
||||
normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
|
||||
if not normalized_origin:
|
||||
raise ValueError("Managed FAL queue origin is required")
|
||||
return f"{normalized_origin}/"
|
||||
|
||||
|
||||
class _ManagedFalSyncClient:
|
||||
"""Small per-instance wrapper around fal_client.SyncClient for managed queue hosts."""
|
||||
|
||||
def __init__(self, *, key: str, queue_run_origin: str):
|
||||
# Trigger the lazy import on first construction. Idempotent — the
|
||||
# placeholder is overwritten with the real module on first call.
|
||||
_load_fal_client()
|
||||
sync_client_class = getattr(fal_client, "SyncClient", None)
|
||||
if sync_client_class is None:
|
||||
raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
|
||||
|
||||
client_module = getattr(fal_client, "client", None)
|
||||
if client_module is None:
|
||||
raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
|
||||
|
||||
self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
|
||||
self._sync_client = sync_client_class(key=key)
|
||||
self._http_client = getattr(self._sync_client, "_client", None)
|
||||
self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
|
||||
self._raise_for_status = getattr(client_module, "_raise_for_status", None)
|
||||
self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
|
||||
self._add_hint_header = getattr(client_module, "add_hint_header", None)
|
||||
self._add_priority_header = getattr(client_module, "add_priority_header", None)
|
||||
self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
|
||||
|
||||
if self._http_client is None:
|
||||
raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
|
||||
if self._maybe_retry_request is None or self._raise_for_status is None:
|
||||
raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
|
||||
if self._request_handle_class is None:
|
||||
raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
|
||||
|
||||
def submit(
|
||||
self,
|
||||
application: str,
|
||||
arguments: Dict[str, Any],
|
||||
*,
|
||||
path: str = "",
|
||||
hint: Optional[str] = None,
|
||||
webhook_url: Optional[str] = None,
|
||||
priority: Any = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
start_timeout: Optional[Union[int, float]] = None,
|
||||
):
|
||||
url = self._queue_url_format + application
|
||||
if path:
|
||||
url += "/" + path.lstrip("/")
|
||||
if webhook_url is not None:
|
||||
url += "?" + urlencode({"fal_webhook": webhook_url})
|
||||
|
||||
request_headers = dict(headers or {})
|
||||
if hint is not None and self._add_hint_header is not None:
|
||||
self._add_hint_header(hint, request_headers)
|
||||
if priority is not None:
|
||||
if self._add_priority_header is None:
|
||||
raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
|
||||
self._add_priority_header(priority, request_headers)
|
||||
if start_timeout is not None:
|
||||
if self._add_timeout_header is None:
|
||||
raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
|
||||
self._add_timeout_header(start_timeout, request_headers)
|
||||
|
||||
response = self._maybe_retry_request(
|
||||
self._http_client,
|
||||
"POST",
|
||||
url,
|
||||
json=arguments,
|
||||
timeout=getattr(self._sync_client, "default_timeout", 120.0),
|
||||
headers=request_headers,
|
||||
)
|
||||
self._raise_for_status(response)
|
||||
|
||||
data = response.json()
|
||||
return self._request_handle_class(
|
||||
request_id=data["request_id"],
|
||||
response_url=data["response_url"],
|
||||
status_url=data["status_url"],
|
||||
cancel_url=data["cancel_url"],
|
||||
client=self._http_client,
|
||||
)
|
||||
|
||||
|
||||
def _get_managed_fal_client(managed_gateway):
|
||||
"""Reuse the managed FAL client so its internal httpx.Client is not leaked per call."""
|
||||
global _managed_fal_client, _managed_fal_client_config
|
||||
|
||||
client_config = (
|
||||
managed_gateway.gateway_origin.rstrip("/"),
|
||||
managed_gateway.resolved_origin.rstrip("/"),
|
||||
managed_gateway.nous_user_token,
|
||||
)
|
||||
with _managed_fal_client_lock:
|
||||
if _managed_fal_client is not None and _managed_fal_client_config == client_config:
|
||||
return _managed_fal_client
|
||||
|
||||
# Resolve fal_client on the legacy module — preserves the test
|
||||
# pattern of monkey-patching ``image_generation_tool.fal_client``.
|
||||
_load_fal_client()
|
||||
_managed_fal_client = _ManagedFalSyncClient(
|
||||
fal_client,
|
||||
key=managed_gateway.nous_user_token,
|
||||
queue_run_origin=managed_gateway.gateway_origin,
|
||||
queue_run_origin=managed_gateway.resolved_origin,
|
||||
)
|
||||
_managed_fal_client_config = client_config
|
||||
return _managed_fal_client
|
||||
@@ -502,24 +414,6 @@ def _submit_fal_request(model: str, arguments: Dict[str, Any]):
|
||||
raise
|
||||
|
||||
|
||||
def _extract_http_status(exc: BaseException) -> Optional[int]:
|
||||
"""Return an HTTP status code from httpx/fal exceptions, else None.
|
||||
|
||||
Defensive across exception shapes — httpx.HTTPStatusError exposes
|
||||
``.response.status_code`` while fal_client wrappers may expose
|
||||
``.status_code`` directly.
|
||||
"""
|
||||
response = getattr(exc, "response", None)
|
||||
if response is not None:
|
||||
status = getattr(response, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
status = getattr(exc, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model resolution + payload construction
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -973,9 +867,12 @@ def _read_configured_image_provider():
|
||||
"""Return the value of ``image_gen.provider`` from config.yaml, or None.
|
||||
|
||||
We only consult the plugin registry when this is explicitly set — an
|
||||
unset value keeps users on the legacy in-tree FAL path even when other
|
||||
unset value keeps users on the in-tree FAL fallback even when other
|
||||
providers happen to be registered (e.g. a user has OPENAI_API_KEY set
|
||||
for other features but never asked for OpenAI image gen).
|
||||
for other features but never asked for OpenAI image gen). ``"fal"``
|
||||
explicitly routes through ``plugins/image_gen/fal/`` (which delegates
|
||||
back into this module's pipeline via call-time indirection — see
|
||||
issue #26241).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
@@ -994,15 +891,16 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
|
||||
"""Route the call to a plugin-registered provider when one is selected.
|
||||
|
||||
Returns a JSON string on dispatch, or ``None`` to fall through to the
|
||||
built-in FAL path.
|
||||
in-tree FAL fallback in ``image_generate_tool``.
|
||||
|
||||
Dispatch only fires when ``image_gen.provider`` is explicitly set AND
|
||||
it does not point to ``fal`` (FAL still lives in-tree in this PR;
|
||||
a later PR ports it into ``plugins/image_gen/fal/``). Any other value
|
||||
that matches a registered plugin provider wins.
|
||||
Dispatch fires when ``image_gen.provider`` is explicitly set — including
|
||||
``"fal"`` itself, which now resolves to the
|
||||
``plugins/image_gen/fal/`` plugin (the plugin re-enters this module's
|
||||
pipeline via ``_it`` indirection so behavior is identical to the
|
||||
direct call, just routed through the registry).
|
||||
"""
|
||||
configured = _read_configured_image_provider()
|
||||
if not configured or configured == "fal":
|
||||
if not configured:
|
||||
return None
|
||||
|
||||
# Also read configured model so we can pass it to the plugin
|
||||
|
||||
@@ -7,7 +7,8 @@ import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Optional
|
||||
from typing import Callable, Optional, Tuple
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -15,6 +16,27 @@ from hermes_constants import get_hermes_home
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
||||
|
||||
_DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com"
|
||||
|
||||
|
||||
def _rewrite_localhost_origin(origin: str) -> Tuple[str, Optional[str]]:
|
||||
"""Rewrite ``*.localhost`` hostnames to ``127.0.0.1`` for DNS compatibility.
|
||||
|
||||
Python's :func:`socket.getaddrinfo` doesn't special-case ``*.localhost``
|
||||
subdomains (RFC 6761), so ``tools-gateway.localhost`` fails DNS resolution
|
||||
on most platforms. Bare ``localhost`` resolves fine and is left untouched.
|
||||
|
||||
Returns ``(resolved_origin, host_header_or_none)``.
|
||||
"""
|
||||
parsed = urlparse(origin)
|
||||
hostname = parsed.hostname
|
||||
if not hostname or not hostname.endswith(".localhost"):
|
||||
return origin, None
|
||||
|
||||
port = parsed.port
|
||||
netloc = f"127.0.0.1:{port}" if port else "127.0.0.1"
|
||||
host_header = f"{hostname}:{port}" if port else hostname
|
||||
resolved = urlunparse(parsed._replace(netloc=netloc))
|
||||
return resolved, host_header
|
||||
_DEFAULT_TOOL_GATEWAY_SCHEME = "https"
|
||||
_NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
||||
@@ -26,6 +48,16 @@ class ManagedToolGatewayConfig:
|
||||
nous_user_token: str
|
||||
managed_mode: bool
|
||||
|
||||
@property
|
||||
def resolved_origin(self) -> str:
|
||||
"""Origin with ``*.localhost`` hostnames rewritten to ``127.0.0.1``."""
|
||||
return _rewrite_localhost_origin(self.gateway_origin)[0]
|
||||
|
||||
@property
|
||||
def gateway_host_header(self) -> Optional[str]:
|
||||
"""Original ``host[:port]`` when the origin was rewritten, else ``None``."""
|
||||
return _rewrite_localhost_origin(self.gateway_origin)[1]
|
||||
|
||||
|
||||
def auth_json_path():
|
||||
"""Return the Hermes auth store path, respecting HERMES_HOME overrides."""
|
||||
|
||||
@@ -21,6 +21,11 @@ def managed_nous_tools_enabled() -> bool:
|
||||
the free tier. We intentionally catch all exceptions and return
|
||||
False — never block the agent startup path.
|
||||
"""
|
||||
import os
|
||||
|
||||
if os.getenv("TOOL_GATEWAY_USER_TOKEN", "").strip():
|
||||
return True
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
|
||||
@@ -123,6 +128,25 @@ def prefers_gateway(config_section: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def portal_app_tools_enabled() -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on.
|
||||
|
||||
Resolution: PORTAL_APP_TOOLS env var → config.yaml → default True.
|
||||
Never raises — safe for check_fn and registration-time use.
|
||||
"""
|
||||
env_val = os.getenv("PORTAL_APP_TOOLS")
|
||||
if env_val is not None:
|
||||
return is_truthy_value(env_val)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
portal = (load_config() or {}).get("portal")
|
||||
if isinstance(portal, dict):
|
||||
return bool(portal.get("app_tools", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def fal_key_is_configured() -> bool:
|
||||
"""Return True when FAL_KEY is set to a non-whitespace value.
|
||||
|
||||
|
||||
@@ -197,6 +197,26 @@ def _normalize_local_command_model(model_name: Optional[str]) -> str:
|
||||
return _normalize_local_model(model_name)
|
||||
|
||||
|
||||
def _try_lazy_install_stt() -> bool:
|
||||
"""Attempt to lazy-install faster-whisper and return True on success.
|
||||
|
||||
The module-level ``_HAS_FASTER_WHISPER`` flag is set at import time and
|
||||
cached. If the package wasn't installed at startup, calling ``ensure()``
|
||||
installs it. This function re-checks dynamically after installation so
|
||||
the provider can use it immediately without a process restart.
|
||||
"""
|
||||
try:
|
||||
from tools.lazy_deps import ensure
|
||||
ensure("stt.faster_whisper")
|
||||
# Re-check dynamically after install
|
||||
import importlib.util as _iu
|
||||
if _iu.find_spec("faster_whisper"):
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Lazy install of faster-whisper failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
def _get_provider(stt_config: dict) -> str:
|
||||
"""Determine which STT provider to use.
|
||||
|
||||
@@ -218,6 +238,9 @@ def _get_provider(stt_config: dict) -> str:
|
||||
return "local"
|
||||
if _has_local_command():
|
||||
return "local_command"
|
||||
# Try lazy-install before giving up
|
||||
if _try_lazy_install_stt():
|
||||
return "local"
|
||||
logger.warning(
|
||||
"STT provider 'local' configured but unavailable "
|
||||
"(install faster-whisper or set HERMES_LOCAL_STT_COMMAND)"
|
||||
@@ -285,6 +308,9 @@ def _get_provider(stt_config: dict) -> str:
|
||||
return "local"
|
||||
if _has_local_command():
|
||||
return "local_command"
|
||||
# Try lazy-install before falling through to cloud providers
|
||||
if _try_lazy_install_stt():
|
||||
return "local"
|
||||
if _HAS_OPENAI and get_env_value("GROQ_API_KEY"):
|
||||
logger.info("No local STT available, using Groq Whisper API")
|
||||
return "groq"
|
||||
@@ -403,7 +429,8 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
|
||||
global _local_model, _local_model_name
|
||||
|
||||
if not _HAS_FASTER_WHISPER:
|
||||
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
|
||||
if not _try_lazy_install_stt():
|
||||
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
|
||||
|
||||
try:
|
||||
# Lazy-load the model (downloads on first use, ~150 MB for 'base')
|
||||
@@ -914,7 +941,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
|
||||
raise ValueError(message)
|
||||
|
||||
return managed_gateway.nous_user_token, urljoin(
|
||||
f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
|
||||
f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
|
||||
)
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -2048,7 +2048,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
|
||||
raise ValueError(message)
|
||||
|
||||
return managed_gateway.nous_user_token, urljoin(
|
||||
f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
|
||||
f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -58,6 +58,8 @@ _HERMES_CORE_TOOLS = [
|
||||
"cronjob",
|
||||
# Cross-platform messaging (gated on gateway running via check_fn)
|
||||
"send_message",
|
||||
# App integrations (500+ apps via Nous tool gateway, gated via check_fn)
|
||||
"app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections",
|
||||
# Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
# Kanban multi-agent coordination — only in schema when the agent is
|
||||
@@ -239,6 +241,12 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"app_tools": {
|
||||
"description": "External app integrations (Gmail, Slack, GitHub, Notion, 500+ apps) via Nous tool gateway",
|
||||
"tools": ["app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"kanban": {
|
||||
"description": (
|
||||
"Kanban multi-agent coordination — only active when the agent "
|
||||
|
||||
+95
-7
@@ -1061,6 +1061,10 @@ def _session_tool_progress_mode(sid: str) -> str:
|
||||
return str(_sessions.get(sid, {}).get("tool_progress_mode", "all") or "all")
|
||||
|
||||
|
||||
def _session_verbose(sid: str) -> bool:
|
||||
return _session_tool_progress_mode(sid) == "verbose"
|
||||
|
||||
|
||||
def _tool_progress_enabled(sid: str) -> bool:
|
||||
return _session_tool_progress_mode(sid) != "off"
|
||||
|
||||
@@ -1492,6 +1496,74 @@ def _tool_ctx(name: str, args: dict) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000
|
||||
_TUI_VERBOSE_TEXT_MAX_LINES = 240
|
||||
|
||||
|
||||
def _cap_tui_verbose_text(text: str) -> str:
|
||||
if (
|
||||
len(text) <= _TUI_VERBOSE_TEXT_MAX_CHARS
|
||||
and text.count("\n") < _TUI_VERBOSE_TEXT_MAX_LINES
|
||||
):
|
||||
return text
|
||||
|
||||
idx = len(text)
|
||||
start = 0
|
||||
for _ in range(_TUI_VERBOSE_TEXT_MAX_LINES):
|
||||
idx = text.rfind("\n", 0, idx)
|
||||
if idx < 0:
|
||||
start = 0
|
||||
break
|
||||
start = idx + 1
|
||||
|
||||
line_start = start
|
||||
start = max(line_start, len(text) - _TUI_VERBOSE_TEXT_MAX_CHARS)
|
||||
if start > line_start:
|
||||
next_break = text.find("\n", start)
|
||||
if 0 <= next_break < len(text) - 1:
|
||||
start = next_break + 1
|
||||
|
||||
tail = text[start:].lstrip()
|
||||
omitted_chars = max(0, len(text) - len(tail))
|
||||
omitted_lines = text[:start].count("\n")
|
||||
if omitted_lines:
|
||||
label = (
|
||||
"[showing verbose tail; omitted "
|
||||
f"{omitted_lines} lines / {omitted_chars} chars]\n"
|
||||
)
|
||||
else:
|
||||
label = f"[showing verbose tail; omitted {omitted_chars} chars]\n"
|
||||
return f"{label}{tail}"
|
||||
|
||||
|
||||
def _redact_tui_verbose_text(text: str) -> str:
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
redacted = redact_sensitive_text(str(text), force=True)
|
||||
except Exception:
|
||||
return ""
|
||||
return _cap_tui_verbose_text(redacted)
|
||||
|
||||
|
||||
def _tool_args_text(args: dict) -> str:
|
||||
try:
|
||||
raw = json.dumps(args or {}, indent=2, ensure_ascii=False, default=str)
|
||||
except Exception:
|
||||
raw = str(args or {})
|
||||
return _redact_tui_verbose_text(raw)
|
||||
|
||||
|
||||
def _tool_result_text(result: object) -> str:
|
||||
try:
|
||||
from agent.tool_dispatch_helpers import _multimodal_text_summary
|
||||
|
||||
raw = _multimodal_text_summary(result)
|
||||
except Exception:
|
||||
raw = str(result)
|
||||
return _redact_tui_verbose_text(raw)
|
||||
|
||||
|
||||
def _fmt_tool_duration(seconds: float | None) -> str:
|
||||
if seconds is None:
|
||||
return ""
|
||||
@@ -1553,13 +1625,18 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
|
||||
pass
|
||||
session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
|
||||
if _tool_progress_enabled(sid):
|
||||
payload = {
|
||||
"tool_id": tool_call_id,
|
||||
"name": name,
|
||||
"context": _tool_ctx(name, args),
|
||||
}
|
||||
if _session_verbose(sid):
|
||||
args_text = _tool_args_text(args)
|
||||
if args_text:
|
||||
payload["args_text"] = args_text
|
||||
# tool.complete is the source of truth for todos (full list from the
|
||||
# tool result). args.todos here may be a partial merge update.
|
||||
_emit(
|
||||
"tool.start",
|
||||
sid,
|
||||
{"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
|
||||
)
|
||||
_emit("tool.start", sid, payload)
|
||||
|
||||
|
||||
def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
|
||||
@@ -1576,6 +1653,10 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
|
||||
summary = _tool_summary(name, result, duration_s)
|
||||
if summary:
|
||||
payload["summary"] = summary
|
||||
if _session_verbose(sid):
|
||||
result_text = _tool_result_text(result)
|
||||
if result_text:
|
||||
payload["result_text"] = result_text
|
||||
if name == "todo":
|
||||
try:
|
||||
data = json.loads(result)
|
||||
@@ -1615,7 +1696,10 @@ def _on_tool_progress(
|
||||
_emit("tool.progress", sid, {"name": name, "preview": preview or ""})
|
||||
return
|
||||
if event_type == "reasoning.available" and preview:
|
||||
_emit("reasoning.available", sid, {"text": str(preview)})
|
||||
payload: dict[str, object] = {"text": str(preview)}
|
||||
if _session_verbose(sid):
|
||||
payload["verbose"] = True
|
||||
_emit("reasoning.available", sid, payload)
|
||||
return
|
||||
if event_type.startswith("subagent."):
|
||||
payload = {
|
||||
@@ -1691,7 +1775,11 @@ def _agent_cbs(sid: str) -> dict:
|
||||
"tool_gen_callback": lambda name: _tool_progress_enabled(sid)
|
||||
and _emit("tool.generating", sid, {"name": name}),
|
||||
"thinking_callback": lambda text: _emit("thinking.delta", sid, {"text": text}),
|
||||
"reasoning_callback": lambda text: _emit("reasoning.delta", sid, {"text": text}),
|
||||
"reasoning_callback": lambda text: _emit(
|
||||
"reasoning.delta",
|
||||
sid,
|
||||
{"text": text, **({"verbose": True} if _session_verbose(sid) else {})},
|
||||
),
|
||||
"status_callback": lambda kind, text=None: _status_update(
|
||||
sid, str(kind), None if text is None else str(text)
|
||||
),
|
||||
|
||||
@@ -342,6 +342,25 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[appended.length - 1]).toMatchObject({ role: 'assistant', text: 'final answer' })
|
||||
})
|
||||
|
||||
it('shows verbose reasoning even when normal reasoning display is off', () => {
|
||||
vi.useFakeTimers()
|
||||
patchUiState({ showReasoning: false })
|
||||
const appended: Msg[] = []
|
||||
const streamed = 'verbose-only reasoning'
|
||||
|
||||
try {
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
|
||||
onEvent({ payload: { text: streamed, verbose: true }, type: 'reasoning.delta' } as any)
|
||||
vi.runOnlyPendingTimers()
|
||||
|
||||
expect(turnController.reasoningText).toBe(streamed)
|
||||
expect(getTurnState().reasoning).toBe(streamed)
|
||||
} finally {
|
||||
vi.useRealTimers()
|
||||
}
|
||||
})
|
||||
|
||||
it('ignores fallback reasoning.available when streamed reasoning already exists', () => {
|
||||
const appended: Msg[] = []
|
||||
const streamed = 'short streamed reasoning'
|
||||
@@ -485,6 +504,25 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[3]?.text).not.toContain('```diff')
|
||||
})
|
||||
|
||||
it('keeps verbose result text on inline_diff tool completions', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
|
||||
onEvent({
|
||||
payload: { args_text: '{ "path": "foo.ts" }', context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
|
||||
type: 'tool.start'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { inline_diff: diff, result_text: 'patched result', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
|
||||
expect(turnController.segmentMessages[0]).toMatchObject({ kind: 'diff' })
|
||||
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Args:\n{ "path": "foo.ts" }')
|
||||
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Result:\npatched result')
|
||||
})
|
||||
|
||||
it('keeps full final responses from duplicating flushed pre-diff narration', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
|
||||
@@ -222,6 +222,21 @@ describe('createSlashHandler', () => {
|
||||
expect(ctx.gateway.rpc).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('keeps visible scrollback when branching a TUI session', async () => {
|
||||
patchUiState({ sid: 'sid-parent' })
|
||||
const rpc = vi.fn(() => Promise.resolve({ session_id: 'sid-branch', title: 'branch title' }))
|
||||
const ctx = buildCtx({ gateway: { ...buildGateway(), rpc } })
|
||||
|
||||
expect(createSlashHandler(ctx)('/branch branch title')).toBe(true)
|
||||
|
||||
expect(rpc).toHaveBeenCalledWith('session.branch', { name: 'branch title', session_id: 'sid-parent' })
|
||||
await vi.waitFor(() => {
|
||||
expect(getUiState().sid).toBe('sid-branch')
|
||||
expect(ctx.transcript.sys).toHaveBeenCalledWith('branched → branch title')
|
||||
})
|
||||
expect(ctx.transcript.setHistoryItems).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reloads skills in the live gateway and refreshes the catalog', async () => {
|
||||
const rpc = vi.fn((method: string) => {
|
||||
if (method === 'skills.reload') {
|
||||
|
||||
@@ -16,4 +16,16 @@ describe('composerPromptText', () => {
|
||||
expect(composerPromptText('❯', 'custom')).toBe('❯')
|
||||
expect(composerPromptText('❯')).toBe('❯')
|
||||
})
|
||||
|
||||
it('uses a Termux-safe ASCII prompt marker in normal mode', () => {
|
||||
expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
|
||||
})
|
||||
|
||||
it('keeps profile prefix suppressed on narrow Termux widths', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
|
||||
})
|
||||
|
||||
it('allows profile prefix on very wide Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { stableComposerColumns, transcriptBodyWidth } from '../lib/inputMetrics.js'
|
||||
import { composerPromptText } from '../lib/prompt.js'
|
||||
|
||||
describe('Termux composer prompt + width guards', () => {
|
||||
it('uses a single-cell ASCII prompt marker in Termux mode', () => {
|
||||
expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
|
||||
})
|
||||
|
||||
it('suppresses profile prefixes on narrow Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
|
||||
})
|
||||
|
||||
it('keeps profile context on very wide Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
|
||||
})
|
||||
|
||||
it('reserves fewer columns for gutter on narrow Termux widths', () => {
|
||||
// 32 columns after prompt: desktop reserves 2 for transcript scrollbar,
|
||||
// Termux keeps those 2 columns for the active composer.
|
||||
expect(stableComposerColumns(40, 8, false)).toBe(28)
|
||||
expect(stableComposerColumns(40, 8, true)).toBe(30)
|
||||
|
||||
// With ample room, Termux still reserves the gutter for alignment.
|
||||
expect(stableComposerColumns(60, 8, true)).toBe(48)
|
||||
})
|
||||
|
||||
it('never over-allocates transcript body width on narrow panes', () => {
|
||||
// Old behavior hard-minned to 20 columns and overflowed narrow layouts.
|
||||
expect(transcriptBodyWidth(24, 'assistant', '>', true)).toBe(19)
|
||||
expect(transcriptBodyWidth(24, 'user', 'upstr >', true)).toBe(14)
|
||||
expect(transcriptBodyWidth(10, 'user', '>', true)).toBeGreaterThanOrEqual(1)
|
||||
})
|
||||
|
||||
it('keeps legacy desktop floor outside Termux mode', () => {
|
||||
expect(transcriptBodyWidth(24, 'assistant', '>')).toBe(20)
|
||||
expect(transcriptBodyWidth(24, 'user', 'upstr >')).toBe(20)
|
||||
})
|
||||
})
|
||||
@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
buildToolTrailLine,
|
||||
buildVerboseToolTrailLine,
|
||||
edgePreview,
|
||||
estimateRows,
|
||||
estimateTokensRough,
|
||||
@@ -12,8 +13,8 @@ import {
|
||||
lastCotTrailIndex,
|
||||
parseToolTrailResultLine,
|
||||
pasteTokenLabel,
|
||||
sanitizeAnsiForRender,
|
||||
sameToolTrailGroup,
|
||||
sanitizeAnsiForRender,
|
||||
splitToolDuration,
|
||||
stripAnsi,
|
||||
thinkingPreview
|
||||
@@ -37,6 +38,39 @@ describe('buildToolTrailLine', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildVerboseToolTrailLine', () => {
|
||||
it('preserves multiline args and result details', () => {
|
||||
const line = buildVerboseToolTrailLine(
|
||||
'terminal',
|
||||
'npm test',
|
||||
false,
|
||||
1.25,
|
||||
'{\n "cmd": "npm test"\n}',
|
||||
'first line\nsecond :: line'
|
||||
)
|
||||
|
||||
expect(line).toContain('Args:\n{')
|
||||
expect(line).toContain('Result:\nfirst line\nsecond :: line')
|
||||
expect(parseToolTrailResultLine(line)).toEqual({
|
||||
call: 'Terminal("npm test") (1.3s)',
|
||||
detail: 'Args:\n{\n "cmd": "npm test"\n}\nResult:\nfirst line\nsecond :: line',
|
||||
mark: '✓'
|
||||
})
|
||||
})
|
||||
|
||||
it('labels verbose failures as errors', () => {
|
||||
const line = buildVerboseToolTrailLine('terminal', 'npm test', true, 0.5, undefined, 'command failed')
|
||||
|
||||
expect(line).toContain('Error:\ncommand failed')
|
||||
expect(line).not.toContain('Result:\ncommand failed')
|
||||
expect(parseToolTrailResultLine(line)).toEqual({
|
||||
call: 'Terminal("npm test") (0.5s)',
|
||||
detail: 'Error:\ncommand failed',
|
||||
mark: '✗'
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('lastCotTrailIndex', () => {
|
||||
it('finds last non-result line', () => {
|
||||
expect(lastCotTrailIndex(['a ✓', 'thinking…'])).toBe(1)
|
||||
|
||||
@@ -178,7 +178,22 @@ describe('supportsFastEchoTerminal', () => {
|
||||
expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'Apple_Terminal' } as NodeJS.ProcessEnv)).toBe(false)
|
||||
})
|
||||
|
||||
it('keeps fast-echo enabled in VS Code and unknown terminals', () => {
|
||||
it('disables fast-echo by default in Termux mode', () => {
|
||||
expect(
|
||||
supportsFastEchoTerminal({ TERMUX_VERSION: '0.118.0', PREFIX: '/data/data/com.termux/files/usr' } as NodeJS.ProcessEnv)
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
it('allows explicit Termux fast-echo opt-in via env override', () => {
|
||||
expect(
|
||||
supportsFastEchoTerminal({
|
||||
HERMES_TUI_TERMUX_FAST_ECHO: '1',
|
||||
TERMUX_VERSION: '0.118.0'
|
||||
} as NodeJS.ProcessEnv)
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('keeps fast-echo enabled in VS Code and unknown non-Termux terminals', () => {
|
||||
expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe(true)
|
||||
expect(supportsFastEchoTerminal({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
|
||||
})
|
||||
|
||||
@@ -491,13 +491,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
|
||||
case 'reasoning.delta':
|
||||
if (ev.payload?.text) {
|
||||
turnController.recordReasoningDelta(ev.payload.text)
|
||||
turnController.recordReasoningDelta(ev.payload.text, Boolean(ev.payload.verbose))
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
case 'reasoning.available':
|
||||
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''))
|
||||
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''), Boolean(ev.payload?.verbose))
|
||||
|
||||
return
|
||||
|
||||
@@ -517,12 +517,18 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
|
||||
case 'tool.start':
|
||||
turnController.recordTodos(ev.payload.todos)
|
||||
turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
|
||||
turnController.recordToolStart(
|
||||
ev.payload.tool_id,
|
||||
ev.payload.name ?? 'tool',
|
||||
ev.payload.context ?? '',
|
||||
ev.payload.args_text ? stripAnsi(String(ev.payload.args_text)) : undefined
|
||||
)
|
||||
|
||||
return
|
||||
case 'tool.complete': {
|
||||
const inlineDiffText =
|
||||
ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
|
||||
const resultText = ev.payload.result_text ? stripAnsi(String(ev.payload.result_text)) : undefined
|
||||
|
||||
if (inlineDiffText) {
|
||||
turnController.recordInlineDiffToolComplete(
|
||||
@@ -530,7 +536,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
ev.payload.tool_id,
|
||||
ev.payload.name,
|
||||
ev.payload.error,
|
||||
ev.payload.duration_s
|
||||
ev.payload.duration_s,
|
||||
resultText
|
||||
)
|
||||
} else {
|
||||
turnController.recordToolComplete(
|
||||
@@ -539,7 +546,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
ev.payload.error,
|
||||
ev.payload.summary,
|
||||
ev.payload.duration_s,
|
||||
ev.payload.todos
|
||||
ev.payload.todos,
|
||||
resultText
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -212,7 +212,6 @@ export const sessionCommands: SlashCommand[] = [
|
||||
void ctx.session.closeSession(prevSid)
|
||||
patchUiState({ sid: r.session_id })
|
||||
ctx.session.setSessionStartedAt(Date.now())
|
||||
ctx.transcript.setHistoryItems([])
|
||||
ctx.transcript.sys(`branched → ${r.title ?? ''}`)
|
||||
})
|
||||
)
|
||||
|
||||
@@ -11,6 +11,7 @@ import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
buildToolTrailLine,
|
||||
buildVerboseToolTrailLine,
|
||||
estimateTokensRough,
|
||||
isTransientTrailLine,
|
||||
sameToolTrailGroup,
|
||||
@@ -542,8 +543,8 @@ class TurnController {
|
||||
}
|
||||
}
|
||||
|
||||
recordReasoningAvailable(text: string) {
|
||||
if (this.interrupted || !getUiState().showReasoning) {
|
||||
recordReasoningAvailable(text: string, force = false) {
|
||||
if (this.interrupted || (!force && !getUiState().showReasoning)) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -560,8 +561,8 @@ class TurnController {
|
||||
this.pulseReasoningStreaming()
|
||||
}
|
||||
|
||||
recordReasoningDelta(text: string) {
|
||||
if (this.interrupted || !getUiState().showReasoning) {
|
||||
recordReasoningDelta(text: string, force = false) {
|
||||
if (this.interrupted || (!force && !getUiState().showReasoning)) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -587,14 +588,15 @@ class TurnController {
|
||||
error?: string,
|
||||
summary?: string,
|
||||
duration?: number,
|
||||
todos?: unknown
|
||||
todos?: unknown,
|
||||
resultText?: string
|
||||
) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
|
||||
this.recordTodos(todos)
|
||||
const line = this.completeTool(toolId, fallbackName, error, summary, duration)
|
||||
const line = this.completeTool(toolId, fallbackName, error, summary, duration, resultText)
|
||||
|
||||
this.pendingSegmentTools = [...this.pendingSegmentTools, line]
|
||||
this.flushPendingToolsIntoLastSegment()
|
||||
@@ -606,30 +608,42 @@ class TurnController {
|
||||
toolId: string,
|
||||
fallbackName?: string,
|
||||
error?: string,
|
||||
duration?: number
|
||||
duration?: number,
|
||||
resultText?: string
|
||||
) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
|
||||
this.flushStreamingSegment()
|
||||
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)])
|
||||
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration, resultText)])
|
||||
this.publishToolState()
|
||||
}
|
||||
|
||||
private completeTool(toolId: string, fallbackName?: string, error?: string, summary?: string, duration?: number) {
|
||||
private completeTool(
|
||||
toolId: string,
|
||||
fallbackName?: string,
|
||||
error?: string,
|
||||
summary?: string,
|
||||
duration?: number,
|
||||
resultText?: string
|
||||
) {
|
||||
const done = this.activeTools.find(tool => tool.id === toolId)
|
||||
const name = done?.name ?? fallbackName ?? 'tool'
|
||||
const label = toolTrailLabel(name)
|
||||
const fallbackDuration = done?.startedAt ? (Date.now() - done.startedAt) / 1000 : undefined
|
||||
|
||||
const line = buildToolTrailLine(
|
||||
name,
|
||||
done?.context || '',
|
||||
Boolean(error),
|
||||
error || summary || '',
|
||||
duration ?? fallbackDuration
|
||||
)
|
||||
const line =
|
||||
done?.verboseArgs || resultText
|
||||
? buildVerboseToolTrailLine(
|
||||
name,
|
||||
done?.context || '',
|
||||
Boolean(error),
|
||||
duration ?? fallbackDuration,
|
||||
done?.verboseArgs,
|
||||
error || resultText || summary || ''
|
||||
)
|
||||
: buildToolTrailLine(name, done?.context || '', Boolean(error), error || summary || '', duration ?? fallbackDuration)
|
||||
|
||||
this.activeTools = this.activeTools.filter(tool => tool.id !== toolId)
|
||||
|
||||
@@ -675,7 +689,7 @@ class TurnController {
|
||||
}, STREAM_BATCH_MS)
|
||||
}
|
||||
|
||||
recordToolStart(toolId: string, name: string, context: string) {
|
||||
recordToolStart(toolId: string, name: string, context: string, verboseArgs?: string) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
@@ -688,7 +702,7 @@ class TurnController {
|
||||
const sample = `${name} ${context}`.trim()
|
||||
|
||||
this.toolTokenAcc += sample ? estimateTokensRough(sample) : 0
|
||||
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now() }]
|
||||
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now(), verboseArgs }]
|
||||
|
||||
patchTurnState({ toolTokens: this.toolTokenAcc, tools: this.activeTools })
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import { useGateway } from '../app/gatewayContext.js'
|
||||
import type { AppLayoutProps } from '../app/interfaces.js'
|
||||
import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
|
||||
import { $uiState } from '../app/uiStore.js'
|
||||
import { INLINE_MODE, SHOW_FPS } from '../config/env.js'
|
||||
import { INLINE_MODE, SHOW_FPS, TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import { PLACEHOLDER } from '../content/placeholders.js'
|
||||
import {
|
||||
COMPOSER_PROMPT_GAP_WIDTH,
|
||||
@@ -169,10 +169,10 @@ const ComposerPane = memo(function ComposerPane({
|
||||
const ui = useStore($uiState)
|
||||
const isBlocked = useStore($isBlocked)
|
||||
const sh = (composer.inputBuf[0] ?? composer.input).startsWith('!')
|
||||
const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh)
|
||||
const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh, TERMUX_TUI_MODE, composer.cols)
|
||||
const promptWidth = composerPromptWidth(promptText)
|
||||
const promptBlank = ' '.repeat(promptWidth)
|
||||
const inputColumns = stableComposerColumns(composer.cols, promptWidth)
|
||||
const inputColumns = stableComposerColumns(composer.cols, promptWidth, TERMUX_TUI_MODE)
|
||||
const inputHeight = inputVisualHeight(composer.input, inputColumns)
|
||||
const inputMouseRef = useRef<null | TextInputMouseApi>(null)
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Ansi, Box, NoSelect, Text } from '@hermes/ink'
|
||||
import { memo, useState } from 'react'
|
||||
|
||||
import { TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import { LONG_MSG } from '../config/limits.js'
|
||||
import { sectionMode } from '../domain/details.js'
|
||||
import { userDisplay } from '../domain/messages.js'
|
||||
@@ -139,7 +140,7 @@ export const MessageLine = memo(function MessageLine({
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant') {
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt)
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)
|
||||
|
||||
return isStreaming ? (
|
||||
// Incremental markdown: split at the last stable block boundary so
|
||||
@@ -201,7 +202,7 @@ export const MessageLine = memo(function MessageLine({
|
||||
</Text>
|
||||
</NoSelect>
|
||||
|
||||
<Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt)}>{content}</Box>
|
||||
<Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)}>{content}</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)
|
||||
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
isVoiceToggleKey,
|
||||
type ParsedVoiceRecordKey
|
||||
} from '../lib/platform.js'
|
||||
import { isTermuxTuiMode } from '../lib/termux.js'
|
||||
|
||||
type InkExt = typeof Ink & {
|
||||
stringWidth: (s: string) => number
|
||||
@@ -298,7 +299,23 @@ export function canFastBackspaceShape(current: string, cursor: number, columns?:
|
||||
export function supportsFastEchoTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
// Terminal.app still shows paint/cursor artifacts under the fast-echo
|
||||
// bypass path. Fall back to the normal Ink render path there.
|
||||
return (env.TERM_PROGRAM ?? '').trim() !== 'Apple_Terminal'
|
||||
if ((env.TERM_PROGRAM ?? '').trim() === 'Apple_Terminal') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Termux terminals are especially sensitive to bypass-path cursor drift and
|
||||
// stale paints at soft-wrap boundaries on tall/narrow viewports. Keep this
|
||||
// off by default in Termux mode; allow explicit opt-in for local debugging.
|
||||
if (isTermuxTuiMode(env)) {
|
||||
const override = String(env.HERMES_TUI_TERMUX_FAST_ECHO ?? '').trim().toLowerCase()
|
||||
if (override) {
|
||||
return /^(?:1|true|yes|on)$/i.test(override)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
function renderWithCursor(value: string, cursor: number) {
|
||||
|
||||
@@ -856,7 +856,16 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
color: t.color.text,
|
||||
key: tool.id,
|
||||
label,
|
||||
details: [],
|
||||
details: tool.verboseArgs
|
||||
? [
|
||||
{
|
||||
color: t.color.muted,
|
||||
content: `Args:\n${boundedLiveRenderText(tool.verboseArgs)}`,
|
||||
dimColor: true,
|
||||
key: `${tool.id}-args`
|
||||
}
|
||||
]
|
||||
: [],
|
||||
content: (
|
||||
<>
|
||||
<Spinner color={t.color.accent} variant="tool" /> {label}
|
||||
|
||||
@@ -477,11 +477,11 @@ export type GatewayEvent =
|
||||
type: 'gateway.start_timeout'
|
||||
}
|
||||
| { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }
|
||||
| { payload?: { text?: string }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
|
||||
| { payload?: { text?: string; verbose?: boolean }; session_id?: string; type: 'reasoning.delta' | 'reasoning.available' }
|
||||
| { payload: { name?: string; preview?: string }; session_id?: string; type: 'tool.progress' }
|
||||
| { payload: { name?: string }; session_id?: string; type: 'tool.generating' }
|
||||
| {
|
||||
payload: { context?: string; name?: string; tool_id: string; todos?: unknown[] }
|
||||
payload: { args_text?: string; context?: string; name?: string; tool_id: string; todos?: unknown[] }
|
||||
session_id?: string
|
||||
type: 'tool.start'
|
||||
}
|
||||
@@ -491,6 +491,7 @@ export type GatewayEvent =
|
||||
error?: string
|
||||
inline_diff?: string
|
||||
name?: string
|
||||
result_text?: string
|
||||
summary?: string
|
||||
tool_id: string
|
||||
todos?: unknown[]
|
||||
|
||||
@@ -177,14 +177,25 @@ export function transcriptGutterWidth(role: Role, userPrompt: string) {
|
||||
return role === 'user' ? composerPromptWidth(userPrompt) : 3
|
||||
}
|
||||
|
||||
export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string) {
|
||||
return Math.max(20, totalCols - transcriptGutterWidth(role, userPrompt) - 2)
|
||||
export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string, termuxMode = false) {
|
||||
const available = Math.max(1, totalCols - transcriptGutterWidth(role, userPrompt) - 2)
|
||||
|
||||
if (termuxMode) {
|
||||
// On narrow / unusual aspect-ratio mobile panes, forcing a wide minimum
|
||||
// width causes right-edge clipping and chopped words.
|
||||
return available
|
||||
}
|
||||
|
||||
return Math.max(20, available)
|
||||
}
|
||||
|
||||
export function stableComposerColumns(totalCols: number, promptWidth: number) {
|
||||
export function stableComposerColumns(totalCols: number, promptWidth: number, termuxMode = false) {
|
||||
// Physical render/wrap width. Always reserve outer composer padding and
|
||||
// prompt prefix. Only reserve the transcript scrollbar gutter when the
|
||||
// terminal is wide enough; on narrow panes, preserving input columns beats
|
||||
// keeping gutters visually aligned.
|
||||
return Math.max(1, totalCols - promptWidth - 2 - (totalCols - promptWidth >= 24 ? 2 : 0))
|
||||
const afterPrompt = totalCols - promptWidth
|
||||
const reserveScrollbar = afterPrompt >= (termuxMode ? 36 : 24) ? 2 : 0
|
||||
|
||||
return Math.max(1, totalCols - promptWidth - 2 - reserveScrollbar)
|
||||
}
|
||||
|
||||
@@ -1,8 +1,32 @@
|
||||
export function composerPromptText(prompt: string, profileName?: null | string, shellMode = false): string {
|
||||
const TERMUX_SAFE_PROMPT = '>'
|
||||
|
||||
export function composerPromptText(
|
||||
prompt: string,
|
||||
profileName?: null | string,
|
||||
shellMode = false,
|
||||
termuxMode = false,
|
||||
totalCols?: number
|
||||
): string {
|
||||
if (shellMode) {
|
||||
return '$'
|
||||
}
|
||||
|
||||
if (termuxMode) {
|
||||
// Termux fonts/terminal backends can render decorative prompt glyphs with
|
||||
// ambiguous width; keep the live composer marker strictly single-cell ASCII
|
||||
// so we never leave stale arrow artifacts while typing.
|
||||
const basePrompt = TERMUX_SAFE_PROMPT
|
||||
|
||||
// On very wide panes we can still include profile context. On narrow/mobile
|
||||
// panes this burns precious columns and increases wrap/clipping risk.
|
||||
const wideEnoughForProfile = typeof totalCols === 'number' ? totalCols >= 90 : false
|
||||
if (wideEnoughForProfile && profileName && !['default', 'custom'].includes(profileName)) {
|
||||
return `${profileName} ${basePrompt}`
|
||||
}
|
||||
|
||||
return basePrompt
|
||||
}
|
||||
|
||||
if (profileName && !['default', 'custom'].includes(profileName)) {
|
||||
return `${profileName} ${prompt}`
|
||||
}
|
||||
|
||||
+25
-3
@@ -212,6 +212,28 @@ export const buildToolTrailLine = (
|
||||
return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
|
||||
}
|
||||
|
||||
const verboseToolBlock = (label: string, text?: string) => {
|
||||
const body = (text ?? '').trim()
|
||||
|
||||
return body ? `${label}:\n${boundedLiveRenderText(body)}` : ''
|
||||
}
|
||||
|
||||
export const buildVerboseToolTrailLine = (
|
||||
name: string,
|
||||
context: string,
|
||||
error?: boolean,
|
||||
duration?: number,
|
||||
argsText?: string,
|
||||
resultText?: string
|
||||
) => {
|
||||
const detail = [verboseToolBlock('Args', argsText), verboseToolBlock(error ? 'Error' : 'Result', resultText)]
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
const took = duration !== undefined ? ` (${duration.toFixed(1)}s)` : ''
|
||||
|
||||
return `${formatToolCall(name, context)}${took}${detail ? ` :: ${detail}` : ''} ${error ? '✗' : '✓'}`
|
||||
}
|
||||
|
||||
export const isToolTrailResultLine = (line: string) => line.endsWith(' ✓') || line.endsWith(' ✗')
|
||||
|
||||
export const parseToolTrailResultLine = (line: string) => {
|
||||
@@ -221,10 +243,10 @@ export const parseToolTrailResultLine = (line: string) => {
|
||||
|
||||
const mark = line.endsWith(' ✗') ? '✗' : '✓'
|
||||
const body = line.slice(0, -2)
|
||||
const [call, detail] = body.split(' :: ', 2)
|
||||
const sep = body.indexOf(' :: ')
|
||||
|
||||
if (detail != null) {
|
||||
return { call, detail, mark }
|
||||
if (sep >= 0) {
|
||||
return { call: body.slice(0, sep), detail: body.slice(sep + 4), mark }
|
||||
}
|
||||
|
||||
const legacy = body.indexOf(': ')
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { Msg } from '../types.js'
|
||||
|
||||
import { TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import { transcriptBodyWidth } from './inputMetrics.js'
|
||||
|
||||
const hashText = (text: string) => {
|
||||
@@ -96,7 +97,7 @@ export const estimatedMsgHeight = (
|
||||
return Math.max(2, msg.todos.length + 2)
|
||||
}
|
||||
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt)
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt, TERMUX_TUI_MODE)
|
||||
const text = msg.text
|
||||
let h = wrappedLines(text || ' ', bodyWidth)
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ export interface ActiveTool {
|
||||
context?: string
|
||||
id: string
|
||||
name: string
|
||||
verboseArgs?: string
|
||||
startedAt?: number
|
||||
}
|
||||
|
||||
|
||||
@@ -44,11 +44,13 @@ const AUX_TASKS: readonly { key: string; label: string; hint: string }[] = [
|
||||
{ key: "vision", label: "Vision", hint: "Image analysis" },
|
||||
{ key: "web_extract", label: "Web Extract", hint: "Page summarization" },
|
||||
{ key: "compression", label: "Compression", hint: "Context compaction" },
|
||||
{ key: "session_search", label: "Session Search", hint: "Recall queries" },
|
||||
{ key: "skills_hub", label: "Skills Hub", hint: "Skill search" },
|
||||
{ key: "approval", label: "Approval", hint: "Smart auto-approve" },
|
||||
{ key: "mcp", label: "MCP", hint: "MCP tool routing" },
|
||||
{ key: "title_generation", label: "Title Gen", hint: "Session titles" },
|
||||
{ key: "triage_specifier", label: "Triage Specifier", hint: "Kanban spec fleshing" },
|
||||
{ key: "kanban_decomposer", label: "Kanban Decomposer", hint: "Task decomposition" },
|
||||
{ key: "profile_describer", label: "Profile Describer", hint: "Auto profile descriptions" },
|
||||
{ key: "curator", label: "Curator", hint: "Skill-usage review" },
|
||||
] as const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user