Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 984e6cb5b8 |
+2
-27
@@ -1125,18 +1125,7 @@ def init_agent(
|
||||
# through _ra().get_tool_definitions()). Duplicate function names cause
|
||||
# 400 errors on providers that enforce unique names (e.g. Xiaomi
|
||||
# MiMo via Nous Portal).
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# enabled_toolsets is None → no filter, inject (backward compat)
|
||||
# "memory" in enabled_toolsets → user opted in, inject
|
||||
# otherwise (incl. []) → user excluded memory, skip injection
|
||||
#
|
||||
# Without this gate, `platform_toolsets: telegram: []` still leaks memory
|
||||
# provider tools (fact_store, etc.) into the tool surface — a 10x latency
|
||||
# penalty on local models and a frequent trigger of tool-call loops.
|
||||
if agent._memory_manager and agent.tools is not None and (
|
||||
agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
|
||||
):
|
||||
if agent._memory_manager and agent.tools is not None:
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
@@ -1446,22 +1435,8 @@ def init_agent(
|
||||
# errors. Even with the cache fix, dedup is the right defense
|
||||
# against plugin paths that may register the same schemas via
|
||||
# ctx.register_tool(). Mirrors the memory tools dedup above.
|
||||
#
|
||||
# Respect the platform's enabled_toolsets configuration (#5544):
|
||||
# context engine tools follow the same gating pattern as memory
|
||||
# provider tools — without the gate, `platform_toolsets: telegram: []`
|
||||
# would still leak lcm_* tools into the tool surface and incur the
|
||||
# same local-model latency penalty.
|
||||
agent._context_engine_tool_names: set = set()
|
||||
if (
|
||||
hasattr(agent, "context_compressor")
|
||||
and agent.context_compressor
|
||||
and agent.tools is not None
|
||||
and (
|
||||
agent.enabled_toolsets is None
|
||||
or "context_engine" in agent.enabled_toolsets
|
||||
)
|
||||
):
|
||||
if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
|
||||
_existing_tool_names = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in agent.tools
|
||||
|
||||
+226
-250
@@ -1606,155 +1606,182 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
|
||||
return out
|
||||
|
||||
|
||||
def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Convert an assistant message to Anthropic content blocks.
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Handles thinking blocks, regular content, tool calls, and
|
||||
reasoning_content injection for Kimi/DeepSeek endpoints.
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
system = None
|
||||
result = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
return {"role": "assistant", "content": effective}
|
||||
|
||||
|
||||
def _convert_tool_message_to_result(
|
||||
result: List[Dict[str, Any]], m: Dict[str, Any]
|
||||
) -> None:
|
||||
"""Convert a tool message to an Anthropic tool_result, merging consecutive
|
||||
results into one user message.
|
||||
|
||||
Mutates ``result`` in place — either appends a new user message or extends
|
||||
the trailing user message's tool_result list.
|
||||
"""
|
||||
content = m.get("content", "")
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
if role == "assistant":
|
||||
blocks = _extract_preserved_thinking_blocks(m)
|
||||
if content:
|
||||
if isinstance(content, list):
|
||||
converted_content = _convert_content_to_anthropic(content)
|
||||
if isinstance(converted_content, list):
|
||||
blocks.extend(converted_content)
|
||||
else:
|
||||
blocks.append({"type": "text", "text": str(content)})
|
||||
for tc in m.get("tool_calls", []):
|
||||
if not tc or not isinstance(tc, dict):
|
||||
continue
|
||||
fn = tc.get("function", {})
|
||||
args = fn.get("arguments", "{}")
|
||||
try:
|
||||
parsed_args = json.loads(args) if isinstance(args, str) else args
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
parsed_args = {}
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": _sanitize_tool_id(tc.get("id", "")),
|
||||
"name": fn.get("name", ""),
|
||||
"input": parsed_args,
|
||||
})
|
||||
# Kimi's /coding endpoint (Anthropic protocol) requires assistant
|
||||
# tool-call messages to carry reasoning_content when thinking is
|
||||
# enabled server-side. Preserve it as a thinking block so Kimi
|
||||
# can validate the message history. See hermes-agent#13848.
|
||||
#
|
||||
# Accept empty string "" — _copy_reasoning_content_for_api()
|
||||
# injects "" as a tier-3 fallback for Kimi tool-call messages
|
||||
# that had no reasoning. Kimi requires the field to exist, even
|
||||
# if empty.
|
||||
#
|
||||
# Prepend (not append): Anthropic protocol requires thinking
|
||||
# blocks before text and tool_use blocks.
|
||||
#
|
||||
# Guard: only add when reasoning_details didn't already contribute
|
||||
# thinking blocks. On native Anthropic, reasoning_details produces
|
||||
# signed thinking blocks — adding another unsigned one from
|
||||
# reasoning_content would create a duplicate (same text) that gets
|
||||
# downgraded to a spurious text block on the last assistant message.
|
||||
reasoning_content = m.get("reasoning_content")
|
||||
_already_has_thinking = any(
|
||||
isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
|
||||
for b in blocks
|
||||
)
|
||||
if isinstance(reasoning_content, str) and not _already_has_thinking:
|
||||
blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
|
||||
# Anthropic rejects empty assistant content
|
||||
effective = blocks or content
|
||||
if not effective or effective == "":
|
||||
effective = [{"type": "text", "text": "(empty)"}]
|
||||
result.append({"role": "assistant", "content": effective})
|
||||
continue
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
if role == "tool":
|
||||
# Sanitize tool_use_id and ensure non-empty content.
|
||||
# Computer-use (and other multimodal) tool results arrive as
|
||||
# either a list of OpenAI-style content parts, or a dict
|
||||
# marked `_multimodal` with an embedded `content` list. Convert
|
||||
# both into Anthropic `tool_result` inner blocks (text + image).
|
||||
multimodal_blocks: Optional[List[Dict[str, Any]]] = None
|
||||
if isinstance(content, dict) and content.get("_multimodal"):
|
||||
multimodal_blocks = _content_parts_to_anthropic_blocks(
|
||||
content.get("content") or []
|
||||
)
|
||||
# Fallback text if the conversion produced nothing usable.
|
||||
if not multimodal_blocks and content.get("text_summary"):
|
||||
multimodal_blocks = [
|
||||
{"type": "text", "text": str(content["text_summary"])}
|
||||
]
|
||||
elif isinstance(content, list):
|
||||
converted = _content_parts_to_anthropic_blocks(content)
|
||||
if any(b.get("type") == "image" for b in converted):
|
||||
multimodal_blocks = converted
|
||||
# Back-compat: some callers stash blocks under a private key.
|
||||
if multimodal_blocks is None:
|
||||
stashed = m.get("_anthropic_content_blocks")
|
||||
if isinstance(stashed, list) and stashed:
|
||||
text_content = content if isinstance(content, str) and content.strip() else None
|
||||
multimodal_blocks = (
|
||||
[{"type": "text", "text": text_content}] + stashed
|
||||
if text_content else list(stashed)
|
||||
)
|
||||
|
||||
if multimodal_blocks:
|
||||
result_content: Any = multimodal_blocks
|
||||
elif isinstance(content, str):
|
||||
result_content = content
|
||||
else:
|
||||
result_content = json.dumps(content) if content else "(no output)"
|
||||
if not result_content:
|
||||
result_content = "(no output)"
|
||||
tool_result = {
|
||||
"type": "tool_result",
|
||||
"tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
|
||||
"content": result_content,
|
||||
}
|
||||
if isinstance(m.get("cache_control"), dict):
|
||||
tool_result["cache_control"] = dict(m["cache_control"])
|
||||
# Merge consecutive tool results into one user message
|
||||
if (
|
||||
result
|
||||
and result[-1]["role"] == "user"
|
||||
and isinstance(result[-1]["content"], list)
|
||||
and result[-1]["content"]
|
||||
and result[-1]["content"][0].get("type") == "tool_result"
|
||||
):
|
||||
result[-1]["content"].append(tool_result)
|
||||
else:
|
||||
result.append({"role": "user", "content": [tool_result]})
|
||||
continue
|
||||
|
||||
def _convert_user_message(content: Any) -> Dict[str, Any]:
|
||||
"""Validate and convert a user message to anthropic format."""
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
return {"role": "user", "content": converted_blocks}
|
||||
else:
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
return {"role": "user", "content": content}
|
||||
# Regular user message — validate non-empty content (Anthropic rejects empty)
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
# Check if all text blocks are empty
|
||||
if not converted_blocks or all(
|
||||
b.get("text", "").strip() == ""
|
||||
for b in converted_blocks
|
||||
if isinstance(b, dict) and b.get("type") == "text"
|
||||
):
|
||||
converted_blocks = [{"type": "text", "text": "(empty message)"}]
|
||||
result.append({"role": "user", "content": converted_blocks})
|
||||
else:
|
||||
# Validate string content is non-empty
|
||||
if not content or (isinstance(content, str) and not content.strip()):
|
||||
content = "(empty message)"
|
||||
result.append({"role": "user", "content": content})
|
||||
|
||||
|
||||
def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
"""Strip tool_use blocks with no matching tool_result, and vice versa.
|
||||
|
||||
Context compression or session truncation can remove either side of a
|
||||
tool-call pair. Anthropic rejects both orphans with HTTP 400.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
for m in result:
|
||||
@@ -1772,7 +1799,10 @@ def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them)
|
||||
# Strip orphaned tool_result blocks (no matching tool_use precedes them).
|
||||
# This is the mirror of the above: context compression or session truncation
|
||||
# can remove an assistant message containing a tool_use while leaving the
|
||||
# subsequent tool_result intact. Anthropic rejects these with a 400.
|
||||
tool_use_ids = set()
|
||||
for m in result:
|
||||
if m["role"] == "assistant" and isinstance(m["content"], list):
|
||||
@@ -1789,16 +1819,12 @@ def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
|
||||
if not m["content"]:
|
||||
m["content"] = [{"type": "text", "text": "(tool result removed)"}]
|
||||
|
||||
|
||||
def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Merge consecutive same-role messages to enforce Anthropic alternation.
|
||||
|
||||
Returns a new list (caller must rebind ``result``).
|
||||
"""
|
||||
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||
fixed = []
|
||||
for m in result:
|
||||
if fixed and fixed[-1]["role"] == m["role"]:
|
||||
if m["role"] == "user":
|
||||
# Merge consecutive user messages
|
||||
prev_content = fixed[-1]["content"]
|
||||
curr_content = m["content"]
|
||||
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||
@@ -1806,6 +1832,7 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||
fixed[-1]["content"] = prev_content + curr_content
|
||||
else:
|
||||
# Mixed types — wrap string in list
|
||||
if isinstance(prev_content, str):
|
||||
prev_content = [{"type": "text", "text": prev_content}]
|
||||
if isinstance(curr_content, str):
|
||||
@@ -1828,6 +1855,7 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
@@ -1835,34 +1863,37 @@ def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any
|
||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
return fixed
|
||||
result = fixed
|
||||
|
||||
|
||||
def _manage_thinking_signatures(
|
||||
result: List[Dict[str, Any]], base_url: str | None, model: str | None
|
||||
) -> None:
|
||||
"""Strip or preserve thinking blocks based on endpoint type.
|
||||
|
||||
Anthropic signs thinking blocks against the full turn content.
|
||||
Any upstream mutation (context compression, session truncation, orphan
|
||||
stripping, message merging) invalidates the signature, causing HTTP 400
|
||||
"Invalid signature in thinking block".
|
||||
|
||||
Signatures are Anthropic-proprietary. Third-party endpoints (MiniMax,
|
||||
Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
|
||||
and will reject them outright. Kimi's /coding and DeepSeek's /anthropic
|
||||
endpoints speak the Anthropic protocol upstream but require unsigned
|
||||
thinking blocks (synthesised from ``reasoning_content``) to round-trip on
|
||||
replayed assistant tool-call messages. See hermes-agent#13848 (Kimi) and
|
||||
hermes-agent#16748 (DeepSeek).
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# ── Thinking block signature management ──────────────────────────
|
||||
# Anthropic signs thinking blocks against the full turn content.
|
||||
# Any upstream mutation (context compression, session truncation,
|
||||
# orphan stripping, message merging) invalidates the signature,
|
||||
# causing HTTP 400 "Invalid signature in thinking block".
|
||||
#
|
||||
# Signatures are Anthropic-proprietary. Third-party endpoints
|
||||
# (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
|
||||
# them and will reject them outright. When targeting a third-party
|
||||
# endpoint, strip ALL thinking/redacted_thinking blocks from every
|
||||
# assistant message — the third-party will generate its own
|
||||
# thinking blocks if it supports extended thinking.
|
||||
#
|
||||
# For direct Anthropic (strategy following clawdbot/OpenClaw):
|
||||
# 1. Strip thinking/redacted_thinking from all assistant messages
|
||||
# EXCEPT the last one — preserves reasoning continuity on the
|
||||
# current tool-use chain while avoiding stale signature errors.
|
||||
# 2. Downgrade unsigned thinking blocks (no signature) to text —
|
||||
# Anthropic can't validate them and will reject them.
|
||||
# 3. Strip cache_control from thinking/redacted_thinking blocks —
|
||||
# cache markers can interfere with signature validation.
|
||||
_THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
|
||||
_is_third_party = _is_third_party_anthropic_endpoint(base_url)
|
||||
# Kimi / DeepSeek share a contract: strip signed Anthropic blocks
|
||||
# (neither upstream can validate Anthropic signatures), preserve unsigned
|
||||
# ones synthesised from reasoning_content. See #13848, #16748.
|
||||
# Kimi /coding and DeepSeek /anthropic share a contract: both speak the
|
||||
# Anthropic Messages protocol upstream but require that thinking blocks
|
||||
# synthesised from reasoning_content round-trip on subsequent turns when
|
||||
# thinking is enabled. Signed Anthropic blocks still have to be stripped
|
||||
# (neither endpoint can validate Anthropic's signatures); unsigned blocks
|
||||
# are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
|
||||
_preserve_unsigned_thinking = (
|
||||
_is_kimi_family_endpoint(base_url, model)
|
||||
or _is_deepseek_anthropic_endpoint(base_url)
|
||||
@@ -1879,19 +1910,26 @@ def _manage_thinking_signatures(
|
||||
continue
|
||||
|
||||
if _preserve_unsigned_thinking:
|
||||
# Kimi / DeepSeek: strip signed, preserve unsigned.
|
||||
# Kimi's /coding and DeepSeek's /anthropic endpoints both enable
|
||||
# thinking server-side and require unsigned thinking blocks on
|
||||
# replayed assistant tool-call messages. Strip signed Anthropic
|
||||
# blocks (neither upstream can validate Anthropic signatures) but
|
||||
# preserve the unsigned ones we synthesised from reasoning_content.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("signature") or b.get("data"):
|
||||
# Signed (or redacted-with-data) — upstream can't validate, strip.
|
||||
# Anthropic-signed block — upstream can't validate, strip
|
||||
continue
|
||||
# Unsigned thinking (synthesised from reasoning_content) —
|
||||
# keep it: the upstream needs it for message-history validation.
|
||||
new_content.append(b)
|
||||
m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
|
||||
elif _is_third_party or idx != last_assistant_idx:
|
||||
# Third-party: strip ALL thinking blocks (signatures are proprietary).
|
||||
# Third-party endpoint: strip ALL thinking blocks from every
|
||||
# assistant message — signatures are Anthropic-proprietary.
|
||||
# Direct Anthropic: strip from non-latest assistant messages only.
|
||||
stripped = [
|
||||
b for b in m["content"]
|
||||
@@ -1899,21 +1937,24 @@ def _manage_thinking_signatures(
|
||||
]
|
||||
m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
|
||||
else:
|
||||
# Latest assistant on direct Anthropic: keep signed, downgrade unsigned
|
||||
# to text so the reasoning isn't lost.
|
||||
# Latest assistant on direct Anthropic: keep signed thinking
|
||||
# blocks for reasoning continuity; downgrade unsigned ones to
|
||||
# plain text.
|
||||
new_content = []
|
||||
for b in m["content"]:
|
||||
if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
|
||||
new_content.append(b)
|
||||
continue
|
||||
if b.get("type") == "redacted_thinking":
|
||||
# Redacted blocks use 'data' for the signature payload —
|
||||
# drop the block when 'data' is missing (can't be validated).
|
||||
# Redacted blocks use 'data' for the signature payload
|
||||
if b.get("data"):
|
||||
new_content.append(b)
|
||||
# else: drop — no data means it can't be validated
|
||||
elif b.get("signature"):
|
||||
# Signed thinking block — keep it
|
||||
new_content.append(b)
|
||||
else:
|
||||
# Unsigned thinking — downgrade to text so it's not lost
|
||||
thinking_text = b.get("thinking", "")
|
||||
if thinking_text:
|
||||
new_content.append({"type": "text", "text": thinking_text})
|
||||
@@ -1925,15 +1966,12 @@ def _manage_thinking_signatures(
|
||||
if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
|
||||
b.pop("cache_control", None)
|
||||
|
||||
|
||||
def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
"""Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
|
||||
|
||||
Base64 images cost ~1,465 tokens each and accumulate across tool calls.
|
||||
Walk backward, keep the most recent N, replace older ones with a placeholder.
|
||||
|
||||
Mutates ``result`` in place.
|
||||
"""
|
||||
# ── Image eviction: keep only the most recent N screenshots ─────
|
||||
# computer_use screenshots (base64 images) sit inside tool_result
|
||||
# blocks: they accumulate and are sent with every API call. Each
|
||||
# costs ~1,465 tokens; after 10+ the conversation becomes slow
|
||||
# even for simple text queries. Walk backward, keep the most recent
|
||||
# _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
|
||||
_MAX_KEEP_IMAGES = 3
|
||||
_image_count = 0
|
||||
for msg in reversed(result):
|
||||
@@ -1960,68 +1998,6 @@ def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
|
||||
for b in inner
|
||||
]
|
||||
|
||||
|
||||
def convert_messages_to_anthropic(
|
||||
messages: List[Dict],
|
||||
base_url: str | None = None,
|
||||
model: str | None = None,
|
||||
) -> Tuple[Optional[Any], List[Dict]]:
|
||||
"""Convert OpenAI-format messages to Anthropic format.
|
||||
|
||||
Returns (system_prompt, anthropic_messages).
|
||||
System messages are extracted since Anthropic takes them as a separate param.
|
||||
system_prompt is a string or list of content blocks (when cache_control present).
|
||||
|
||||
When *base_url* is provided and points to a third-party Anthropic-compatible
|
||||
endpoint, all thinking block signatures are stripped. Signatures are
|
||||
Anthropic-proprietary — third-party endpoints cannot validate them and will
|
||||
reject them with HTTP 400 "Invalid signature in thinking block".
|
||||
|
||||
When *model* is provided and matches the Kimi / Moonshot family (or
|
||||
*base_url* is a Kimi / Moonshot host), unsigned thinking blocks
|
||||
synthesised from ``reasoning_content`` are preserved on replayed
|
||||
assistant tool-call messages — Kimi requires the field to exist, even
|
||||
if empty.
|
||||
"""
|
||||
system = None
|
||||
result: List[Dict[str, Any]] = []
|
||||
|
||||
for m in messages:
|
||||
role = m.get("role", "user")
|
||||
content = m.get("content", "")
|
||||
|
||||
if role == "system":
|
||||
if isinstance(content, list):
|
||||
# Preserve cache_control markers on content blocks
|
||||
has_cache = any(
|
||||
p.get("cache_control") for p in content if isinstance(p, dict)
|
||||
)
|
||||
if has_cache:
|
||||
system = [p for p in content if isinstance(p, dict)]
|
||||
else:
|
||||
system = "\n".join(
|
||||
p["text"] for p in content if p.get("type") == "text"
|
||||
)
|
||||
else:
|
||||
system = content
|
||||
continue
|
||||
|
||||
if role == "assistant":
|
||||
result.append(_convert_assistant_message(m))
|
||||
continue
|
||||
|
||||
if role == "tool":
|
||||
_convert_tool_message_to_result(result, m)
|
||||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append(_convert_user_message(content))
|
||||
|
||||
_strip_orphaned_tool_blocks(result)
|
||||
result = _merge_consecutive_roles(result)
|
||||
_manage_thinking_signatures(result, base_url, model)
|
||||
_evict_old_screenshots(result)
|
||||
|
||||
return system, result
|
||||
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ from agent.message_sanitization import (
|
||||
_strip_non_ascii,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
estimate_messages_tokens_rough,
|
||||
estimate_request_tokens_rough,
|
||||
get_next_probe_tier,
|
||||
@@ -74,50 +73,6 @@ from utils import base_url_host_matches, env_var_enabled
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
|
||||
"""Return a user-facing error when Ollama is loaded with too little context."""
|
||||
if not getattr(agent, "tools", None):
|
||||
return None
|
||||
|
||||
runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
|
||||
if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
|
||||
return None
|
||||
if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
|
||||
return None
|
||||
|
||||
model = getattr(agent, "model", "") or "the selected model"
|
||||
base_url = getattr(agent, "base_url", "") or "unknown base URL"
|
||||
provider = getattr(agent, "provider", "") or "unknown"
|
||||
tool_count = len(getattr(agent, "tools", None) or [])
|
||||
|
||||
logger.warning(
|
||||
"Ollama runtime context too small for Hermes tool use: "
|
||||
"model=%s provider=%s base_url=%s runtime_context=%d "
|
||||
"minimum_context=%d estimated_request_tokens=%d tool_count=%d "
|
||||
"session=%s",
|
||||
model,
|
||||
provider,
|
||||
base_url,
|
||||
runtime_ctx,
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
request_tokens,
|
||||
tool_count,
|
||||
getattr(agent, "session_id", None) or "none",
|
||||
)
|
||||
|
||||
return (
|
||||
f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
|
||||
f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
|
||||
"for reliable tool use.\n\n"
|
||||
"Increase the Ollama context for this model and restart/reload the "
|
||||
"model before trying again. A known-good starting point is 65,536 "
|
||||
"tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
|
||||
"(and `model.context_length: 65536` if you also override the displayed "
|
||||
"model context). If you manage the model through an Ollama Modelfile, "
|
||||
"set `PARAMETER num_ctx 65536` there instead."
|
||||
)
|
||||
|
||||
|
||||
def _ra():
|
||||
"""Lazy reference to ``run_agent`` so callers can patch
|
||||
``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
|
||||
@@ -572,7 +527,6 @@ def run_conversation(
|
||||
api_call_count = 0
|
||||
final_response = None
|
||||
interrupted = False
|
||||
failed = False
|
||||
codex_ack_continuations = 0
|
||||
length_continue_retries = 0
|
||||
truncated_tool_call_retries = 0
|
||||
@@ -929,26 +883,6 @@ def run_conversation(
|
||||
# Calculate approximate request size for logging
|
||||
total_chars = sum(len(str(msg)) for msg in api_messages)
|
||||
approx_tokens = estimate_messages_tokens_rough(api_messages)
|
||||
approx_request_tokens = estimate_request_tokens_rough(
|
||||
api_messages, tools=agent.tools or None
|
||||
)
|
||||
|
||||
_runtime_context_error = _ollama_context_limit_error(
|
||||
agent, approx_request_tokens
|
||||
)
|
||||
if _runtime_context_error:
|
||||
final_response = _runtime_context_error
|
||||
failed = True
|
||||
_turn_exit_reason = "ollama_runtime_context_too_small"
|
||||
messages.append({"role": "assistant", "content": final_response})
|
||||
agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
|
||||
api_call_count -= 1
|
||||
agent._api_call_count = api_call_count
|
||||
try:
|
||||
agent.iteration_budget.refund()
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
|
||||
# Thinking spinner for quiet mode (animated during API call)
|
||||
thinking_spinner = None
|
||||
@@ -989,7 +923,6 @@ def run_conversation(
|
||||
copilot_auth_retry_attempted=False
|
||||
thinking_sig_retry_attempted = False
|
||||
image_shrink_retry_attempted = False
|
||||
multimodal_tool_content_retry_attempted = False
|
||||
oauth_1m_beta_retry_attempted = False
|
||||
llama_cpp_grammar_retry_attempted = False
|
||||
has_retried_429 = False
|
||||
@@ -2061,31 +1994,6 @@ def run_conversation(
|
||||
"or shrink didn't reduce size; surfacing original error."
|
||||
)
|
||||
|
||||
# Multimodal-tool-content recovery: providers that follow
|
||||
# the OpenAI spec strictly (tool message content must be a
|
||||
# string) reject our list-type content with a 400. Strip
|
||||
# image parts from any list-type tool messages, mark the
|
||||
# (provider, model) as no-list-tool-content for the rest
|
||||
# of this session so future tool results preemptively
|
||||
# downgrade, and retry once. See issue #27344.
|
||||
if (
|
||||
classified.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
and not multimodal_tool_content_retry_attempted
|
||||
):
|
||||
multimodal_tool_content_retry_attempted = True
|
||||
if agent._try_strip_image_parts_from_tool_messages(api_messages):
|
||||
agent._vprint(
|
||||
f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
|
||||
f"downgraded screenshots to text and retrying...",
|
||||
force=True,
|
||||
)
|
||||
continue
|
||||
else:
|
||||
logger.info(
|
||||
"multimodal-tool-content recovery: no list-type tool "
|
||||
"messages with image parts found; surfacing original error."
|
||||
)
|
||||
|
||||
# Anthropic OAuth subscription rejected the 1M-context beta
|
||||
# header ("long context beta is not yet available for this
|
||||
# subscription"). Disable the beta for the rest of this
|
||||
@@ -3940,11 +3848,7 @@ def run_conversation(
|
||||
)
|
||||
|
||||
# Determine if conversation completed successfully
|
||||
completed = (
|
||||
final_response is not None
|
||||
and api_call_count < agent.max_iterations
|
||||
and not failed
|
||||
)
|
||||
completed = final_response is not None and api_call_count < agent.max_iterations
|
||||
|
||||
# Save trajectory if enabled. ``user_message`` may be a multimodal
|
||||
# list of parts; the trajectory format wants a plain string.
|
||||
@@ -4094,7 +3998,6 @@ def run_conversation(
|
||||
"api_calls": api_call_count,
|
||||
"completed": completed,
|
||||
"turn_exit_reason": _turn_exit_reason,
|
||||
"failed": failed,
|
||||
"partial": False, # True only when stopped due to invalid tool calls
|
||||
"interrupted": interrupted,
|
||||
"response_previewed": getattr(agent, "_response_was_previewed", False),
|
||||
|
||||
@@ -50,7 +50,6 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported" # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry
|
||||
|
||||
# Provider-specific
|
||||
thinking_signature = "thinking_signature" # Anthropic thinking block sig invalid
|
||||
@@ -166,32 +165,6 @@ _IMAGE_TOO_LARGE_PATTERNS = [
|
||||
# the likely culprit; we still try the shrink path before giving up.
|
||||
]
|
||||
|
||||
# Providers that follow the OpenAI spec strictly require tool message
|
||||
# ``content`` to be a string. Some (Anthropic native, Codex Responses,
|
||||
# Gemini native, first-party OpenAI) extend this to accept a content-parts
|
||||
# list (text + image_url) so screenshots from computer_use survive. Others
|
||||
# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
|
||||
# providers) reject the list with a 400 — the patterns below are the most
|
||||
# common error shapes we see. Recovery: strip image parts from tool
|
||||
# messages in-place, record the (provider, model) for the rest of the
|
||||
# session so we don't waste another call learning the same lesson, retry.
|
||||
#
|
||||
# See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
|
||||
# Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
|
||||
"text is not set",
|
||||
# Generic "tool message must be string" shapes
|
||||
"tool message content must be a string",
|
||||
"tool content must be a string",
|
||||
"tool message must be a string",
|
||||
# OpenAI-compat servers that reject list-type tool content with a
|
||||
# schema-validation message
|
||||
"expected string, got list",
|
||||
"expected string, got array",
|
||||
# Alibaba/DashScope variant
|
||||
"tool_call.content must be string",
|
||||
]
|
||||
|
||||
# Context overflow patterns
|
||||
_CONTEXT_OVERFLOW_PATTERNS = [
|
||||
"context length",
|
||||
@@ -808,19 +781,6 @@ def _classify_400(
|
||||
) -> ClassifiedError:
|
||||
"""Classify 400 Bad Request — context overflow, format error, or generic."""
|
||||
|
||||
# Multimodal tool content rejected from 400. Must be checked BEFORE
|
||||
# image_too_large because the recovery is different (strip image parts
|
||||
# from tool messages, mark the model as no-list-tool-content for the
|
||||
# rest of the session) and BEFORE context_overflow because some of the
|
||||
# patterns ("text is not set") are ambiguous in isolation but become
|
||||
# specific when combined with a 400 on a request known to contain
|
||||
# multimodal tool content.
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
|
||||
# Must be checked BEFORE context_overflow because messages can trip both
|
||||
# patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
|
||||
@@ -962,13 +922,6 @@ def _classify_by_message(
|
||||
should_compress=True,
|
||||
)
|
||||
|
||||
# Multimodal tool content patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.multimodal_tool_content_unsupported,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Image-too-large patterns (from message text when no status_code)
|
||||
if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
+19
-39
@@ -428,6 +428,23 @@ PLATFORM_HINTS = {
|
||||
"files arrive as downloadable documents. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as photos."
|
||||
),
|
||||
"whatsapp_cloud": (
|
||||
"You are on a text messaging communication platform, WhatsApp "
|
||||
"(via Meta's official Business Cloud API). Standard markdown "
|
||||
"(**bold**, ~~strike~~, # headers, [links](url)) is auto-converted "
|
||||
"to WhatsApp's native syntax (*bold*, ~strike~, etc.) — feel free "
|
||||
"to write in markdown. Tables are NOT supported — prefer bullet "
|
||||
"lists or labeled key:value pairs. "
|
||||
"You can send media files natively: include MEDIA:/absolute/path/to/file "
|
||||
"in your response. Images (.jpg, .png) become photo attachments, "
|
||||
"videos (.mp4) play inline, audio (.mp3, .ogg) sends as voice/audio "
|
||||
"messages, other files arrive as documents. Image URLs in markdown "
|
||||
"format  also work. "
|
||||
"IMPORTANT: this platform has a 24-hour conversation window — if the "
|
||||
"user hasn't messaged in 24h, free-form replies are refused by Meta "
|
||||
"(error 131047). This rarely matters for live chat, but is worth "
|
||||
"knowing if you're scheduling a delayed message."
|
||||
),
|
||||
"telegram": (
|
||||
"You are on a text messaging communication platform, Telegram. "
|
||||
"Standard markdown is automatically converted to Telegram format. "
|
||||
@@ -1258,10 +1275,6 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
"terminal",
|
||||
"process",
|
||||
"execute_code",
|
||||
"app_search_tools",
|
||||
"app_tool_schemas",
|
||||
"app_execute_tools",
|
||||
"app_manage_connections",
|
||||
}
|
||||
|
||||
if valid_names and not (valid_names & relevant_tool_names):
|
||||
@@ -1283,13 +1296,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
|
||||
lines = [
|
||||
"# Nous Subscription",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, browser automation (Browser Use), and app integrations (500+ apps) by default. Modal execution is optional.",
|
||||
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, OpenAI Whisper STT, and browser automation (Browser Use) by default. Modal execution is optional.",
|
||||
"Current capability status:",
|
||||
]
|
||||
lines.extend(_status_line(feature) for feature in features.items())
|
||||
lines.extend(
|
||||
[
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
|
||||
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys.",
|
||||
"If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
|
||||
"Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
|
||||
"Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
|
||||
@@ -1298,39 +1311,6 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# App tools (500+ external integrations) behavioural prompt
|
||||
# =========================================================================
|
||||
|
||||
_APP_TOOLS_PROMPT = """\
|
||||
## App Tools (500+ External Integrations)
|
||||
|
||||
You have app_search_tools, app_tool_schemas, app_execute_tools, and app_manage_connections available RIGHT NOW as callable tools. They are already configured and connected to the Nous tool gateway — no SDK installation, no API keys, no plugin setup needed. Just call them.
|
||||
|
||||
**When to use:** When a user asks to interact with ANY external app or service — Gmail, Slack, GitHub, Jira, Notion, Google Sheets, Linear, HubSpot, Figma, Salesforce, or any of 500+ other apps. ALWAYS prefer these tools over loading skills about the same service (e.g. do NOT load the 'linear', 'airtable', 'google-workspace', 'notion', or any similar skill — use app_search_tools instead). Do NOT suggest installing SDKs, CLI tools, MCP servers, or API keys for external services — call app_search_tools directly.
|
||||
|
||||
**Workflow:**
|
||||
1. Call app_search_tools with a clear use_case description to discover available tools
|
||||
2. Check the response for connection status — if no active connection, call app_manage_connections and share the auth link with the user
|
||||
3. Review the execution plan and pitfalls in the search response before executing
|
||||
4. If a tool has schemaRef instead of input_schema, call app_tool_schemas to get the full schema
|
||||
5. Execute tools via app_execute_tools with schema-compliant arguments
|
||||
|
||||
**Session tracking:** Pass session: {generate_id: true} on your first app_search_tools call. Reuse the returned session.id in all subsequent calls. Generate a new session when the user pivots to a different task.
|
||||
|
||||
**Important:** Never fabricate tool slugs or argument field names. Only use slugs and schemas returned by app_search_tools or app_tool_schemas."""
|
||||
|
||||
|
||||
def build_app_tools_prompt(valid_tool_names: "set[str] | None" = None) -> str:
|
||||
"""Return the app tools behavioural guidance when the toolset is active."""
|
||||
if valid_tool_names and "app_search_tools" not in valid_tool_names:
|
||||
return ""
|
||||
if not valid_tool_names:
|
||||
# No tool names known — skip (conservative)
|
||||
return ""
|
||||
return _APP_TOOLS_PROMPT
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# =========================================================================
|
||||
|
||||
+1
-19
@@ -12,7 +12,7 @@ import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
from hermes_constants import get_config_path, get_skills_dir, is_termux
|
||||
from hermes_constants import get_config_path, get_skills_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -136,14 +136,6 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
|
||||
If the field is absent or empty the skill is compatible with **all**
|
||||
platforms (backward-compatible default).
|
||||
|
||||
Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
|
||||
older Pythons but became ``"android"`` on Python 3.13+. Termux is a
|
||||
Linux userland riding on the Android kernel, so skills tagged
|
||||
``linux`` are treated as compatible in Termux regardless of which
|
||||
``sys.platform`` value Python reports. Individual Linux commands
|
||||
inside a skill may still misbehave (no systemd, BusyBox utils, no
|
||||
apt/dnf, etc.) but that is on the skill, not on platform gating.
|
||||
"""
|
||||
platforms = frontmatter.get("platforms")
|
||||
if not platforms:
|
||||
@@ -151,21 +143,11 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
|
||||
if not isinstance(platforms, list):
|
||||
platforms = [platforms]
|
||||
current = sys.platform
|
||||
running_in_termux = is_termux()
|
||||
for platform in platforms:
|
||||
normalized = str(platform).lower().strip()
|
||||
mapped = PLATFORM_MAP.get(normalized, normalized)
|
||||
if current.startswith(mapped):
|
||||
return True
|
||||
# Termux runs a Linux userland on Android. Accept linux-tagged
|
||||
# skills regardless of whether sys.platform is "linux" (pre-3.13
|
||||
# Termux) or "android" (Python 3.13+ Termux, and any other
|
||||
# Android runtime).
|
||||
if running_in_termux and mapped == "linux":
|
||||
return True
|
||||
# Explicit termux/android tags match a Termux session too.
|
||||
if running_in_termux and mapped in ("termux", "android"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -130,12 +130,6 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
||||
nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
|
||||
if nous_subscription_prompt:
|
||||
stable_parts.append(nous_subscription_prompt)
|
||||
|
||||
# App tools (500+ external integrations) behavioural guidance
|
||||
app_tools_prompt = _r.build_app_tools_prompt(agent.valid_tool_names)
|
||||
if app_tools_prompt:
|
||||
stable_parts.append(app_tools_prompt)
|
||||
|
||||
# Tool-use enforcement: tells the model to actually call tools instead
|
||||
# of describing intended actions. Controlled by config.yaml
|
||||
# agent.tool_use_enforcement:
|
||||
|
||||
@@ -114,6 +114,7 @@ _HOME_TARGET_ENV_VARS = {
|
||||
"bluebubbles": "BLUEBUBBLES_HOME_CHANNEL",
|
||||
"qqbot": "QQBOT_HOME_CHANNEL",
|
||||
"whatsapp": "WHATSAPP_HOME_CHANNEL",
|
||||
"whatsapp_cloud": "WHATSAPP_CLOUD_HOME_CHANNEL",
|
||||
}
|
||||
|
||||
# Legacy env var names kept for back-compat. Each entry is the current
|
||||
|
||||
@@ -109,6 +109,7 @@ class Platform(Enum):
|
||||
TELEGRAM = "telegram"
|
||||
DISCORD = "discord"
|
||||
WHATSAPP = "whatsapp"
|
||||
WHATSAPP_CLOUD = "whatsapp_cloud"
|
||||
SLACK = "slack"
|
||||
SIGNAL = "signal"
|
||||
MATTERMOST = "mattermost"
|
||||
@@ -419,6 +420,9 @@ _PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] =
|
||||
cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
|
||||
),
|
||||
Platform.WHATSAPP: lambda cfg: True, # bridge handles auth
|
||||
Platform.WHATSAPP_CLOUD: lambda cfg: bool(
|
||||
cfg.extra.get("phone_number_id") and cfg.extra.get("access_token")
|
||||
),
|
||||
Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
|
||||
Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
|
||||
Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
|
||||
@@ -1367,6 +1371,61 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
|
||||
thread_id=os.getenv("WHATSAPP_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# WhatsApp Cloud API (official Business Platform via Meta).
|
||||
# Distinct from the Baileys bridge: pure HTTP graph.facebook.com calls
|
||||
# outbound, public webhook inbound. Both adapters can run in parallel
|
||||
# against different phone numbers.
|
||||
whatsapp_cloud_phone_id = os.getenv("WHATSAPP_CLOUD_PHONE_NUMBER_ID")
|
||||
whatsapp_cloud_token = os.getenv("WHATSAPP_CLOUD_ACCESS_TOKEN")
|
||||
if whatsapp_cloud_phone_id and whatsapp_cloud_token:
|
||||
if Platform.WHATSAPP_CLOUD not in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD] = PlatformConfig()
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].enabled = True
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra.update({
|
||||
"phone_number_id": whatsapp_cloud_phone_id,
|
||||
"access_token": whatsapp_cloud_token,
|
||||
})
|
||||
# Optional: app_id / app_secret (signature verification)
|
||||
wa_cloud_app_id = os.getenv("WHATSAPP_CLOUD_APP_ID")
|
||||
if wa_cloud_app_id:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["app_id"] = wa_cloud_app_id
|
||||
wa_cloud_app_secret = os.getenv("WHATSAPP_CLOUD_APP_SECRET")
|
||||
if wa_cloud_app_secret:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["app_secret"] = wa_cloud_app_secret
|
||||
# Optional: WABA id (analytics, future use)
|
||||
wa_cloud_waba_id = os.getenv("WHATSAPP_CLOUD_WABA_ID")
|
||||
if wa_cloud_waba_id:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["waba_id"] = wa_cloud_waba_id
|
||||
# Webhook verify token — Meta hub.verify_token shared secret
|
||||
wa_cloud_verify_token = os.getenv("WHATSAPP_CLOUD_VERIFY_TOKEN")
|
||||
if wa_cloud_verify_token:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["verify_token"] = wa_cloud_verify_token
|
||||
# Webhook server bind config (defaults baked into the adapter)
|
||||
wa_cloud_host = os.getenv("WHATSAPP_CLOUD_WEBHOOK_HOST")
|
||||
if wa_cloud_host:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_host"] = wa_cloud_host
|
||||
wa_cloud_port = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PORT")
|
||||
if wa_cloud_port:
|
||||
try:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_port"] = int(wa_cloud_port)
|
||||
except ValueError:
|
||||
pass
|
||||
wa_cloud_path = os.getenv("WHATSAPP_CLOUD_WEBHOOK_PATH")
|
||||
if wa_cloud_path:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["webhook_path"] = wa_cloud_path
|
||||
# Graph API version override (rarely needed)
|
||||
wa_cloud_api_version = os.getenv("WHATSAPP_CLOUD_API_VERSION")
|
||||
if wa_cloud_api_version:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].extra["api_version"] = wa_cloud_api_version
|
||||
whatsapp_cloud_home = os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL")
|
||||
if whatsapp_cloud_home and Platform.WHATSAPP_CLOUD in config.platforms:
|
||||
config.platforms[Platform.WHATSAPP_CLOUD].home_channel = HomeChannel(
|
||||
platform=Platform.WHATSAPP_CLOUD,
|
||||
chat_id=whatsapp_cloud_home,
|
||||
name=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_NAME", "Home"),
|
||||
thread_id=os.getenv("WHATSAPP_CLOUD_HOME_CHANNEL_THREAD_ID") or None,
|
||||
)
|
||||
|
||||
# Slack
|
||||
slack_token = os.getenv("SLACK_BOT_TOKEN")
|
||||
if slack_token:
|
||||
|
||||
@@ -95,6 +95,12 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
|
||||
# Tier 3 — no edit support, progress messages are permanent
|
||||
"signal": _TIER_LOW,
|
||||
"whatsapp": _TIER_MEDIUM, # Baileys bridge supports /edit
|
||||
# WhatsApp Cloud API: Meta added message editing in 2023 but the
|
||||
# Hermes Cloud adapter doesn't implement edit_message yet, so we
|
||||
# stay on TIER_LOW (tool_progress off) to avoid spamming each
|
||||
# status update as a separate message. Promote to TIER_MEDIUM once
|
||||
# Cloud's edit_message lands.
|
||||
"whatsapp_cloud": _TIER_LOW,
|
||||
"bluebubbles": _TIER_LOW,
|
||||
"weixin": _TIER_LOW,
|
||||
"wecom": _TIER_LOW,
|
||||
|
||||
+20
-95
@@ -18,7 +18,6 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
|
||||
Storage: ~/.hermes/pairing/
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -149,11 +148,6 @@ class PairingStore:
|
||||
|
||||
# ----- Pending codes -----
|
||||
|
||||
@staticmethod
|
||||
def _hash_code(code: str, salt: bytes) -> str:
|
||||
"""Hash a pairing code with the given salt using SHA-256."""
|
||||
return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
|
||||
|
||||
def generate_code(
|
||||
self, platform: str, user_id: str, user_name: str = ""
|
||||
) -> Optional[str]:
|
||||
@@ -164,9 +158,6 @@ class PairingStore:
|
||||
- User is rate-limited (too recent request)
|
||||
- Max pending codes reached for this platform
|
||||
- User/platform is in lockout due to failed attempts
|
||||
|
||||
The code is NOT stored in plaintext. Only a salted SHA-256 hash is
|
||||
persisted so that reading the pending file does not reveal codes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -187,17 +178,8 @@ class PairingStore:
|
||||
# Generate cryptographically random code
|
||||
code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
|
||||
|
||||
# Hash the code with a random salt before storing
|
||||
salt = os.urandom(16)
|
||||
code_hash = self._hash_code(code, salt)
|
||||
|
||||
# Use a unique entry id as the key (not the code itself)
|
||||
entry_id = secrets.token_hex(8)
|
||||
|
||||
# Store pending request with hashed code
|
||||
pending[entry_id] = {
|
||||
"hash": code_hash,
|
||||
"salt": salt.hex(),
|
||||
# Store pending request
|
||||
pending[code] = {
|
||||
"user_id": user_id,
|
||||
"user_name": user_name,
|
||||
"created_at": time.time(),
|
||||
@@ -213,16 +195,10 @@ class PairingStore:
|
||||
"""
|
||||
Approve a pairing code. Adds the user to the approved list.
|
||||
|
||||
Returns ``{user_id, user_name}`` on success, ``None`` if the code is
|
||||
Returns {user_id, user_name} on success, None if code is
|
||||
invalid/expired OR the platform is currently locked out after
|
||||
``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
|
||||
disambiguate with ``_is_locked_out(platform)``.
|
||||
|
||||
Verification: the user-provided code is hashed with each stored
|
||||
entry's salt and compared to the stored hash using constant-time
|
||||
comparison. Pre-hash entries (legacy plaintext-key format from
|
||||
pre-upgrade pending.json files) are silently ignored — they get
|
||||
pruned at TTL by ``_cleanup_expired``.
|
||||
"""
|
||||
with self._lock:
|
||||
self._cleanup_expired(platform)
|
||||
@@ -237,73 +213,34 @@ class PairingStore:
|
||||
return None
|
||||
|
||||
pending = self._load_json(self._pending_path(platform))
|
||||
|
||||
# Find the entry whose hash matches the provided code.
|
||||
# Tolerate legacy plaintext-key entries (no salt/hash) and
|
||||
# malformed entries — skip them rather than KeyError, so an
|
||||
# in-place upgrade across an existing pending.json doesn't
|
||||
# crash on the first approve call. Legacy entries get pruned
|
||||
# at their TTL by _cleanup_expired.
|
||||
matched_key = None
|
||||
matched_entry = None
|
||||
for entry_id, entry in pending.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if "salt" not in entry or "hash" not in entry:
|
||||
continue
|
||||
try:
|
||||
salt = bytes.fromhex(entry["salt"])
|
||||
except ValueError:
|
||||
continue
|
||||
candidate_hash = self._hash_code(code, salt)
|
||||
if secrets.compare_digest(candidate_hash, entry["hash"]):
|
||||
matched_key = entry_id
|
||||
matched_entry = entry
|
||||
break
|
||||
|
||||
if matched_key is None:
|
||||
if code not in pending:
|
||||
self._record_failed_attempt(platform)
|
||||
return None
|
||||
|
||||
del pending[matched_key]
|
||||
entry = pending.pop(code)
|
||||
self._save_json(self._pending_path(platform), pending)
|
||||
|
||||
# Add to approved list
|
||||
self._approve_user(platform, matched_entry["user_id"],
|
||||
matched_entry.get("user_name", ""))
|
||||
self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
|
||||
|
||||
return {
|
||||
"user_id": matched_entry["user_id"],
|
||||
"user_name": matched_entry.get("user_name", ""),
|
||||
"user_id": entry["user_id"],
|
||||
"user_name": entry.get("user_name", ""),
|
||||
}
|
||||
|
||||
def list_pending(self, platform: str = None) -> list:
|
||||
"""List pending pairing requests, optionally filtered by platform.
|
||||
|
||||
Codes are stored hashed — the ``code`` field is replaced with the
|
||||
first 8 hex characters of the hash so admins can distinguish entries
|
||||
without revealing the original code. Legacy plaintext-key entries
|
||||
(pre-hash format) are shown with a "legacy" placeholder so admins
|
||||
can see them age out without crashing on a missing ``hash`` field.
|
||||
"""
|
||||
"""List pending pairing requests, optionally filtered by platform."""
|
||||
results = []
|
||||
platforms = [platform] if platform else self._all_platforms("pending")
|
||||
for p in platforms:
|
||||
self._cleanup_expired(p)
|
||||
pending = self._load_json(self._pending_path(p))
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
continue
|
||||
age_min = int((time.time() - created_at) / 60)
|
||||
hash_val = info.get("hash")
|
||||
code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
|
||||
for code, info in pending.items():
|
||||
age_min = int((time.time() - info["created_at"]) / 60)
|
||||
results.append({
|
||||
"platform": p,
|
||||
"code": code_display,
|
||||
"user_id": info.get("user_id", ""),
|
||||
"code": code,
|
||||
"user_id": info["user_id"],
|
||||
"user_name": info.get("user_name", ""),
|
||||
"age_minutes": age_min,
|
||||
})
|
||||
@@ -360,29 +297,17 @@ class PairingStore:
|
||||
# ----- Cleanup -----
|
||||
|
||||
def _cleanup_expired(self, platform: str) -> None:
|
||||
"""Remove expired pending codes.
|
||||
|
||||
Tolerant of malformed / legacy entries — anything without a numeric
|
||||
``created_at`` is treated as expired (it's effectively unusable
|
||||
with the new hash-keyed schema anyway).
|
||||
"""
|
||||
"""Remove expired pending codes."""
|
||||
path = self._pending_path(platform)
|
||||
pending = self._load_json(path)
|
||||
now = time.time()
|
||||
expired = []
|
||||
for entry_id, info in pending.items():
|
||||
if not isinstance(info, dict):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
created_at = info.get("created_at")
|
||||
if not isinstance(created_at, (int, float)):
|
||||
expired.append(entry_id)
|
||||
continue
|
||||
if (now - created_at) > CODE_TTL_SECONDS:
|
||||
expired.append(entry_id)
|
||||
expired = [
|
||||
code for code, info in pending.items()
|
||||
if (now - info["created_at"]) > CODE_TTL_SECONDS
|
||||
]
|
||||
if expired:
|
||||
for entry_id in expired:
|
||||
del pending[entry_id]
|
||||
for code in expired:
|
||||
del pending[code]
|
||||
self._save_json(path, pending)
|
||||
|
||||
def _all_platforms(self, suffix: str) -> list:
|
||||
|
||||
@@ -52,6 +52,22 @@ for the full pattern (Template Buttons postback at 45s, `RequestCache`
|
||||
state machine, `interrupt_session_activity` override for `/stop`
|
||||
orphans) and the developer-guide page for the prose walkthrough.
|
||||
|
||||
**Sibling adapters that share behavior.** When a single platform has
|
||||
two transport modes the user picks between — unofficial vs official
|
||||
APIs, polling vs websocket, library A vs library B — the right
|
||||
structure is two adapters that share a behavior mixin. WhatsApp does
|
||||
this: `gateway/platforms/whatsapp.py` (Baileys bridge) and
|
||||
`gateway/platforms/whatsapp_cloud.py` (Meta Cloud API) both inherit
|
||||
from `WhatsAppBehaviorMixin` in `gateway/platforms/whatsapp_common.py`.
|
||||
The mixin owns gating, allow-lists, mention parsing, broadcast
|
||||
filters, and the WhatsApp-flavored markdown conversion — everything
|
||||
that's platform-protocol-agnostic. Each adapter owns its transport.
|
||||
Both register distinct `Platform.*` enum values so the gateway can run
|
||||
both simultaneously against different phone numbers. The mixin must
|
||||
come **first** in the bases list — `class WhatsAppAdapter(Mixin,
|
||||
BasePlatformAdapter)` — so the mixin's `format_message` overrides
|
||||
`BasePlatformAdapter`'s generic default.
|
||||
|
||||
See `plugins/platforms/irc/`, `plugins/platforms/teams/`, and
|
||||
`plugins/platforms/google_chat/` for complete working examples, and
|
||||
`website/docs/developer-guide/adding-platform-adapters.md` for the full
|
||||
@@ -94,6 +110,19 @@ The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base.
|
||||
| `send_animation(chat_id, path, caption)` | Send a GIF/animation |
|
||||
| `send_image_file(chat_id, path, caption)` | Send image from local file |
|
||||
|
||||
### Interactive UX (recommended if your platform supports tappable buttons)
|
||||
|
||||
If your platform supports interactive button/menu messages, implement these for a more polished agent experience. They all degrade gracefully to plain text when not overridden:
|
||||
|
||||
| Method | Purpose |
|
||||
|--------|---------|
|
||||
| `send_clarify(chat_id, question, choices, clarify_id, session_key, ...)` | Render the `clarify` tool's multi-choice question as tappable buttons. Pair with inbound dispatch that routes button taps to `tools.clarify_gateway.resolve_gateway_clarify`. |
|
||||
| `send_exec_approval(chat_id, command, session_key, description, ...)` | Render dangerous-command approval as Approve/Deny buttons. Inbound dispatch routes to `tools.approval.resolve_gateway_approval`. |
|
||||
| `send_slash_confirm(chat_id, title, message, session_key, confirm_id, ...)` | Render slash-command confirmations (e.g. `/reload-mcp`) as Once/Always/Cancel buttons. Inbound dispatch routes to `tools.slash_confirm.resolve`. |
|
||||
| `send_model_picker(...)` | Interactive `/model` picker. Used by Telegram and Discord. |
|
||||
|
||||
See `gateway/platforms/telegram.py`, `discord.py`, and `whatsapp_cloud.py` for reference implementations. The button-callback id convention (`cl:<id>:<idx>`, `appr:<id>:<choice>`, `sc:<choice>:<id>`) is shared across adapters — match it so the gateway-side resolvers work without modification.
|
||||
|
||||
### Required function
|
||||
|
||||
```python
|
||||
|
||||
@@ -308,26 +308,11 @@ class WebhookAdapter(BasePlatformAdapter):
|
||||
data = json.loads(subs_path.read_text(encoding="utf-8"))
|
||||
if not isinstance(data, dict):
|
||||
return
|
||||
# Merge: static routes take precedence over dynamic ones.
|
||||
# Reject any dynamic route whose effective secret is empty —
|
||||
# an empty secret would cause _handle_webhook to skip HMAC
|
||||
# validation entirely, letting unauthenticated callers in.
|
||||
new_dynamic: Dict[str, dict] = {}
|
||||
for k, v in data.items():
|
||||
if k in self._static_routes:
|
||||
continue
|
||||
effective_secret = v.get("secret", self._global_secret)
|
||||
if not effective_secret:
|
||||
logger.warning(
|
||||
"[webhook] Dynamic route '%s' skipped: 'secret' is "
|
||||
"missing or empty. Set a valid HMAC secret, or use "
|
||||
"'%s' to explicitly disable auth (testing only).",
|
||||
k,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
continue
|
||||
new_dynamic[k] = v
|
||||
self._dynamic_routes = new_dynamic
|
||||
# Merge: static routes take precedence over dynamic ones
|
||||
self._dynamic_routes = {
|
||||
k: v for k, v in data.items()
|
||||
if k not in self._static_routes
|
||||
}
|
||||
self._routes = {**self._dynamic_routes, **self._static_routes}
|
||||
self._dynamic_routes_mtime = mtime
|
||||
logger.info(
|
||||
|
||||
@@ -16,11 +16,9 @@ with different backends via a bridge pattern.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
@@ -180,6 +178,7 @@ import sys
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.whatsapp_common import WhatsAppBehaviorMixin
|
||||
from gateway.platforms.base import (
|
||||
BasePlatformAdapter,
|
||||
MessageEvent,
|
||||
@@ -215,7 +214,7 @@ def check_whatsapp_requirements() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class WhatsAppAdapter(BasePlatformAdapter):
|
||||
class WhatsAppAdapter(WhatsAppBehaviorMixin, BasePlatformAdapter):
|
||||
"""
|
||||
WhatsApp adapter.
|
||||
|
||||
@@ -237,13 +236,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
- allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
|
||||
- group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
|
||||
- group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
|
||||
|
||||
Behavior (gating, mention parsing, markdown conversion, chunking) is
|
||||
provided by ``WhatsAppBehaviorMixin`` so the Cloud API adapter can
|
||||
share it. Only transport-specific code lives here.
|
||||
"""
|
||||
|
||||
# WhatsApp message limits — practical UX limit, not protocol max.
|
||||
# WhatsApp allows ~65K but long messages are unreadable on mobile.
|
||||
MAX_MESSAGE_LENGTH = 4096
|
||||
DEFAULT_REPLY_PREFIX = "⚕ *Hermes Agent*\n────────────\n"
|
||||
|
||||
|
||||
# Default bridge location relative to the hermes-agent install
|
||||
_DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
|
||||
|
||||
@@ -278,213 +276,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
# notification before the normal "✓ whatsapp disconnected" fires.
|
||||
self._shutting_down: bool = False
|
||||
|
||||
def _effective_reply_prefix(self) -> str:
|
||||
"""Return the prefix the Node bridge will add in self-chat mode."""
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
if whatsapp_mode != "self-chat":
|
||||
return ""
|
||||
if self._reply_prefix is not None:
|
||||
return self._reply_prefix.replace("\\n", "\n")
|
||||
env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX")
|
||||
if env_prefix is not None:
|
||||
return env_prefix.replace("\\n", "\n")
|
||||
return self.DEFAULT_REPLY_PREFIX
|
||||
|
||||
def _outgoing_chunk_limit(self) -> int:
|
||||
"""Reserve room for the bridge-side prefix so final WhatsApp text fits."""
|
||||
prefix_len = len(self._effective_reply_prefix())
|
||||
# Keep enough space for truncate_message's pagination indicator and
|
||||
# code-fence repair even if a user configures a very long prefix.
|
||||
return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len)
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}
|
||||
|
||||
def _whatsapp_free_response_chats(self) -> set[str]:
|
||||
raw = self.config.extra.get("free_response_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_allow_list(raw) -> set[str]:
|
||||
"""Parse allow_from / group_allow_from from config or env var."""
|
||||
if raw is None:
|
||||
return set()
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _is_broadcast_chat(chat_id: str) -> bool:
|
||||
"""True for WhatsApp pseudo-chats that aren't real conversations.
|
||||
|
||||
Covers Status updates (Stories) and Channel/Newsletter broadcasts.
|
||||
These show up as inbound messages on Baileys but the agent should
|
||||
never reply — answering a Story update spams the contact's status
|
||||
feed, and Channel posts aren't addressable in the first place.
|
||||
"""
|
||||
if not chat_id:
|
||||
return False
|
||||
cid = chat_id.strip().lower()
|
||||
if cid == "status@broadcast":
|
||||
return True
|
||||
# @broadcast suffix covers status@broadcast plus any future
|
||||
# broadcast-list variants. @newsletter is the Channel JID suffix.
|
||||
if cid.endswith("@broadcast") or cid.endswith("@newsletter"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_dm_allowed(self, sender_id: str) -> bool:
|
||||
"""Check whether a DM from the given sender should be processed."""
|
||||
if self._dm_policy == "disabled":
|
||||
return False
|
||||
if self._dm_policy == "allowlist":
|
||||
return sender_id in self._allow_from
|
||||
# "open" — all DMs allowed
|
||||
return True
|
||||
|
||||
def _is_group_allowed(self, chat_id: str) -> bool:
|
||||
"""Check whether a group chat should be processed."""
|
||||
if self._group_policy == "disabled":
|
||||
return False
|
||||
if self._group_policy == "allowlist":
|
||||
return chat_id in self._group_allow_from
|
||||
# "open" — all groups allowed
|
||||
return True
|
||||
|
||||
def _compile_mention_patterns(self):
|
||||
patterns = self.config.extra.get("mention_patterns")
|
||||
if patterns is None:
|
||||
raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
patterns = json.loads(raw)
|
||||
except Exception:
|
||||
patterns = [part.strip() for part in raw.splitlines() if part.strip()]
|
||||
if not patterns:
|
||||
patterns = [part.strip() for part in raw.split(",") if part.strip()]
|
||||
if patterns is None:
|
||||
return []
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
if not isinstance(patterns, list):
|
||||
logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__)
|
||||
return []
|
||||
|
||||
compiled = []
|
||||
for pattern in patterns:
|
||||
if not isinstance(pattern, str) or not pattern.strip():
|
||||
continue
|
||||
try:
|
||||
compiled.append(re.compile(pattern, re.IGNORECASE))
|
||||
except re.error as exc:
|
||||
logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc)
|
||||
if compiled:
|
||||
logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled))
|
||||
return compiled
|
||||
|
||||
@staticmethod
|
||||
def _normalize_whatsapp_id(value: Optional[str]) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
normalized = str(value).strip()
|
||||
if ":" in normalized and "@" in normalized:
|
||||
normalized = normalized.replace(":", "@", 1)
|
||||
return normalized
|
||||
|
||||
def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
|
||||
bot_ids = set()
|
||||
for candidate in data.get("botIds") or []:
|
||||
normalized = self._normalize_whatsapp_id(candidate)
|
||||
if normalized:
|
||||
bot_ids.add(normalized)
|
||||
return bot_ids
|
||||
|
||||
def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
|
||||
quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
|
||||
if not quoted_participant:
|
||||
return False
|
||||
return quoted_participant in self._bot_ids_from_message(data)
|
||||
|
||||
def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
if not bot_ids:
|
||||
return False
|
||||
mentioned_ids = {
|
||||
nid
|
||||
for candidate in (data.get("mentionedIds") or [])
|
||||
if (nid := self._normalize_whatsapp_id(candidate))
|
||||
}
|
||||
if mentioned_ids & bot_ids:
|
||||
return True
|
||||
|
||||
body = str(data.get("body") or "")
|
||||
lower_body = body.lower()
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0].lower()
|
||||
if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
|
||||
if not self._mention_patterns:
|
||||
return False
|
||||
body = str(data.get("body") or "")
|
||||
return any(pattern.search(body) for pattern in self._mention_patterns)
|
||||
|
||||
def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
|
||||
if not text:
|
||||
return text
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
cleaned = text
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0]
|
||||
if bare_id:
|
||||
cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned)
|
||||
return cleaned.strip() or text
|
||||
|
||||
def _should_process_message(self, data: Dict[str, Any]) -> bool:
|
||||
chat_id_raw = str(data.get("chatId") or "")
|
||||
# WhatsApp uses pseudo-chats for Status updates (Stories) and
|
||||
# Channel/Newsletter broadcasts. These are not real conversations
|
||||
# and the agent should never reply to them — even in self-chat mode
|
||||
# where the bridge may surface them as "fromMe" events.
|
||||
if self._is_broadcast_chat(chat_id_raw):
|
||||
return False
|
||||
is_group = data.get("isGroup", False)
|
||||
if is_group:
|
||||
chat_id = chat_id_raw
|
||||
if not self._is_group_allowed(chat_id):
|
||||
return False
|
||||
else:
|
||||
sender_id = str(data.get("senderId") or data.get("from") or "")
|
||||
if not self._is_dm_allowed(sender_id):
|
||||
return False
|
||||
# DMs that pass the policy gate are always processed
|
||||
return True
|
||||
# Group messages: check mention / free-response settings
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if chat_id in self._whatsapp_free_response_chats():
|
||||
return True
|
||||
if not self._whatsapp_require_mention():
|
||||
return True
|
||||
body = str(data.get("body") or "").strip()
|
||||
if body.startswith("/"):
|
||||
return True
|
||||
if self._message_is_reply_to_bot(data):
|
||||
return True
|
||||
if self._message_mentions_bot(data):
|
||||
return True
|
||||
return self._message_matches_mention_patterns(data)
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""
|
||||
Start the WhatsApp bridge.
|
||||
@@ -808,63 +599,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
self._close_bridge_log()
|
||||
print(f"[{self.name}] Disconnected")
|
||||
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Convert standard markdown to WhatsApp-compatible formatting.
|
||||
|
||||
WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
|
||||
and monospaced `inline`. Standard markdown uses different syntax
|
||||
for bold/italic/strikethrough, so we convert here.
|
||||
|
||||
Code blocks (``` fenced) and inline code (`) are protected from
|
||||
conversion via placeholder substitution.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# --- 1. Protect fenced code blocks from formatting changes ---
|
||||
_FENCE_PH = "\x00FENCE"
|
||||
fences: list[str] = []
|
||||
|
||||
def _save_fence(m: re.Match) -> str:
|
||||
fences.append(m.group(0))
|
||||
return f"{_FENCE_PH}{len(fences) - 1}\x00"
|
||||
|
||||
result = re.sub(r"```[\s\S]*?```", _save_fence, content)
|
||||
|
||||
# --- 2. Protect inline code ---
|
||||
_CODE_PH = "\x00CODE"
|
||||
codes: list[str] = []
|
||||
|
||||
def _save_code(m: re.Match) -> str:
|
||||
codes.append(m.group(0))
|
||||
return f"{_CODE_PH}{len(codes) - 1}\x00"
|
||||
|
||||
result = re.sub(r"`[^`\n]+`", _save_code, result)
|
||||
|
||||
# --- 3. Convert markdown formatting to WhatsApp syntax ---
|
||||
# Bold: **text** or __text__ → *text*
|
||||
result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
|
||||
result = re.sub(r"__(.+?)__", r"*\1*", result)
|
||||
# Strikethrough: ~~text~~ → ~text~
|
||||
result = re.sub(r"~~(.+?)~~", r"~\1~", result)
|
||||
# Italic: *text* is already WhatsApp italic — leave as-is
|
||||
# _text_ is already WhatsApp italic — leave as-is
|
||||
|
||||
# --- 4. Convert markdown headers to bold text ---
|
||||
# # Header → *Header*
|
||||
result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
|
||||
|
||||
# --- 5. Convert markdown links: [text](url) → text (url) ---
|
||||
result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
|
||||
|
||||
# --- 6. Restore protected sections ---
|
||||
for i, fence in enumerate(fences):
|
||||
result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
|
||||
for i, code in enumerate(codes):
|
||||
result = result.replace(f"{_CODE_PH}{i}\x00", code)
|
||||
|
||||
return result
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,351 @@
|
||||
"""
|
||||
Transport-agnostic WhatsApp behavior shared by the Baileys bridge adapter
|
||||
and the official WhatsApp Cloud API adapter.
|
||||
|
||||
The mixin provides:
|
||||
- Allow-list / DM / group gating
|
||||
- Mention detection (explicit @-mentions + configurable regex patterns)
|
||||
- Quoted-reply-to-bot detection
|
||||
- Broadcast / Channel / Newsletter filtering
|
||||
- WhatsApp-flavored markdown conversion
|
||||
- Outgoing chunk length budgeting
|
||||
|
||||
It is the *behavior layer*. Transport-specific concerns (subprocess management,
|
||||
HTTP webhooks, Graph API calls, media upload protocols) live in each adapter.
|
||||
|
||||
Mixin contract — the adapter must set these on ``self`` before any of the
|
||||
mixin's methods are called (typically in ``__init__``):
|
||||
|
||||
self.config # gateway.config.PlatformConfig
|
||||
self.name # str — adapter name (used in log lines)
|
||||
self._dm_policy # str: "open" | "allowlist" | "disabled"
|
||||
self._allow_from # set[str]
|
||||
self._group_policy # str: "open" | "allowlist" | "disabled"
|
||||
self._group_allow_from # set[str]
|
||||
self._mention_patterns # list[re.Pattern]
|
||||
self._reply_prefix # Optional[str]
|
||||
|
||||
Class attributes ``MAX_MESSAGE_LENGTH`` and ``DEFAULT_REPLY_PREFIX`` are
|
||||
defined on the mixin and may be overridden per-adapter if needed.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WhatsAppBehaviorMixin:
|
||||
"""Shared behavior for all WhatsApp adapters (Baileys + Cloud API).
|
||||
|
||||
See module docstring for the attribute contract the host adapter must
|
||||
satisfy. This mixin owns no state of its own — every value it touches
|
||||
is either a class attribute or set by the adapter's ``__init__``.
|
||||
"""
|
||||
|
||||
# WhatsApp message limits — practical UX limit, not protocol max.
|
||||
# WhatsApp allows ~65K but long messages are unreadable on mobile.
|
||||
MAX_MESSAGE_LENGTH: int = 4096
|
||||
|
||||
DEFAULT_REPLY_PREFIX: str = "⚕ *Hermes Agent*\n────────────\n"
|
||||
|
||||
# ------------------------------------------------------------------ config
|
||||
def _effective_reply_prefix(self) -> str:
|
||||
"""Return the prefix to add to outgoing replies in self-chat mode.
|
||||
|
||||
Subclasses that don't have a self-chat concept (the Cloud API
|
||||
adapter) can override this to always return ``""`` or apply a
|
||||
different policy.
|
||||
"""
|
||||
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
|
||||
if whatsapp_mode != "self-chat":
|
||||
return ""
|
||||
if self._reply_prefix is not None:
|
||||
return self._reply_prefix.replace("\\n", "\n")
|
||||
env_prefix = os.getenv("WHATSAPP_REPLY_PREFIX")
|
||||
if env_prefix is not None:
|
||||
return env_prefix.replace("\\n", "\n")
|
||||
return self.DEFAULT_REPLY_PREFIX
|
||||
|
||||
def _outgoing_chunk_limit(self) -> int:
|
||||
"""Reserve room for the reply prefix so the final message fits."""
|
||||
prefix_len = len(self._effective_reply_prefix())
|
||||
# Keep enough space for truncate_message's pagination indicator and
|
||||
# code-fence repair even if a user configures a very long prefix.
|
||||
return max(1024, self.MAX_MESSAGE_LENGTH - prefix_len)
|
||||
|
||||
def _whatsapp_require_mention(self) -> bool:
|
||||
configured = self.config.extra.get("require_mention")
|
||||
if configured is not None:
|
||||
if isinstance(configured, str):
|
||||
return configured.lower() in {"true", "1", "yes", "on"}
|
||||
return bool(configured)
|
||||
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in {
|
||||
"true",
|
||||
"1",
|
||||
"yes",
|
||||
"on",
|
||||
}
|
||||
|
||||
def _whatsapp_free_response_chats(self) -> set[str]:
|
||||
raw = self.config.extra.get("free_response_chats")
|
||||
if raw is None:
|
||||
raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
@staticmethod
|
||||
def _coerce_allow_list(raw) -> set[str]:
|
||||
"""Parse allow_from / group_allow_from from config or env var."""
|
||||
if raw is None:
|
||||
return set()
|
||||
if isinstance(raw, list):
|
||||
return {str(part).strip() for part in raw if str(part).strip()}
|
||||
return {part.strip() for part in str(raw).split(",") if part.strip()}
|
||||
|
||||
# ------------------------------------------------------------------ JID helpers
|
||||
@staticmethod
|
||||
def _normalize_whatsapp_id(value: Optional[str]) -> str:
|
||||
if not value:
|
||||
return ""
|
||||
normalized = str(value).strip()
|
||||
if ":" in normalized and "@" in normalized:
|
||||
normalized = normalized.replace(":", "@", 1)
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _is_broadcast_chat(chat_id: str) -> bool:
|
||||
"""True for WhatsApp pseudo-chats that aren't real conversations.
|
||||
|
||||
Covers Status updates (Stories) and Channel/Newsletter broadcasts.
|
||||
These show up as inbound messages on Baileys but the agent should
|
||||
never reply — answering a Story update spams the contact's status
|
||||
feed, and Channel posts aren't addressable in the first place.
|
||||
"""
|
||||
if not chat_id:
|
||||
return False
|
||||
cid = chat_id.strip().lower()
|
||||
if cid == "status@broadcast":
|
||||
return True
|
||||
# @broadcast suffix covers status@broadcast plus any future
|
||||
# broadcast-list variants. @newsletter is the Channel JID suffix.
|
||||
if cid.endswith("@broadcast") or cid.endswith("@newsletter"):
|
||||
return True
|
||||
return False
|
||||
|
||||
# ------------------------------------------------------------------ gating
|
||||
def _is_dm_allowed(self, sender_id: str) -> bool:
|
||||
"""Check whether a DM from the given sender should be processed."""
|
||||
if self._dm_policy == "disabled":
|
||||
return False
|
||||
if self._dm_policy == "allowlist":
|
||||
return sender_id in self._allow_from
|
||||
# "open" — all DMs allowed
|
||||
return True
|
||||
|
||||
def _is_group_allowed(self, chat_id: str) -> bool:
|
||||
"""Check whether a group chat should be processed."""
|
||||
if self._group_policy == "disabled":
|
||||
return False
|
||||
if self._group_policy == "allowlist":
|
||||
return chat_id in self._group_allow_from
|
||||
# "open" — all groups allowed
|
||||
return True
|
||||
|
||||
def _compile_mention_patterns(self):
|
||||
patterns = self.config.extra.get("mention_patterns")
|
||||
if patterns is None:
|
||||
raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
|
||||
if raw:
|
||||
try:
|
||||
patterns = json.loads(raw)
|
||||
except Exception:
|
||||
patterns = [
|
||||
part.strip() for part in raw.splitlines() if part.strip()
|
||||
]
|
||||
if not patterns:
|
||||
patterns = [
|
||||
part.strip() for part in raw.split(",") if part.strip()
|
||||
]
|
||||
if patterns is None:
|
||||
return []
|
||||
if isinstance(patterns, str):
|
||||
patterns = [patterns]
|
||||
if not isinstance(patterns, list):
|
||||
logger.warning(
|
||||
"[%s] whatsapp mention_patterns must be a list or string; got %s",
|
||||
self.name,
|
||||
type(patterns).__name__,
|
||||
)
|
||||
return []
|
||||
|
||||
compiled = []
|
||||
for pattern in patterns:
|
||||
if not isinstance(pattern, str) or not pattern.strip():
|
||||
continue
|
||||
try:
|
||||
compiled.append(re.compile(pattern, re.IGNORECASE))
|
||||
except re.error as exc:
|
||||
logger.warning(
|
||||
"[%s] Invalid WhatsApp mention pattern %r: %s",
|
||||
self.name,
|
||||
pattern,
|
||||
exc,
|
||||
)
|
||||
if compiled:
|
||||
logger.info(
|
||||
"[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled)
|
||||
)
|
||||
return compiled
|
||||
|
||||
def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
|
||||
bot_ids = set()
|
||||
for candidate in data.get("botIds") or []:
|
||||
normalized = self._normalize_whatsapp_id(candidate)
|
||||
if normalized:
|
||||
bot_ids.add(normalized)
|
||||
return bot_ids
|
||||
|
||||
def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
|
||||
quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
|
||||
if not quoted_participant:
|
||||
return False
|
||||
return quoted_participant in self._bot_ids_from_message(data)
|
||||
|
||||
def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
if not bot_ids:
|
||||
return False
|
||||
mentioned_ids = {
|
||||
nid
|
||||
for candidate in (data.get("mentionedIds") or [])
|
||||
if (nid := self._normalize_whatsapp_id(candidate))
|
||||
}
|
||||
if mentioned_ids & bot_ids:
|
||||
return True
|
||||
|
||||
body = str(data.get("body") or "")
|
||||
lower_body = body.lower()
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0].lower()
|
||||
if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
|
||||
if not self._mention_patterns:
|
||||
return False
|
||||
body = str(data.get("body") or "")
|
||||
return any(pattern.search(body) for pattern in self._mention_patterns)
|
||||
|
||||
def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
|
||||
if not text:
|
||||
return text
|
||||
bot_ids = self._bot_ids_from_message(data)
|
||||
cleaned = text
|
||||
for bot_id in bot_ids:
|
||||
bare_id = bot_id.split("@", 1)[0]
|
||||
if bare_id:
|
||||
cleaned = re.sub(
|
||||
rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned
|
||||
)
|
||||
return cleaned.strip() or text
|
||||
|
||||
def _should_process_message(self, data: Dict[str, Any]) -> bool:
|
||||
chat_id_raw = str(data.get("chatId") or "")
|
||||
# WhatsApp uses pseudo-chats for Status updates (Stories) and
|
||||
# Channel/Newsletter broadcasts. These are not real conversations
|
||||
# and the agent should never reply to them — even in self-chat mode
|
||||
# where the bridge may surface them as "fromMe" events.
|
||||
if self._is_broadcast_chat(chat_id_raw):
|
||||
return False
|
||||
is_group = data.get("isGroup", False)
|
||||
if is_group:
|
||||
chat_id = chat_id_raw
|
||||
if not self._is_group_allowed(chat_id):
|
||||
return False
|
||||
else:
|
||||
sender_id = str(data.get("senderId") or data.get("from") or "")
|
||||
if not self._is_dm_allowed(sender_id):
|
||||
return False
|
||||
# DMs that pass the policy gate are always processed
|
||||
return True
|
||||
# Group messages: check mention / free-response settings
|
||||
chat_id = str(data.get("chatId") or "")
|
||||
if chat_id in self._whatsapp_free_response_chats():
|
||||
return True
|
||||
if not self._whatsapp_require_mention():
|
||||
return True
|
||||
body = str(data.get("body") or "").strip()
|
||||
if body.startswith("/"):
|
||||
return True
|
||||
if self._message_is_reply_to_bot(data):
|
||||
return True
|
||||
if self._message_mentions_bot(data):
|
||||
return True
|
||||
return self._message_matches_mention_patterns(data)
|
||||
|
||||
# ------------------------------------------------------------------ formatting
|
||||
def format_message(self, content: str) -> str:
|
||||
"""Convert standard markdown to WhatsApp-compatible formatting.
|
||||
|
||||
WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
|
||||
and monospaced `inline`. Standard markdown uses different syntax
|
||||
for bold/italic/strikethrough, so we convert here.
|
||||
|
||||
Code blocks (``` fenced) and inline code (`) are protected from
|
||||
conversion via placeholder substitution.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
# --- 1. Protect fenced code blocks from formatting changes ---
|
||||
_FENCE_PH = "\x00FENCE"
|
||||
fences: list[str] = []
|
||||
|
||||
def _save_fence(m: re.Match) -> str:
|
||||
fences.append(m.group(0))
|
||||
return f"{_FENCE_PH}{len(fences) - 1}\x00"
|
||||
|
||||
result = re.sub(r"```[\s\S]*?```", _save_fence, content)
|
||||
|
||||
# --- 2. Protect inline code ---
|
||||
_CODE_PH = "\x00CODE"
|
||||
codes: list[str] = []
|
||||
|
||||
def _save_code(m: re.Match) -> str:
|
||||
codes.append(m.group(0))
|
||||
return f"{_CODE_PH}{len(codes) - 1}\x00"
|
||||
|
||||
result = re.sub(r"`[^`\n]+`", _save_code, result)
|
||||
|
||||
# --- 3. Convert markdown formatting to WhatsApp syntax ---
|
||||
# Bold: **text** or __text__ → *text*
|
||||
result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
|
||||
result = re.sub(r"__(.+?)__", r"*\1*", result)
|
||||
# Strikethrough: ~~text~~ → ~text~
|
||||
result = re.sub(r"~~(.+?)~~", r"~\1~", result)
|
||||
# Italic: *text* is already WhatsApp italic — leave as-is
|
||||
# _text_ is already WhatsApp italic — leave as-is
|
||||
|
||||
# --- 4. Convert markdown headers to bold text ---
|
||||
# # Header → *Header*
|
||||
result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
|
||||
|
||||
# --- 5. Convert markdown links: [text](url) → text (url) ---
|
||||
result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
|
||||
|
||||
# --- 6. Restore protected sections ---
|
||||
for i, fence in enumerate(fences):
|
||||
result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
|
||||
for i, code in enumerate(codes):
|
||||
result = result.replace(f"{_CODE_PH}{i}\x00", code)
|
||||
|
||||
return result
|
||||
+18
-2
@@ -3678,7 +3678,8 @@ class GatewayRunner:
|
||||
# Warn if no user allowlists are configured and open access is not opted in
|
||||
_builtin_allowed_vars = (
|
||||
"TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS", "WHATSAPP_CLOUD_ALLOWED_USERS",
|
||||
"SLACK_ALLOWED_USERS",
|
||||
"SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"TELEGRAM_GROUP_ALLOWED_USERS",
|
||||
"TELEGRAM_GROUP_ALLOWED_CHATS",
|
||||
@@ -3696,7 +3697,8 @@ class GatewayRunner:
|
||||
)
|
||||
_builtin_allow_all_vars = (
|
||||
"TELEGRAM_ALLOW_ALL_USERS", "DISCORD_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS", "SLACK_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS", "WHATSAPP_CLOUD_ALLOW_ALL_USERS",
|
||||
"SLACK_ALLOW_ALL_USERS",
|
||||
"SIGNAL_ALLOW_ALL_USERS", "EMAIL_ALLOW_ALL_USERS",
|
||||
"SMS_ALLOW_ALL_USERS", "MATTERMOST_ALLOW_ALL_USERS",
|
||||
"MATRIX_ALLOW_ALL_USERS", "DINGTALK_ALLOW_ALL_USERS",
|
||||
@@ -5954,6 +5956,18 @@ class GatewayRunner:
|
||||
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
|
||||
return None
|
||||
return WhatsAppAdapter(config)
|
||||
|
||||
elif platform == Platform.WHATSAPP_CLOUD:
|
||||
from gateway.platforms.whatsapp_cloud import (
|
||||
WhatsAppCloudAdapter,
|
||||
check_whatsapp_cloud_requirements,
|
||||
)
|
||||
if not check_whatsapp_cloud_requirements():
|
||||
logger.warning(
|
||||
"WhatsApp Cloud: aiohttp/httpx missing — reinstall hermes-agent"
|
||||
)
|
||||
return None
|
||||
return WhatsAppCloudAdapter(config)
|
||||
|
||||
elif platform == Platform.SLACK:
|
||||
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
|
||||
@@ -6144,6 +6158,7 @@ class GatewayRunner:
|
||||
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
|
||||
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
|
||||
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
|
||||
Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOWED_USERS",
|
||||
Platform.SLACK: "SLACK_ALLOWED_USERS",
|
||||
Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
|
||||
Platform.EMAIL: "EMAIL_ALLOWED_USERS",
|
||||
@@ -6170,6 +6185,7 @@ class GatewayRunner:
|
||||
Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
|
||||
Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS",
|
||||
Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS",
|
||||
Platform.WHATSAPP_CLOUD: "WHATSAPP_CLOUD_ALLOW_ALL_USERS",
|
||||
Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
|
||||
Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
|
||||
Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
|
||||
|
||||
+2
-47
@@ -1778,17 +1778,8 @@ DEFAULT_CONFIG = {
|
||||
},
|
||||
},
|
||||
|
||||
# ── Nous Portal feature flags ──────────────────────────────────────
|
||||
"portal": {
|
||||
# App tools: 500+ external app integrations (Gmail, Slack, GitHub,
|
||||
# Notion, etc.) via the Nous tool gateway. Requires an active Nous
|
||||
# subscription. Set to False to hide the app_tools toolset even
|
||||
# when a subscription is present.
|
||||
"app_tools": True,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 24,
|
||||
"_config_version": 23,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
@@ -2276,22 +2267,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TOOLS_GATEWAY_URL": {
|
||||
"description": "Explicit URL for the tools-gateway (app integrations). Overrides the auto-derived tools-gateway.nousresearch.com",
|
||||
"prompt": "Tools-gateway URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"PORTAL_APP_TOOLS": {
|
||||
"description": "Enable app integration tools (500+ apps via Nous tool gateway). Requires Nous subscription.",
|
||||
"prompt": "Enable app tools (500+ apps)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "tool",
|
||||
"advanced": True,
|
||||
},
|
||||
"TAVILY_API_KEY": {
|
||||
"description": "Tavily API key for AI-native web search, extract, and crawl",
|
||||
"prompt": "Tavily API key",
|
||||
@@ -3326,7 +3301,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions", "portal",
|
||||
"sessions",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
@@ -3989,26 +3964,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
|
||||
f"{', '.join(added_aux)}"
|
||||
)
|
||||
|
||||
# ── Version 23 → 24: inject app_tools into saved platform_toolsets ──
|
||||
# The portal.app_tools config flag is handled by deep-merge (DEFAULT_CONFIG
|
||||
# has it, so load_config() always includes it). But platform_toolsets are
|
||||
# user-owned lists that deep-merge can't append to — existing users who
|
||||
# ran `hermes tools` have a saved list that won't include app_tools.
|
||||
if current_ver < 24:
|
||||
config = read_raw_config()
|
||||
pt = config.get("platform_toolsets")
|
||||
if isinstance(pt, dict):
|
||||
patched = False
|
||||
for plat_key, ts_list in pt.items():
|
||||
if isinstance(ts_list, list) and "app_tools" not in ts_list:
|
||||
ts_list.append("app_tools")
|
||||
patched = True
|
||||
if patched:
|
||||
save_config(config)
|
||||
results["config_added"].append("app_tools added to platform_toolsets")
|
||||
if not quiet:
|
||||
print(" ✓ Added app_tools to saved platform toolset lists")
|
||||
|
||||
if current_ver < latest_ver and not quiet:
|
||||
print(f"Config version: {current_ver} → {latest_ver}")
|
||||
|
||||
|
||||
@@ -71,7 +71,7 @@ def curses_checklist(
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
curses.init_pair(3, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
|
||||
@@ -21,44 +21,6 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
|
||||
# tests) don't spam the same warning multiple times.
|
||||
_WARNED_KEYS: set[str] = set()
|
||||
|
||||
# Map of env-var name → source label ("bitwarden", etc.) for credentials
|
||||
# that were injected by an external secret source during load_hermes_dotenv().
|
||||
# Used by setup / `hermes model` flows to label detected credentials so
|
||||
# users understand WHERE a key came from when their .env doesn't contain it
|
||||
# directly (otherwise the "credentials detected ✓" line looks identical to
|
||||
# the .env case and they don't know Bitwarden is wired up).
|
||||
_SECRET_SOURCES: dict[str, str] = {}
|
||||
|
||||
|
||||
def get_secret_source(env_var: str) -> str | None:
|
||||
"""Return the label of the secret source that supplied ``env_var``, if any.
|
||||
|
||||
Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
|
||||
during the current process's ``load_hermes_dotenv()`` call. Returns
|
||||
``None`` for keys that came from ``.env``, the shell environment, or
|
||||
aren't tracked.
|
||||
"""
|
||||
return _SECRET_SOURCES.get(env_var)
|
||||
|
||||
|
||||
def format_secret_source_suffix(env_var: str) -> str:
|
||||
"""Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
|
||||
|
||||
Use this when printing a detected credential so the user can see where
|
||||
it came from. Empty string when the credential came from ``.env`` or
|
||||
the shell — those are the implicit / "default" cases users already
|
||||
understand.
|
||||
"""
|
||||
source = get_secret_source(env_var)
|
||||
if not source:
|
||||
return ""
|
||||
if source == "bitwarden":
|
||||
return " (from Bitwarden)"
|
||||
# Generic fallback — future-proofing for additional secret sources
|
||||
# (e.g. 1Password, HashiCorp Vault) without having to update every
|
||||
# call site.
|
||||
return f" (from {source})"
|
||||
|
||||
|
||||
def _format_offending_chars(value: str, limit: int = 3) -> str:
|
||||
"""Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
|
||||
@@ -251,12 +213,6 @@ def _apply_external_secret_sources(home_path: Path) -> None:
|
||||
# and might have the same copy-paste corruption as a manually
|
||||
# edited .env (see #6843).
|
||||
_sanitize_loaded_credentials()
|
||||
# Remember where these came from so the setup / `hermes model`
|
||||
# flows can label detected credentials with "(from Bitwarden)" —
|
||||
# otherwise users see "credentials ✓" with no hint that the value
|
||||
# came from BSM rather than .env.
|
||||
for name in result.applied:
|
||||
_SECRET_SOURCES[name] = "bitwarden"
|
||||
print(
|
||||
f" Bitwarden Secrets Manager: applied {len(result.applied)} "
|
||||
f"secret{'s' if len(result.applied) != 1 else ''} "
|
||||
|
||||
+41
-31
@@ -591,7 +591,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1) # selected
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1) # header
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1) # search
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim
|
||||
curses.init_pair(4, 8, -1) # dim
|
||||
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
@@ -1981,6 +1981,25 @@ def cmd_whatsapp(args):
|
||||
print("⚠ Pairing may not have completed. Run 'hermes whatsapp' to try again.")
|
||||
|
||||
|
||||
def cmd_whatsapp_cloud(args):
|
||||
"""Set up WhatsApp Business Cloud API (official Meta integration).
|
||||
|
||||
Walks the user through the Meta-side credentials (Phone Number ID,
|
||||
Access Token, App Secret, optional App/WABA IDs) plus webhook
|
||||
configuration. Includes field-shape validators that catch the most
|
||||
common setup mistakes (e.g. pasting a phone number into the Phone
|
||||
Number ID field).
|
||||
|
||||
Distinct from ``hermes whatsapp`` (the Baileys bridge wizard) — the
|
||||
two adapters are complementary, not alternatives. See
|
||||
``hermes_cli/setup_whatsapp_cloud.py``.
|
||||
"""
|
||||
_require_tty("whatsapp-cloud")
|
||||
from hermes_cli.setup_whatsapp_cloud import run_whatsapp_cloud_setup
|
||||
|
||||
return run_whatsapp_cloud_setup()
|
||||
|
||||
|
||||
def cmd_setup(args):
|
||||
"""Interactive setup wizard."""
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
@@ -2433,9 +2452,6 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
("triage_specifier", "Triage specifier", "kanban spec fleshing"),
|
||||
("kanban_decomposer", "Kanban decomposer", "task decomposition"),
|
||||
("profile_describer", "Profile describer", "auto profile descriptions"),
|
||||
("curator", "Curator", "skill-usage review pass"),
|
||||
]
|
||||
|
||||
@@ -4665,9 +4681,7 @@ def _model_flow_copilot(config, current_model=""):
|
||||
source = creds.get("source", "")
|
||||
else:
|
||||
if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
bw_suffix = format_secret_source_suffix(source)
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
|
||||
print(f" GitHub token: {api_key[:8]}... ✓ ({source})")
|
||||
elif source == "gh auth token":
|
||||
print(" GitHub token: ✓ (from `gh auth token`)")
|
||||
else:
|
||||
@@ -4924,10 +4938,7 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
|
||||
return new_key, False
|
||||
|
||||
# Already configured — offer K / R / C ────────────────────────────────
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
|
||||
source_suffix = format_secret_source_suffix(key_env) if key_env else ""
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
|
||||
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||
if not key_env:
|
||||
# Nothing we can rewrite; just acknowledge and move on.
|
||||
print()
|
||||
@@ -5210,9 +5221,7 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
|
||||
# Prompt for API key
|
||||
existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
|
||||
if existing_key:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
|
||||
print(f" Bedrock API Key: {existing_key[:12]}... ✓")
|
||||
else:
|
||||
print(f" Endpoint: {mantle_base_url}")
|
||||
print()
|
||||
@@ -5883,22 +5892,7 @@ def _model_flow_anthropic(config, current_model=""):
|
||||
if has_creds:
|
||||
# Show what we found
|
||||
if existing_key:
|
||||
from hermes_cli.env_loader import format_secret_source_suffix
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
|
||||
# Surface which env var supplied the key so users with
|
||||
# Bitwarden see "(from Bitwarden)" — without this, a detected
|
||||
# BSM key looks identical to a key in .env and users assume
|
||||
# nothing is wired up.
|
||||
source_suffix = ""
|
||||
for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
|
||||
if os.getenv(var, "").strip() == existing_key:
|
||||
source_suffix = format_secret_source_suffix(var)
|
||||
if source_suffix:
|
||||
break
|
||||
print(
|
||||
f" Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
|
||||
)
|
||||
print(f" Anthropic credentials: {existing_key[:12]}... ✓")
|
||||
elif cc_available:
|
||||
print(" Claude Code credentials: ✓ (auto-detected)")
|
||||
print()
|
||||
@@ -9724,6 +9718,7 @@ def _coalesce_session_name_args(argv: list) -> list:
|
||||
"gateway",
|
||||
"setup",
|
||||
"whatsapp",
|
||||
"whatsapp-cloud",
|
||||
"login",
|
||||
"logout",
|
||||
"auth",
|
||||
@@ -10585,7 +10580,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||
"model", "pairing", "plugins", "postinstall", "profile", "proxy",
|
||||
"send", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat", "secrets",
|
||||
"version", "webhook", "whatsapp", "whatsapp-cloud", "chat", "secrets",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
# top-level --help is an acceptable trade-off for skipping an
|
||||
# expensive eager import of every bundled plugin module.
|
||||
@@ -11336,6 +11331,21 @@ def main():
|
||||
)
|
||||
whatsapp_parser.set_defaults(func=cmd_whatsapp)
|
||||
|
||||
# =========================================================================
|
||||
# whatsapp-cloud command (official Meta Cloud API; complement to Baileys)
|
||||
# =========================================================================
|
||||
whatsapp_cloud_parser = subparsers.add_parser(
|
||||
"whatsapp-cloud",
|
||||
help="Set up WhatsApp Business Cloud API integration",
|
||||
description=(
|
||||
"Configure the official Meta WhatsApp Business Cloud API "
|
||||
"adapter (Business account required, public webhook URL "
|
||||
"required). Distinct from `hermes whatsapp` which sets up "
|
||||
"the Baileys bridge for personal accounts."
|
||||
),
|
||||
)
|
||||
whatsapp_cloud_parser.set_defaults(func=cmd_whatsapp_cloud)
|
||||
|
||||
# =========================================================================
|
||||
# slack command
|
||||
# =========================================================================
|
||||
|
||||
+130
-37
@@ -66,6 +66,10 @@ class NousSubscriptionFeatures:
|
||||
def tts(self) -> NousFeatureState:
|
||||
return self.features["tts"]
|
||||
|
||||
@property
|
||||
def stt(self) -> NousFeatureState:
|
||||
return self.features["stt"]
|
||||
|
||||
@property
|
||||
def browser(self) -> NousFeatureState:
|
||||
return self.features["browser"]
|
||||
@@ -74,12 +78,8 @@ class NousSubscriptionFeatures:
|
||||
def modal(self) -> NousFeatureState:
|
||||
return self.features["modal"]
|
||||
|
||||
@property
|
||||
def app_tools(self) -> NousFeatureState:
|
||||
return self.features["app_tools"]
|
||||
|
||||
def items(self) -> Iterable[NousFeatureState]:
|
||||
ordered = ("web", "image_gen", "tts", "browser", "modal", "app_tools")
|
||||
ordered = ("web", "image_gen", "tts", "stt", "browser", "modal")
|
||||
for key in ordered:
|
||||
yield self.features[key]
|
||||
|
||||
@@ -163,6 +163,16 @@ def _tts_label(current_provider: str) -> str:
|
||||
return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
|
||||
|
||||
|
||||
def _stt_label(current_provider: str) -> str:
|
||||
mapping = {
|
||||
"openai": "OpenAI Whisper",
|
||||
"groq": "Groq Whisper",
|
||||
"mistral": "Mistral Voxtral Transcribe",
|
||||
"local": "Local faster-whisper",
|
||||
}
|
||||
return mapping.get(current_provider or "local", current_provider or "Local faster-whisper")
|
||||
|
||||
|
||||
def _resolve_browser_feature_state(
|
||||
*,
|
||||
browser_tool_enabled: bool,
|
||||
@@ -229,22 +239,6 @@ def _resolve_browser_feature_state(
|
||||
return "local", available, active, False
|
||||
|
||||
|
||||
def _read_portal_app_tools_enabled(config: Optional[Dict[str, object]] = None) -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on."""
|
||||
if config is not None:
|
||||
# Fast path: use the pre-loaded config snapshot from the caller
|
||||
import os
|
||||
env_val = os.getenv("PORTAL_APP_TOOLS")
|
||||
if env_val is not None:
|
||||
return is_truthy_value(env_val)
|
||||
portal = config.get("portal")
|
||||
if isinstance(portal, dict):
|
||||
return bool(portal.get("app_tools", True))
|
||||
return True
|
||||
from tools.tool_backend_helpers import portal_app_tools_enabled
|
||||
return portal_app_tools_enabled()
|
||||
|
||||
|
||||
def get_nous_subscription_features(
|
||||
config: Optional[Dict[str, object]] = None,
|
||||
) -> NousSubscriptionFeatures:
|
||||
@@ -271,6 +265,7 @@ def get_nous_subscription_features(
|
||||
|
||||
web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
|
||||
tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
|
||||
stt_cfg = config.get("stt") if isinstance(config.get("stt"), dict) else {}
|
||||
browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
|
||||
terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
|
||||
|
||||
@@ -280,6 +275,11 @@ def get_nous_subscription_features(
|
||||
web_search_backend = str(web_cfg.get("search_backend") or "").strip().lower()
|
||||
web_extract_backend = str(web_cfg.get("extract_backend") or "").strip().lower()
|
||||
tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
|
||||
# STT default is "local" (faster-whisper) per DEFAULT_CONFIG, which
|
||||
# requires `pip install faster-whisper`. For Nous subscribers we'd
|
||||
# rather route through the managed OpenAI audio gateway — see
|
||||
# apply_nous_managed_defaults below.
|
||||
stt_provider = str(stt_cfg.get("provider") or "local").strip().lower()
|
||||
browser_provider_explicit = "cloud_provider" in browser_cfg
|
||||
browser_provider = normalize_browser_cloud_provider(
|
||||
browser_cfg.get("cloud_provider") if browser_provider_explicit else None
|
||||
@@ -296,6 +296,7 @@ def get_nous_subscription_features(
|
||||
# prevent gateway routing.
|
||||
web_use_gateway = _uses_gateway(web_cfg)
|
||||
tts_use_gateway = _uses_gateway(tts_cfg)
|
||||
stt_use_gateway = _uses_gateway(stt_cfg)
|
||||
browser_use_gateway = _uses_gateway(browser_cfg)
|
||||
image_gen_cfg = config.get("image_gen") if isinstance(config.get("image_gen"), dict) else {}
|
||||
image_use_gateway = _uses_gateway(image_gen_cfg)
|
||||
@@ -313,6 +314,22 @@ def get_nous_subscription_features(
|
||||
direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
|
||||
direct_modal = has_direct_modal_credentials()
|
||||
|
||||
# STT direct providers. OpenAI Whisper reuses the same audio key as
|
||||
# OpenAI TTS — resolve_openai_audio_api_key() reads VOICE_TOOLS_OPENAI_KEY
|
||||
# and falls back to OPENAI_API_KEY. The local provider's "direct"
|
||||
# signal is whether faster-whisper is importable; we lazy-import so
|
||||
# this module stays cheap on the happy path.
|
||||
direct_openai_stt = bool(resolve_openai_audio_api_key())
|
||||
direct_groq_stt = bool(get_env_value("GROQ_API_KEY"))
|
||||
direct_mistral_stt = bool(get_env_value("MISTRAL_API_KEY"))
|
||||
try:
|
||||
from tools.transcription_tools import _HAS_FASTER_WHISPER
|
||||
local_stt_available = bool(_HAS_FASTER_WHISPER) or bool(
|
||||
get_env_value("HERMES_LOCAL_STT_COMMAND")
|
||||
)
|
||||
except Exception:
|
||||
local_stt_available = bool(get_env_value("HERMES_LOCAL_STT_COMMAND"))
|
||||
|
||||
# When use_gateway is set, suppress direct credentials for managed detection
|
||||
if web_use_gateway:
|
||||
direct_firecrawl = False
|
||||
@@ -324,6 +341,11 @@ def get_nous_subscription_features(
|
||||
if tts_use_gateway:
|
||||
direct_openai_tts = False
|
||||
direct_elevenlabs = False
|
||||
if stt_use_gateway:
|
||||
direct_openai_stt = False
|
||||
direct_groq_stt = False
|
||||
direct_mistral_stt = False
|
||||
local_stt_available = False
|
||||
if browser_use_gateway:
|
||||
direct_browser_use = False
|
||||
direct_browserbase = False
|
||||
@@ -331,10 +353,12 @@ def get_nous_subscription_features(
|
||||
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
|
||||
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
|
||||
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
|
||||
# STT and TTS share the same managed gateway endpoint ("openai-audio")
|
||||
# because the OpenAI audio API covers both /audio/speech (TTS) and
|
||||
# /audio/transcriptions (STT). One probe, used by both.
|
||||
managed_stt_available = managed_tts_available
|
||||
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
|
||||
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
|
||||
app_gw_ready = bool(managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("tools"))
|
||||
app_config_on = _read_portal_app_tools_enabled(config)
|
||||
modal_state = resolve_modal_backend_state(
|
||||
modal_mode,
|
||||
has_direct=direct_modal,
|
||||
@@ -383,6 +407,24 @@ def get_nous_subscription_features(
|
||||
)
|
||||
tts_active = bool(tts_tool_enabled and tts_available)
|
||||
|
||||
# STT availability per provider. Unlike TTS, STT isn't a model-callable
|
||||
# tool — the gateway voice middleware calls it on every inbound voice
|
||||
# message — so toolset_enabled is N/A and we treat stt as always
|
||||
# "enabled" if a usable provider is configured.
|
||||
stt_current_provider = stt_provider or "local"
|
||||
stt_managed = (
|
||||
stt_current_provider == "openai"
|
||||
and managed_stt_available
|
||||
and not direct_openai_stt
|
||||
)
|
||||
stt_available = bool(
|
||||
(stt_current_provider == "local" and local_stt_available)
|
||||
or (stt_current_provider == "openai" and (managed_stt_available or direct_openai_stt))
|
||||
or (stt_current_provider == "groq" and direct_groq_stt)
|
||||
or (stt_current_provider == "mistral" and direct_mistral_stt)
|
||||
)
|
||||
stt_active = stt_available
|
||||
|
||||
browser_local_available = _has_agent_browser()
|
||||
(
|
||||
browser_current_provider,
|
||||
@@ -437,6 +479,13 @@ def get_nous_subscription_features(
|
||||
if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
|
||||
tts_explicit_configured = tts_provider not in {"", "edge"}
|
||||
|
||||
# STT considers any non-default provider explicit. "local" is the
|
||||
# DEFAULT_CONFIG seed, so seeing it doesn't mean the user picked it.
|
||||
stt_explicit_configured = False
|
||||
raw_stt_cfg = config.get("stt")
|
||||
if isinstance(raw_stt_cfg, dict) and "provider" in raw_stt_cfg:
|
||||
stt_explicit_configured = stt_provider not in {"", "local"}
|
||||
|
||||
features = {
|
||||
"web": NousFeatureState(
|
||||
key="web",
|
||||
@@ -474,6 +523,21 @@ def get_nous_subscription_features(
|
||||
current_provider=_tts_label(tts_current_provider),
|
||||
explicit_configured=tts_explicit_configured,
|
||||
),
|
||||
"stt": NousFeatureState(
|
||||
key="stt",
|
||||
label="Speech-to-text",
|
||||
included_by_default=True,
|
||||
available=stt_available,
|
||||
active=stt_active,
|
||||
managed_by_nous=stt_managed,
|
||||
direct_override=stt_active and not stt_managed,
|
||||
# STT isn't toolset-gated (gateway middleware calls it
|
||||
# unconditionally on inbound voice), so report True so the
|
||||
# status display doesn't flag it as "tool disabled".
|
||||
toolset_enabled=True,
|
||||
current_provider=_stt_label(stt_current_provider),
|
||||
explicit_configured=stt_explicit_configured,
|
||||
),
|
||||
"browser": NousFeatureState(
|
||||
key="browser",
|
||||
label="Browser automation",
|
||||
@@ -498,17 +562,6 @@ def get_nous_subscription_features(
|
||||
current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
|
||||
explicit_configured=terminal_backend == "modal",
|
||||
),
|
||||
"app_tools": NousFeatureState(
|
||||
key="app_tools",
|
||||
label="App tools (500+ apps)",
|
||||
included_by_default=True,
|
||||
available=app_gw_ready,
|
||||
active=app_gw_ready and app_config_on,
|
||||
managed_by_nous=app_gw_ready and app_config_on,
|
||||
direct_override=False,
|
||||
toolset_enabled=app_config_on,
|
||||
current_provider="Nous Tool Gateway",
|
||||
),
|
||||
}
|
||||
|
||||
return NousSubscriptionFeatures(
|
||||
@@ -547,6 +600,11 @@ def apply_nous_managed_defaults(
|
||||
tts_cfg = {}
|
||||
config["tts"] = tts_cfg
|
||||
|
||||
stt_cfg = config.get("stt")
|
||||
if not isinstance(stt_cfg, dict):
|
||||
stt_cfg = {}
|
||||
config["stt"] = stt_cfg
|
||||
|
||||
browser_cfg = config.get("browser")
|
||||
if not isinstance(browser_cfg, dict):
|
||||
browser_cfg = {}
|
||||
@@ -568,6 +626,18 @@ def apply_nous_managed_defaults(
|
||||
tts_cfg["provider"] = "openai"
|
||||
changed.add("tts")
|
||||
|
||||
# STT: same pattern as TTS. The DEFAULT_CONFIG seed is "local"
|
||||
# (requires `pip install faster-whisper`); for Nous subscribers we
|
||||
# flip it to "openai" so the managed audio gateway handles transcription
|
||||
# via the same auth as TTS. Skipped when the user has explicitly
|
||||
# configured STT or has direct credentials for a non-managed provider.
|
||||
if not features.stt.explicit_configured and not (
|
||||
get_env_value("GROQ_API_KEY")
|
||||
or get_env_value("MISTRAL_API_KEY")
|
||||
):
|
||||
stt_cfg["provider"] = "openai"
|
||||
changed.add("stt")
|
||||
|
||||
if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
|
||||
get_env_value("BROWSER_USE_API_KEY")
|
||||
or get_env_value("BROWSERBASE_API_KEY")
|
||||
@@ -589,6 +659,7 @@ _GATEWAY_TOOL_LABELS = {
|
||||
"web": "Web search & extract (Firecrawl)",
|
||||
"image_gen": "Image generation (FAL)",
|
||||
"tts": "Text-to-speech (OpenAI TTS)",
|
||||
"stt": "Speech-to-text (OpenAI Whisper)",
|
||||
"browser": "Browser automation (Browser Use)",
|
||||
}
|
||||
|
||||
@@ -608,6 +679,15 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
|
||||
resolve_openai_audio_api_key()
|
||||
or get_env_value("ELEVENLABS_API_KEY")
|
||||
),
|
||||
# STT direct credentials. OpenAI Whisper shares the audio key
|
||||
# with TTS via resolve_openai_audio_api_key() — counting it here
|
||||
# too is intentional: if the user has an OpenAI audio key they
|
||||
# don't need the gateway for either.
|
||||
"stt": bool(
|
||||
resolve_openai_audio_api_key()
|
||||
or get_env_value("GROQ_API_KEY")
|
||||
or get_env_value("MISTRAL_API_KEY")
|
||||
),
|
||||
"browser": bool(
|
||||
get_env_value("BROWSER_USE_API_KEY")
|
||||
or (get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
|
||||
@@ -619,10 +699,11 @@ _GATEWAY_DIRECT_LABELS = {
|
||||
"web": "Firecrawl/Exa/Parallel/Tavily key",
|
||||
"image_gen": "FAL key",
|
||||
"tts": "OpenAI/ElevenLabs key",
|
||||
"stt": "OpenAI/Groq/Mistral key",
|
||||
"browser": "Browser Use/Browserbase key",
|
||||
}
|
||||
|
||||
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "browser")
|
||||
_ALL_GATEWAY_KEYS = ("web", "image_gen", "tts", "stt", "browser")
|
||||
|
||||
|
||||
def get_gateway_eligible_tools(
|
||||
@@ -658,6 +739,7 @@ def get_gateway_eligible_tools(
|
||||
"web": _uses_gateway(config.get("web")),
|
||||
"image_gen": _uses_gateway(config.get("image_gen")),
|
||||
"tts": _uses_gateway(config.get("tts")),
|
||||
"stt": _uses_gateway(config.get("stt")),
|
||||
"browser": _uses_gateway(config.get("browser")),
|
||||
}
|
||||
|
||||
@@ -697,6 +779,11 @@ def apply_gateway_defaults(
|
||||
tts_cfg = {}
|
||||
config["tts"] = tts_cfg
|
||||
|
||||
stt_cfg = config.get("stt")
|
||||
if not isinstance(stt_cfg, dict):
|
||||
stt_cfg = {}
|
||||
config["stt"] = stt_cfg
|
||||
|
||||
browser_cfg = config.get("browser")
|
||||
if not isinstance(browser_cfg, dict):
|
||||
browser_cfg = {}
|
||||
@@ -712,6 +799,11 @@ def apply_gateway_defaults(
|
||||
tts_cfg["use_gateway"] = True
|
||||
changed.add("tts")
|
||||
|
||||
if "stt" in tool_keys:
|
||||
stt_cfg["provider"] = "openai"
|
||||
stt_cfg["use_gateway"] = True
|
||||
changed.add("stt")
|
||||
|
||||
if "browser" in tool_keys:
|
||||
browser_cfg["cloud_provider"] = "browser-use"
|
||||
browser_cfg["use_gateway"] = True
|
||||
@@ -750,8 +842,9 @@ def prompt_enable_tool_gateway(config: Dict[str, object]) -> set[str]:
|
||||
desc_parts: list[str] = [
|
||||
"",
|
||||
" The Tool Gateway gives you access to web search, image generation,",
|
||||
" text-to-speech, and browser automation through your Nous subscription.",
|
||||
" No need to sign up for separate API keys — just pick the tools you want.",
|
||||
" text-to-speech, speech-to-text, and browser automation through your",
|
||||
" Nous subscription. No need to sign up for separate API keys — just",
|
||||
" pick the tools you want.",
|
||||
"",
|
||||
]
|
||||
if already_managed:
|
||||
|
||||
@@ -24,6 +24,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("discord", PlatformInfo(label="💬 Discord", default_toolset="hermes-discord")),
|
||||
("slack", PlatformInfo(label="💼 Slack", default_toolset="hermes-slack")),
|
||||
("whatsapp", PlatformInfo(label="📱 WhatsApp", default_toolset="hermes-whatsapp")),
|
||||
("whatsapp_cloud", PlatformInfo(label="📱 WhatsApp Business (Cloud)", default_toolset="hermes-whatsapp")),
|
||||
("signal", PlatformInfo(label="📡 Signal", default_toolset="hermes-signal")),
|
||||
("bluebubbles", PlatformInfo(label="💙 BlueBubbles", default_toolset="hermes-bluebubbles")),
|
||||
("email", PlatformInfo(label="📧 Email", default_toolset="hermes-email")),
|
||||
|
||||
@@ -1051,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1) # dim gray
|
||||
curses.init_pair(4, 8, -1) # dim gray
|
||||
cursor = 0
|
||||
scroll_offset = 0
|
||||
|
||||
@@ -1196,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {curses.KEY_ENTER, 10, 13}:
|
||||
if cursor < n_plugins:
|
||||
@@ -1228,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
curses.init_pair(4, 8, -1)
|
||||
curses.curs_set(0)
|
||||
elif key in {27, ord("q")}:
|
||||
# Save plugin changes on exit
|
||||
|
||||
@@ -0,0 +1,530 @@
|
||||
"""
|
||||
Interactive setup wizard for the WhatsApp Cloud API adapter.
|
||||
|
||||
Entry point: ``hermes whatsapp-cloud`` (dispatched from
|
||||
``cmd_whatsapp_cloud`` in ``hermes_cli/main.py``).
|
||||
|
||||
Walks the user through the 6 credentials Meta requires + recipient
|
||||
allowlist, auto-generates the verify token, and prints exact follow-up
|
||||
instructions for the parts that can't happen inside the wizard process
|
||||
(starting cloudflared, starting the gateway, configuring Meta's
|
||||
webhook dashboard, adding their phone to the recipient list).
|
||||
|
||||
Heavy emphasis on field-shape validation to catch the most common
|
||||
configuration mistakes:
|
||||
|
||||
- Putting the actual phone number in ``WHATSAPP_CLOUD_PHONE_NUMBER_ID``
|
||||
(the field expects Meta's 15-17 digit internal ID, not a phone number).
|
||||
This is the #1 trap — caught us during Phase 3 live testing.
|
||||
- Pasting tokens with trailing whitespace.
|
||||
- Pasting an OpenAI / Slack / GitHub key by mistake.
|
||||
- Confusing App ID with WABA ID with Phone Number ID.
|
||||
|
||||
Each prompt has contextual help showing exactly where to find the value
|
||||
in Meta's App Dashboard, with a one-line description and the field's
|
||||
expected shape ("starts with EAA", "15-17 digits", "32 hex chars", etc.).
|
||||
|
||||
The wizard intentionally does NOT smoke-test the webhook itself — the
|
||||
Hermes gateway and the cloudflared tunnel both run in separate
|
||||
processes the user starts AFTER this wizard exits, so any in-wizard
|
||||
probe would fail by design. Instead the final SETUP COMPLETE block
|
||||
prints the exact curl command the user can run from a third terminal
|
||||
to verify the loop end-to-end once everything's running.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import secrets
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Field-shape validators
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Each validator returns (ok, reason_if_not_ok). The wizard uses them to
|
||||
# reject obviously-malformed input before saving — saves users a round
|
||||
# trip with Meta's 401 / 400 errors.
|
||||
|
||||
|
||||
def _validate_phone_number_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Phone Number ID is a 15-17 digit numeric ID assigned by Meta.
|
||||
|
||||
It's NOT a phone number. The #1 setup mistake is pasting the actual
|
||||
phone number (e.g. ``15556422442``) into this field — that's only
|
||||
10-11 digits and gets rejected by Graph as "Object with ID does
|
||||
not exist."
|
||||
"""
|
||||
if not value:
|
||||
return False, "Phone Number ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "Phone Number ID must be numeric (no '+', spaces, or dashes)"
|
||||
# Real phone numbers are 10-11 digits (US/CA country code + area code
|
||||
# + 7 digits). Meta's internal IDs are 15-17 digits. If we see a
|
||||
# phone-number-sized value, the user almost certainly pasted the
|
||||
# phone number by mistake.
|
||||
if 10 <= len(s) <= 12:
|
||||
return False, (
|
||||
"That looks like a phone number — but this field needs the "
|
||||
"Phone Number ID (Meta's internal ID, 15-17 digits, e.g. "
|
||||
"'7794189252778687'). Look just BELOW the 'From' dropdown in "
|
||||
"API Setup → it's labelled 'Phone number ID'."
|
||||
)
|
||||
if len(s) < 13:
|
||||
return False, "Phone Number ID looks too short (expected 13-18 digits)"
|
||||
if len(s) > 20:
|
||||
return False, "Phone Number ID looks too long (expected 13-18 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_waba_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""WABA ID is numeric, similar length range as Phone Number ID."""
|
||||
if not value:
|
||||
return False, "WABA ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "WABA ID must be numeric"
|
||||
if len(s) < 10 or len(s) > 25:
|
||||
return False, "WABA ID looks wrong (expected 10-25 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_app_id(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Meta App ID is numeric, typically 15-16 digits."""
|
||||
if not value:
|
||||
return False, "App ID is required"
|
||||
s = value.strip()
|
||||
if not s.isdigit():
|
||||
return False, "App ID must be numeric"
|
||||
if len(s) < 13 or len(s) > 20:
|
||||
return False, "App ID looks wrong (expected 15-16 digits)"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_app_secret(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""App Secret is a 32-character lowercase hex string."""
|
||||
if not value:
|
||||
return False, "App Secret is required"
|
||||
s = value.strip()
|
||||
if not re.fullmatch(r"[0-9a-f]+", s.lower()):
|
||||
return False, (
|
||||
"App Secret should be a hex string (only digits 0-9 and "
|
||||
"letters a-f). Make sure you copied the 'App secret' from "
|
||||
"Settings → Basic, not some other token."
|
||||
)
|
||||
if len(s) != 32:
|
||||
return False, f"App Secret should be exactly 32 hex characters (got {len(s)})"
|
||||
return True, None
|
||||
|
||||
|
||||
def _validate_access_token(value: str) -> tuple[bool, Optional[str]]:
|
||||
"""Meta access tokens start with ``EAA`` and are 100-300+ characters.
|
||||
|
||||
Both temp tokens (24h) and System User permanent tokens share this
|
||||
prefix. We don't try to distinguish them.
|
||||
"""
|
||||
if not value:
|
||||
return False, "Access token is required"
|
||||
s = value.strip()
|
||||
if not s.startswith("EAA"):
|
||||
# Diagnose common paste mistakes
|
||||
if s.startswith("sk-"):
|
||||
return False, (
|
||||
"That's an OpenAI key (starts with 'sk-'), not a Meta "
|
||||
"WhatsApp access token. Meta tokens start with 'EAA'."
|
||||
)
|
||||
if s.startswith("xoxb-") or s.startswith("xoxp-"):
|
||||
return False, (
|
||||
"That's a Slack token, not a Meta WhatsApp access token. "
|
||||
"Meta tokens start with 'EAA'."
|
||||
)
|
||||
if s.startswith("ghp_") or s.startswith("gho_"):
|
||||
return False, (
|
||||
"That's a GitHub token, not a Meta WhatsApp access "
|
||||
"token. Meta tokens start with 'EAA'."
|
||||
)
|
||||
return False, (
|
||||
"Meta WhatsApp access tokens start with 'EAA'. Check that "
|
||||
"you're copying from the right place (API Setup → 'Generate "
|
||||
"access token', or Business Settings → System Users → "
|
||||
"'Generate token' for a permanent one)."
|
||||
)
|
||||
if len(s) < 100:
|
||||
return False, f"Access token looks too short ({len(s)} chars, expected 100+)"
|
||||
return True, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _prompt(message: str, default: Optional[str] = None) -> str:
|
||||
"""Read one line of input. Returns "" on EOF / Ctrl+C / empty input.
|
||||
|
||||
The ``default`` parameter is shown to the user but NOT auto-applied
|
||||
on empty input — callers handle the "user kept existing" case
|
||||
explicitly so they can distinguish between a real value and a
|
||||
display preview (e.g. ``"abc12345..."`` for masked secrets).
|
||||
"""
|
||||
try:
|
||||
suffix = f" [{default}]" if default else ""
|
||||
raw = input(f"{message}{suffix}: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print()
|
||||
return ""
|
||||
return raw
|
||||
|
||||
|
||||
def _prompt_validated(
|
||||
message: str,
|
||||
validator,
|
||||
*,
|
||||
current: Optional[str] = None,
|
||||
help_text: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Repeat the prompt until the user enters a valid value or aborts.
|
||||
|
||||
Returns the validated value, or None if the user gave up (empty
|
||||
response after an error, or Ctrl+C). ``current`` is shown as a
|
||||
default for re-runs of the wizard with existing config.
|
||||
"""
|
||||
if help_text:
|
||||
for line in help_text.strip().splitlines():
|
||||
print(f" {line}")
|
||||
attempts = 0
|
||||
while True:
|
||||
attempts += 1
|
||||
value = _prompt(f" → {message}", default=current)
|
||||
if not value:
|
||||
return None
|
||||
ok, reason = validator(value)
|
||||
if ok:
|
||||
return value.strip()
|
||||
print(f" ✗ {reason}")
|
||||
if attempts >= 3:
|
||||
try:
|
||||
cont = input(" Try again, or press Enter to skip: ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
return None
|
||||
if not cont:
|
||||
return None
|
||||
attempts = 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Wizard
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_whatsapp_cloud_setup() -> int:
|
||||
"""Interactive wizard for the WhatsApp Cloud API adapter.
|
||||
|
||||
Returns 0 on full success, 1 on user abort, 2 on partial completion
|
||||
(some fields written but the user bailed before finishing).
|
||||
"""
|
||||
from hermes_cli.config import get_env_value, save_env_value
|
||||
|
||||
print()
|
||||
print("⚕ WhatsApp Business Cloud API Setup")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print("This wizard configures Hermes to talk to WhatsApp via Meta's")
|
||||
print("official Cloud API. It's the production-grade path:")
|
||||
print()
|
||||
print(" • No QR codes, no Node.js bridge subprocess")
|
||||
print(" • Stable connection — no account-ban risk")
|
||||
print(" • Business account required (not personal WhatsApp)")
|
||||
print(" • Public webhook URL required (Cloudflare Tunnel, ngrok,")
|
||||
print(" or your own reverse proxy with TLS)")
|
||||
print()
|
||||
print("If you don't have a Meta app set up yet, follow these steps")
|
||||
print("FIRST, then come back and re-run this wizard:")
|
||||
print()
|
||||
print(" 1. https://developers.facebook.com/apps → Create App")
|
||||
print(" → 'Connect with customers through WhatsApp'")
|
||||
print(" 2. App Dashboard → WhatsApp → API Setup")
|
||||
print(" 3. Click 'Generate access token' (temp 24h token is fine to")
|
||||
print(" start; switch to a System User permanent token later)")
|
||||
print()
|
||||
try:
|
||||
proceed = input("Press Enter to continue, or Ctrl+C to abort... ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nSetup cancelled.")
|
||||
return 1
|
||||
|
||||
print()
|
||||
print("─" * 50)
|
||||
print("STEP 1 — Phone Number ID")
|
||||
print("─" * 50)
|
||||
current_phone_id = get_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID") or None
|
||||
phone_id = _prompt_validated(
|
||||
"Phone Number ID",
|
||||
_validate_phone_number_id,
|
||||
current=current_phone_id,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → WhatsApp → API Setup, in the\n"
|
||||
"'Send and receive messages' section.\n"
|
||||
"Look BELOW the 'From' dropdown — there's a 'Phone number ID'\n"
|
||||
"line with the value (15-17 digits, e.g. '7794189252778687').\n"
|
||||
"It is NOT the phone number itself (+1 555-...). That's the\n"
|
||||
"single most common setup mistake."
|
||||
),
|
||||
)
|
||||
if not phone_id:
|
||||
if current_phone_id:
|
||||
phone_id = current_phone_id
|
||||
print(f" ✓ Keeping existing: {phone_id}")
|
||||
else:
|
||||
print("\n✗ Phone Number ID is required. Aborting.")
|
||||
return 1
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_PHONE_NUMBER_ID", phone_id)
|
||||
print(f" ✓ Saved: {phone_id}")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 2 — Access Token")
|
||||
print("─" * 50)
|
||||
current_token = get_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN") or None
|
||||
current_display = (current_token[:15] + "...") if current_token else None
|
||||
token = _prompt_validated(
|
||||
"Access Token",
|
||||
_validate_access_token,
|
||||
current=current_display,
|
||||
help_text=(
|
||||
"Two options for getting one:\n\n"
|
||||
" (a) TEMP — App Dashboard → WhatsApp → API Setup →\n"
|
||||
" 'Generate access token' button. Lasts 24 hours.\n"
|
||||
" Fine for testing today; you'll have to regenerate\n"
|
||||
" tomorrow.\n\n"
|
||||
" (b) PERMANENT (production) — System User token. One-time\n"
|
||||
" setup, never expires:\n"
|
||||
" • business.facebook.com → Settings → System users →\n"
|
||||
" Add → Admin role\n"
|
||||
" • Assign Assets → your app (Manage app), your\n"
|
||||
" WhatsApp account (Manage WABAs)\n"
|
||||
" • Generate token → expiration: Never → permissions:\n"
|
||||
" business_management, whatsapp_business_messaging,\n"
|
||||
" whatsapp_business_management\n\n"
|
||||
"Tokens start with 'EAA'."
|
||||
),
|
||||
)
|
||||
# If they had a current token and just hit Enter, keep it.
|
||||
if not token:
|
||||
if current_token:
|
||||
token = current_token
|
||||
print(" ✓ Keeping existing token")
|
||||
else:
|
||||
print("\n✗ Access Token is required. Aborting.")
|
||||
return 1
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_ACCESS_TOKEN", token)
|
||||
print(" ✓ Saved (token hidden)")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 3 — App Secret (required for webhook signature verification)")
|
||||
print("─" * 50)
|
||||
current_secret = get_env_value("WHATSAPP_CLOUD_APP_SECRET") or None
|
||||
current_secret_display = (current_secret[:8] + "...") if current_secret else None
|
||||
app_secret = _prompt_validated(
|
||||
"App Secret",
|
||||
_validate_app_secret,
|
||||
current=current_secret_display,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → Settings → Basic →\n"
|
||||
"'App secret' field (click 'Show', enter your Facebook password).\n\n"
|
||||
"If 'Show' doesn't appear, you may need Admin role on the app.\n"
|
||||
"It's a 32-character lowercase hex string.\n\n"
|
||||
"Without the App Secret, inbound webhook POSTs are refused\n"
|
||||
"with HTTP 503 (we can't verify they actually came from Meta)."
|
||||
),
|
||||
)
|
||||
if not app_secret:
|
||||
if current_secret:
|
||||
app_secret = current_secret
|
||||
print(" ✓ Keeping existing App Secret")
|
||||
else:
|
||||
print("\n⚠ Skipping App Secret — inbound webhooks will be refused")
|
||||
print(" until you set WHATSAPP_CLOUD_APP_SECRET manually.")
|
||||
else:
|
||||
save_env_value("WHATSAPP_CLOUD_APP_SECRET", app_secret)
|
||||
print(" ✓ Saved (secret hidden)")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 4 — App ID & WABA ID (optional, for analytics)")
|
||||
print("─" * 50)
|
||||
current_app_id = get_env_value("WHATSAPP_CLOUD_APP_ID") or None
|
||||
app_id = _prompt_validated(
|
||||
"App ID (optional, press Enter to skip)",
|
||||
lambda v: (True, None) if not v else _validate_app_id(v),
|
||||
current=current_app_id,
|
||||
help_text=(
|
||||
"Found in: App Dashboard → Settings → Basic → 'App ID' at the\n"
|
||||
"top of the page. Numeric, ~15-16 digits.\n"
|
||||
"Not required for messaging — useful only for analytics later."
|
||||
),
|
||||
)
|
||||
if app_id:
|
||||
save_env_value("WHATSAPP_CLOUD_APP_ID", app_id)
|
||||
print(f" ✓ Saved: {app_id}")
|
||||
elif current_app_id:
|
||||
print(f" ✓ Keeping existing: {current_app_id}")
|
||||
|
||||
current_waba_id = get_env_value("WHATSAPP_CLOUD_WABA_ID") or None
|
||||
waba_id = _prompt_validated(
|
||||
"WABA ID (optional, press Enter to skip)",
|
||||
lambda v: (True, None) if not v else _validate_waba_id(v),
|
||||
current=current_waba_id,
|
||||
help_text=(
|
||||
"WhatsApp Business Account ID. Found in: App Dashboard →\n"
|
||||
"WhatsApp → API Setup, near the top — 'WhatsApp Business\n"
|
||||
"Account ID'. Numeric, ~15+ digits.\n"
|
||||
"Not required for messaging — useful for analytics."
|
||||
),
|
||||
)
|
||||
if waba_id:
|
||||
save_env_value("WHATSAPP_CLOUD_WABA_ID", waba_id)
|
||||
print(f" ✓ Saved: {waba_id}")
|
||||
elif current_waba_id:
|
||||
print(f" ✓ Keeping existing: {current_waba_id}")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 5 — Verify Token (auto-generated)")
|
||||
print("─" * 50)
|
||||
current_verify = get_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN") or None
|
||||
if current_verify:
|
||||
print(f" An existing verify token is already set ({current_verify[:8]}...).")
|
||||
try:
|
||||
regen = input(" Generate a new one? [y/N]: ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
regen = "n"
|
||||
if regen in {"y", "yes"}:
|
||||
verify_token = secrets.token_urlsafe(32)
|
||||
save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token)
|
||||
print(f" ✓ New verify token: {verify_token}")
|
||||
else:
|
||||
verify_token = current_verify
|
||||
print(" ✓ Keeping existing verify token")
|
||||
else:
|
||||
verify_token = secrets.token_urlsafe(32)
|
||||
save_env_value("WHATSAPP_CLOUD_VERIFY_TOKEN", verify_token)
|
||||
print(f" ✓ Generated: {verify_token}")
|
||||
print()
|
||||
print(" → COPY THIS TOKEN NOW. You'll paste it into Meta's webhook")
|
||||
print(" configuration dialog (next step).")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("STEP 6 — Recipient Allowlist")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" Who is allowed to message the bot? (Comma-separated phone")
|
||||
print(" numbers with country code, no '+' / spaces / dashes. Use '*'")
|
||||
print(" to allow anyone — only safe if you've also configured Meta's")
|
||||
print(" recipient whitelist for app-development mode.)")
|
||||
print()
|
||||
current_allow = get_env_value("WHATSAPP_CLOUD_ALLOWED_USERS") or None
|
||||
allow_default = current_allow if current_allow else None
|
||||
try:
|
||||
allowed = input(
|
||||
f" → Allowed users{' [' + allow_default + ']' if allow_default else ''}: "
|
||||
).strip() or (allow_default or "")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
allowed = ""
|
||||
if allowed:
|
||||
# Light normalization — strip spaces and dashes from each entry.
|
||||
allowed = ",".join(
|
||||
re.sub(r"[\s\-+]", "", part) for part in allowed.split(",") if part.strip()
|
||||
)
|
||||
save_env_value("WHATSAPP_CLOUD_ALLOWED_USERS", allowed)
|
||||
print(f" ✓ Saved: {allowed}")
|
||||
else:
|
||||
print(" ⚠ No allowlist — every inbound message will be denied.")
|
||||
print(" Re-run this wizard or set WHATSAPP_CLOUD_ALLOWED_USERS manually.")
|
||||
print()
|
||||
|
||||
print("─" * 50)
|
||||
print("SETUP COMPLETE — Next steps")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" Hermes needs a public HTTPS URL to receive WhatsApp messages.")
|
||||
print(" The recommended path is Cloudflare Tunnel (free, no port")
|
||||
print(" forwarding, no DNS setup).")
|
||||
print()
|
||||
print(" 1. Install cloudflared (one-time, if you don't have it):")
|
||||
print(" Windows: winget install Cloudflare.cloudflared")
|
||||
print(" macOS: brew install cloudflared")
|
||||
print(" Linux: https://github.com/cloudflare/cloudflared/releases")
|
||||
print()
|
||||
print(" Alternatives: ngrok, or your own domain + reverse proxy")
|
||||
print(" with TLS.")
|
||||
print()
|
||||
print(" 2. Start the tunnel in a separate terminal:")
|
||||
print(" cloudflared tunnel --url http://localhost:8090")
|
||||
print(" Note the printed https://<random>.trycloudflare.com URL.")
|
||||
print()
|
||||
print(" 3. Start the Hermes gateway in another terminal:")
|
||||
print(" hermes gateway")
|
||||
print()
|
||||
print(" 4. Verify your local config is reachable. From a third")
|
||||
print(" terminal, with the tunnel URL substituted:")
|
||||
print()
|
||||
print(" curl 'https://YOUR-TUNNEL.trycloudflare.com/whatsapp/webhook?\\")
|
||||
print(f" hub.mode=subscribe&hub.verify_token={verify_token}&\\")
|
||||
print(" hub.challenge=hello'")
|
||||
print()
|
||||
print(" Expected: HTTP 200 with body 'hello'.")
|
||||
print(" Also try: curl https://YOUR-TUNNEL.trycloudflare.com/health")
|
||||
print(" (should return JSON with verify_token_configured: true).")
|
||||
print()
|
||||
print(" 5. Configure Meta to point at your tunnel:")
|
||||
print(" App Dashboard → WhatsApp → Configuration → Edit webhook")
|
||||
print(" Callback URL: <tunnel-url>/whatsapp/webhook")
|
||||
print(f" Verify Token: {verify_token}")
|
||||
print(" → Click 'Verify and save'")
|
||||
print(" → Then 'Manage' webhook fields → subscribe to 'messages'")
|
||||
print()
|
||||
print(" 6. Add your phone to Meta's recipient list:")
|
||||
print(" App Dashboard → WhatsApp → API Setup → 'To' →")
|
||||
print(" 'Manage phone number list'")
|
||||
print()
|
||||
print(" 7. DM the bot's test number from your phone.")
|
||||
print()
|
||||
print("─" * 50)
|
||||
print("Optional: polish your bot's WhatsApp profile")
|
||||
print("─" * 50)
|
||||
print()
|
||||
print(" WhatsApp shows a display name and profile picture for your bot")
|
||||
print(" in every chat header and contact list. These are set in Meta's")
|
||||
print(" Business Manager, not via this wizard — but here's where to do")
|
||||
print(" it once you're up and running:")
|
||||
print()
|
||||
effective_waba = waba_id or current_waba_id
|
||||
if effective_waba:
|
||||
print(" • Display name + profile picture:")
|
||||
print(" https://business.facebook.com/wa/manage/phone-numbers/"
|
||||
f"?waba_id={effective_waba}")
|
||||
else:
|
||||
print(" • Display name + profile picture:")
|
||||
print(" https://business.facebook.com/wa/manage/phone-numbers/")
|
||||
print(" (select your WhatsApp Business Account on that page)")
|
||||
print(" Display-name changes go through a ~24-48h Meta review.")
|
||||
print()
|
||||
print(" • About, description, website, hours, business category:")
|
||||
print(" Same page → click your phone number → 'Edit profile'.")
|
||||
print()
|
||||
print(" • Verified badge (the green check):")
|
||||
print(" Requires Meta's business verification process —")
|
||||
print(" Business Manager → Security Center → Start Verification.")
|
||||
print()
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/")
|
||||
print(" messaging/whatsapp-cloud")
|
||||
print()
|
||||
return 0
|
||||
@@ -309,7 +309,7 @@ def show_status(args):
|
||||
print()
|
||||
print(color("◆ Nous Tool Gateway", Colors.CYAN, Colors.BOLD))
|
||||
print(" Your free-tier Nous account does not include Tool Gateway access.")
|
||||
print(" Upgrade your subscription to unlock managed web, image, TTS, and browser tools.")
|
||||
print(" Upgrade your subscription to unlock managed web, image, TTS, STT, and browser tools.")
|
||||
try:
|
||||
portal_url = nous_status.get("portal_base_url", "").rstrip("/")
|
||||
if portal_url:
|
||||
|
||||
+29
-86
@@ -78,7 +78,6 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
("yuanbao", "🤖 Yuanbao", "group info, member queries, DM"),
|
||||
("computer_use", "🖱️ Computer Use (macOS)", "background desktop control via cua-driver"),
|
||||
("app_tools", "🔌 App Integrations (500+)", "Gmail, Slack, GitHub, Jira, Notion, etc. via Nous tool gateway"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
@@ -312,16 +311,6 @@ TOOL_CATEGORIES = {
|
||||
"image_gen": {
|
||||
"name": "Image Generation",
|
||||
"icon": "🎨",
|
||||
# Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
|
||||
# OpenAI Codex, and xAI are injected at runtime from each
|
||||
# ``plugins.image_gen.<vendor>`` package via
|
||||
# ``_plugin_image_gen_providers()`` in ``_visible_providers``.
|
||||
# Only non-provider UX setup-flow rows remain here:
|
||||
# - "Nous Subscription" — managed FAL billed via the Nous
|
||||
# subscription (requires_nous_auth + override_env_vars).
|
||||
# Uses the fal plugin as the underlying backend but has a
|
||||
# distinct setup UX.
|
||||
# Mirrors the shape browser/video_gen ship today.
|
||||
"providers": [
|
||||
{
|
||||
"name": "Nous Subscription",
|
||||
@@ -333,6 +322,15 @@ TOOL_CATEGORIES = {
|
||||
"override_env_vars": ["FAL_KEY"],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
{
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
|
||||
],
|
||||
"imagegen_backend": "fal",
|
||||
},
|
||||
],
|
||||
},
|
||||
"video_gen": {
|
||||
@@ -484,11 +482,6 @@ TOOLSET_ENV_REQUIREMENTS = {
|
||||
# ─── Post-Setup Hooks ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _cua_driver_cmd() -> str:
|
||||
"""Return the cua-driver executable name/path, honoring non-empty overrides."""
|
||||
return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
|
||||
|
||||
|
||||
def _pip_install(
|
||||
args: List[str],
|
||||
*,
|
||||
@@ -557,55 +550,6 @@ def _pip_install(
|
||||
)
|
||||
|
||||
|
||||
|
||||
def _check_cua_driver_asset_for_arch() -> bool:
|
||||
"""Check whether the latest CUA release ships an asset for this architecture.
|
||||
|
||||
Returns True if the asset likely exists (or if we cannot determine it).
|
||||
Returns False and prints a warning when the asset is confirmed missing,
|
||||
so callers can skip the install attempt and avoid a raw 404.
|
||||
"""
|
||||
import platform as _plat
|
||||
import urllib.request
|
||||
|
||||
machine = _plat.machine() # "x86_64" or "arm64"
|
||||
if machine == "arm64":
|
||||
# arm64 (Apple Silicon) assets are always published.
|
||||
return True
|
||||
|
||||
# x86_64 / Intel — probe the latest release for an architecture-specific
|
||||
# asset before falling through to the upstream installer.
|
||||
api_url = (
|
||||
"https://api.github.com/repos/trycua/cua/releases/latest"
|
||||
)
|
||||
try:
|
||||
req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
release = _json.loads(resp.read().decode())
|
||||
tag = release.get("tag_name", "")
|
||||
assets = release.get("assets", [])
|
||||
arch_names = {"x86_64", "amd64"}
|
||||
has_asset = any(
|
||||
any(a in a_info.get("name", "").lower() for a in arch_names)
|
||||
for a_info in assets
|
||||
)
|
||||
if not has_asset:
|
||||
_print_warning(
|
||||
f" Latest CUA release ({tag}) has no Intel (x86_64) asset."
|
||||
)
|
||||
_print_info(
|
||||
" CUA Driver currently only ships Apple Silicon builds."
|
||||
)
|
||||
_print_info(
|
||||
" See: https://github.com/trycua/cua/issues/1493"
|
||||
)
|
||||
return False
|
||||
except Exception:
|
||||
# Network / API failure — proceed and let the installer handle it.
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
"""Install or refresh the cua-driver binary used by Computer Use.
|
||||
|
||||
@@ -635,8 +579,7 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" Computer Use (cua-driver) is macOS-only; skipping.")
|
||||
return False
|
||||
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
binary = shutil.which(driver_cmd)
|
||||
binary = shutil.which("cua-driver")
|
||||
|
||||
# Not installed → fresh install path (only when caller asked for it).
|
||||
if not binary and not upgrade:
|
||||
@@ -644,20 +587,18 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — install manually:")
|
||||
_print_info(" https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
|
||||
return False
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return False
|
||||
return _run_cua_driver_installer(label="Installing")
|
||||
|
||||
# Already installed and caller didn't ask to upgrade → just confirm.
|
||||
if binary and not upgrade:
|
||||
try:
|
||||
version = subprocess.run(
|
||||
[driver_cmd, "--version"],
|
||||
["cua-driver", "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
_print_success(f" {driver_cmd} already installed: {version or 'unknown version'}")
|
||||
_print_success(f" cua-driver already installed: {version or 'unknown version'}")
|
||||
except Exception:
|
||||
_print_success(f" {driver_cmd} already installed.")
|
||||
_print_success(" cua-driver already installed.")
|
||||
_print_info(" Grant macOS permissions if not done yet:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -668,14 +609,11 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
_print_warning(" curl not found — cannot refresh cua-driver.")
|
||||
return bool(binary)
|
||||
|
||||
if not _check_cua_driver_asset_for_arch():
|
||||
return bool(binary)
|
||||
|
||||
if binary:
|
||||
# Show before/after version when we have a baseline. Best-effort.
|
||||
try:
|
||||
before = subprocess.run(
|
||||
[driver_cmd, "--version"],
|
||||
["cua-driver", "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
except Exception:
|
||||
@@ -687,13 +625,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
|
||||
if ok and before:
|
||||
try:
|
||||
after = subprocess.run(
|
||||
[driver_cmd, "--version"],
|
||||
["cua-driver", "--version"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
).stdout.strip()
|
||||
if after and after != before:
|
||||
_print_success(f" {driver_cmd} upgraded: {before} → {after}")
|
||||
_print_success(f" cua-driver upgraded: {before} → {after}")
|
||||
elif after:
|
||||
_print_info(f" {driver_cmd} up to date: {after}")
|
||||
_print_info(f" cua-driver up to date: {after}")
|
||||
except Exception:
|
||||
pass
|
||||
return ok
|
||||
@@ -717,12 +655,11 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
|
||||
_print_info(f" {label} cua-driver (macOS background computer-use)...")
|
||||
else:
|
||||
_print_info(f" {label} cua-driver...")
|
||||
driver_cmd = _cua_driver_cmd()
|
||||
try:
|
||||
result = subprocess.run(install_cmd, shell=True, timeout=300)
|
||||
if result.returncode == 0 and shutil.which(driver_cmd):
|
||||
if result.returncode == 0 and shutil.which("cua-driver"):
|
||||
if verbose:
|
||||
_print_success(f" {driver_cmd} installed.")
|
||||
_print_success(" cua-driver installed.")
|
||||
_print_info(" IMPORTANT — grant macOS permissions now:")
|
||||
_print_info(" System Settings > Privacy & Security > Accessibility")
|
||||
_print_info(" System Settings > Privacy & Security > Screen Recording")
|
||||
@@ -1569,9 +1506,12 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
|
||||
row but carries an ``image_gen_plugin_name`` marker so downstream
|
||||
code (config writing, model picker) knows to route through the
|
||||
plugin registry. Every image-gen backend is a plugin now — there
|
||||
are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
|
||||
this function to dedupe against (see issue #26241).
|
||||
plugin registry instead of the in-tree FAL backend.
|
||||
|
||||
FAL is skipped — it's already exposed by the hardcoded
|
||||
``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
|
||||
a plugin in a follow-up PR, the hardcoded entries go away and this
|
||||
function surfaces it alongside OpenAI automatically.
|
||||
"""
|
||||
try:
|
||||
from agent.image_gen_registry import list_providers
|
||||
@@ -1584,6 +1524,9 @@ def _plugin_image_gen_providers() -> list[dict]:
|
||||
|
||||
rows: list[dict] = []
|
||||
for provider in providers:
|
||||
if getattr(provider, "name", None) == "fal":
|
||||
# FAL has its own hardcoded rows today.
|
||||
continue
|
||||
try:
|
||||
schema = provider.get_setup_schema()
|
||||
except Exception:
|
||||
@@ -1808,7 +1751,7 @@ _POST_SETUP_INSTALLED: dict = {
|
||||
# entry when (a) the post_setup is the ONLY install side-effect for
|
||||
# a no-key provider, and (b) an installed-state check is cheap and
|
||||
# doesn't trigger a heavy import.
|
||||
"cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
|
||||
"cua_driver": lambda: bool(shutil.which("cua-driver")),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -975,13 +975,11 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
|
||||
"vision",
|
||||
"web_extract",
|
||||
"compression",
|
||||
"session_search",
|
||||
"skills_hub",
|
||||
"approval",
|
||||
"mcp",
|
||||
"title_generation",
|
||||
"triage_specifier",
|
||||
"kanban_decomposer",
|
||||
"profile_describer",
|
||||
"curator",
|
||||
)
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 1.2 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 2.1 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.6 MiB |
Binary file not shown.
|
Before Width: | Height: | Size: 1.4 MiB |
@@ -1,85 +0,0 @@
|
||||
Create a professional infographic following these specifications:
|
||||
|
||||
## Image Specifications
|
||||
|
||||
- **Type**: Infographic
|
||||
- **Layout**: bento-grid
|
||||
- **Style**: technical-schematic (engineering blueprint variant)
|
||||
- **Aspect Ratio**: 1:1 (square)
|
||||
- **Language**: English
|
||||
|
||||
## Core Principles
|
||||
|
||||
- Follow the bento-grid layout precisely with varied cell sizes
|
||||
- Apply technical-schematic aesthetics consistently throughout
|
||||
- Keep information concise, highlight keywords and core concepts
|
||||
- Use ample whitespace for visual clarity
|
||||
- Maintain clear visual hierarchy with a hero cell for the headline metric
|
||||
|
||||
## Style Guidelines (technical-schematic blueprint)
|
||||
|
||||
- Color palette: deep blue background (#1E3A5F), white lines and text, amber accent (#F59E0B) ONLY on the hero metric and critical deltas, cyan callouts for measurement annotations
|
||||
- Grid pattern overlay across the entire canvas — fine white grid lines on the deep blue background
|
||||
- All-caps technical stencil typography for headers; clean sans-serif for body
|
||||
- Dimension lines with arrowheads connecting metrics to their cells
|
||||
- Technical symbols where appropriate (gear icons, flow arrows, modular block diagrams)
|
||||
- Consistent stroke weights — bold for cell borders, thin for grid, medium for connector lines
|
||||
- Engineering spec-sheet aesthetic: feels like a printed architectural blueprint, austere and precise
|
||||
|
||||
## Layout Guidelines (bento-grid)
|
||||
|
||||
- Hero cell (TOP-CENTER or LEFT, occupying ~40% of canvas): "−61 COMPLEXITY · 79 → 18" headline metric in massive amber-on-blue, with subtitle "convert_messages_to_anthropic refactored"
|
||||
- 7 helper cells in a 2x4 or 3x3 grid showing each extracted helper as its own modular block — each cell has the helper name in all-caps, its complexity number, and one-line role
|
||||
- Metrics strip cell: BEFORE/AFTER table with deltas (185 statements → ~70, 79 C → 18 C, +5 violations intentional)
|
||||
- Test validation cell: "152/152 + 213/213 PASS" with checkmark stencil
|
||||
- Footer strip across bottom: "PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Content to render
|
||||
|
||||
**Main title (top of canvas, all caps):** "ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION"
|
||||
**Subtitle:** "PR #27784 — convert_messages_to_anthropic refactor"
|
||||
|
||||
**Hero cell (largest, amber accent):**
|
||||
- "−61"
|
||||
- "CYCLOMATIC COMPLEXITY"
|
||||
- "79 → 18 MAX (−77%)"
|
||||
- Subtext: "convert_messages_to_anthropic · pure code motion · zero behavior change"
|
||||
|
||||
**7 helper cells (one per helper, each its own modular block):**
|
||||
|
||||
1. _convert_assistant_message · C<10 · "Assistant msg → content blocks"
|
||||
2. _convert_tool_message_to_result · C=12 · "Tool msg → tool_result + merge"
|
||||
3. _convert_user_message · C<10 · "User msg validation"
|
||||
4. _strip_orphaned_tool_blocks · C=15 · "Orphan tool_use removal"
|
||||
5. _merge_consecutive_roles · C=13 · "Anthropic role-alternation"
|
||||
6. _manage_thinking_signatures · C=18 · "Strip/preserve by endpoint"
|
||||
7. _evict_old_screenshots · C<10 · "Keep most recent 3 images"
|
||||
|
||||
**Metrics cell (table format with arrows):**
|
||||
- MAX FUNCTION COMPLEXITY: 79 → 18 (−77%)
|
||||
- MAX STATEMENTS/FUNCTION: 185 → ~70 (−62%)
|
||||
- LOC FILE-WIDE: −4
|
||||
- MAIN FUNCTION LOC: 395 → 63
|
||||
|
||||
**Test validation cell (checkmark stencil):**
|
||||
- test_anthropic_adapter.py: 152/152 PASS
|
||||
- test_auxiliary_client.py: 172/172 PASS
|
||||
- test_azure_identity_adapter.py: 39/39 PASS
|
||||
- test_bedrock_1m_context.py: 2/2 PASS
|
||||
|
||||
**Behavior preservation cell:**
|
||||
"ZERO LOGIC CHANGES · ANTHROPIC + KIMI + DEEPSEEK + MINIMAX + AZURE FOUNDRY + BEDROCK SEMANTICS PRESERVED"
|
||||
|
||||
**Footer strip:**
|
||||
"PR #27784 · agent/anthropic_adapter.py · cherry-picked from #23968 · @kshitijk4poor · NousResearch/hermes-agent"
|
||||
|
||||
## Text Requirements
|
||||
|
||||
- All text in English, all-caps for headers
|
||||
- Hero metric "−61" in amber (#F59E0B), oversized, with thick blueprint stencil treatment
|
||||
- Helper names in white technical stencil
|
||||
- Complexity numbers (C=12, C=18, etc.) in cyan callouts
|
||||
- "BEFORE" labels in white-on-blue, "AFTER" labels in amber-on-blue
|
||||
- Footer in small white stencil
|
||||
|
||||
Generate the infographic now as a square engineering blueprint.
|
||||
@@ -1,66 +0,0 @@
|
||||
# Infographic: PR #27784 — convert_messages_to_anthropic refactor
|
||||
|
||||
## Hero metric
|
||||
**−61 cyclomatic complexity** in `agent/anthropic_adapter.py` (79 → 18 max).
|
||||
**−4 LOC** net file-wide. **77% drop** in single-function complexity ceiling.
|
||||
|
||||
## Title
|
||||
ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION
|
||||
PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor
|
||||
|
||||
## Section 1: BEFORE (left side)
|
||||
**convert_messages_to_anthropic**
|
||||
- 185 statements
|
||||
- 90 branches
|
||||
- Cyclomatic: 79
|
||||
- Did 7 jobs in one function
|
||||
|
||||
Inline responsibilities mixed together:
|
||||
1. Walk + dispatch by role
|
||||
2. Tool-result conversion
|
||||
3. Orphan tool-use stripping
|
||||
4. Same-role merging
|
||||
5. Thinking-signature management
|
||||
6. Screenshot eviction
|
||||
7. Final assembly
|
||||
|
||||
## Section 2: AFTER (right side)
|
||||
**convert_messages_to_anthropic** — now 63 lines, C<10
|
||||
Plus 7 single-responsibility helpers:
|
||||
|
||||
| Helper | C | Role |
|
||||
|---|---|---|
|
||||
| _convert_assistant_message | <10 | Assistant msg → content blocks |
|
||||
| _convert_tool_message_to_result | 12 | Tool msg → tool_result + merge |
|
||||
| _convert_user_message | <10 | User msg validation + conversion |
|
||||
| _strip_orphaned_tool_blocks | 15 | Strip orphan tool_use + tool_result |
|
||||
| _merge_consecutive_roles | 13 | Anthropic role-alternation enforce |
|
||||
| _manage_thinking_signatures | 18 | Strip/preserve/downgrade by endpoint |
|
||||
| _evict_old_screenshots | <10 | Keep most recent 3 images |
|
||||
|
||||
## Section 3: METRICS
|
||||
| Metric | Before | After | Δ |
|
||||
|---|---:|---:|---:|
|
||||
| Max function complexity | 79 | 18 | −77% |
|
||||
| Max statements/function | 185 | ~70 | −62% |
|
||||
| LOC (file-wide) | — | — | **−4** |
|
||||
| C901 violations | 3 | 8 | +5 (intentional split) |
|
||||
|
||||
## Section 4: ZERO BEHAVIOR CHANGE
|
||||
- Pure code motion — no logic edits
|
||||
- Mutating helpers update `result` in place (same as inline)
|
||||
- `_merge_consecutive_roles` returns new list — caller rebinds
|
||||
- Anthropic / Kimi / DeepSeek / MiniMax / Azure Foundry / Bedrock semantics preserved
|
||||
- Thinking-signature handling identical to pre-refactor
|
||||
|
||||
## Section 5: TEST VALIDATION
|
||||
- tests/agent/test_anthropic_adapter.py — **152 / 152 pass**
|
||||
- tests/agent/test_auxiliary_client.py — **172 / 172 pass**
|
||||
- tests/agent/test_azure_identity_adapter.py — **39 / 39 pass**
|
||||
- tests/agent/test_bedrock_1m_context.py — **2 / 2 pass**
|
||||
|
||||
## Footer
|
||||
File: agent/anthropic_adapter.py
|
||||
Original PR: #27784 (cherry-pick of #23968)
|
||||
Salvage commit: 9c102b937 (kshitijk4poor authorship preserved)
|
||||
Repo: NousResearch/hermes-agent
|
||||
@@ -148,7 +148,7 @@ class BrowserUseBrowserProvider(BrowserProvider):
|
||||
|
||||
return {
|
||||
"api_key": managed.nous_user_token,
|
||||
"base_url": managed.resolved_origin.rstrip("/"),
|
||||
"base_url": managed.gateway_origin.rstrip("/"),
|
||||
"managed_mode": True,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,182 +0,0 @@
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Wraps the 18-model FAL catalog (FLUX 2, Z-Image, Nano Banana, GPT
|
||||
Image 1.5, Recraft, Imagen 4, Qwen, Ideogram, …) as an
|
||||
:class:`ImageGenProvider` implementation.
|
||||
|
||||
The heavy lifting — model catalog, payload construction, request
|
||||
submission, managed-Nous-gateway selection, Clarity Upscaler chaining
|
||||
— lives in :mod:`tools.image_generation_tool`. This plugin reaches into
|
||||
that module via call-time indirection (``import tools.image_generation_tool as _it``)
|
||||
so:
|
||||
|
||||
* the existing test suite (``tests/tools/test_image_generation.py``,
|
||||
``tests/tools/test_managed_media_gateways.py``) keeps patching
|
||||
``image_tool._submit_fal_request`` / ``image_tool.fal_client`` /
|
||||
``image_tool._managed_fal_client`` without modification, and
|
||||
* there's exactly one canonical FAL code path on disk — the plugin is a
|
||||
registration adapter, not a parallel implementation.
|
||||
|
||||
See issue #26241 for the migration plan and the
|
||||
``plugin-extraction-test-patch-compatibility.md`` rules this follows.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.image_gen_provider import (
|
||||
DEFAULT_ASPECT_RATIO,
|
||||
ImageGenProvider,
|
||||
resolve_aspect_ratio,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FalImageGenProvider(ImageGenProvider):
|
||||
"""FAL.ai image generation backend.
|
||||
|
||||
Delegates to ``tools.image_generation_tool.image_generate_tool`` so
|
||||
the in-tree FAL implementation (model catalog, payload builder,
|
||||
managed-gateway selection, Clarity Upscaler chaining) is the single
|
||||
source of truth. Everything is resolved at call time via the
|
||||
``_it`` indirection so tests can monkey-patch the legacy module.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "fal"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "FAL.ai"
|
||||
|
||||
def is_available(self) -> bool:
|
||||
# Available when direct FAL_KEY is set OR the managed Nous
|
||||
# gateway resolves a fal-queue origin. Both checks come from the
|
||||
# legacy module so this provider tracks whatever logic ships
|
||||
# there.
|
||||
import tools.image_generation_tool as _it
|
||||
try:
|
||||
return bool(_it.check_fal_api_key())
|
||||
except Exception: # noqa: BLE001 — defensive; never break the picker
|
||||
return False
|
||||
|
||||
def list_models(self) -> List[Dict[str, Any]]:
|
||||
import tools.image_generation_tool as _it
|
||||
return [
|
||||
{
|
||||
"id": model_id,
|
||||
"display": meta.get("display", model_id),
|
||||
"speed": meta.get("speed", ""),
|
||||
"strengths": meta.get("strengths", ""),
|
||||
"price": meta.get("price", ""),
|
||||
}
|
||||
for model_id, meta in _it.FAL_MODELS.items()
|
||||
]
|
||||
|
||||
def default_model(self) -> Optional[str]:
|
||||
import tools.image_generation_tool as _it
|
||||
return _it.DEFAULT_MODEL
|
||||
|
||||
def get_setup_schema(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": "FAL.ai",
|
||||
"badge": "paid",
|
||||
"tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
|
||||
"env_vars": [
|
||||
{
|
||||
"key": "FAL_KEY",
|
||||
"prompt": "FAL API key",
|
||||
"url": "https://fal.ai/dashboard/keys",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
**kwargs: Any,
|
||||
) -> Dict[str, Any]:
|
||||
"""Generate an image via the legacy FAL pipeline.
|
||||
|
||||
Forwards prompt + aspect_ratio (and any forward-compat extras
|
||||
the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
|
||||
then reshapes its JSON-string response into the provider-ABC
|
||||
dict format consumed by ``_dispatch_to_plugin_provider``.
|
||||
"""
|
||||
import tools.image_generation_tool as _it
|
||||
|
||||
aspect = resolve_aspect_ratio(aspect_ratio)
|
||||
passthrough = {
|
||||
key: kwargs[key]
|
||||
for key in (
|
||||
"num_inference_steps",
|
||||
"guidance_scale",
|
||||
"num_images",
|
||||
"output_format",
|
||||
"seed",
|
||||
)
|
||||
if key in kwargs and kwargs[key] is not None
|
||||
}
|
||||
|
||||
try:
|
||||
raw = _it.image_generate_tool(
|
||||
prompt=prompt,
|
||||
aspect_ratio=aspect,
|
||||
**passthrough,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001 — never raise out of generate
|
||||
logger.warning("FAL image_generate_tool raised: %s", exc, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": f"FAL image generation failed: {exc}",
|
||||
"error_type": type(exc).__name__,
|
||||
"provider": "fal",
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect,
|
||||
}
|
||||
|
||||
try:
|
||||
response = json.loads(raw) if isinstance(raw, str) else raw
|
||||
except Exception: # noqa: BLE001
|
||||
response = {"success": False, "image": None, "error": "Invalid JSON from FAL pipeline"}
|
||||
|
||||
if not isinstance(response, dict):
|
||||
response = {
|
||||
"success": False,
|
||||
"image": None,
|
||||
"error": "FAL pipeline returned a non-dict response",
|
||||
"error_type": "provider_contract",
|
||||
}
|
||||
|
||||
# Stamp provider/prompt/aspect_ratio so downstream consumers see
|
||||
# the uniform shape declared in ``agent.image_gen_provider``.
|
||||
response.setdefault("provider", "fal")
|
||||
response.setdefault("prompt", prompt)
|
||||
response.setdefault("aspect_ratio", aspect)
|
||||
# Annotate model best-effort — the legacy pipeline resolves it
|
||||
# internally, so query it after the fact for the response shape.
|
||||
if "model" not in response:
|
||||
try:
|
||||
model_id, _meta = _it._resolve_fal_model()
|
||||
response["model"] = model_id
|
||||
except Exception: # noqa: BLE001
|
||||
pass
|
||||
return response
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Plugin entry point — wire ``FalImageGenProvider`` into the registry."""
|
||||
ctx.register_image_gen_provider(FalImageGenProvider())
|
||||
@@ -1,7 +0,0 @@
|
||||
name: fal
|
||||
version: 1.0.0
|
||||
description: "FAL.ai image generation backend (flux-2-klein, flux-2-pro, nano-banana, gpt-image-1.5, recraft-v3, etc.)."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
requires_env:
|
||||
- FAL_KEY
|
||||
@@ -47,25 +47,6 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
|
||||
_TIMEOUT = 30.0
|
||||
_REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")
|
||||
|
||||
# Maps the viking_remember `category` enum to a viking:// subdirectory.
|
||||
# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
|
||||
_CATEGORY_SUBDIR_MAP = {
|
||||
"preference": "preferences",
|
||||
"entity": "entities",
|
||||
"event": "events",
|
||||
"case": "cases",
|
||||
"pattern": "patterns",
|
||||
}
|
||||
_DEFAULT_MEMORY_SUBDIR = "preferences"
|
||||
|
||||
# Maps the built-in memory tool's `target` ("user" vs "memory") to a subdir
|
||||
# for on_memory_write mirroring. User profile facts → preferences; agent
|
||||
# notes / observations → patterns. Anything unknown falls back to the default.
|
||||
_MEMORY_WRITE_TARGET_SUBDIR_MAP = {
|
||||
"user": "preferences",
|
||||
"memory": "patterns",
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
@@ -626,35 +607,24 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
except Exception as e:
|
||||
logger.warning("OpenViking session commit failed: %s", e)
|
||||
|
||||
def _build_memory_uri(self, subdir: str) -> str:
|
||||
"""Build a viking:// memory URI under the configured user/subdir."""
|
||||
slug = uuid.uuid4().hex[:12]
|
||||
return f"viking://user/{self._user}/memories/{subdir}/mem_{slug}.md"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking via content/write."""
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
"""Mirror built-in memory writes to OpenViking as explicit memories."""
|
||||
if not self._client or action != "add" or not content:
|
||||
return
|
||||
|
||||
subdir = _MEMORY_WRITE_TARGET_SUBDIR_MAP.get(target, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
|
||||
def _write():
|
||||
try:
|
||||
client = _VikingClient(
|
||||
self._endpoint, self._api_key,
|
||||
account=self._account, user=self._user, agent=self._agent,
|
||||
)
|
||||
client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
# Add as a user message with memory context so the commit
|
||||
# picks it up as an explicit memory during extraction
|
||||
client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": f"[Memory note — {target}] {content}"},
|
||||
],
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug("OpenViking memory mirror failed: %s", e)
|
||||
@@ -888,27 +858,24 @@ class OpenVikingMemoryProvider(MemoryProvider):
|
||||
if not content:
|
||||
return tool_error("content is required")
|
||||
|
||||
# Store as a session message that will be extracted during commit.
|
||||
# The category hint helps OpenViking's extraction classify correctly.
|
||||
category = args.get("category", "")
|
||||
subdir = _CATEGORY_SUBDIR_MAP.get(category, _DEFAULT_MEMORY_SUBDIR)
|
||||
uri = self._build_memory_uri(subdir)
|
||||
text = f"[Remember] {content}"
|
||||
if category:
|
||||
text = f"[Remember — {category}] {content}"
|
||||
|
||||
# Write directly via content/write API.
|
||||
# This creates the file, stores the content, and queues vector indexing
|
||||
# in a single call — no dependency on session commit / VLM extraction.
|
||||
try:
|
||||
result = self._client.post("/api/v1/content/write", {
|
||||
"uri": uri,
|
||||
"content": content,
|
||||
"mode": "create",
|
||||
})
|
||||
written = result.get("result", {}).get("written_bytes", 0)
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": f"Memory stored ({written}b) and queued for vector indexing.",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error("OpenViking content/write failed: %s", e)
|
||||
return tool_error(f"Failed to store memory: {e}")
|
||||
self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
|
||||
"role": "user",
|
||||
"parts": [
|
||||
{"type": "text", "text": text},
|
||||
],
|
||||
})
|
||||
|
||||
return json.dumps({
|
||||
"status": "stored",
|
||||
"message": "Memory recorded. Will be extracted and indexed on session commit.",
|
||||
})
|
||||
|
||||
def _tool_add_resource(self, args: dict) -> str:
|
||||
url = args.get("url", "")
|
||||
|
||||
@@ -282,24 +282,20 @@ def _build_payload(
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# fal_client lazy import (shared with image_generation_tool via fal_common)
|
||||
# fal_client lazy import (same pattern as image_generation_tool)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_fal_client: Any = None
|
||||
|
||||
|
||||
def _load_fal_client() -> Any:
|
||||
"""Lazy-load the ``fal_client`` SDK and cache it on this module.
|
||||
|
||||
Delegates the actual import to :func:`tools.fal_common.import_fal_client`
|
||||
so the ``lazy_deps`` ensure-install handling stays in one place.
|
||||
"""
|
||||
global _fal_client
|
||||
if _fal_client is not None:
|
||||
return _fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
_fal_client = import_fal_client()
|
||||
return _fal_client
|
||||
import fal_client # type: ignore
|
||||
|
||||
_fal_client = fal_client
|
||||
return fal_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -238,7 +238,7 @@ def _get_firecrawl_client() -> Any:
|
||||
|
||||
kwargs = {
|
||||
"api_key": managed_gateway.nous_user_token,
|
||||
"api_url": managed_gateway.resolved_origin,
|
||||
"api_url": managed_gateway.gateway_origin,
|
||||
}
|
||||
client_config = (
|
||||
"tool-gateway",
|
||||
|
||||
@@ -3357,25 +3357,6 @@ class AIAgent:
|
||||
return content
|
||||
|
||||
if self._model_supports_vision():
|
||||
# Vision-capable on paper — but if we've already learned in this
|
||||
# session that the active (provider, model) rejects list-type
|
||||
# tool content (e.g. Xiaomi MiMo's 400 "text is not set"),
|
||||
# short-circuit to a text summary so we don't burn another
|
||||
# round-trip relearning the same lesson. Cache populated by
|
||||
# the 400 recovery path in agent.conversation_loop. Transient
|
||||
# per-session; next session retries.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
no_list = getattr(self, "_no_list_tool_content_models", None)
|
||||
if no_list and key in no_list:
|
||||
logger.debug(
|
||||
"Tool %s: model %s/%s known to reject list-type tool "
|
||||
"content this session — sending text summary",
|
||||
tool_name, key[0], key[1],
|
||||
)
|
||||
return _multimodal_text_summary(result)
|
||||
return content
|
||||
|
||||
summary = _multimodal_text_summary(result)
|
||||
@@ -3404,80 +3385,6 @@ class AIAgent:
|
||||
from agent.conversation_compression import try_shrink_image_parts_in_messages
|
||||
return try_shrink_image_parts_in_messages(api_messages)
|
||||
|
||||
def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
|
||||
"""Downgrade list-type tool messages to text summaries in-place.
|
||||
|
||||
Recovery path for providers that reject list-type tool message content
|
||||
(e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344). Walks
|
||||
``api_messages`` for any ``role: "tool"`` message whose ``content`` is
|
||||
a list containing image parts, replaces the content with the existing
|
||||
text part(s) (or a minimal placeholder if none survive), and records
|
||||
the active (provider, model) in ``self._no_list_tool_content_models``
|
||||
so subsequent ``_tool_result_content_for_active_model`` calls in this
|
||||
session preemptively downgrade screenshots without a round-trip.
|
||||
|
||||
Returns True when at least one tool message was downgraded — the
|
||||
caller (the 400 recovery branch in ``agent.conversation_loop``) uses
|
||||
this to decide whether to retry the API call with the modified
|
||||
history or surface the original error.
|
||||
"""
|
||||
if not isinstance(api_messages, list):
|
||||
return False
|
||||
|
||||
# Record (provider, model) so we don't relearn this lesson.
|
||||
key = (
|
||||
(getattr(self, "provider", "") or "").strip().lower(),
|
||||
(getattr(self, "model", "") or "").strip(),
|
||||
)
|
||||
if not hasattr(self, "_no_list_tool_content_models"):
|
||||
self._no_list_tool_content_models = set()
|
||||
if key[1]: # only record when we actually have a model id
|
||||
self._no_list_tool_content_models.add(key)
|
||||
|
||||
changed = False
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict) or msg.get("role") != "tool":
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
continue
|
||||
|
||||
# Salvage any text parts so the model still sees some signal.
|
||||
text_parts: List[str] = []
|
||||
had_image = False
|
||||
for part in content:
|
||||
if not isinstance(part, dict):
|
||||
if isinstance(part, str) and part.strip():
|
||||
text_parts.append(part.strip())
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image_url" or ptype == "input_image":
|
||||
had_image = True
|
||||
continue
|
||||
if ptype in {"text", "input_text"}:
|
||||
text = str(part.get("text") or "").strip()
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
|
||||
if not had_image:
|
||||
# List-type content but no image parts — leave alone (some
|
||||
# providers reject ANY list content, but stripping a
|
||||
# text-only list doesn't reduce ambiguity; let the caller
|
||||
# surface the original error if this turns out to be the
|
||||
# case).
|
||||
continue
|
||||
|
||||
if text_parts:
|
||||
msg["content"] = "\n\n".join(text_parts)
|
||||
else:
|
||||
msg["content"] = (
|
||||
"[image content removed — provider does not accept "
|
||||
"list-type tool message content]"
|
||||
)
|
||||
changed = True
|
||||
|
||||
return changed
|
||||
|
||||
def _anthropic_preserve_dots(self) -> bool:
|
||||
"""True when using an anthropic-compatible endpoint that preserves dots in model names.
|
||||
Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
|
||||
|
||||
@@ -47,7 +47,6 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
|
||||
AUTHOR_MAP = {
|
||||
# teknium (multiple emails)
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"cipherframe@users.noreply.github.com": "CipherFrame",
|
||||
"me@promplate.dev": "CNSeniorious000",
|
||||
"yichengqiao21@gmail.com": "YarrowQiao",
|
||||
"erhanyasarx@gmail.com": "erhnysr",
|
||||
@@ -60,18 +59,14 @@ AUTHOR_MAP = {
|
||||
"mgongzai@gmail.com": "vKongv",
|
||||
"0x.badfriend@gmail.com": "discodirector",
|
||||
"altriatree@gmail.com": "TruaShamu",
|
||||
"contact-me@stark-x.cn": "Stark-X",
|
||||
"nat@nthrow.io": "nthrow",
|
||||
"m@mobrienv.dev": "mikeyobrien",
|
||||
"saeed919@pm.me": "falasi",
|
||||
"chrisdlc119@outlook.com": "chdlc",
|
||||
"omar@techdeveloper.site": "nycomar",
|
||||
"qiyin.zuo@pcitc.com": "qiyin-code",
|
||||
"mr.aashiz@gmail.com": "aashizpoudel",
|
||||
"70629228+shaun0927@users.noreply.github.com": "shaun0927",
|
||||
"98262967+Bihruze@users.noreply.github.com": "Bihruze",
|
||||
"189280367+Lempkey@users.noreply.github.com": "Lempkey",
|
||||
"leovillalbajr@gmail.com": "Lempkey",
|
||||
"nidhi2894@gmail.com": "nidhi-singh02",
|
||||
"30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
|
||||
"oleksii.lisikh@gmail.com": "olisikh",
|
||||
@@ -935,8 +930,6 @@ AUTHOR_MAP = {
|
||||
"holynn@placeholder.local": "holynn-q",
|
||||
"agent@hermes.local": "jacdevos",
|
||||
"sunsky.lau@gmail.com": "liuhao1024",
|
||||
"fabianoeq@gmail.com": "rodrigoeqnit",
|
||||
"178342791+sgtworkman@users.noreply.github.com": "sgtworkman",
|
||||
"qiuqfang98@qq.com": "keepcalmqqf",
|
||||
"261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026",
|
||||
"yanzh.su@gmail.com": "YanzhongSu",
|
||||
|
||||
@@ -56,7 +56,6 @@ class TestFailoverReason:
|
||||
"overloaded", "server_error", "timeout",
|
||||
"context_overflow", "payload_too_large", "image_too_large",
|
||||
"model_not_found", "format_error",
|
||||
"multimodal_tool_content_unsupported",
|
||||
"provider_policy_blocked",
|
||||
"thinking_signature", "long_context_tier",
|
||||
"oauth_long_context_beta_forbidden",
|
||||
@@ -1257,66 +1256,3 @@ class TestRateLimitErrorWithoutStatusCode:
|
||||
e.status_code = None
|
||||
result = classify_api_error(e, provider="copilot", model="gpt-4o")
|
||||
assert result.reason != FailoverReason.rate_limit
|
||||
|
||||
|
||||
|
||||
# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
|
||||
|
||||
class TestMultimodalToolContentUnsupported:
|
||||
"""Issue #27344 — providers that reject list-type tool message content
|
||||
should be classified as ``multimodal_tool_content_unsupported`` so the
|
||||
retry loop can downgrade screenshots to text and try again.
|
||||
"""
|
||||
|
||||
def test_xiaomi_mimo_text_is_not_set_pattern(self):
|
||||
"""The actual Xiaomi MiMo 400 wording from the bug report."""
|
||||
e = MockAPIError(
|
||||
"Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
assert result.retryable is True
|
||||
|
||||
def test_generic_tool_message_must_be_string(self):
|
||||
e = MockAPIError(
|
||||
"tool message content must be a string",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_expected_string_got_list(self):
|
||||
e = MockAPIError(
|
||||
"Schema validation failed: expected string, got list",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="custom", model="some-model")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
|
||||
"""Some providers return a 400 whose message contains BOTH
|
||||
'text is not set' and a length-shaped phrase; the tool-content
|
||||
recovery is cheaper than compression so it must win the priority.
|
||||
"""
|
||||
e = MockAPIError(
|
||||
"text is not set; context length exceeded",
|
||||
status_code=400,
|
||||
)
|
||||
result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_no_status_code_path_also_classifies(self):
|
||||
"""When the error reaches us without a status code (transport
|
||||
layer ate it) the message-only classifier branch must also
|
||||
recognise the pattern.
|
||||
"""
|
||||
e = MockTransportError("tool_call.content must be string")
|
||||
result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
def test_unrelated_400_is_not_misclassified(self):
|
||||
"""Make sure the patterns don't false-positive on normal 400s."""
|
||||
e = MockAPIError("bad request: missing field 'model'", status_code=400)
|
||||
result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
|
||||
assert result.reason != FailoverReason.multimodal_tool_content_unsupported
|
||||
|
||||
@@ -1060,191 +1060,3 @@ class TestHonchoCadenceTracking:
|
||||
p.on_turn_start(2, "second message")
|
||||
should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
|
||||
assert should_skip, "Second turn (turn 2) SHOULD be skipped"
|
||||
|
||||
|
||||
class TestMemoryToolToolsetGate:
|
||||
"""Issue #5544: memory provider tools must respect platform_toolsets.
|
||||
|
||||
Before the fix, MemoryManager.get_all_tool_schemas() output was appended
|
||||
to AIAgent.tools unconditionally in agent_init.py — bypassing the
|
||||
enabled_toolsets filter. Result: `platform_toolsets: telegram: []`
|
||||
still leaked fact_store and other memory tools into the tool surface,
|
||||
causing 10x latency on local models (Qwen3-30B: 1.7s → 42s) and
|
||||
tool-call loops on small models.
|
||||
|
||||
These tests mirror the gate logic in agent/agent_init.py around the
|
||||
memory provider tool injection block. The gate condition is:
|
||||
|
||||
enabled_toolsets is None → no filter, inject (backward compat)
|
||||
"memory" in enabled_toolsets → user opted in, inject
|
||||
otherwise (incl. []) → skip injection
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_memory_injection(enabled_toolsets, memory_manager):
|
||||
"""Simulate the gated memory-tool injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
|
||||
if memory_manager and tools is not None and (
|
||||
enabled_toolsets is None or "memory" in enabled_toolsets
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in memory_manager.get_all_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names
|
||||
|
||||
def _mgr_with_tools(self, *tool_names):
|
||||
"""Build a MemoryManager whose providers expose the named tool schemas."""
|
||||
mgr = MemoryManager()
|
||||
p = FakeMemoryProvider(
|
||||
"ext",
|
||||
tools=[{"name": n, "description": n, "parameters": {}} for n in tool_names],
|
||||
)
|
||||
mgr.add_provider(p)
|
||||
return mgr
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None (no filter) injects memory tools — backward compat."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert "fact_store" in names
|
||||
assert any(t["function"]["name"] == "fact_store" for t in tools)
|
||||
|
||||
def test_memory_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'memory' injects memory tools."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "memory", "web"], mgr)
|
||||
assert "fact_store" in names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress memory tools. (#5544)"""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection([], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_toolsets_without_memory_blocks_injection(self):
|
||||
"""Toolset list that doesn't name 'memory' must suppress injection."""
|
||||
mgr = self._mgr_with_tools("fact_store")
|
||||
tools, names = self._run_memory_injection(["terminal", "web"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_no_memory_manager_no_injection(self):
|
||||
"""Gate is moot without a memory manager."""
|
||||
tools, names = self._run_memory_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
def test_multiple_schemas_all_blocked_together(self):
|
||||
"""When the gate is closed, no memory tools leak — not even partially."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(["terminal"], mgr)
|
||||
assert tools == []
|
||||
assert names == set()
|
||||
|
||||
def test_multiple_schemas_all_injected_when_enabled(self):
|
||||
"""When the gate is open, every memory tool schema is injected."""
|
||||
mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
|
||||
tools, names = self._run_memory_injection(None, mgr)
|
||||
assert names == {"fact_store", "memory_search", "memory_add"}
|
||||
|
||||
|
||||
class TestContextEngineToolsetGate:
|
||||
"""Issue #5544 (sibling): context engine tools follow the same gate.
|
||||
|
||||
`agent.context_compressor.get_tool_schemas()` (e.g. lcm_grep, lcm_describe,
|
||||
lcm_expand) was appended to AIAgent.tools unconditionally. Same blind
|
||||
injection class as the memory bug; same local-model penalty. Gate name:
|
||||
"context_engine" (matches the existing plugin-system convention).
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _run_context_engine_injection(enabled_toolsets, compressor):
|
||||
"""Simulate the gated context-engine injection block from agent_init.py."""
|
||||
tools = []
|
||||
valid_tool_names = set()
|
||||
engine_tool_names = set()
|
||||
|
||||
if (
|
||||
compressor is not None
|
||||
and tools is not None
|
||||
and (
|
||||
enabled_toolsets is None
|
||||
or "context_engine" in enabled_toolsets
|
||||
)
|
||||
):
|
||||
_existing = {
|
||||
t.get("function", {}).get("name")
|
||||
for t in tools
|
||||
if isinstance(t, dict)
|
||||
}
|
||||
for _schema in compressor.get_tool_schemas():
|
||||
_tname = _schema.get("name", "")
|
||||
if _tname and _tname in _existing:
|
||||
continue
|
||||
tools.append({"type": "function", "function": _schema})
|
||||
if _tname:
|
||||
valid_tool_names.add(_tname)
|
||||
engine_tool_names.add(_tname)
|
||||
_existing.add(_tname)
|
||||
|
||||
return tools, valid_tool_names, engine_tool_names
|
||||
|
||||
class _FakeCompressor:
|
||||
def __init__(self, schemas):
|
||||
self._schemas = schemas
|
||||
|
||||
def get_tool_schemas(self):
|
||||
return list(self._schemas)
|
||||
|
||||
def _compressor_with(self, *tool_names):
|
||||
return self._FakeCompressor(
|
||||
[{"name": n, "description": n, "parameters": {}} for n in tool_names]
|
||||
)
|
||||
|
||||
def test_none_toolsets_injects(self):
|
||||
"""enabled_toolsets=None injects context-engine tools — backward compat."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe", "lcm_expand")
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, c)
|
||||
assert engine_names == {"lcm_grep", "lcm_describe", "lcm_expand"}
|
||||
|
||||
def test_context_engine_in_toolsets_injects(self):
|
||||
"""enabled_toolsets including 'context_engine' injects the tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "context_engine"], c
|
||||
)
|
||||
assert "lcm_grep" in engine_names
|
||||
|
||||
def test_empty_toolsets_blocks_injection(self):
|
||||
"""`platform_toolsets: telegram: []` must suppress context-engine tools."""
|
||||
c = self._compressor_with("lcm_grep")
|
||||
tools, names, engine_names = self._run_context_engine_injection([], c)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_toolsets_without_context_engine_blocks_injection(self):
|
||||
"""A toolset list that doesn't name 'context_engine' suppresses injection."""
|
||||
c = self._compressor_with("lcm_grep", "lcm_describe")
|
||||
tools, names, engine_names = self._run_context_engine_injection(
|
||||
["terminal", "memory"], c
|
||||
)
|
||||
assert tools == []
|
||||
assert engine_names == set()
|
||||
|
||||
def test_no_compressor_no_injection(self):
|
||||
"""Gate is moot without a context_compressor."""
|
||||
tools, names, engine_names = self._run_context_engine_injection(None, None)
|
||||
assert tools == []
|
||||
|
||||
@@ -442,9 +442,9 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
|
||||
"image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"stt": NousFeatureState("stt", "Speech-to-text", True, True, True, True, False, True, "OpenAI Whisper"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -453,7 +453,7 @@ class TestBuildNousSubscriptionPrompt:
|
||||
|
||||
assert "Browser Use" in prompt
|
||||
assert "Modal execution is optional" in prompt
|
||||
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt
|
||||
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, OpenAI Whisper, or Browser-Use API keys" in prompt
|
||||
|
||||
def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
|
||||
@@ -467,9 +467,9 @@ class TestBuildNousSubscriptionPrompt:
|
||||
"web": NousFeatureState("web", "Web tools", True, False, False, False, False, True, ""),
|
||||
"image_gen": NousFeatureState("image_gen", "Image generation", True, False, False, False, False, True, ""),
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
|
||||
"stt": NousFeatureState("stt", "Speech-to-text", True, False, False, False, False, True, ""),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, False, False, False, False, True, ""),
|
||||
},
|
||||
),
|
||||
)
|
||||
@@ -786,6 +786,7 @@ class TestPromptBuilderConstants:
|
||||
|
||||
def test_platform_hints_known_platforms(self):
|
||||
assert "whatsapp" in PLATFORM_HINTS
|
||||
assert "whatsapp_cloud" in PLATFORM_HINTS
|
||||
assert "telegram" in PLATFORM_HINTS
|
||||
assert "discord" in PLATFORM_HINTS
|
||||
assert "cron" in PLATFORM_HINTS
|
||||
@@ -793,6 +794,22 @@ class TestPromptBuilderConstants:
|
||||
assert "api_server" in PLATFORM_HINTS
|
||||
assert "webui" in PLATFORM_HINTS
|
||||
|
||||
def test_whatsapp_cloud_hint_mentions_24h_window(self):
|
||||
"""The Cloud API's 24-hour conversation window is a hard rule the
|
||||
agent should know about. Phase 5 (template fallback) was deferred,
|
||||
so the model needs to know free-form replies outside the window
|
||||
will fail with Graph error 131047 — otherwise it'll cheerfully
|
||||
try to schedule delayed messages that silently break."""
|
||||
hint = PLATFORM_HINTS["whatsapp_cloud"]
|
||||
assert "24-hour" in hint or "24h" in hint or "24 hour" in hint
|
||||
assert "131047" in hint
|
||||
|
||||
def test_whatsapp_cloud_hint_advertises_media(self):
|
||||
"""Cloud adapter supports the same MEDIA:/path/ convention as
|
||||
Baileys for outbound attachments."""
|
||||
hint = PLATFORM_HINTS["whatsapp_cloud"]
|
||||
assert "MEDIA:" in hint
|
||||
|
||||
def test_cli_hint_does_not_suggest_media_tags(self):
|
||||
# Regression: MEDIA:/path tags are intercepted only by messaging
|
||||
# gateway platforms. On the CLI they render as literal text and
|
||||
|
||||
@@ -1,12 +1,6 @@
|
||||
"""Tests for agent/skill_utils.py."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.skill_utils import (
|
||||
extract_skill_conditions,
|
||||
iter_skill_index_files,
|
||||
skill_matches_platform,
|
||||
)
|
||||
from agent.skill_utils import extract_skill_conditions, iter_skill_index_files
|
||||
|
||||
|
||||
def test_metadata_as_dict_with_hermes():
|
||||
@@ -100,100 +94,3 @@ def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
|
||||
found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
|
||||
|
||||
assert found == [real / "SKILL.md"]
|
||||
|
||||
|
||||
# ── skill_matches_platform on Termux ──────────────────────────────────────
|
||||
|
||||
|
||||
class TestSkillMatchesPlatformTermux:
|
||||
"""Termux is Linux userland on Android. Skills tagged platforms:[linux]
|
||||
must load there regardless of whether Python reports sys.platform as
|
||||
"linux" (pre-3.13) or "android" (3.13+). Reported by user @LikiusInik
|
||||
in May 2026 — only 3 built-in skills appeared on Termux because every
|
||||
github/productivity/mlops skill is tagged platforms:[linux,macos,windows]
|
||||
and sys.platform=="android" did not start with "linux".
|
||||
"""
|
||||
|
||||
def test_no_platforms_field_matches_everywhere(self):
|
||||
# Backward-compat default — skills without a platforms tag load
|
||||
# on any OS, Termux included.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({}) is True
|
||||
assert skill_matches_platform({"name": "foo"}) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_android_platform(self):
|
||||
# Python 3.13+ on Termux reports sys.platform == "android".
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_macos_windows_skill_loads_on_termux(self):
|
||||
# The common "[linux, macos, windows]" tag used by github-*,
|
||||
# productivity, mlops, etc.
|
||||
fm = {"platforms": ["linux", "macos", "windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_linux_skill_loads_on_termux_linux_platform(self):
|
||||
# Pre-3.13 Termux reports sys.platform == "linux" already — this
|
||||
# works without the Termux escape hatch but must still pass.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_only_skill_still_excluded_on_termux(self):
|
||||
# macOS-only skills (apple-notes, imessage, ...) should NOT load
|
||||
# on Termux. The Termux fallback only widens platforms:[linux,...].
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_windows_only_skill_still_excluded_on_termux(self):
|
||||
fm = {"platforms": ["windows"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_explicit_termux_or_android_tag_matches(self):
|
||||
# Skills can also opt in explicitly via platforms:[termux] or
|
||||
# platforms:[android] — both should match a Termux session.
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=True
|
||||
):
|
||||
assert skill_matches_platform({"platforms": ["termux"]}) is True
|
||||
assert skill_matches_platform({"platforms": ["android"]}) is True
|
||||
|
||||
def test_non_termux_android_does_not_widen(self):
|
||||
# If we're somehow on a plain Android Python (not Termux), don't
|
||||
# silently load Linux skills — Termux is the supported environment.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "android"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is False
|
||||
|
||||
def test_linux_skill_on_real_linux_unaffected(self):
|
||||
# The non-Termux Linux path must not change.
|
||||
fm = {"platforms": ["linux"]}
|
||||
with patch("agent.skill_utils.sys.platform", "linux"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
def test_macos_skill_on_real_macos_unaffected(self):
|
||||
fm = {"platforms": ["macos"]}
|
||||
with patch("agent.skill_utils.sys.platform", "darwin"), patch(
|
||||
"agent.skill_utils.is_termux", return_value=False
|
||||
):
|
||||
assert skill_matches_platform(fm) is True
|
||||
|
||||
@@ -2510,3 +2510,26 @@ class TestSendMediaTimeoutCancelsFuture:
|
||||
# 2. Second file still got dispatched — one timeout doesn't abort the batch
|
||||
adapter.send_video.assert_called_once()
|
||||
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
|
||||
|
||||
|
||||
class TestHomeTargetEnvVarRegistry:
|
||||
"""Regression: ``_HOME_TARGET_ENV_VARS`` must include every gateway
|
||||
platform that supports cron-driven outbound delivery. Missing an
|
||||
entry means ``hermes cron create --deliver=<platform>`` silently
|
||||
fails to route through the platform's home channel."""
|
||||
|
||||
def test_whatsapp_cloud_registered(self):
|
||||
"""``deliver=whatsapp_cloud`` routes through
|
||||
WHATSAPP_CLOUD_HOME_CHANNEL — added alongside the existing
|
||||
``whatsapp`` Baileys entry."""
|
||||
from cron.scheduler import _HOME_TARGET_ENV_VARS
|
||||
|
||||
assert "whatsapp_cloud" in _HOME_TARGET_ENV_VARS
|
||||
assert _HOME_TARGET_ENV_VARS["whatsapp_cloud"] == "WHATSAPP_CLOUD_HOME_CHANNEL"
|
||||
|
||||
def test_baileys_whatsapp_still_registered(self):
|
||||
"""Sanity guard: the Cloud addition didn't disturb Baileys
|
||||
whatsapp routing."""
|
||||
from cron.scheduler import _HOME_TARGET_ENV_VARS
|
||||
|
||||
assert _HOME_TARGET_ENV_VARS.get("whatsapp") == "WHATSAPP_HOME_CHANNEL"
|
||||
|
||||
@@ -206,9 +206,23 @@ class TestPlatformDefaults:
|
||||
"""Signal, BlueBubbles, etc. default to 'off' tool progress."""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
|
||||
for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk"):
|
||||
for plat in ("signal", "bluebubbles", "weixin", "wecom", "dingtalk", "whatsapp_cloud"):
|
||||
assert resolve_display_setting({}, plat, "tool_progress") == "off", plat
|
||||
|
||||
def test_whatsapp_cloud_locked_to_low_tier_until_edit_message_lands(self):
|
||||
"""Regression guard: ``whatsapp_cloud`` must stay TIER_LOW until the
|
||||
adapter implements edit_message. Without an edit endpoint, raising
|
||||
the tier to MEDIUM would spam separate WhatsApp messages for every
|
||||
tool-progress update, which is the exact failure mode this entry
|
||||
exists to avoid.
|
||||
|
||||
When/if Cloud's edit_message lands, update _PLATFORM_DEFAULTS to
|
||||
TIER_MEDIUM and update this test to assert ``"new"`` accordingly.
|
||||
"""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
assert resolve_display_setting({}, "whatsapp_cloud", "tool_progress") == "off"
|
||||
assert resolve_display_setting({}, "whatsapp_cloud", "streaming") is False
|
||||
|
||||
def test_minimal_tier_platforms(self):
|
||||
"""Email, SMS, webhook default to 'off' tool progress."""
|
||||
from gateway.display_config import resolve_display_setting
|
||||
|
||||
@@ -75,197 +75,9 @@ class TestCodeGeneration:
|
||||
code = store.generate_code("telegram", "user1", "Alice")
|
||||
pending = store.list_pending("telegram")
|
||||
assert len(pending) == 1
|
||||
# list_pending no longer returns the original code — it returns a
|
||||
# truncated hash prefix. Verify the metadata is correct instead.
|
||||
assert pending[0]["code"] == code
|
||||
assert pending[0]["user_id"] == "user1"
|
||||
assert pending[0]["user_name"] == "Alice"
|
||||
# The code field is now a hash prefix, not the original plaintext code
|
||||
assert pending[0]["code"] != code
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hashed storage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHashedStorage:
|
||||
def test_pending_file_contains_hash_and_salt(self, tmp_path):
|
||||
"""Stored entries must have 'hash' and 'salt', never the plaintext code."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1", "Alice")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
|
||||
assert len(raw) == 1
|
||||
entry = next(iter(raw.values()))
|
||||
# Must have hash and salt fields
|
||||
assert "hash" in entry
|
||||
assert "salt" in entry
|
||||
# Hash must be a valid hex SHA-256 digest (64 hex chars)
|
||||
assert len(entry["hash"]) == 64
|
||||
assert all(c in "0123456789abcdef" for c in entry["hash"])
|
||||
# Salt must be a valid hex string (32 hex chars for 16 bytes)
|
||||
assert len(entry["salt"]) == 32
|
||||
assert all(c in "0123456789abcdef" for c in entry["salt"])
|
||||
# The plaintext code must NOT appear as a key or value anywhere
|
||||
assert code not in raw # not a key
|
||||
for key, val in raw.items():
|
||||
assert code != key
|
||||
for field_val in val.values():
|
||||
if isinstance(field_val, str):
|
||||
assert field_val != code
|
||||
|
||||
def test_plaintext_code_not_stored(self, tmp_path):
|
||||
"""The raw JSON file must not contain the plaintext code anywhere."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
raw_text = (tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
assert code not in raw_text
|
||||
|
||||
def test_valid_code_verifies_against_hash(self, tmp_path):
|
||||
"""approve_code with the correct code should succeed."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1", "Bob")
|
||||
result = store.approve_code("telegram", code)
|
||||
assert result is not None
|
||||
assert result["user_id"] == "user1"
|
||||
assert result["user_name"] == "Bob"
|
||||
|
||||
def test_invalid_code_rejected(self, tmp_path):
|
||||
"""approve_code with a wrong code should fail."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
store.generate_code("telegram", "user1")
|
||||
result = store.approve_code("telegram", "ZZZZZZZZ")
|
||||
assert result is None
|
||||
|
||||
def test_different_salts_per_entry(self, tmp_path):
|
||||
"""Each pending entry should have a unique salt."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
store = PairingStore()
|
||||
store.generate_code("telegram", "user0")
|
||||
store.generate_code("telegram", "user1")
|
||||
store.generate_code("telegram", "user2")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
salts = [entry["salt"] for entry in raw.values()]
|
||||
assert len(set(salts)) == 3 # all unique
|
||||
|
||||
def test_hash_code_static_method(self, tmp_path):
|
||||
"""_hash_code should be deterministic for the same code+salt."""
|
||||
salt = os.urandom(16)
|
||||
h1 = PairingStore._hash_code("ABCD1234", salt)
|
||||
h2 = PairingStore._hash_code("ABCD1234", salt)
|
||||
assert h1 == h2
|
||||
# Different salt should produce a different hash
|
||||
salt2 = os.urandom(16)
|
||||
h3 = PairingStore._hash_code("ABCD1234", salt2)
|
||||
assert h3 != h1
|
||||
|
||||
|
||||
class TestLegacyPendingFileCompat:
|
||||
"""Defensive coverage for pre-hash pending.json on upgraded installs.
|
||||
|
||||
Existing user installs may have a pending.json written by the old
|
||||
code (plaintext code as key, no hash/salt fields). The new
|
||||
approve_code / list_pending / _cleanup_expired must not crash on
|
||||
those entries — they should be ignored and aged out at TTL.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _write_legacy(tmp_path, code="ABCD1234", created_at=None):
|
||||
"""Write a pre-hash pending.json with plaintext code as the key."""
|
||||
import time as _time
|
||||
if created_at is None:
|
||||
created_at = _time.time()
|
||||
legacy = {
|
||||
code: {
|
||||
"user_id": "legacy-user",
|
||||
"user_name": "Legacy",
|
||||
"created_at": created_at,
|
||||
}
|
||||
}
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps(legacy), encoding="utf-8"
|
||||
)
|
||||
|
||||
def test_approve_code_ignores_legacy_entries(self, tmp_path):
|
||||
"""A valid old-format code must NOT silently approve under the new schema."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(tmp_path, code="LEGACY01")
|
||||
store = PairingStore()
|
||||
# The plaintext "code" used to be the key — under the new schema
|
||||
# it's not even looked at, and there's no hash/salt to verify.
|
||||
# Result: approve_code returns None, the legacy entry is left
|
||||
# alone (gets pruned by _cleanup_expired at TTL).
|
||||
result = store.approve_code("telegram", "LEGACY01")
|
||||
assert result is None
|
||||
# Approved list must be empty
|
||||
assert store.is_approved("telegram", "legacy-user") is False
|
||||
|
||||
def test_list_pending_handles_legacy_entries(self, tmp_path):
|
||||
"""list_pending must not KeyError on a missing 'hash' field."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(tmp_path)
|
||||
store = PairingStore()
|
||||
pending = store.list_pending("telegram")
|
||||
assert len(pending) == 1
|
||||
assert pending[0]["user_id"] == "legacy-user"
|
||||
assert pending[0]["code"] == "legacy" # placeholder
|
||||
|
||||
def test_cleanup_expired_removes_legacy_at_ttl(self, tmp_path):
|
||||
"""Legacy entries past CODE_TTL must still get pruned."""
|
||||
import time as _time
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
self._write_legacy(
|
||||
tmp_path,
|
||||
code="LEGACY99",
|
||||
created_at=_time.time() - CODE_TTL_SECONDS - 1,
|
||||
)
|
||||
store = PairingStore()
|
||||
store._cleanup_expired("telegram")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
assert raw == {}
|
||||
|
||||
def test_cleanup_expired_handles_malformed_entries(self, tmp_path):
|
||||
"""Non-dict / missing-created_at entries get evicted, not crashed on."""
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps({
|
||||
"broken1": "not a dict",
|
||||
"broken2": {"user_id": "x"}, # no created_at
|
||||
"broken3": {"created_at": "not a number"},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
store = PairingStore()
|
||||
store._cleanup_expired("telegram")
|
||||
raw = json.loads(
|
||||
(tmp_path / "telegram-pending.json").read_text(encoding="utf-8")
|
||||
)
|
||||
assert raw == {}
|
||||
|
||||
def test_approve_code_skips_malformed_entries(self, tmp_path):
|
||||
"""Malformed entries must not crash approve_code's hash loop."""
|
||||
import time as _time
|
||||
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
|
||||
(tmp_path / "telegram-pending.json").write_text(
|
||||
json.dumps({
|
||||
"broken": {"user_id": "x", "created_at": _time.time(),
|
||||
"salt": "not-hex", "hash": "doesntmatter"},
|
||||
}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
store = PairingStore()
|
||||
# Approving with any code must just return None, not crash.
|
||||
assert store.approve_code("telegram", "ABCD1234") is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -488,10 +300,9 @@ class TestCodeExpiry:
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
|
||||
# Manually expire all pending entries
|
||||
# Manually expire the code
|
||||
pending = store._load_json(store._pending_path("telegram"))
|
||||
for entry_id in pending:
|
||||
pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
store._save_json(store._pending_path("telegram"), pending)
|
||||
|
||||
# Cleanup happens on next operation
|
||||
@@ -503,10 +314,9 @@ class TestCodeExpiry:
|
||||
store = PairingStore()
|
||||
code = store.generate_code("telegram", "user1")
|
||||
|
||||
# Expire all entries
|
||||
# Expire it
|
||||
pending = store._load_json(store._pending_path("telegram"))
|
||||
for entry_id in pending:
|
||||
pending[entry_id]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
pending[code]["created_at"] = time.time() - CODE_TTL_SECONDS - 1
|
||||
store._save_json(store._pending_path("telegram"), pending)
|
||||
|
||||
result = store.approve_code("telegram", code)
|
||||
|
||||
@@ -6,11 +6,7 @@ import pytest
|
||||
from pathlib import Path
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.webhook import (
|
||||
WebhookAdapter,
|
||||
_DYNAMIC_ROUTES_FILENAME,
|
||||
_INSECURE_NO_AUTH,
|
||||
)
|
||||
from gateway.platforms.webhook import WebhookAdapter, _DYNAMIC_ROUTES_FILENAME
|
||||
|
||||
|
||||
def _make_adapter(routes=None, extra=None):
|
||||
@@ -89,78 +85,3 @@ class TestDynamicRouteLoading:
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "static" in adapter._routes
|
||||
assert len(adapter._dynamic_routes) == 0
|
||||
|
||||
|
||||
class TestDynamicRouteSecretValidation:
|
||||
"""Empty/missing secrets must be rejected during hot-reload.
|
||||
|
||||
Regression for HMAC bypass: prior to the fix, an agent-induced
|
||||
dynamic route with `"secret": ""` would be merged into self._routes
|
||||
by _reload_dynamic_routes(), then _handle_webhook's
|
||||
`if secret and secret != _INSECURE_NO_AUTH` would skip signature
|
||||
validation because empty string is falsy. Unauthenticated POSTs
|
||||
would then execute the webhook prompt.
|
||||
"""
|
||||
|
||||
def test_empty_secret_rejected(self, tmp_path):
|
||||
# Explicit empty-string secret must NOT fall back to the global
|
||||
# secret, and the route must be skipped entirely.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"evil": {"secret": "", "prompt": "rm -rf"}})
|
||||
)
|
||||
adapter = _make_adapter() # has global secret
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "evil" not in adapter._routes
|
||||
assert "evil" not in adapter._dynamic_routes
|
||||
|
||||
def test_missing_secret_no_global_rejected(self, tmp_path):
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"orphan": {"prompt": "test"}})
|
||||
)
|
||||
# No global secret configured
|
||||
adapter = _make_adapter(extra={"secret": ""})
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "orphan" not in adapter._routes
|
||||
assert "orphan" not in adapter._dynamic_routes
|
||||
|
||||
def test_missing_secret_inherits_global(self, tmp_path):
|
||||
# No per-route secret but a global one is set → route is kept,
|
||||
# the global secret protects it. Preserves existing fallback.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"valid": {"prompt": "ok"}})
|
||||
)
|
||||
adapter = _make_adapter() # global secret set
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "valid" in adapter._routes
|
||||
|
||||
def test_insecure_no_auth_preserved(self, tmp_path):
|
||||
# Explicit opt-in escape hatch for local testing — must still load.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"test": {"secret": _INSECURE_NO_AUTH, "prompt": "p"}})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "test" in adapter._routes
|
||||
|
||||
def test_warning_logged_on_skip(self, tmp_path, caplog):
|
||||
import logging
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({"silent": {"secret": "", "prompt": "x"}})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
with caplog.at_level(logging.WARNING, logger="gateway.platforms.webhook"):
|
||||
adapter._reload_dynamic_routes()
|
||||
assert any("silent" in rec.message for rec in caplog.records)
|
||||
|
||||
def test_partial_skip(self, tmp_path):
|
||||
# One route bad, one route good — only the bad one is dropped.
|
||||
(tmp_path / _DYNAMIC_ROUTES_FILENAME).write_text(
|
||||
json.dumps({
|
||||
"bad": {"secret": "", "prompt": "x"},
|
||||
"good": {"secret": "valid-secret", "prompt": "y"},
|
||||
})
|
||||
)
|
||||
adapter = _make_adapter()
|
||||
adapter._reload_dynamic_routes()
|
||||
assert "good" in adapter._routes
|
||||
assert "bad" not in adapter._routes
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,131 +0,0 @@
|
||||
"""Tests for curses color compatibility on low-color terminals (Docker).
|
||||
|
||||
Regression test for #13688: ``hermes plugins`` crashes with
|
||||
``curses.error: init_pair() : color number is greater than COLORS-1``
|
||||
in Docker containers where curses.COLORS == 8 (only colors 0-7 exist).
|
||||
|
||||
The bug was ``curses.init_pair(4, 8, -1)`` using raw color 8 ("bright
|
||||
black" / dim gray) which does not exist on 8-color terminals. The fix
|
||||
clamps with ``min(8, curses.COLORS - 1)``.
|
||||
"""
|
||||
|
||||
import curses
|
||||
import re
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# Path to the source files under test
|
||||
_SRC_ROOT = Path(__file__).parent.parent.parent / "hermes_cli"
|
||||
|
||||
|
||||
class TestInitPairClampingBehavior:
|
||||
"""Simulate curses color initialization on low-color terminals.
|
||||
|
||||
Patches curses.COLORS to 8 (Docker default) and verifies that
|
||||
init_pair is never called with a color >= COLORS.
|
||||
"""
|
||||
|
||||
def _collect_init_pair_calls(self, draw_fn, colors_value):
|
||||
"""Run a curses draw function with a mock stdscr and patched COLORS.
|
||||
|
||||
Returns list of (pair_number, fg, bg) tuples from init_pair calls.
|
||||
"""
|
||||
calls = []
|
||||
real_init_pair = curses.init_pair
|
||||
|
||||
def tracking_init_pair(pair, fg, bg):
|
||||
calls.append((pair, fg, bg))
|
||||
|
||||
mock_stdscr = MagicMock()
|
||||
mock_stdscr.getmaxyx.return_value = (24, 80)
|
||||
mock_stdscr.getch.return_value = 27 # ESC to exit
|
||||
|
||||
with patch("curses.COLORS", colors_value, create=True), \
|
||||
patch("curses.init_pair", side_effect=tracking_init_pair), \
|
||||
patch("curses.has_colors", return_value=True), \
|
||||
patch("curses.start_color"), \
|
||||
patch("curses.use_default_colors"), \
|
||||
patch("curses.curs_set"):
|
||||
try:
|
||||
draw_fn(mock_stdscr)
|
||||
except (SystemExit, StopIteration, Exception):
|
||||
pass # draw functions loop until keypress
|
||||
|
||||
return calls
|
||||
|
||||
def test_8_color_terminal_no_color_exceeds_limit(self):
|
||||
"""On an 8-color terminal (Docker), no init_pair fg color >= 8."""
|
||||
# Simulate the color init pattern from plugins_cmd.py
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(1, curses.COLOR_GREEN, -1)
|
||||
curses.init_pair(2, curses.COLOR_YELLOW, -1)
|
||||
curses.init_pair(3, curses.COLOR_CYAN, -1)
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 8)
|
||||
for pair, fg, bg in calls:
|
||||
assert fg < 8, (
|
||||
f"init_pair({pair}, {fg}, {bg}) uses color {fg} which "
|
||||
f"does not exist on an 8-color terminal (valid: 0-7)"
|
||||
)
|
||||
|
||||
def test_256_color_terminal_uses_color_8(self):
|
||||
"""On a 256-color terminal, color 8 (dim gray) should be used."""
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 256)
|
||||
assert any(fg == 8 for _, fg, _ in calls), (
|
||||
"On 256-color terminals, color 8 (dim gray) should be used"
|
||||
)
|
||||
|
||||
def test_16_color_terminal_uses_color_8(self):
|
||||
"""On a 16-color terminal, color 8 should be available."""
|
||||
def _simulated_color_init(stdscr):
|
||||
if curses.has_colors():
|
||||
curses.start_color()
|
||||
curses.use_default_colors()
|
||||
curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
|
||||
|
||||
calls = self._collect_init_pair_calls(_simulated_color_init, 16)
|
||||
assert any(fg == 8 for _, fg, _ in calls)
|
||||
|
||||
|
||||
class TestSourceCodeGuardrails:
|
||||
"""Regression guardrails: raw color 8 must not reappear in source.
|
||||
|
||||
These complement the behavioral tests above — they catch regressions
|
||||
introduced by copy-paste of the old pattern.
|
||||
"""
|
||||
|
||||
_RAW_COLOR_8_PATTERN = re.compile(r'init_pair\(\d+,\s*8\s*,')
|
||||
|
||||
def test_no_raw_color_8_in_plugins_cmd(self):
|
||||
source = (_SRC_ROOT / "plugins_cmd.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"plugins_cmd.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
|
||||
def test_no_raw_color_8_in_main(self):
|
||||
source = (_SRC_ROOT / "main.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"main.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
|
||||
def test_no_raw_color_8_in_curses_ui(self):
|
||||
source = (_SRC_ROOT / "curses_ui.py").read_text()
|
||||
matches = self._RAW_COLOR_8_PATTERN.findall(source)
|
||||
assert not matches, (
|
||||
f"curses_ui.py contains unclamped color 8: {matches}"
|
||||
)
|
||||
@@ -69,19 +69,18 @@ class TestPluginPickerInjection:
|
||||
assert "Myimg" in names
|
||||
assert "myimg" in plugin_names
|
||||
|
||||
def test_fal_surfaced_alongside_other_plugins(self, monkeypatch):
|
||||
def test_fal_skipped_to_avoid_duplicate(self, monkeypatch):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
# After #26241, FAL is itself a plugin (`plugins/image_gen/fal/`)
|
||||
# and the hardcoded `TOOL_CATEGORIES["image_gen"]` FAL row is
|
||||
# gone. The plugin-row builder therefore surfaces it like any
|
||||
# other backend — no deduplication step needed.
|
||||
# Simulate a FAL plugin being registered — the picker already has
|
||||
# hardcoded FAL rows in TOOL_CATEGORIES, so plugin-FAL must be
|
||||
# skipped to avoid showing FAL twice.
|
||||
image_gen_registry.register_provider(_FakeProvider("fal"))
|
||||
image_gen_registry.register_provider(_FakeProvider("openai"))
|
||||
|
||||
rows = tools_config._plugin_image_gen_providers()
|
||||
names = [r.get("image_gen_plugin_name") for r in rows]
|
||||
assert "fal" in names
|
||||
assert "fal" not in names
|
||||
assert "openai" in names
|
||||
|
||||
def test_visible_providers_includes_plugins_for_image_gen(self, monkeypatch):
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests for ``install_cua_driver`` upgrade semantics and architecture pre-check.
|
||||
"""Tests for ``install_cua_driver`` upgrade semantics.
|
||||
|
||||
The cua-driver upstream installer always pulls the latest release tag, so
|
||||
re-running it is the canonical upgrade path. ``install_cua_driver(upgrade=True)``
|
||||
@@ -10,18 +10,18 @@ must:
|
||||
fix for the "we only pulled cua-driver once on enable" complaint).
|
||||
* Preserve original ``upgrade=False`` behaviour for the toolset-enable flow:
|
||||
skip if installed, install otherwise, warn on non-macOS.
|
||||
* Pre-check architecture compatibility before downloading to avoid raw 404
|
||||
errors on Intel macOS when the upstream release lacks x86_64 assets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
class TestInstallCuaDriverUpgrade:
|
||||
def test_upgrade_on_non_macos_is_silent_noop(self):
|
||||
"""``hermes update`` calls install_cua_driver(upgrade=True) for every
|
||||
user. On Linux/Windows it must return False without printing the
|
||||
"macOS-only; skipping" warning that the toolset-enable path emits."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch.object(tools_config, "_print_warning") as warn, \
|
||||
@@ -30,6 +30,8 @@ class TestInstallCuaDriverUpgrade:
|
||||
warn.assert_not_called()
|
||||
|
||||
def test_non_upgrade_on_non_macos_warns(self):
|
||||
"""The toolset-enable path (upgrade=False) should still warn loudly
|
||||
when the user tries to enable Computer Use on a non-macOS host."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch.object(tools_config, "_print_warning") as warn, \
|
||||
@@ -38,36 +40,43 @@ class TestInstallCuaDriverUpgrade:
|
||||
warn.assert_called()
|
||||
|
||||
def test_upgrade_on_macos_with_binary_runs_installer(self):
|
||||
"""When cua-driver is already on PATH and upgrade=True, we must
|
||||
re-run the upstream installer (this is the fix for the bug report).
|
||||
"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in {"cua-driver", "curl"} else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner, \
|
||||
patch("subprocess.run"):
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
# Refresh path uses non-verbose mode so we don't re-print the
|
||||
# "grant macOS permissions" block on every `hermes update`.
|
||||
kwargs = runner.call_args.kwargs
|
||||
assert kwargs.get("verbose") is False
|
||||
|
||||
def test_upgrade_on_macos_without_binary_runs_installer(self):
|
||||
"""upgrade=True with cua-driver missing must still trigger an
|
||||
install — equivalent to a fresh install. (Don't silently no-op.)"""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
def test_non_upgrade_on_macos_with_binary_skips_install(self):
|
||||
"""Original toolset-enable behaviour: cua-driver already installed
|
||||
+ upgrade=False → confirm and return without re-running installer.
|
||||
This is the behaviour that ``hermes tools`` (re)enable depends on,
|
||||
so the new helper must not regress it."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
@@ -80,133 +89,27 @@ class TestInstallCuaDriverUpgrade:
|
||||
runner.assert_not_called()
|
||||
|
||||
def test_non_upgrade_on_macos_without_binary_runs_installer(self):
|
||||
"""Original fresh-install path must still work."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch.object(tools_config, "_check_cua_driver_asset_for_arch",
|
||||
return_value=True), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer",
|
||||
return_value=True) as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is True
|
||||
runner.assert_called_once()
|
||||
|
||||
|
||||
class TestCheckCuaDriverAssetForArch:
|
||||
def test_arm64_always_returns_true(self):
|
||||
def test_upgrade_without_curl_does_not_crash(self):
|
||||
"""If curl isn't on PATH we can't refresh — must warn and return
|
||||
the current install state, not raise."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.machine", return_value="arm64"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_with_asset_returns_true(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver-0.1.6-darwin-x86_64.tar.gz"},
|
||||
],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_x86_64_without_asset_returns_false(self):
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [
|
||||
{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"},
|
||||
{"name": "cua-driver.tar.gz"},
|
||||
],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning") as warn, \
|
||||
patch.object(tools_config, "_print_info"):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is False
|
||||
warn.assert_called_once()
|
||||
assert "no Intel" in warn.call_args[0][0].lower() or "x86_64" in warn.call_args[0][0]
|
||||
|
||||
def test_x86_64_api_failure_returns_true(self):
|
||||
"""Network failure should fail open — let the installer handle it."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
with patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", side_effect=Exception("timeout")):
|
||||
assert tools_config._check_cua_driver_asset_for_arch() is True
|
||||
|
||||
def test_fresh_install_x86_64_no_asset_skips_installer(self):
|
||||
"""When the latest release has no Intel asset, skip the installer."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
# cua-driver present, curl missing.
|
||||
def _which(name):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=False) is False
|
||||
runner.assert_not_called()
|
||||
|
||||
def test_upgrade_x86_64_no_asset_returns_existing_status(self):
|
||||
"""On upgrade with no Intel asset, return whether binary existed."""
|
||||
from hermes_cli import tools_config
|
||||
|
||||
release = {
|
||||
"tag_name": "cua-driver-v0.1.6",
|
||||
"assets": [{"name": "cua-driver-0.1.6-darwin-arm64.tar.gz"}],
|
||||
}
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = json.dumps(release).encode()
|
||||
mock_resp.__enter__ = lambda s: s
|
||||
mock_resp.__exit__ = MagicMock(return_value=False)
|
||||
|
||||
# With binary installed — returns True (binary exists)
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/local/bin/" + n
|
||||
if n in ("cua-driver", "curl") else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
patch.object(tools_config.shutil, "which", side_effect=_which), \
|
||||
patch.object(tools_config, "_print_warning"):
|
||||
assert tools_config.install_cua_driver(upgrade=True) is True
|
||||
runner.assert_not_called()
|
||||
|
||||
# Without binary — returns False
|
||||
with patch("platform.system", return_value="Darwin"), \
|
||||
patch.object(tools_config.shutil, "which",
|
||||
side_effect=lambda n: "/usr/bin/curl" if n == "curl" else None), \
|
||||
patch("platform.machine", return_value="x86_64"), \
|
||||
patch("urllib.request.urlopen", return_value=mock_resp), \
|
||||
patch.object(tools_config, "_print_warning"), \
|
||||
patch.object(tools_config, "_print_info"), \
|
||||
patch.object(tools_config, "_run_cua_driver_installer") as runner:
|
||||
assert tools_config.install_cua_driver(upgrade=True) is False
|
||||
runner.assert_not_called()
|
||||
|
||||
@@ -179,7 +179,13 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
ns,
|
||||
"_get_gateway_direct_credentials",
|
||||
lambda: {"web": True, "image_gen": False, "tts": False, "browser": False},
|
||||
lambda: {
|
||||
"web": True,
|
||||
"image_gen": False,
|
||||
"tts": False,
|
||||
"stt": False,
|
||||
"browser": False,
|
||||
},
|
||||
)
|
||||
|
||||
unconfigured, has_direct, already_managed = ns.get_gateway_eligible_tools(
|
||||
@@ -191,4 +197,150 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
|
||||
|
||||
assert "web" in has_direct
|
||||
assert "web" not in already_managed
|
||||
assert set(unconfigured) == {"image_gen", "tts", "browser"}
|
||||
assert set(unconfigured) == {"image_gen", "tts", "stt", "browser"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# STT — managed-by-Nous detection (Phase 4 follow-up)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_stt_managed_by_nous_when_provider_openai_and_no_direct_key(monkeypatch):
|
||||
"""Default `stt.provider: openai` with a Nous sub + no direct OpenAI key
|
||||
should route through the managed audio gateway."""
|
||||
monkeypatch.setattr(ns, "get_env_value", lambda name: "")
|
||||
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
|
||||
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
|
||||
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False)
|
||||
monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
|
||||
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
|
||||
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
|
||||
monkeypatch.setattr(
|
||||
ns,
|
||||
"is_managed_tool_gateway_ready",
|
||||
lambda vendor: vendor == "openai-audio",
|
||||
)
|
||||
|
||||
features = ns.get_nous_subscription_features({"stt": {"provider": "openai"}})
|
||||
|
||||
assert features.stt.available is True
|
||||
assert features.stt.active is True
|
||||
assert features.stt.managed_by_nous is True
|
||||
assert features.stt.direct_override is False
|
||||
assert features.stt.current_provider == "OpenAI Whisper"
|
||||
|
||||
|
||||
def test_stt_direct_key_overrides_managed(monkeypatch):
|
||||
"""When the user has VOICE_TOOLS_OPENAI_KEY set, STT should use the
|
||||
direct key, not the managed gateway — same precedence as TTS."""
|
||||
monkeypatch.setattr(ns, "get_env_value", lambda name: "")
|
||||
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
|
||||
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
|
||||
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False)
|
||||
monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
|
||||
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "sk-direct-key")
|
||||
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
|
||||
monkeypatch.setattr(
|
||||
ns,
|
||||
"is_managed_tool_gateway_ready",
|
||||
lambda vendor: vendor == "openai-audio",
|
||||
)
|
||||
|
||||
features = ns.get_nous_subscription_features({"stt": {"provider": "openai"}})
|
||||
|
||||
assert features.stt.available is True
|
||||
assert features.stt.managed_by_nous is False
|
||||
assert features.stt.direct_override is True
|
||||
|
||||
|
||||
def test_stt_groq_provider_requires_groq_key(monkeypatch):
|
||||
env = {"GROQ_API_KEY": "groq-key"}
|
||||
monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
|
||||
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {})
|
||||
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: False)
|
||||
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: False)
|
||||
monkeypatch.setattr(ns, "_has_agent_browser", lambda: False)
|
||||
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
|
||||
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
|
||||
monkeypatch.setattr(ns, "is_managed_tool_gateway_ready", lambda vendor: False)
|
||||
|
||||
features = ns.get_nous_subscription_features({"stt": {"provider": "groq"}})
|
||||
|
||||
assert features.stt.available is True
|
||||
assert features.stt.managed_by_nous is False
|
||||
assert features.stt.current_provider == "Groq Whisper"
|
||||
assert features.stt.explicit_configured is True
|
||||
|
||||
|
||||
def test_apply_nous_managed_defaults_flips_stt_provider_to_openai_for_nous_users(monkeypatch):
|
||||
"""Fresh Nous-subscribed user with the DEFAULT_CONFIG `stt.provider: local`
|
||||
seed should have it auto-flipped to "openai" so the managed audio
|
||||
gateway transcribes their voice notes without needing faster-whisper
|
||||
installed."""
|
||||
monkeypatch.setattr(ns, "get_env_value", lambda name: "")
|
||||
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
|
||||
# Avoid the heavy real probing in get_nous_subscription_features.
|
||||
monkeypatch.setattr(
|
||||
ns,
|
||||
"get_nous_subscription_features",
|
||||
lambda config: ns.NousSubscriptionFeatures(
|
||||
subscribed=True,
|
||||
nous_auth_present=True,
|
||||
provider_is_nous=True,
|
||||
features={
|
||||
key: ns.NousFeatureState(
|
||||
key=key, label=key, included_by_default=True,
|
||||
available=False, active=False, managed_by_nous=False,
|
||||
direct_override=False, toolset_enabled=False,
|
||||
explicit_configured=False,
|
||||
)
|
||||
for key in ("web", "image_gen", "tts", "stt", "browser", "modal")
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
config = {"stt": {"provider": "local"}}
|
||||
changed = ns.apply_nous_managed_defaults(config, enabled_toolsets=[])
|
||||
|
||||
assert "stt" in changed
|
||||
assert config["stt"]["provider"] == "openai"
|
||||
|
||||
|
||||
def test_apply_nous_managed_defaults_skips_stt_when_groq_key_present(monkeypatch):
|
||||
"""Don't override a user who explicitly set up Groq for STT."""
|
||||
env = {"GROQ_API_KEY": "groq-key"}
|
||||
monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
|
||||
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
|
||||
monkeypatch.setattr(
|
||||
ns,
|
||||
"get_nous_subscription_features",
|
||||
lambda config: ns.NousSubscriptionFeatures(
|
||||
subscribed=True,
|
||||
nous_auth_present=True,
|
||||
provider_is_nous=True,
|
||||
features={
|
||||
key: ns.NousFeatureState(
|
||||
key=key, label=key, included_by_default=True,
|
||||
available=False, active=False, managed_by_nous=False,
|
||||
direct_override=False, toolset_enabled=False,
|
||||
explicit_configured=False,
|
||||
)
|
||||
for key in ("web", "image_gen", "tts", "stt", "browser", "modal")
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
config = {"stt": {"provider": "local"}}
|
||||
changed = ns.apply_nous_managed_defaults(config, enabled_toolsets=[])
|
||||
|
||||
# STT was not flipped because the user has a Groq key configured.
|
||||
assert "stt" not in changed
|
||||
assert config["stt"]["provider"] == "local"
|
||||
|
||||
|
||||
def test_apply_gateway_defaults_sets_stt_use_gateway(monkeypatch):
|
||||
config = {}
|
||||
changed = ns.apply_gateway_defaults(config, ["stt"])
|
||||
|
||||
assert "stt" in changed
|
||||
assert config["stt"]["provider"] == "openai"
|
||||
assert config["stt"]["use_gateway"] is True
|
||||
|
||||
@@ -88,9 +88,9 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
|
||||
"web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
|
||||
"image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
|
||||
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
|
||||
"stt": NousFeatureState("stt", "Speech-to-text", True, True, True, True, False, True, "OpenAI Whisper"),
|
||||
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
|
||||
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
|
||||
"app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
|
||||
},
|
||||
),
|
||||
raising=False,
|
||||
|
||||
@@ -12,10 +12,8 @@ from hermes_cli.tools_config import (
|
||||
_get_platform_tools,
|
||||
_platform_toolset_summary,
|
||||
_reconfigure_tool,
|
||||
_run_post_setup,
|
||||
_save_platform_tools,
|
||||
_toolset_has_keys,
|
||||
_toolset_needs_configuration_prompt,
|
||||
CONFIGURABLE_TOOLSETS,
|
||||
TOOL_CATEGORIES,
|
||||
_visible_providers,
|
||||
@@ -754,91 +752,6 @@ def test_numeric_mcp_server_name_does_not_crash_sorted():
|
||||
|
||||
# ─── Imagegen Backend Picker Wiring ────────────────────────────────────────
|
||||
|
||||
def test_toolset_has_keys_treats_no_key_providers_as_configured():
|
||||
config = {}
|
||||
|
||||
assert _toolset_has_keys("computer_use", config) is True
|
||||
|
||||
|
||||
def test_computer_use_needs_configuration_when_cua_driver_post_setup_pending():
|
||||
"""No-key providers can still need setup when their post_setup is unsatisfied.
|
||||
|
||||
Returning users enabling Computer Use through `hermes tools` must reach the
|
||||
cua-driver post-setup installer even though the provider has no API keys.
|
||||
"""
|
||||
with patch("shutil.which", return_value=None):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is True
|
||||
|
||||
|
||||
def test_computer_use_skips_configuration_when_cua_driver_already_installed():
|
||||
"""Installed post_setup dependencies should keep returning-user toggles no-op."""
|
||||
def fake_which(name: str):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
|
||||
with patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_respects_custom_cua_driver_command():
|
||||
"""The setup gate should match runtime's HERMES_CUA_DRIVER_CMD override."""
|
||||
def fake_which(name: str):
|
||||
return "/opt/bin/custom-cua" if name == "custom-cua" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_blank_custom_driver_command_falls_back_to_default():
|
||||
"""Blank overrides should not make the setup gate look for an empty command."""
|
||||
def fake_which(name: str):
|
||||
return "/usr/local/bin/cua-driver" if name == "cua-driver" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": " "}), \
|
||||
patch("shutil.which", side_effect=fake_which):
|
||||
assert _toolset_needs_configuration_prompt("computer_use", {}) is False
|
||||
|
||||
|
||||
def test_computer_use_post_setup_respects_custom_driver_command_when_installed():
|
||||
"""post_setup already-installed checks should version-probe the override."""
|
||||
def fake_which(name: str):
|
||||
return "/opt/bin/custom-cua" if name == "custom-cua" else None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("platform.system", return_value="Darwin"), \
|
||||
patch("shutil.which", side_effect=fake_which), \
|
||||
patch("subprocess.run") as run:
|
||||
run.return_value.stdout = "custom 1.2.3\n"
|
||||
|
||||
_run_post_setup("cua_driver")
|
||||
|
||||
run.assert_called_once()
|
||||
assert run.call_args.args[0] == ["custom-cua", "--version"]
|
||||
|
||||
|
||||
def test_computer_use_post_setup_missing_override_does_not_accept_default_binary():
|
||||
"""A default cua-driver binary must not satisfy a missing runtime override."""
|
||||
seen = []
|
||||
|
||||
def fake_which(name: str):
|
||||
seen.append(name)
|
||||
if name == "cua-driver":
|
||||
return "/usr/local/bin/cua-driver"
|
||||
if name == "curl":
|
||||
return None
|
||||
return None
|
||||
|
||||
with patch.dict("os.environ", {"HERMES_CUA_DRIVER_CMD": "custom-cua"}), \
|
||||
patch("platform.system", return_value="Darwin"), \
|
||||
patch("shutil.which", side_effect=fake_which), \
|
||||
patch("subprocess.run") as run:
|
||||
_run_post_setup("cua_driver")
|
||||
|
||||
run.assert_not_called()
|
||||
assert "custom-cua" in seen
|
||||
assert "curl" in seen
|
||||
|
||||
|
||||
class TestImagegenBackendRegistry:
|
||||
"""IMAGEGEN_BACKENDS tags drive the model picker flow in tools_config."""
|
||||
|
||||
|
||||
@@ -168,7 +168,7 @@ def test_make_tui_argv_skips_build_only_on_termux_when_fresh(
|
||||
|
||||
argv, cwd = main_mod._make_tui_argv(tmp_path, tui_dev=False)
|
||||
|
||||
assert argv == ["/bin/node", "--expose-gc", str(tmp_path / "dist" / "entry.js")]
|
||||
assert argv == ["/bin/node", str(tmp_path / "dist" / "entry.js")]
|
||||
assert cwd == tmp_path
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,406 @@
|
||||
"""Tests for the WhatsApp Cloud API setup wizard.
|
||||
|
||||
Covers:
|
||||
- Field-shape validators (catch the #1 setup mistake — phone number in
|
||||
the Phone Number ID field — plus the OpenAI / Slack / GitHub token
|
||||
paste-by-mistake cases)
|
||||
- Wizard end-to-end flow with mocked stdin/stdout — verifies each step
|
||||
writes the expected env var, validation errors block invalid input,
|
||||
optional fields can be skipped, and the SETUP COMPLETE block prints
|
||||
the post-setup tunnel + Meta-dashboard instructions the user needs
|
||||
(the wizard can't smoke-test reachability itself because the gateway
|
||||
isn't running yet during setup).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import os
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli.setup_whatsapp_cloud import (
|
||||
_validate_phone_number_id,
|
||||
_validate_waba_id,
|
||||
_validate_app_id,
|
||||
_validate_app_secret,
|
||||
_validate_access_token,
|
||||
run_whatsapp_cloud_setup,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validator tests — the cheap, exhaustive coverage layer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPhoneNumberIdValidator:
|
||||
def test_accepts_real_meta_phone_number_id(self):
|
||||
ok, _ = _validate_phone_number_id("7794189252778687")
|
||||
assert ok
|
||||
|
||||
def test_rejects_actual_phone_number_with_helpful_message(self):
|
||||
"""The #1 setup trap — pasting the phone number instead of the ID."""
|
||||
ok, reason = _validate_phone_number_id("15556422442")
|
||||
assert not ok
|
||||
assert "phone number" in reason.lower()
|
||||
assert "Phone number ID" in reason # tells them where to look
|
||||
|
||||
def test_rejects_phone_number_with_plus(self):
|
||||
ok, reason = _validate_phone_number_id("+15556422442")
|
||||
assert not ok
|
||||
assert "numeric" in reason.lower() or "phone number" in reason.lower()
|
||||
|
||||
def test_rejects_empty(self):
|
||||
ok, reason = _validate_phone_number_id("")
|
||||
assert not ok
|
||||
assert "required" in reason.lower()
|
||||
|
||||
def test_rejects_too_short(self):
|
||||
ok, _ = _validate_phone_number_id("12345")
|
||||
assert not ok
|
||||
|
||||
def test_rejects_too_long(self):
|
||||
ok, _ = _validate_phone_number_id("1" * 25)
|
||||
assert not ok
|
||||
|
||||
def test_strips_surrounding_whitespace(self):
|
||||
ok, _ = _validate_phone_number_id(" 7794189252778687 ")
|
||||
assert ok
|
||||
|
||||
|
||||
class TestAccessTokenValidator:
|
||||
def test_accepts_eaa_token(self):
|
||||
ok, _ = _validate_access_token("EAA" + "a" * 100)
|
||||
assert ok
|
||||
|
||||
def test_rejects_empty(self):
|
||||
ok, reason = _validate_access_token("")
|
||||
assert not ok
|
||||
assert "required" in reason.lower()
|
||||
|
||||
def test_rejects_openai_key_with_helpful_message(self):
|
||||
ok, reason = _validate_access_token("sk-proj-" + "a" * 100)
|
||||
assert not ok
|
||||
assert "OpenAI" in reason
|
||||
|
||||
def test_rejects_slack_token_with_helpful_message(self):
|
||||
ok, reason = _validate_access_token("xoxb-1234-5678-abcdef")
|
||||
assert not ok
|
||||
assert "Slack" in reason
|
||||
|
||||
def test_rejects_github_token_with_helpful_message(self):
|
||||
ok, reason = _validate_access_token("ghp_abcdefghijklmnop")
|
||||
assert not ok
|
||||
assert "GitHub" in reason
|
||||
|
||||
def test_rejects_garbage_with_helpful_message(self):
|
||||
ok, reason = _validate_access_token("random-string-here")
|
||||
assert not ok
|
||||
assert "EAA" in reason # tells them what to look for
|
||||
|
||||
def test_rejects_short_token(self):
|
||||
ok, reason = _validate_access_token("EAAabc")
|
||||
assert not ok
|
||||
assert "short" in reason.lower()
|
||||
|
||||
|
||||
class TestAppSecretValidator:
|
||||
def test_accepts_32_hex_chars(self):
|
||||
ok, _ = _validate_app_secret("0123456789abcdef0123456789abcdef")
|
||||
assert ok
|
||||
|
||||
def test_accepts_uppercase_hex(self):
|
||||
ok, _ = _validate_app_secret("0123456789ABCDEF0123456789ABCDEF")
|
||||
assert ok
|
||||
|
||||
def test_rejects_wrong_length(self):
|
||||
ok, reason = _validate_app_secret("0123456789abcdef") # 16 chars
|
||||
assert not ok
|
||||
assert "32" in reason
|
||||
|
||||
def test_rejects_non_hex(self):
|
||||
ok, reason = _validate_app_secret("zzzz56789abcdef0123456789abcdezz")
|
||||
assert not ok
|
||||
assert "hex" in reason.lower()
|
||||
|
||||
def test_rejects_empty(self):
|
||||
ok, _ = _validate_app_secret("")
|
||||
assert not ok
|
||||
|
||||
|
||||
class TestAppIdValidator:
|
||||
def test_accepts_valid(self):
|
||||
ok, _ = _validate_app_id("1234567890123456")
|
||||
assert ok
|
||||
|
||||
def test_rejects_non_numeric(self):
|
||||
ok, _ = _validate_app_id("abcdef")
|
||||
assert not ok
|
||||
|
||||
def test_rejects_too_short(self):
|
||||
ok, _ = _validate_app_id("123")
|
||||
assert not ok
|
||||
|
||||
|
||||
class TestWabaIdValidator:
|
||||
def test_accepts_valid(self):
|
||||
ok, _ = _validate_waba_id("215589313241560883")
|
||||
assert ok
|
||||
|
||||
def test_rejects_non_numeric(self):
|
||||
ok, _ = _validate_waba_id("abc-def")
|
||||
assert not ok
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end wizard flow
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
"""Redirect HERMES_HOME so save_env_value writes into a temp .env."""
|
||||
home = tmp_path / "home"
|
||||
hermes = home / ".hermes"
|
||||
hermes.mkdir(parents=True)
|
||||
monkeypatch.setattr(Path, "home", lambda: home)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes))
|
||||
for key in list(os.environ):
|
||||
if key.startswith("WHATSAPP_CLOUD_"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
return hermes
|
||||
|
||||
|
||||
def _env_value(hermes_home: Path, key: str) -> str | None:
|
||||
env_file = hermes_home / ".env"
|
||||
if not env_file.exists():
|
||||
return None
|
||||
for line in env_file.read_text().splitlines():
|
||||
if "=" not in line:
|
||||
continue
|
||||
k, _, v = line.partition("=")
|
||||
if k.strip() == key:
|
||||
return v.strip().strip('"').strip("'")
|
||||
return None
|
||||
|
||||
|
||||
class TestWizardFlow:
|
||||
def test_happy_path_minimal(self, isolated_home, monkeypatch):
|
||||
"""Provide only the required fields; skip optional steps."""
|
||||
inputs = iter([
|
||||
"", # press Enter to continue
|
||||
"7794189252778687", # Phone Number ID
|
||||
"EAA" + "x" * 200, # Access Token
|
||||
"0123456789abcdef0123456789abcdef", # App Secret
|
||||
"", # App ID — skip
|
||||
"", # WABA ID — skip
|
||||
"15551234567", # Allowed users
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
rc = run_whatsapp_cloud_setup()
|
||||
assert rc == 0
|
||||
out = buf.getvalue()
|
||||
assert "SETUP COMPLETE" in out
|
||||
# Required fields written
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") == "7794189252778687"
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN").startswith("EAA")
|
||||
assert len(_env_value(isolated_home, "WHATSAPP_CLOUD_APP_SECRET")) == 32
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_ALLOWED_USERS") == "15551234567"
|
||||
# Verify token auto-generated
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN")
|
||||
# Optional fields stayed unset
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_APP_ID") is None
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_WABA_ID") is None
|
||||
|
||||
def test_phone_number_id_validator_catches_phone_number(self, isolated_home, monkeypatch):
|
||||
"""The trap test — user pastes their phone number into the
|
||||
Phone Number ID field. Wizard MUST reject with a helpful
|
||||
explanation, not pass through."""
|
||||
inputs = iter([
|
||||
"", # press Enter to continue
|
||||
"15556422442", # phone number — rejected
|
||||
"", # empty — gives up
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
rc = run_whatsapp_cloud_setup()
|
||||
assert rc == 1
|
||||
out = buf.getvalue()
|
||||
# Must surface the specific guidance about Phone Number ID
|
||||
assert "Phone number ID" in out
|
||||
assert "15-17 digits" in out
|
||||
# Should NOT have saved the bad value
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") is None
|
||||
|
||||
def test_access_token_validator_catches_openai_key(self, isolated_home, monkeypatch):
|
||||
"""User pastes 'sk-proj-...' by mistake. Wizard rejects."""
|
||||
inputs = iter([
|
||||
"", # continue
|
||||
"7794189252778687", # good Phone ID
|
||||
"sk-proj-" + "x" * 100, # OpenAI key — rejected
|
||||
"", # give up
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
rc = run_whatsapp_cloud_setup()
|
||||
assert rc == 1
|
||||
out = buf.getvalue()
|
||||
assert "OpenAI" in out # diagnostic in error message
|
||||
# Phone Number ID was saved (it was valid), but access token was not
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_PHONE_NUMBER_ID") == "7794189252778687"
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN") is None
|
||||
|
||||
def test_verify_token_is_auto_generated(self, isolated_home, monkeypatch):
|
||||
"""The verify token is one of the few things the user shouldn't
|
||||
have to invent. Wizard generates a strong random one."""
|
||||
inputs = iter([
|
||||
"", # continue
|
||||
"7794189252778687", # Phone ID
|
||||
"EAA" + "x" * 200, # Token
|
||||
"0123456789abcdef0123456789abcdef", # App Secret
|
||||
"", # App ID — skip
|
||||
"", # WABA ID — skip
|
||||
"15551234567", # Allowed users
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
run_whatsapp_cloud_setup()
|
||||
verify_token = _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN")
|
||||
assert verify_token is not None
|
||||
# secrets.token_urlsafe(32) produces ~43 chars (base64-of-32-bytes)
|
||||
assert len(verify_token) >= 32
|
||||
# Should also be echoed to user output so they can paste into Meta
|
||||
assert verify_token in buf.getvalue()
|
||||
|
||||
def test_setup_complete_block_includes_post_setup_instructions(self, isolated_home, monkeypatch):
|
||||
"""The wizard can't smoke-test the webhook itself (the gateway
|
||||
isn't running yet), so it MUST print the exact curl/cloudflared
|
||||
steps the user needs after the wizard exits."""
|
||||
inputs = iter([
|
||||
"", # continue
|
||||
"7794189252778687", # Phone ID
|
||||
"EAA" + "x" * 200, # Token
|
||||
"0123456789abcdef0123456789abcdef", # App Secret
|
||||
"", # App ID — skip
|
||||
"", # WABA ID — skip
|
||||
"15551234567", # Allowed users
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
run_whatsapp_cloud_setup()
|
||||
out = buf.getvalue()
|
||||
# Required post-setup guidance
|
||||
assert "cloudflared tunnel --url http://localhost:8090" in out
|
||||
assert "hermes gateway" in out
|
||||
assert "Verify and save" in out
|
||||
assert "messages" in out
|
||||
# The verify token should be quotable on the curl line
|
||||
verify_token = _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN")
|
||||
assert verify_token in out
|
||||
|
||||
def test_existing_token_preserved_on_rerun(self, isolated_home, monkeypatch):
|
||||
"""Re-running the wizard with existing config should let the
|
||||
user keep current values by hitting Enter."""
|
||||
# Pre-populate .env as if a previous run succeeded
|
||||
env_file = isolated_home / ".env"
|
||||
env_file.write_text(
|
||||
"WHATSAPP_CLOUD_PHONE_NUMBER_ID=7794189252778687\n"
|
||||
"WHATSAPP_CLOUD_ACCESS_TOKEN=EAAprevious_token_here_" + "x" * 100 + "\n"
|
||||
"WHATSAPP_CLOUD_APP_SECRET=0123456789abcdef0123456789abcdef\n"
|
||||
"WHATSAPP_CLOUD_VERIFY_TOKEN=existing_verify_token_already_set\n"
|
||||
)
|
||||
inputs = iter([
|
||||
"", # continue
|
||||
"", # Phone ID — keep existing
|
||||
"", # Token — keep existing
|
||||
"", # App Secret — keep existing
|
||||
"", # App ID — skip
|
||||
"", # WABA ID — skip
|
||||
"", # verify token: regenerate? [y/N] — no
|
||||
"", # Allowed users — keep
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
rc = run_whatsapp_cloud_setup()
|
||||
assert rc == 0
|
||||
# Values preserved
|
||||
token = _env_value(isolated_home, "WHATSAPP_CLOUD_ACCESS_TOKEN")
|
||||
assert token is not None
|
||||
assert token.startswith("EAAprevious_token_here_")
|
||||
# Verify token preserved (user said no to regenerate)
|
||||
assert _env_value(isolated_home, "WHATSAPP_CLOUD_VERIFY_TOKEN") == "existing_verify_token_already_set"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Profile polish block (SETUP COMPLETE → optional WhatsApp profile setup)
|
||||
# =========================================================================
|
||||
|
||||
|
||||
class TestProfilePolishGuidance:
|
||||
"""The wizard can't set the bot's WhatsApp display name or profile
|
||||
picture via the API — those go through Meta's Business Manager UI.
|
||||
Verify that the SETUP COMPLETE block points the user at the right
|
||||
place rather than leaving them to figure it out on their own."""
|
||||
|
||||
def test_polish_block_present_and_points_at_business_manager(
|
||||
self, isolated_home, monkeypatch
|
||||
):
|
||||
inputs = iter([
|
||||
"",
|
||||
"7794189252778687",
|
||||
"EAA" + "x" * 200,
|
||||
"0123456789abcdef0123456789abcdef",
|
||||
"", # App ID — skip
|
||||
"", # WABA ID — skip
|
||||
"15551234567",
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
run_whatsapp_cloud_setup()
|
||||
out = buf.getvalue()
|
||||
# Polish block header
|
||||
assert "polish your bot's WhatsApp profile" in out
|
||||
# Direct user at Meta's Business Manager (not the developer dash)
|
||||
assert "business.facebook.com/wa/manage/phone-numbers" in out
|
||||
# Mention each of the three things the user can do there
|
||||
assert "Display name" in out
|
||||
assert "profile picture" in out
|
||||
assert "Edit profile" in out
|
||||
# Set expectations about display-name reviews
|
||||
assert "24-48h" in out or "24–48h" in out
|
||||
|
||||
def test_polish_block_deeplinks_when_waba_id_known(
|
||||
self, isolated_home, monkeypatch
|
||||
):
|
||||
"""If the user gave us the WABA ID earlier in the wizard, the
|
||||
Business Manager URL should pre-select their account."""
|
||||
waba = "987654321098765"
|
||||
inputs = iter([
|
||||
"",
|
||||
"7794189252778687",
|
||||
"EAA" + "x" * 200,
|
||||
"0123456789abcdef0123456789abcdef",
|
||||
"", # App ID — skip
|
||||
waba, # WABA ID — provided
|
||||
"15551234567",
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: next(inputs))
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
run_whatsapp_cloud_setup()
|
||||
out = buf.getvalue()
|
||||
# Deep-linked URL with the user's WABA pre-selected
|
||||
assert f"waba_id={waba}" in out
|
||||
# Without WABA, we tell the user they'll need to pick their account
|
||||
assert "select your WhatsApp Business Account" not in out
|
||||
@@ -1,300 +0,0 @@
|
||||
"""Behavior-parity check for the image-gen FAL plugin migration (#26241).
|
||||
|
||||
Spawns one subprocess per (version, scenario) cell — pinned to either
|
||||
``origin/main`` (legacy in-tree FAL fall-through + ``configured == "fal"``
|
||||
skip in ``_dispatch_to_plugin_provider``) or this PR's worktree (FAL is
|
||||
itself a plugin and the dispatcher routes every set provider through
|
||||
the registry). Each subprocess clears all FAL-related env vars + writes
|
||||
a ``config.yaml``, then asks the dispatcher how it would route an
|
||||
``image_generate`` call. The emitted shape tuple is
|
||||
``{dispatch_kind, provider_name, model}``:
|
||||
|
||||
* ``dispatch_kind`` ∈ ``{"legacy_fal", "plugin", "error", None}`` —
|
||||
whether the call would go straight to the in-tree pipeline,
|
||||
through ``_dispatch_to_plugin_provider``, raise an explicit
|
||||
provider-not-registered error, or fall through silently.
|
||||
* ``provider_name`` — when ``dispatch_kind == "plugin"``, the
|
||||
resolved provider name. ``None`` otherwise.
|
||||
* ``model`` — the resolved FAL model id when applicable.
|
||||
|
||||
The parent process diffs the shapes per scenario. A diff means the
|
||||
migration introduced an observable behaviour change vs origin/main —
|
||||
likely a real regression for users on the existing config keys.
|
||||
|
||||
Run from the PR worktree:
|
||||
|
||||
python tests/plugins/image_gen/check_parity_vs_main.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[3]
|
||||
|
||||
|
||||
# Pin one path to current main, one to the PR worktree.
|
||||
# ``REPO_ROOT`` is ``.../.worktrees/<name>``; the main checkout lives
|
||||
# two levels up. When running directly from a regular clone (no
|
||||
# worktree), ``MAIN_DIR`` falls back to a sibling ``hermes-agent-main``
|
||||
# checkout if one exists.
|
||||
def _resolve_main_dir() -> Path:
|
||||
candidate = REPO_ROOT.parent.parent
|
||||
if (candidate / "tools" / "image_generation_tool.py").exists() and candidate != REPO_ROOT:
|
||||
return candidate
|
||||
sibling = REPO_ROOT.parent / "hermes-agent-main"
|
||||
if (sibling / "tools" / "image_generation_tool.py").exists():
|
||||
return sibling
|
||||
return REPO_ROOT
|
||||
|
||||
|
||||
MAIN_DIR = _resolve_main_dir()
|
||||
PR_DIR = REPO_ROOT
|
||||
assert (PR_DIR / "tools" / "image_generation_tool.py").exists(), (
|
||||
f"PR_DIR={PR_DIR} doesn't look like a hermes-agent checkout"
|
||||
)
|
||||
|
||||
|
||||
SUBPROCESS_SCRIPT = r"""
|
||||
import json, os, sys, tempfile
|
||||
sys.path.insert(0, sys.argv[1])
|
||||
|
||||
# Isolated HERMES_HOME so the config write is hermetic.
|
||||
home = tempfile.mkdtemp()
|
||||
os.environ["HERMES_HOME"] = home
|
||||
|
||||
# Clear FAL-related env so dispatch decisions are config-driven.
|
||||
for k in (
|
||||
"FAL_KEY", "FAL_QUEUE_GATEWAY_URL",
|
||||
"TOOL_GATEWAY_DOMAIN", "TOOL_GATEWAY_USER_TOKEN",
|
||||
"FAL_IMAGE_MODEL",
|
||||
):
|
||||
os.environ.pop(k, None)
|
||||
|
||||
scenario_env = json.loads(sys.argv[2])
|
||||
os.environ.update(scenario_env)
|
||||
|
||||
config_yaml = sys.argv[3]
|
||||
config_path = os.path.join(home, "config.yaml")
|
||||
with open(config_path, "w") as f:
|
||||
f.write(config_yaml)
|
||||
|
||||
# Fresh import — must not have anything cached.
|
||||
for name in list(sys.modules):
|
||||
if (name.startswith("tools.")
|
||||
or name.startswith("agent.")
|
||||
or name.startswith("plugins.")
|
||||
or name.startswith("hermes_cli.")):
|
||||
sys.modules.pop(name, None)
|
||||
|
||||
import tools.image_generation_tool as image_tool
|
||||
|
||||
dispatch_kind = None
|
||||
provider_name = None
|
||||
model = None
|
||||
error_text = None
|
||||
|
||||
try:
|
||||
raw = image_tool._dispatch_to_plugin_provider("ping", "landscape")
|
||||
if raw is None:
|
||||
dispatch_kind = "legacy_fal"
|
||||
else:
|
||||
parsed = json.loads(raw) if isinstance(raw, str) else raw
|
||||
if isinstance(parsed, dict):
|
||||
if parsed.get("error_type") == "provider_not_registered":
|
||||
dispatch_kind = "error"
|
||||
error_text = parsed.get("error")
|
||||
else:
|
||||
dispatch_kind = "plugin"
|
||||
provider_name = parsed.get("provider")
|
||||
model = parsed.get("model")
|
||||
else:
|
||||
dispatch_kind = "unknown_payload"
|
||||
|
||||
if model is None:
|
||||
# _resolve_fal_model still returns the active FAL model id even
|
||||
# when dispatch goes to a non-FAL plugin — used for the diff
|
||||
# only when applicable.
|
||||
try:
|
||||
model_id, _meta = image_tool._resolve_fal_model()
|
||||
if dispatch_kind == "legacy_fal":
|
||||
model = model_id
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
dispatch_kind = "exception"
|
||||
error_text = repr(exc)
|
||||
|
||||
shape = {
|
||||
"dispatch_kind": dispatch_kind,
|
||||
"provider_name": provider_name,
|
||||
"model": model,
|
||||
"error_present": error_text is not None,
|
||||
}
|
||||
print(json.dumps(shape))
|
||||
"""
|
||||
|
||||
|
||||
SCENARIOS: list[tuple[str, str, dict[str, str]]] = [
|
||||
# (label, config.yaml body, extra env vars)
|
||||
("no-config-no-env", "", {}),
|
||||
(
|
||||
"explicit-fal-no-creds",
|
||||
"image_gen:\n provider: fal\n",
|
||||
{},
|
||||
),
|
||||
(
|
||||
"explicit-fal-with-creds",
|
||||
"image_gen:\n provider: fal\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"explicit-fal-with-model",
|
||||
"image_gen:\n provider: fal\n model: fal-ai/flux-2-pro\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"explicit-typo-provider",
|
||||
"image_gen:\n provider: not-a-real-backend\n",
|
||||
{"FAL_KEY": "test-key"},
|
||||
),
|
||||
(
|
||||
"managed-gateway-only",
|
||||
"",
|
||||
{
|
||||
"TOOL_GATEWAY_DOMAIN": "nousresearch.com",
|
||||
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _run_scenario(repo_path: Path, label: str, config_yaml: str, env: dict) -> dict:
|
||||
venv_python = repo_path / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = MAIN_DIR / ".venv" / "bin" / "python"
|
||||
if not venv_python.exists():
|
||||
venv_python = Path("python3")
|
||||
|
||||
out = subprocess.run(
|
||||
[
|
||||
str(venv_python),
|
||||
"-c",
|
||||
SUBPROCESS_SCRIPT,
|
||||
str(repo_path),
|
||||
json.dumps(env),
|
||||
config_yaml,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if out.returncode != 0:
|
||||
return {
|
||||
"error": "subprocess failed",
|
||||
"stdout": out.stdout[-500:],
|
||||
"stderr": out.stderr[-500:],
|
||||
}
|
||||
try:
|
||||
return json.loads(out.stdout.strip().splitlines()[-1])
|
||||
except Exception as exc:
|
||||
return {"error": f"could not parse output: {exc}", "stdout": out.stdout}
|
||||
|
||||
|
||||
def _reduce(shape: dict) -> dict:
|
||||
"""Reduce to the parts that matter for user-visible parity.
|
||||
|
||||
On origin/main, ``explicit-fal-*`` scenarios short-circuit to
|
||||
``legacy_fal`` because of the ``configured == "fal"`` skip. On the
|
||||
PR, those same scenarios route through the plugin and emit
|
||||
``dispatch_kind == "plugin"`` with ``provider_name == "fal"``.
|
||||
|
||||
Both shapes are functionally equivalent — the plugin's ``generate()``
|
||||
re-enters the same in-tree pipeline via ``_it`` indirection — but
|
||||
we want the diff to be visible so reviewers can sign off on the
|
||||
intentional behaviour delta.
|
||||
"""
|
||||
return {
|
||||
"dispatch_kind": shape.get("dispatch_kind"),
|
||||
"provider_name": shape.get("provider_name"),
|
||||
"model": shape.get("model"),
|
||||
"error_present": shape.get("error_present"),
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(f"main: {MAIN_DIR}")
|
||||
print(f"pr: {PR_DIR}")
|
||||
print()
|
||||
|
||||
if MAIN_DIR == PR_DIR:
|
||||
print(
|
||||
"WARN: MAIN_DIR == PR_DIR — diffs will be trivially identical.\n"
|
||||
" Set up a sibling 'hermes-agent-main' checkout pinned to "
|
||||
"origin/main to get real parity coverage."
|
||||
)
|
||||
print()
|
||||
|
||||
failures: list[str] = []
|
||||
errors: list[str] = []
|
||||
intentional_diffs: list[tuple[str, dict, dict]] = []
|
||||
for label, config_yaml, env in SCENARIOS:
|
||||
main_shape = _run_scenario(MAIN_DIR, label, config_yaml, env)
|
||||
pr_shape = _run_scenario(PR_DIR, label, config_yaml, env)
|
||||
|
||||
if "error" in main_shape or "error" in pr_shape:
|
||||
print(f" [ERR ] {label}: subprocess failed")
|
||||
print(f" main: {main_shape}")
|
||||
print(f" pr: {pr_shape}")
|
||||
errors.append(label)
|
||||
continue
|
||||
|
||||
main_reduced = _reduce(main_shape)
|
||||
pr_reduced = _reduce(pr_shape)
|
||||
|
||||
if main_reduced == pr_reduced:
|
||||
print(f" [OK] {label}: {main_reduced}")
|
||||
continue
|
||||
|
||||
# On main, "explicit-fal-*" returns legacy_fal; on PR, plugin
|
||||
# dispatch. That's the only acceptable diff — flag everything
|
||||
# else as a regression.
|
||||
legacy_to_plugin_fal = (
|
||||
main_reduced.get("dispatch_kind") == "legacy_fal"
|
||||
and pr_reduced.get("dispatch_kind") == "plugin"
|
||||
and pr_reduced.get("provider_name") == "fal"
|
||||
)
|
||||
if legacy_to_plugin_fal:
|
||||
print(f" [DIFF] {label}: legacy_fal → plugin (fal) — expected")
|
||||
intentional_diffs.append((label, main_reduced, pr_reduced))
|
||||
else:
|
||||
print(f" [FAIL] {label}")
|
||||
print(f" main: {main_reduced}")
|
||||
print(f" pr: {pr_reduced}")
|
||||
failures.append(label)
|
||||
|
||||
print()
|
||||
if errors:
|
||||
print(f"SUBPROCESS ERRORS in {len(errors)} scenario(s):")
|
||||
for e in errors:
|
||||
print(f" - {e}")
|
||||
if failures:
|
||||
print(f"BEHAVIOUR REGRESSION in {len(failures)} scenario(s):")
|
||||
for f in failures:
|
||||
print(f" - {f}")
|
||||
if intentional_diffs:
|
||||
print(
|
||||
f"INTENTIONAL DIFFS ({len(intentional_diffs)}): "
|
||||
f"legacy_fal → plugin dispatch for explicit FAL paths."
|
||||
)
|
||||
if failures or errors:
|
||||
return 1
|
||||
print(f"PARITY OK across {len(SCENARIOS)} scenarios.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,226 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for the FAL.ai image generation plugin.
|
||||
|
||||
The plugin is a thin registration adapter — actual FAL pipeline logic
|
||||
lives in ``tools.image_generation_tool`` and is exercised by
|
||||
``tests/tools/test_image_generation.py``. These tests focus on:
|
||||
|
||||
* the ``ImageGenProvider`` ABC surface (name, models, schema)
|
||||
* call-time indirection (``_it`` resolution at ``generate()`` time so
|
||||
``monkeypatch.setattr(image_tool, ...)`` keeps working)
|
||||
* response shape stamping (provider/prompt/aspect_ratio/model)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider surface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenProviderSurface:
|
||||
def test_name(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
assert FalImageGenProvider().name == "fal"
|
||||
|
||||
def test_display_name(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
assert FalImageGenProvider().display_name == "FAL.ai"
|
||||
|
||||
def test_default_model_matches_legacy(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
from tools.image_generation_tool import DEFAULT_MODEL
|
||||
|
||||
assert FalImageGenProvider().default_model() == DEFAULT_MODEL
|
||||
|
||||
def test_list_models_uses_legacy_catalog(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
from tools.image_generation_tool import FAL_MODELS
|
||||
|
||||
provider = FalImageGenProvider()
|
||||
models = provider.list_models()
|
||||
ids = {m["id"] for m in models}
|
||||
# Whatever FAL_MODELS ships, the provider mirrors verbatim.
|
||||
assert ids == set(FAL_MODELS.keys())
|
||||
# Spot-check the expected first-class fields are present.
|
||||
for entry in models:
|
||||
for field in ("id", "display", "speed", "strengths", "price"):
|
||||
assert field in entry
|
||||
|
||||
def test_setup_schema_advertises_fal_key(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
schema = FalImageGenProvider().get_setup_schema()
|
||||
assert schema["name"] == "FAL.ai"
|
||||
assert schema["badge"] == "paid"
|
||||
env_keys = {entry["key"] for entry in schema.get("env_vars", [])}
|
||||
assert "FAL_KEY" in env_keys
|
||||
|
||||
|
||||
class TestFalImageGenProviderAvailability:
|
||||
def test_is_available_when_legacy_check_passes(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: True)
|
||||
assert FalImageGenProvider().is_available() is True
|
||||
|
||||
def test_is_available_false_when_legacy_check_fails(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", lambda: False)
|
||||
assert FalImageGenProvider().is_available() is False
|
||||
|
||||
def test_is_available_handles_legacy_exception(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
def _boom():
|
||||
raise RuntimeError("config broke")
|
||||
|
||||
monkeypatch.setattr(image_tool, "check_fal_api_key", _boom)
|
||||
# Picker must not propagate exceptions — show as "not available".
|
||||
assert FalImageGenProvider().is_available() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate() — call-time indirection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenProviderGenerate:
|
||||
def test_generate_delegates_to_legacy_image_generate_tool(self, monkeypatch):
|
||||
"""Plugin must look up ``image_generate_tool`` at call time so
|
||||
``monkeypatch.setattr(image_tool, "image_generate_tool", ...)``
|
||||
takes effect."""
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
captured = {}
|
||||
|
||||
def fake_image_generate_tool(prompt, aspect_ratio, **kwargs):
|
||||
captured["prompt"] = prompt
|
||||
captured["aspect_ratio"] = aspect_ratio
|
||||
captured["kwargs"] = kwargs
|
||||
return json.dumps({"success": True, "image": "https://fake/image.png"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake_image_generate_tool)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
result = FalImageGenProvider().generate(
|
||||
"a serene mountain landscape",
|
||||
aspect_ratio="square",
|
||||
seed=42,
|
||||
)
|
||||
|
||||
assert captured["prompt"] == "a serene mountain landscape"
|
||||
assert captured["aspect_ratio"] == "square"
|
||||
assert captured["kwargs"] == {"seed": 42}
|
||||
assert result["success"] is True
|
||||
assert result["image"] == "https://fake/image.png"
|
||||
# Stamped fields for the unified response shape
|
||||
assert result["provider"] == "fal"
|
||||
assert result["prompt"] == "a serene mountain landscape"
|
||||
assert result["aspect_ratio"] == "square"
|
||||
assert result["model"] == "fal-ai/flux-2/klein/9b"
|
||||
|
||||
def test_generate_invalid_aspect_ratio_is_coerced(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
seen_aspect = {}
|
||||
|
||||
def fake(prompt, aspect_ratio, **kwargs):
|
||||
seen_aspect["v"] = aspect_ratio
|
||||
return json.dumps({"success": True, "image": "x"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
FalImageGenProvider().generate("p", aspect_ratio="not-a-real-ratio")
|
||||
# ``resolve_aspect_ratio`` clamps to landscape.
|
||||
assert seen_aspect["v"] == "landscape"
|
||||
|
||||
def test_generate_passthrough_drops_none_kwargs(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
seen = {}
|
||||
|
||||
def fake(prompt, aspect_ratio, **kwargs):
|
||||
seen.update(kwargs)
|
||||
return json.dumps({"success": True, "image": "x"})
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", fake)
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
FalImageGenProvider().generate(
|
||||
"p",
|
||||
aspect_ratio="landscape",
|
||||
seed=None,
|
||||
num_images=2,
|
||||
guidance_scale=None,
|
||||
)
|
||||
|
||||
# ``None`` values must not be forwarded — they'd override the
|
||||
# model's defaults inside the legacy payload builder.
|
||||
assert "seed" not in seen
|
||||
assert "guidance_scale" not in seen
|
||||
assert seen.get("num_images") == 2
|
||||
|
||||
def test_generate_catches_exception_from_legacy(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
def boom(*args, **kwargs):
|
||||
raise RuntimeError("FAL endpoint exploded")
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", boom)
|
||||
|
||||
result = FalImageGenProvider().generate("p")
|
||||
assert result["success"] is False
|
||||
assert "FAL image generation failed" in result["error"]
|
||||
assert result["error_type"] == "RuntimeError"
|
||||
assert result["provider"] == "fal"
|
||||
|
||||
def test_generate_invalid_json_response(self, monkeypatch):
|
||||
import tools.image_generation_tool as image_tool
|
||||
from plugins.image_gen.fal import FalImageGenProvider
|
||||
|
||||
monkeypatch.setattr(image_tool, "image_generate_tool", lambda **kw: "not-json")
|
||||
monkeypatch.setattr(image_tool, "_resolve_fal_model",
|
||||
lambda: ("fal-ai/flux-2/klein/9b", {}))
|
||||
|
||||
result = FalImageGenProvider().generate("p")
|
||||
assert result["success"] is False
|
||||
assert "Invalid JSON" in result["error"]
|
||||
assert result["provider"] == "fal"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry wiring
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFalImageGenPluginRegistration:
|
||||
def test_register_wires_provider_into_registry(self):
|
||||
from plugins.image_gen.fal import FalImageGenProvider, register
|
||||
|
||||
ctx = MagicMock()
|
||||
register(ctx)
|
||||
|
||||
ctx.register_image_gen_provider.assert_called_once()
|
||||
(registered,), _ = ctx.register_image_gen_provider.call_args
|
||||
assert isinstance(registered, FalImageGenProvider)
|
||||
@@ -1,260 +0,0 @@
|
||||
"""Tests for reactive multimodal-tool-content recovery.
|
||||
|
||||
Covers the full chain for providers that reject list-type content in
|
||||
``role: "tool"`` messages (Xiaomi MiMo's 400 "text is not set", etc.):
|
||||
|
||||
1. agent/error_classifier.py: 400 with the right wording classifies as
|
||||
``FailoverReason.multimodal_tool_content_unsupported``.
|
||||
2. run_agent._try_strip_image_parts_from_tool_messages downgrades tool
|
||||
messages whose ``content`` is a list-with-image to a string text
|
||||
summary, in-place, and records the active (provider, model) in
|
||||
``self._no_list_tool_content_models`` so future tool results in this
|
||||
session preemptively downgrade.
|
||||
3. run_agent._tool_result_content_for_active_model short-circuits to a
|
||||
text summary when the (provider, model) is in the cache, even though
|
||||
``_model_supports_vision`` returns True — avoiding a wasted round
|
||||
trip on every subsequent screenshot in the session.
|
||||
|
||||
The end-to-end retry loop wiring (`conversation_loop.py`) is exercised by
|
||||
the classifier signal + helper-mutation tests; the integration only adds
|
||||
a trivial flag-and-continue around the existing pattern used for
|
||||
``image_too_large`` recovery.
|
||||
|
||||
See: https://github.com/NousResearch/hermes-agent/issues/27344
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.error_classifier import FailoverReason, classify_api_error
|
||||
|
||||
|
||||
class _FakeApiError(Exception):
|
||||
"""Stand-in for an openai.BadRequestError with status_code + body."""
|
||||
|
||||
def __init__(self, status_code: int, message: str, body: dict | None = None):
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.body = body or {"error": {"message": message}}
|
||||
self.response = None
|
||||
|
||||
|
||||
def _make_agent(provider: str = "xiaomi", model: str = "mimo-v2.5"):
|
||||
"""Build a bare AIAgent for method-level testing, no provider setup."""
|
||||
from run_agent import AIAgent
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.provider = provider
|
||||
agent.model = model
|
||||
return agent
|
||||
|
||||
|
||||
# ─── Strip helper ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestStripImagePartsHelper:
|
||||
def test_no_messages_returns_false(self):
|
||||
agent = _make_agent()
|
||||
assert agent._try_strip_image_parts_from_tool_messages([]) is False
|
||||
assert agent._try_strip_image_parts_from_tool_messages(None) is False
|
||||
|
||||
def test_no_tool_messages_returns_false(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "user", "content": "plain text"},
|
||||
{"role": "assistant", "content": "ack"},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
|
||||
def test_tool_message_with_string_content_unchanged(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": "plain string result"},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
assert msgs[0]["content"] == "plain string result"
|
||||
|
||||
def test_tool_message_list_without_image_unchanged(self):
|
||||
"""List content with only text parts is left alone — caller surfaces
|
||||
the original error if this turns out to also be rejected."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "hello"},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is False
|
||||
|
||||
def test_tool_message_list_with_image_downgrades(self):
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "AX summary: 5 buttons visible"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
|
||||
# Image stripped; text preserved as a string.
|
||||
assert isinstance(msgs[0]["content"], str)
|
||||
assert "AX summary" in msgs[0]["content"]
|
||||
assert "image_url" not in msgs[0]["content"]
|
||||
assert "iVBOR" not in msgs[0]["content"]
|
||||
|
||||
def test_tool_message_image_only_gets_placeholder(self):
|
||||
"""If the list had nothing but image parts, leave a placeholder so
|
||||
the assistant message has something to reference."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR..."}},
|
||||
]},
|
||||
]
|
||||
assert agent._try_strip_image_parts_from_tool_messages(msgs) is True
|
||||
assert isinstance(msgs[0]["content"], str)
|
||||
assert "image content removed" in msgs[0]["content"]
|
||||
|
||||
def test_records_provider_model_in_session_cache(self):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
assert ("xiaomi", "mimo-v2.5") in agent._no_list_tool_content_models
|
||||
|
||||
def test_only_tool_messages_get_downgraded(self):
|
||||
"""User / assistant messages with list-type content are out of
|
||||
scope — they're handled by the existing image-routing path."""
|
||||
agent = _make_agent()
|
||||
msgs = [
|
||||
{"role": "user", "content": [
|
||||
{"type": "text", "text": "describe"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,Y"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
# User message untouched.
|
||||
assert isinstance(msgs[0]["content"], list)
|
||||
assert any(p.get("type") == "image_url" for p in msgs[0]["content"])
|
||||
# Tool message downgraded.
|
||||
assert isinstance(msgs[1]["content"], str)
|
||||
assert "summary" in msgs[1]["content"]
|
||||
|
||||
def test_skips_recording_when_no_model_id(self):
|
||||
"""Don't poison the cache with empty keys when provider/model is
|
||||
unset (e.g. lazy-initialised mid-handshake)."""
|
||||
agent = _make_agent(provider="", model="")
|
||||
msgs = [
|
||||
{"role": "tool", "tool_call_id": "x", "content": [
|
||||
{"type": "text", "text": "summary"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,X"}},
|
||||
]},
|
||||
]
|
||||
agent._try_strip_image_parts_from_tool_messages(msgs)
|
||||
assert agent._no_list_tool_content_models == set()
|
||||
|
||||
|
||||
# ─── Short-circuit on cached models ──────────────────────────────────────────
|
||||
|
||||
|
||||
class TestToolResultContentShortCircuit:
|
||||
"""Once the session has learned that (provider, model) rejects list
|
||||
content, ``_tool_result_content_for_active_model`` returns a text
|
||||
summary even though ``_model_supports_vision`` reports True.
|
||||
"""
|
||||
|
||||
def _multimodal_result(self, png_b64: str = "iVBORw0KGgoAAAA"):
|
||||
return {
|
||||
"_multimodal": True,
|
||||
"content": [
|
||||
{"type": "text", "text": "capture mode=som 800x600 app=Safari"},
|
||||
{"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{png_b64}"}},
|
||||
],
|
||||
"text_summary": "capture mode=som 800x600 app=Safari",
|
||||
"meta": {"mode": "som", "width": 800, "height": 600, "elements": 5,
|
||||
"png_bytes": 1024},
|
||||
}
|
||||
|
||||
def test_returns_list_when_cache_empty_and_vision_supported(self, monkeypatch):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
agent._no_list_tool_content_models = set() # explicit empty
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
# Native multimodal path: returns the content parts list.
|
||||
assert isinstance(out, list)
|
||||
assert any(p.get("type") == "image_url" for p in out)
|
||||
|
||||
def test_returns_text_summary_when_model_in_cache(self, monkeypatch):
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5")
|
||||
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
# Short-circuit: a plain string summary, no image_url present.
|
||||
assert isinstance(out, str)
|
||||
assert "data:image" not in out
|
||||
assert "image_url" not in out
|
||||
|
||||
def test_cache_miss_on_different_model(self, monkeypatch):
|
||||
"""Cache is per (provider, model). A cached entry for mimo-v2.5
|
||||
must NOT affect a session running on a different model.
|
||||
"""
|
||||
agent = _make_agent(provider="xiaomi", model="mimo-v2.5-pro")
|
||||
agent._no_list_tool_content_models = {("xiaomi", "mimo-v2.5")}
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
assert isinstance(out, list)
|
||||
|
||||
def test_missing_cache_attribute_falls_through(self, monkeypatch):
|
||||
"""Tests that build agents via ``object.__new__`` without calling
|
||||
``__init__`` must not crash — the cache attribute may be absent.
|
||||
"""
|
||||
agent = _make_agent()
|
||||
# Deliberately do not assign _no_list_tool_content_models.
|
||||
monkeypatch.setattr(agent, "_model_supports_vision", lambda: True)
|
||||
out = agent._tool_result_content_for_active_model(
|
||||
"computer_use", self._multimodal_result()
|
||||
)
|
||||
assert isinstance(out, list)
|
||||
|
||||
|
||||
# ─── Classifier ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestRecoveryEndToEndClassification:
|
||||
"""Lock in that the patterns used by the recovery path classify to
|
||||
the right ``FailoverReason``. (The recovery hook in
|
||||
``agent.conversation_loop`` consumes this reason directly.)
|
||||
"""
|
||||
|
||||
def test_xiaomi_mimo_classifies(self):
|
||||
err = _FakeApiError(
|
||||
status_code=400,
|
||||
message=(
|
||||
"Error code: 400 - {'error': {'code': '400', 'message': "
|
||||
"'Param Incorrect', 'param': 'text is not set', 'type': ''}}"
|
||||
),
|
||||
)
|
||||
result = classify_api_error(err, provider="xiaomi", model="mimo-v2.5")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
assert result.retryable is True
|
||||
|
||||
def test_alibaba_variant_classifies(self):
|
||||
err = _FakeApiError(
|
||||
status_code=400,
|
||||
message="tool_call.content must be string",
|
||||
)
|
||||
result = classify_api_error(err, provider="alibaba", model="qwen3.5-plus")
|
||||
assert result.reason == FailoverReason.multimodal_tool_content_unsupported
|
||||
@@ -2636,31 +2636,6 @@ class TestRunConversation:
|
||||
assert result["final_response"] == "Final answer"
|
||||
assert result["completed"] is True
|
||||
|
||||
def test_ollama_small_runtime_context_fails_before_api_call(self, agent, caplog):
|
||||
self._setup_agent(agent)
|
||||
agent.model = "qwen3.5:9b"
|
||||
agent.provider = "custom"
|
||||
agent.base_url = "http://host.docker.internal:11434/v1"
|
||||
agent._ollama_num_ctx = 4096
|
||||
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
caplog.at_level(logging.WARNING, logger="agent.conversation_loop"),
|
||||
):
|
||||
result = agent.run_conversation("Call ps -aux")
|
||||
|
||||
assert result["failed"] is True
|
||||
assert result["completed"] is False
|
||||
assert result["api_calls"] == 0
|
||||
assert result["turn_exit_reason"] == "ollama_runtime_context_too_small"
|
||||
assert "Ollama loaded `qwen3.5:9b` with only 4,096 tokens" in result["final_response"]
|
||||
assert "model.ollama_num_ctx: 65536" in result["final_response"]
|
||||
assert not agent.client.chat.completions.create.called
|
||||
assert "Ollama runtime context too small for Hermes tool use" in caplog.text
|
||||
assert "runtime_context=4096" in caplog.text
|
||||
|
||||
def test_tool_calls_then_stop(self, agent):
|
||||
self._setup_agent(agent)
|
||||
tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1")
|
||||
|
||||
@@ -1,119 +0,0 @@
|
||||
"""Tests for the secret-source tracking in ``hermes_cli.env_loader``.
|
||||
|
||||
These cover the small public surface that lets `hermes model` / `hermes setup`
|
||||
label detected credentials with their origin ("from Bitwarden") so users
|
||||
don't see an unexplained "credentials ✓" line when their .env is empty.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from hermes_cli import env_loader # noqa: E402
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_sources():
|
||||
"""Each test starts with a clean source map."""
|
||||
env_loader._SECRET_SOURCES.clear()
|
||||
yield
|
||||
env_loader._SECRET_SOURCES.clear()
|
||||
|
||||
|
||||
def test_get_secret_source_returns_none_for_untracked_var():
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
|
||||
|
||||
|
||||
def test_get_secret_source_returns_label_for_tracked_var():
|
||||
env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_empty_for_untracked():
|
||||
# Credentials from .env or the shell shouldn't add noise — the
|
||||
# implicit case stays unlabeled.
|
||||
assert env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY") == ""
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_bitwarden_uses_proper_name():
|
||||
env_loader._SECRET_SOURCES["ANTHROPIC_API_KEY"] = "bitwarden"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
|
||||
== " (from Bitwarden)"
|
||||
)
|
||||
|
||||
|
||||
def test_format_secret_source_suffix_generic_label_for_future_sources():
|
||||
# Future-proofing: a new secret source (e.g. "vault") should still
|
||||
# produce a sensible label without needing to edit every call site.
|
||||
env_loader._SECRET_SOURCES["OPENAI_API_KEY"] = "vault"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("OPENAI_API_KEY")
|
||||
== " (from vault)"
|
||||
)
|
||||
|
||||
|
||||
def test_apply_external_secret_sources_records_bitwarden_origin(tmp_path, monkeypatch):
|
||||
"""End-to-end: when ``apply_bitwarden_secrets`` returns applied keys,
|
||||
they end up in ``_SECRET_SOURCES`` so the UI can label them."""
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
"secrets:\n"
|
||||
" bitwarden:\n"
|
||||
" enabled: true\n"
|
||||
" project_id: test-project\n"
|
||||
" access_token_env: BWS_ACCESS_TOKEN\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Stub apply_bitwarden_secrets to return a synthetic FetchResult.
|
||||
from agent.secret_sources.bitwarden import FetchResult
|
||||
|
||||
fake_result = FetchResult(
|
||||
secrets={"ANTHROPIC_API_KEY": "sk-ant-test"},
|
||||
applied=["ANTHROPIC_API_KEY"],
|
||||
)
|
||||
|
||||
def _fake_apply(**_kwargs):
|
||||
return fake_result
|
||||
|
||||
# The import inside _apply_external_secret_sources is lazy, so we
|
||||
# patch the *module attribute* it will pull in.
|
||||
import agent.secret_sources.bitwarden as bw_module
|
||||
|
||||
monkeypatch.setattr(bw_module, "apply_bitwarden_secrets", _fake_apply)
|
||||
|
||||
env_loader._apply_external_secret_sources(tmp_path)
|
||||
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") == "bitwarden"
|
||||
assert (
|
||||
env_loader.format_secret_source_suffix("ANTHROPIC_API_KEY")
|
||||
== " (from Bitwarden)"
|
||||
)
|
||||
|
||||
|
||||
def test_apply_external_secret_sources_noop_when_disabled(tmp_path, monkeypatch):
|
||||
"""Disabled Bitwarden config must not touch the source map."""
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
"secrets:\n"
|
||||
" bitwarden:\n"
|
||||
" enabled: false\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
env_loader._apply_external_secret_sources(tmp_path)
|
||||
|
||||
assert env_loader.get_secret_source("ANTHROPIC_API_KEY") is None
|
||||
@@ -59,59 +59,6 @@ def test_write_json_returns_false_on_broken_pipe(monkeypatch):
|
||||
assert server.write_json({"ok": True}) is False
|
||||
|
||||
|
||||
def test_tui_verbose_tool_details_fail_closed_when_redaction_fails(monkeypatch):
|
||||
redact_module = types.ModuleType("agent.redact")
|
||||
|
||||
def fail_redaction(*_args, **_kwargs):
|
||||
raise RuntimeError("redaction unavailable")
|
||||
|
||||
setattr(redact_module, "redact_sensitive_text", fail_redaction)
|
||||
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
|
||||
|
||||
assert server._redact_tui_verbose_text("api_key=secret") == ""
|
||||
assert server._tool_args_text({"api_key": "secret"}) == ""
|
||||
assert server._tool_result_text("token=secret") == ""
|
||||
|
||||
|
||||
def test_tui_verbose_tool_details_are_capped_before_emit(monkeypatch):
|
||||
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_CHARS", 12)
|
||||
monkeypatch.setattr(server, "_TUI_VERBOSE_TEXT_MAX_LINES", 2)
|
||||
|
||||
capped = server._cap_tui_verbose_text("one\ntwo\nthree\nfour")
|
||||
|
||||
assert capped.startswith("[showing verbose tail; omitted ")
|
||||
assert capped.endswith("three\nfour")
|
||||
assert "one" not in capped
|
||||
|
||||
|
||||
def test_tui_verbose_tool_events_omit_details_when_redaction_fails(monkeypatch):
|
||||
redact_module = types.ModuleType("agent.redact")
|
||||
|
||||
def fail_redaction(*_args, **_kwargs):
|
||||
raise RuntimeError("redaction unavailable")
|
||||
|
||||
setattr(redact_module, "redact_sensitive_text", fail_redaction)
|
||||
monkeypatch.setitem(sys.modules, "agent.redact", redact_module)
|
||||
|
||||
events: list[tuple[str, str, dict]] = []
|
||||
monkeypatch.setattr(
|
||||
server, "_emit", lambda event_type, sid, payload: events.append((event_type, sid, payload))
|
||||
)
|
||||
monkeypatch.setitem(
|
||||
server._sessions,
|
||||
"redaction-test",
|
||||
{"tool_progress_mode": "verbose", "tool_started_at": {}},
|
||||
)
|
||||
|
||||
server._on_tool_start("redaction-test", "tool-1", "terminal", {"command": "pwd"})
|
||||
server._on_tool_complete("redaction-test", "tool-1", "terminal", {"command": "pwd"}, "done")
|
||||
|
||||
assert events[0][0] == "tool.start"
|
||||
assert events[1][0] == "tool.complete"
|
||||
assert "args_text" not in events[0][2]
|
||||
assert "result_text" not in events[1][2]
|
||||
|
||||
|
||||
def test_dispatch_rejects_non_object_request():
|
||||
resp = server.dispatch([])
|
||||
|
||||
|
||||
@@ -8,8 +8,6 @@ depend on the registry being populated should use it explicitly or via
|
||||
``@pytest.mark.usefixtures("web_registry_populated")``.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@@ -50,20 +48,3 @@ def web_registry_populated():
|
||||
yield
|
||||
from agent.web_search_registry import _reset_for_tests
|
||||
_reset_for_tests()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_lazy_stt_install():
|
||||
"""Disarm the runtime lazy-install probe so static ``_HAS_FASTER_WHISPER``
|
||||
patches accurately simulate 'faster-whisper not installed'.
|
||||
|
||||
Without this, ``_try_lazy_install_stt()`` calls
|
||||
``importlib.util.find_spec("faster_whisper")``, which returns truthy
|
||||
whenever the package is installed in the dev / CI environment —
|
||||
defeating the test's ``_HAS_FASTER_WHISPER=False`` patch.
|
||||
|
||||
Opt in at module scope with
|
||||
``pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")``.
|
||||
"""
|
||||
with patch("tools.transcription_tools._try_lazy_install_stt", return_value=False):
|
||||
yield
|
||||
|
||||
@@ -1,246 +0,0 @@
|
||||
"""Unit tests for tools/app_tools.py — the Nous tool gateway integration."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from tools.managed_tool_gateway import ManagedToolGatewayConfig
|
||||
|
||||
|
||||
_FAKE_GATEWAY = ManagedToolGatewayConfig(
|
||||
vendor="tools",
|
||||
gateway_origin="https://tools-gateway.example.com",
|
||||
nous_user_token="test-token-abc123",
|
||||
managed_mode=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_http_client_cache():
|
||||
"""Clear the module-level cached httpx client between tests."""
|
||||
import tools.app_tools as mod
|
||||
mod._http_client = None
|
||||
mod._http_client_origin = None
|
||||
yield
|
||||
mod._http_client = None
|
||||
mod._http_client_origin = None
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def gateway_post(monkeypatch):
|
||||
"""Patch the gateway and httpx.Client.post; return a dict capturing the request."""
|
||||
monkeypatch.setattr(
|
||||
"tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"tools.app_tools._get_current_model_name", lambda: None
|
||||
)
|
||||
captured = {}
|
||||
resp = MagicMock(spec=httpx.Response)
|
||||
resp.status_code = 200
|
||||
resp.json.return_value = {"data": {}, "error": None}
|
||||
resp.text = json.dumps({"data": {}, "error": None})
|
||||
|
||||
def fake_post(self, url, *, json=None, headers=None, **kw):
|
||||
captured["url"] = url
|
||||
captured["headers"] = headers
|
||||
captured["json"] = json
|
||||
return resp
|
||||
|
||||
monkeypatch.setattr(httpx.Client, "post", fake_post)
|
||||
return captured
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_fn gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAppToolsAvailability:
|
||||
def test_returns_false_when_gateway_not_ready(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: False)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is False
|
||||
|
||||
def test_returns_true_when_gateway_ready_and_config_on(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: True)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is True
|
||||
|
||||
def test_returns_false_when_config_off(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.is_managed_tool_gateway_ready", lambda vendor: True)
|
||||
monkeypatch.setattr("tools.app_tools._read_portal_app_tools_enabled", lambda: False)
|
||||
from tools.app_tools import _app_tools_available
|
||||
assert _app_tools_available() is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL + auth header
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSearchPostsCorrectUrlAndAuth:
|
||||
def test_posts_to_v1_search_with_bearer_token(self, monkeypatch, gateway_post):
|
||||
monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "test-model")
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "send email"}]})
|
||||
|
||||
assert gateway_post["url"] == "https://tools-gateway.example.com/v1/search"
|
||||
assert gateway_post["headers"]["Authorization"] == "Bearer test-token-abc123"
|
||||
assert gateway_post["headers"]["Content-Type"] == "application/json"
|
||||
assert gateway_post["json"]["queries"] == [{"use_case": "send email"}]
|
||||
assert gateway_post["json"]["model"] == "test-model"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model auto-injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestModelAutoInjection:
|
||||
def test_injects_model_from_config(self, monkeypatch, gateway_post):
|
||||
monkeypatch.setattr("tools.app_tools._get_current_model_name", lambda: "claude-sonnet-4")
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}]})
|
||||
assert gateway_post["json"]["model"] == "claude-sonnet-4"
|
||||
|
||||
def test_omits_model_when_unresolvable(self, gateway_post):
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}]})
|
||||
assert "model" not in gateway_post["json"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gateway-internal param stripping (allowlist approach)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExecuteStripsInternalParams:
|
||||
def test_strips_sync_response_thought_step_metric(self, gateway_post):
|
||||
from tools.app_tools import handle_app_execute_tools
|
||||
handle_app_execute_tools({
|
||||
"tools": [{"tool_slug": "TEST", "arguments": {}}],
|
||||
"sync_response_to_workbench": True,
|
||||
"thought": "testing",
|
||||
"current_step": "TESTING",
|
||||
"current_step_metric": "1/1 tests",
|
||||
})
|
||||
body = gateway_post["json"]
|
||||
for key in ("sync_response_to_workbench", "thought", "current_step", "current_step_metric"):
|
||||
assert key not in body
|
||||
assert body["tools"] == [{"tool_slug": "TEST", "arguments": {}}]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP error → tool result (not exception)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestHttpErrorReturnedAsToolResult:
|
||||
@pytest.mark.parametrize("status_code", [402, 403, 422, 500])
|
||||
def test_returns_error_json_not_exception(self, monkeypatch, status_code):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
error_body = {"error": {"code": "TEST_ERROR", "message": "fail"}}
|
||||
resp = MagicMock(spec=httpx.Response)
|
||||
resp.status_code = status_code
|
||||
resp.json.return_value = error_body
|
||||
resp.text = json.dumps(error_body)
|
||||
monkeypatch.setattr(httpx.Client, "post", lambda self, url, **kw: resp)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "TEST_ERROR"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Network failure → tool result
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNetworkFailureReturnedAsToolResult:
|
||||
def test_connect_error_returns_gateway_unreachable(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
|
||||
def raise_connect(self, url, **kw):
|
||||
raise httpx.ConnectError("Connection refused")
|
||||
monkeypatch.setattr(httpx.Client, "post", raise_connect)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "GATEWAY_UNREACHABLE"
|
||||
|
||||
def test_timeout_returns_gateway_timeout(self, monkeypatch):
|
||||
monkeypatch.setattr("tools.app_tools.resolve_managed_tool_gateway", lambda v: _FAKE_GATEWAY)
|
||||
|
||||
def raise_timeout(self, url, **kw):
|
||||
raise httpx.ReadTimeout("timed out")
|
||||
monkeypatch.setattr(httpx.Client, "post", raise_timeout)
|
||||
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
result = json.loads(handle_app_search_tools({"queries": [{"use_case": "test"}]}))
|
||||
assert result["error"]["code"] == "GATEWAY_TIMEOUT"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Endpoint routing + payload forwarding
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEndpointRouting:
|
||||
def test_manage_connections_forwards_toolkits(self, gateway_post):
|
||||
from tools.app_tools import handle_app_manage_connections
|
||||
handle_app_manage_connections({"toolkits": ["gmail", "slack"], "reinitiate_all": True})
|
||||
assert gateway_post["url"].endswith("/v1/connections")
|
||||
assert gateway_post["json"]["toolkits"] == ["gmail", "slack"]
|
||||
assert gateway_post["json"]["reinitiate_all"] is True
|
||||
|
||||
def test_tool_schemas_forwards_slugs(self, gateway_post):
|
||||
from tools.app_tools import handle_app_tool_schemas
|
||||
handle_app_tool_schemas({"tool_slugs": ["GMAIL_SEND_EMAIL"], "include": ["input_schema", "output_schema"]})
|
||||
assert gateway_post["url"].endswith("/v1/schemas")
|
||||
assert gateway_post["json"]["tool_slugs"] == ["GMAIL_SEND_EMAIL"]
|
||||
assert gateway_post["json"]["include"] == ["input_schema", "output_schema"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry entries
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRegistryEntries:
|
||||
def test_all_four_tools_registered_under_app_tools(self):
|
||||
from tools.registry import registry
|
||||
import tools.app_tools # noqa: F401
|
||||
expected = {"app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"}
|
||||
for name in expected:
|
||||
entry = registry._tools.get(name)
|
||||
assert entry is not None, f"{name} not registered"
|
||||
assert entry.toolset == "app_tools"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# session (object) vs session_id (string) asymmetry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSessionHandling:
|
||||
def test_search_uses_session_object(self, gateway_post):
|
||||
from tools.app_tools import handle_app_search_tools
|
||||
handle_app_search_tools({"queries": [{"use_case": "test"}], "session": {"generate_id": True}})
|
||||
assert isinstance(gateway_post["json"]["session"], dict)
|
||||
assert "session_id" not in gateway_post["json"]
|
||||
|
||||
def test_schemas_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_tool_schemas
|
||||
handle_app_tool_schemas({"tool_slugs": ["TEST"], "session_id": "sess-123"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-123"
|
||||
assert "session" not in gateway_post["json"]
|
||||
|
||||
def test_execute_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_execute_tools
|
||||
handle_app_execute_tools({"tools": [{"tool_slug": "TEST", "arguments": {}}], "session_id": "sess-456"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-456"
|
||||
assert "session" not in gateway_post["json"]
|
||||
|
||||
def test_connections_uses_session_id_string(self, gateway_post):
|
||||
from tools.app_tools import handle_app_manage_connections
|
||||
handle_app_manage_connections({"toolkits": ["gmail"], "session_id": "sess-789"})
|
||||
assert gateway_post["json"]["session_id"] == "sess-789"
|
||||
assert "session" not in gateway_post["json"]
|
||||
@@ -76,27 +76,6 @@ class TestSchema:
|
||||
modes = set(COMPUTER_USE_SCHEMA["parameters"]["properties"]["mode"]["enum"])
|
||||
assert modes == {"som", "vision", "ax"}
|
||||
|
||||
def test_schema_exposes_max_elements_cap_for_capture(self):
|
||||
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
|
||||
props = COMPUTER_USE_SCHEMA["parameters"]["properties"]
|
||||
assert "max_elements" in props
|
||||
assert props["max_elements"]["type"] == "integer"
|
||||
assert props["max_elements"].get("minimum", 1) >= 1
|
||||
|
||||
def test_schema_max_elements_documents_default_and_upper_bound(self):
|
||||
"""Schema description must agree with the runtime. The original PR
|
||||
text said "Default 100" without a corresponding `default` field, and
|
||||
had no upper bound — both Copilot findings.
|
||||
"""
|
||||
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
|
||||
from tools.computer_use.tool import (
|
||||
_DEFAULT_MAX_ELEMENTS,
|
||||
_MAX_ALLOWED_MAX_ELEMENTS,
|
||||
)
|
||||
prop = COMPUTER_USE_SCHEMA["parameters"]["properties"]["max_elements"]
|
||||
assert prop.get("default") == _DEFAULT_MAX_ELEMENTS
|
||||
assert prop.get("maximum") == _MAX_ALLOWED_MAX_ELEMENTS
|
||||
|
||||
|
||||
class TestRegistration:
|
||||
def test_tool_registers_with_registry(self):
|
||||
@@ -226,54 +205,6 @@ class TestDispatch:
|
||||
parsed = json.loads(out)
|
||||
assert "error" in parsed
|
||||
|
||||
def test_set_value_routes_to_backend(self, noop_backend):
|
||||
"""set_value must reach the backend — regression for missing _NoopBackend stub."""
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "set_value", "value": "Option A", "element": 5})
|
||||
parsed = json.loads(out)
|
||||
assert parsed.get("ok") is True
|
||||
assert parsed.get("action") == "set_value"
|
||||
assert any(c[0] == "set_value" for c in noop_backend.calls)
|
||||
|
||||
def test_set_value_missing_value_returns_error(self, noop_backend):
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "set_value"})
|
||||
parsed = json.loads(out)
|
||||
assert "error" in parsed
|
||||
def test_capture_after_skipped_when_action_failed(self, noop_backend):
|
||||
"""capture_after must not fire when res.ok=False (regression guard).
|
||||
|
||||
A follow-up screenshot after a failed action shows the screen in a
|
||||
normal state, misleading the model into thinking the action succeeded.
|
||||
"""
|
||||
from unittest.mock import patch
|
||||
from tools.computer_use.backend import ActionResult
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
|
||||
# Make click() return a failure.
|
||||
with patch.object(noop_backend, "click",
|
||||
return_value=ActionResult(ok=False, action="click",
|
||||
message="element not found")):
|
||||
out = handle_computer_use({"action": "click", "element": 99,
|
||||
"capture_after": True})
|
||||
|
||||
parsed = json.loads(out)
|
||||
# Should return the error, not a multimodal capture.
|
||||
assert parsed.get("ok") is False
|
||||
assert parsed.get("action") == "click"
|
||||
# No follow-up capture should have been issued.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 0, "capture must not be called after a failed action"
|
||||
|
||||
def test_capture_after_fires_when_action_succeeds(self, noop_backend):
|
||||
"""capture_after must trigger for successful actions."""
|
||||
from tools.computer_use.tool import handle_computer_use
|
||||
out = handle_computer_use({"action": "click", "element": 1,
|
||||
"capture_after": True})
|
||||
# Noop backend returns ok=True, so capture should have been called.
|
||||
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
|
||||
assert len(capture_calls) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Safety guards (type / key block lists)
|
||||
@@ -406,193 +337,6 @@ class TestCaptureResponse:
|
||||
assert "AXButton" in text_part["text"]
|
||||
assert "AXTextField" in text_part["text"]
|
||||
|
||||
def _ax_backend_with(self, count: int):
|
||||
"""Construct a fake backend that yields ``count`` AX elements."""
|
||||
from tools.computer_use.backend import CaptureResult, UIElement
|
||||
|
||||
elements = [
|
||||
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
|
||||
for i in range(count)
|
||||
]
|
||||
|
||||
class FakeBackend:
|
||||
def start(self): pass
|
||||
def stop(self): pass
|
||||
def is_available(self): return True
|
||||
def capture(self, mode="som", app=None):
|
||||
return CaptureResult(
|
||||
mode=mode, width=800, height=600,
|
||||
png_b64="",
|
||||
elements=list(elements),
|
||||
app="Obsidian",
|
||||
)
|
||||
def click(self, **kw): ...
|
||||
def drag(self, **kw): ...
|
||||
def scroll(self, **kw): ...
|
||||
def type_text(self, text): ...
|
||||
def key(self, keys): ...
|
||||
def list_apps(self): return []
|
||||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
return FakeBackend()
|
||||
|
||||
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
|
||||
"""Regression for #22865: an Electron-style 600-element AX tree must
|
||||
not emit the entire array verbatim into the tool result.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert parsed["mode"] == "ax"
|
||||
assert parsed["total_elements"] == 600
|
||||
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS
|
||||
assert parsed["truncated_elements"] == 600 - cu_tool._DEFAULT_MAX_ELEMENTS
|
||||
# Truncation must be visible in the human summary so the model knows
|
||||
# the JSON view is partial and can re-issue with a tighter scope.
|
||||
assert "truncated to" in parsed["summary"]
|
||||
|
||||
def test_capture_ax_honors_explicit_max_elements_override(self):
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 250}
|
||||
)
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == 250
|
||||
assert parsed["truncated_elements"] == 350
|
||||
|
||||
def test_capture_ax_below_cap_is_unchanged(self):
|
||||
"""Backwards-compat: small captures keep the full elements array and
|
||||
do not surface a `truncated_elements` field.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(5)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
|
||||
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == 5
|
||||
assert parsed["total_elements"] == 5
|
||||
assert "truncated_elements" not in parsed
|
||||
assert "truncated to" not in parsed["summary"]
|
||||
|
||||
def test_capture_ax_invalid_max_elements_falls_back_to_default(self):
|
||||
"""Malformed `max_elements` (string, negative, zero) must not silently
|
||||
disable the cap and re-introduce the original unbounded behavior.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
for bad in ("not-a-number", 0, -10):
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": bad}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS, (
|
||||
f"bad max_elements={bad!r} disabled the cap"
|
||||
)
|
||||
|
||||
def test_capture_ax_clamps_oversized_max_elements_to_hard_cap(self):
|
||||
"""A caller passing a very large `max_elements` must not be able to
|
||||
disable the safeguard. The cap is clamped to a hard upper bound so
|
||||
the context-blow-up protection cannot be bypassed by argument.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(5000)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 10_000}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
assert len(parsed["elements"]) == cu_tool._MAX_ALLOWED_MAX_ELEMENTS
|
||||
assert parsed["total_elements"] == 5000
|
||||
assert parsed["truncated_elements"] == 5000 - cu_tool._MAX_ALLOWED_MAX_ELEMENTS
|
||||
|
||||
def test_capture_ax_summary_indices_match_returned_elements(self):
|
||||
"""When `max_elements` is below the human-summary's own line cap, the
|
||||
summary must not index elements that aren't in the returned array.
|
||||
Otherwise the model sees `#15` in the summary and finds no matching
|
||||
entry in `elements`.
|
||||
"""
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_backend = self._ax_backend_with(600)
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
|
||||
out = cu_tool.handle_computer_use(
|
||||
{"action": "capture", "mode": "ax", "max_elements": 5}
|
||||
)
|
||||
parsed = json.loads(out)
|
||||
returned_indices = {e["index"] for e in parsed["elements"]}
|
||||
summary_lines = parsed["summary"].splitlines()
|
||||
indexed_lines = [ln for ln in summary_lines if ln.lstrip().startswith("#")]
|
||||
for ln in indexed_lines:
|
||||
idx_token = ln.lstrip().split()[0].lstrip("#")
|
||||
idx = int(idx_token)
|
||||
assert idx in returned_indices, (
|
||||
f"summary references #{idx} but it is absent from elements payload "
|
||||
f"(returned: {sorted(returned_indices)})"
|
||||
)
|
||||
|
||||
def test_capture_multimodal_summary_omits_truncation_note(self):
|
||||
"""The som/vision multimodal envelope returns a screenshot, not an
|
||||
`elements` array — so a "response truncated to N of M elements"
|
||||
claim in the summary would be inaccurate.
|
||||
"""
|
||||
from tools.computer_use.backend import CaptureResult, UIElement
|
||||
from tools.computer_use import tool as cu_tool
|
||||
|
||||
fake_png = "iVBORw0KGgo="
|
||||
elements = [
|
||||
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
|
||||
for i in range(600)
|
||||
]
|
||||
|
||||
class FakeBackend:
|
||||
def start(self): pass
|
||||
def stop(self): pass
|
||||
def is_available(self): return True
|
||||
def capture(self, mode="som", app=None):
|
||||
return CaptureResult(
|
||||
mode=mode, width=800, height=600,
|
||||
png_b64=fake_png, elements=list(elements),
|
||||
app="Obsidian",
|
||||
)
|
||||
def click(self, **kw): ...
|
||||
def drag(self, **kw): ...
|
||||
def scroll(self, **kw): ...
|
||||
def type_text(self, text): ...
|
||||
def key(self, keys): ...
|
||||
def list_apps(self): return []
|
||||
def focus_app(self, app, raise_window=False): ...
|
||||
|
||||
cu_tool.reset_backend_for_tests()
|
||||
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
|
||||
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
|
||||
|
||||
assert isinstance(out, dict) and out["_multimodal"] is True
|
||||
text_part = next(p for p in out["content"] if p.get("type") == "text")
|
||||
assert "truncated to" not in text_part["text"], (
|
||||
"multimodal response carries an image, not an elements array; "
|
||||
"the truncation note describes a payload field that isn't present"
|
||||
)
|
||||
assert "truncated to" not in out["text_summary"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Anthropic adapter: multimodal tool-result conversion
|
||||
|
||||
@@ -78,63 +78,6 @@ def test_resolve_managed_tool_gateway_is_disabled_without_subscription():
|
||||
assert result is None
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_rewrites_subdomain():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost:3009")
|
||||
assert resolved == "http://127.0.0.1:3009"
|
||||
assert host == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_preserves_path():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost:3009/v1/foo")
|
||||
assert resolved == "http://127.0.0.1:3009/v1/foo"
|
||||
assert host == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_no_port():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://tools-gateway.localhost")
|
||||
assert resolved == "http://127.0.0.1"
|
||||
assert host == "tools-gateway.localhost"
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_ignores_bare_localhost():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("http://localhost:3009")
|
||||
assert resolved == "http://localhost:3009"
|
||||
assert host is None
|
||||
|
||||
|
||||
def test_rewrite_localhost_origin_ignores_real_domains():
|
||||
rewrite = managed_tool_gateway._rewrite_localhost_origin
|
||||
resolved, host = rewrite("https://tools-gateway.nousresearch.com")
|
||||
assert resolved == "https://tools-gateway.nousresearch.com"
|
||||
assert host is None
|
||||
|
||||
|
||||
def test_gateway_config_resolved_origin_and_host_header():
|
||||
cfg = managed_tool_gateway.ManagedToolGatewayConfig(
|
||||
vendor="tools",
|
||||
gateway_origin="http://tools-gateway.localhost:3009",
|
||||
nous_user_token="tok",
|
||||
managed_mode=True,
|
||||
)
|
||||
assert cfg.resolved_origin == "http://127.0.0.1:3009"
|
||||
assert cfg.gateway_host_header == "tools-gateway.localhost:3009"
|
||||
|
||||
|
||||
def test_gateway_config_resolved_origin_passthrough_for_real_domain():
|
||||
cfg = managed_tool_gateway.ManagedToolGatewayConfig(
|
||||
vendor="firecrawl",
|
||||
gateway_origin="https://firecrawl-gateway.nousresearch.com",
|
||||
nous_user_token="tok",
|
||||
managed_mode=True,
|
||||
)
|
||||
assert cfg.resolved_origin == "https://firecrawl-gateway.nousresearch.com"
|
||||
assert cfg.gateway_host_header is None
|
||||
|
||||
|
||||
def test_read_nous_access_token_refreshes_expiring_cached_token(tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("TOOL_GATEWAY_USER_TOKEN", raising=False)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
@@ -91,7 +91,7 @@ class TestSSHBulkUpload:
|
||||
assert "/home/testuser/.hermes/credentials" in mkdir_str
|
||||
|
||||
def test_staging_symlinks_mirror_remote_layout(self, mock_env, tmp_path):
|
||||
"""Symlinks in staging dir should mirror the .hermes-relative layout."""
|
||||
"""Symlinks in staging dir should mirror the remote path structure."""
|
||||
f1 = tmp_path / "local_a.txt"
|
||||
f1.write_text("content a")
|
||||
|
||||
@@ -107,7 +107,9 @@ class TestSSHBulkUpload:
|
||||
c_idx = cmd.index("-C")
|
||||
staging_dir = cmd[c_idx + 1]
|
||||
# Check the symlink exists
|
||||
expected = os.path.join(staging_dir, "skills/my_skill.md")
|
||||
expected = os.path.join(
|
||||
staging_dir, "home/testuser/.hermes/skills/my_skill.md"
|
||||
)
|
||||
staging_paths.append(expected)
|
||||
assert os.path.islink(expected), f"Expected symlink at {expected}"
|
||||
assert os.readlink(expected) == os.path.abspath(str(f1))
|
||||
@@ -164,42 +166,14 @@ class TestSSHBulkUpload:
|
||||
assert "-" in tar_cmd # stdout
|
||||
assert "-C" in tar_cmd
|
||||
|
||||
# ssh: extract from stdin at ~/.hermes, preserving existing dir modes (#17767)
|
||||
# ssh: extract from stdin at /, preserving existing dir modes (#17767)
|
||||
ssh_str = " ".join(ssh_cmd)
|
||||
assert "ssh" in ssh_str
|
||||
assert "tar xf -" in ssh_str
|
||||
assert "--no-overwrite-dir" in ssh_str
|
||||
assert "-C /home/testuser/.hermes" in ssh_str
|
||||
assert "-C /" in ssh_str
|
||||
assert "testuser@example.com" in ssh_str
|
||||
|
||||
def test_bulk_upload_never_stages_remote_home_prefix(self, mock_env, tmp_path):
|
||||
"""Regression: do not archive /home/<user> path components."""
|
||||
f1 = tmp_path / "nested.txt"
|
||||
f1.write_text("nested")
|
||||
files = [(str(f1), "/home/testuser/.hermes/cache/nested.txt")]
|
||||
|
||||
def capture_tar_cmd(cmd, **kwargs):
|
||||
if cmd[0] == "tar":
|
||||
c_idx = cmd.index("-C")
|
||||
staging_dir = cmd[c_idx + 1]
|
||||
assert not os.path.exists(os.path.join(staging_dir, "home"))
|
||||
expected = os.path.join(staging_dir, "cache/nested.txt")
|
||||
assert os.path.islink(expected)
|
||||
|
||||
mock = MagicMock()
|
||||
mock.stdout = MagicMock()
|
||||
mock.returncode = 0
|
||||
mock.poll.return_value = 0
|
||||
mock.communicate.return_value = (b"", b"")
|
||||
mock.stderr = MagicMock()
|
||||
mock.stderr.read.return_value = b""
|
||||
return mock
|
||||
|
||||
with patch.object(subprocess, "run",
|
||||
return_value=subprocess.CompletedProcess([], 0)), \
|
||||
patch.object(subprocess, "Popen", side_effect=capture_tar_cmd):
|
||||
mock_env._ssh_bulk_upload(files)
|
||||
|
||||
def test_mkdir_failure_raises(self, mock_env, tmp_path):
|
||||
"""mkdir failure should raise RuntimeError before tar pipe."""
|
||||
f1 = tmp_path / "y.txt"
|
||||
|
||||
@@ -23,9 +23,6 @@ def _fake_faster_whisper_module(mock_model):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clear_openai_env(monkeypatch):
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
|
||||
@@ -12,9 +12,6 @@ from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolate_env(monkeypatch):
|
||||
"""Strip every STT-related env var so the test really exercises the
|
||||
|
||||
@@ -42,9 +42,6 @@ def sample_ogg(tmp_path):
|
||||
return str(ogg_path)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_env(monkeypatch):
|
||||
"""Ensure no real API keys leak into tests."""
|
||||
|
||||
@@ -1,438 +0,0 @@
|
||||
"""App integration tools — 500+ external apps via the Nous tool gateway.
|
||||
|
||||
Four meta tools that let the LLM discover, authenticate, and execute
|
||||
real app tools at runtime through the Nous managed tool gateway.
|
||||
|
||||
Architecture:
|
||||
Hermes → POST JSON → tools-gateway.nousresearch.com/v1/* → External APIs
|
||||
Auth: Bearer <nous_user_token> (subscription-gated)
|
||||
Vendor: "tools" in the managed gateway infra (build_vendor_gateway_url)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from tools.registry import registry
|
||||
from tools.managed_tool_gateway import (
|
||||
is_managed_tool_gateway_ready,
|
||||
resolve_managed_tool_gateway,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Timeouts per endpoint (connect, read)
|
||||
# ---------------------------------------------------------------------------
|
||||
_TIMEOUT_SEARCH = httpx.Timeout(30.0, connect=5.0)
|
||||
_TIMEOUT_SCHEMAS = httpx.Timeout(15.0, connect=5.0)
|
||||
_TIMEOUT_EXECUTE = httpx.Timeout(120.0, connect=5.0)
|
||||
_TIMEOUT_CONNECTIONS = httpx.Timeout(30.0, connect=5.0)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level cached httpx client — avoids TCP+TLS setup per tool call.
|
||||
# Follows the same thread-safe staleness pattern as image_generation_tool.py.
|
||||
# ---------------------------------------------------------------------------
|
||||
import threading
|
||||
|
||||
_http_client: Optional[httpx.Client] = None
|
||||
_http_client_origin: Optional[str] = None
|
||||
_http_client_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_http_client(origin: str, verify: bool = True) -> httpx.Client:
|
||||
"""Return a reusable httpx.Client, recreated when the origin changes."""
|
||||
global _http_client, _http_client_origin
|
||||
with _http_client_lock:
|
||||
if _http_client is not None and _http_client_origin == origin:
|
||||
return _http_client
|
||||
if _http_client is not None:
|
||||
try:
|
||||
_http_client.close()
|
||||
except Exception:
|
||||
pass
|
||||
_http_client = httpx.Client(verify=verify)
|
||||
_http_client_origin = origin
|
||||
return _http_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config / availability helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_portal_app_tools_enabled() -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on."""
|
||||
from tools.tool_backend_helpers import portal_app_tools_enabled
|
||||
return portal_app_tools_enabled()
|
||||
|
||||
|
||||
def _app_tools_available() -> bool:
|
||||
"""check_fn: True when subscription is active, gateway reachable, config on."""
|
||||
if not _read_portal_app_tools_enabled():
|
||||
return False
|
||||
return is_managed_tool_gateway_ready("tools")
|
||||
|
||||
|
||||
def _get_current_model_name() -> Optional[str]:
|
||||
"""Best-effort read of the current model name from config.
|
||||
|
||||
Handles both ``"model": "name"`` and ``"model": {"default": "name"}``
|
||||
config shapes. Returns None if unresolvable (caller should omit the
|
||||
field rather than sending garbage).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
if isinstance(model_cfg, dict):
|
||||
default = model_cfg.get("default")
|
||||
if isinstance(default, str) and default.strip():
|
||||
return default.strip()
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gateway HTTP client
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _gateway_post(
|
||||
path: str,
|
||||
payload: Dict[str, Any],
|
||||
timeout: httpx.Timeout,
|
||||
) -> Dict[str, Any]:
|
||||
"""POST JSON to the tool gateway and return the parsed response.
|
||||
|
||||
Never raises — HTTP errors and network failures are returned as dicts
|
||||
so the LLM can see them and communicate with the user.
|
||||
"""
|
||||
gateway = resolve_managed_tool_gateway("tools")
|
||||
if gateway is None:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_UNAVAILABLE",
|
||||
"message": "Nous tool gateway is not available. Check your subscription status.",
|
||||
}
|
||||
}
|
||||
|
||||
url = f"{gateway.gateway_origin.rstrip('/')}{path}"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {gateway.nous_user_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
try:
|
||||
client = _get_http_client(url.split("/v1/")[0])
|
||||
response = client.post(url, json=payload, headers=headers, timeout=timeout)
|
||||
|
||||
# Return parsed body regardless of status code — the LLM handles errors
|
||||
try:
|
||||
return response.json()
|
||||
except Exception:
|
||||
return {
|
||||
"error": {
|
||||
"code": f"HTTP_{response.status_code}",
|
||||
"message": response.text[:2000],
|
||||
}
|
||||
}
|
||||
|
||||
except httpx.TimeoutException as exc:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_TIMEOUT",
|
||||
"message": f"Request to {path} timed out: {exc}",
|
||||
}
|
||||
}
|
||||
except Exception as exc:
|
||||
return {
|
||||
"error": {
|
||||
"code": "GATEWAY_UNREACHABLE",
|
||||
"message": f"Failed to reach tool gateway: {exc}",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def handle_app_search_tools(args: dict, **kw) -> str:
|
||||
"""Search 500+ app integrations for tools matching a use case."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
queries = args.get("queries")
|
||||
if queries:
|
||||
payload["queries"] = queries
|
||||
|
||||
# session is an OBJECT {id, generate_id} — NOT a string
|
||||
session = args.get("session")
|
||||
if session is not None:
|
||||
payload["session"] = session
|
||||
|
||||
# Auto-inject model name from config (omit if unresolvable)
|
||||
model = args.get("model") or _get_current_model_name()
|
||||
if model:
|
||||
payload["model"] = model
|
||||
|
||||
return json.dumps(_gateway_post("/v1/search", payload, _TIMEOUT_SEARCH),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_tool_schemas(args: dict, **kw) -> str:
|
||||
"""Get full input schemas for tools discovered via app_search_tools."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
tool_slugs = args.get("tool_slugs")
|
||||
if tool_slugs:
|
||||
payload["tool_slugs"] = tool_slugs
|
||||
|
||||
include = args.get("include")
|
||||
if include:
|
||||
payload["include"] = include
|
||||
|
||||
# session_id is a STRING — not an object
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
return json.dumps(_gateway_post("/v1/schemas", payload, _TIMEOUT_SCHEMAS),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_execute_tools(args: dict, **kw) -> str:
|
||||
"""Execute one or more app tools in parallel."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
tools = args.get("tools")
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
|
||||
# session_id is a STRING
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
# Strip gateway-internal params that are meaningless in Hermes
|
||||
# (sync_response_to_workbench, thought, current_step, current_step_metric)
|
||||
# They never enter the payload — we only pick the fields we need.
|
||||
|
||||
return json.dumps(_gateway_post("/v1/execute", payload, _TIMEOUT_EXECUTE),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
def handle_app_manage_connections(args: dict, **kw) -> str:
|
||||
"""Check or initiate OAuth/API key connections for app toolkits."""
|
||||
payload: Dict[str, Any] = {}
|
||||
|
||||
toolkits = args.get("toolkits")
|
||||
if toolkits:
|
||||
payload["toolkits"] = toolkits
|
||||
|
||||
reinitiate_all = args.get("reinitiate_all")
|
||||
if reinitiate_all is not None:
|
||||
payload["reinitiate_all"] = reinitiate_all
|
||||
|
||||
# session_id is a STRING
|
||||
session_id = args.get("session_id")
|
||||
if session_id is not None:
|
||||
payload["session_id"] = session_id
|
||||
|
||||
return json.dumps(_gateway_post("/v1/connections", payload, _TIMEOUT_CONNECTIONS),
|
||||
ensure_ascii=False, default=str)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
registry.register(
|
||||
name="app_search_tools",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_search_tools",
|
||||
"description": (
|
||||
"Search 500+ app integrations (Gmail, Slack, GitHub, Notion, Google Sheets, "
|
||||
"Jira, Linear, Figma, and more) to find tools for a task. Returns tool slugs, "
|
||||
"execution plans, pitfalls, and connection status."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["queries"],
|
||||
"properties": {
|
||||
"queries": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"description": (
|
||||
"Structured search queries. Split independent app actions "
|
||||
"into separate queries. Each returns 4-6 tools."
|
||||
),
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["use_case"],
|
||||
"properties": {
|
||||
"use_case": {
|
||||
"type": "string",
|
||||
"maxLength": 1024,
|
||||
"description": (
|
||||
"Normalized description of the task. Include app "
|
||||
"names if mentioned. Do NOT include personal "
|
||||
"identifiers — put those in known_fields."
|
||||
),
|
||||
},
|
||||
"known_fields": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Known inputs as comma-separated key:value pairs "
|
||||
"(e.g. 'channel_name:general'). Omit if not relevant."
|
||||
),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"session": {
|
||||
"type": "object",
|
||||
"description": "Session context. Pass {generate_id: true} for new workflows, {id: \"EXISTING\"} to continue.",
|
||||
"properties": {
|
||||
"id": {"type": "string", "description": "Existing session ID to reuse."},
|
||||
"generate_id": {"type": "boolean", "description": "Set true for first call of a new workflow."},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_search_tools(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Search 500+ app integrations",
|
||||
emoji="🔍",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_tool_schemas",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_tool_schemas",
|
||||
"description": (
|
||||
"Get full input parameter schemas for tools discovered via "
|
||||
"app_search_tools. Only use slugs from search results — never invent."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["tool_slugs"],
|
||||
"properties": {
|
||||
"tool_slugs": {
|
||||
"type": "array",
|
||||
"description": "Tool slugs to retrieve schemas for.",
|
||||
"items": {"type": "string", "minLength": 1},
|
||||
},
|
||||
"include": {
|
||||
"type": "array",
|
||||
"default": ["input_schema"],
|
||||
"description": "Schema fields to include. Add 'output_schema' for response validation.",
|
||||
"items": {"type": "string", "enum": ["input_schema", "output_schema"]},
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_tool_schemas(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Get tool input schemas",
|
||||
emoji="📋",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_execute_tools",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_execute_tools",
|
||||
"description": (
|
||||
"Execute one or more app tools in parallel (up to 50). "
|
||||
"Requires active connection per toolkit. Use schema-compliant arguments only."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["tools"],
|
||||
"properties": {
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"minItems": 1,
|
||||
"maxItems": 50,
|
||||
"description": "Logically independent tools to execute in parallel.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["tool_slug", "arguments"],
|
||||
"additionalProperties": False,
|
||||
"properties": {
|
||||
"tool_slug": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Tool slug from search results — never invent.",
|
||||
},
|
||||
"arguments": {
|
||||
"type": "object",
|
||||
"additionalProperties": True,
|
||||
"description": "Arguments matching the tool's input schema exactly.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_execute_tools(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
max_result_size_chars=50_000,
|
||||
description="Execute app tools",
|
||||
emoji="⚡",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="app_manage_connections",
|
||||
toolset="app_tools",
|
||||
schema={
|
||||
"name": "app_manage_connections",
|
||||
"description": (
|
||||
"Check or initiate OAuth/API key connections for app toolkits. "
|
||||
"Returns auth links for inactive connections."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"required": ["toolkits"],
|
||||
"properties": {
|
||||
"toolkits": {
|
||||
"type": "array",
|
||||
"description": "Toolkit slugs to check or connect (e.g. ['gmail', 'slack']).",
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
"reinitiate_all": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "Force reconnection even for active connections.",
|
||||
},
|
||||
"session_id": {
|
||||
"type": "string",
|
||||
"description": "Session ID from a prior app_search_tools call.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
handler=lambda args, **kw: handle_app_manage_connections(args, **kw),
|
||||
check_fn=_app_tools_available,
|
||||
description="Manage app connections",
|
||||
emoji="🔗",
|
||||
)
|
||||
@@ -142,14 +142,6 @@ class ComputerUseBackend(ABC):
|
||||
def focus_app(self, app: str, raise_window: bool = False) -> ActionResult:
|
||||
"""Route input to `app` (by name or bundle ID). Default: focus without raise."""
|
||||
|
||||
# ── Native-value mutation ────────────────────────────────────────
|
||||
@abstractmethod
|
||||
def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
|
||||
"""Set a native value on an element (e.g. AXPopUpButton selection).
|
||||
|
||||
`element` is the 1-based SOM index returned by a prior capture call.
|
||||
"""
|
||||
|
||||
# ── Timing ──────────────────────────────────────────────────────
|
||||
def wait(self, seconds: float) -> ActionResult:
|
||||
"""Default implementation: time.sleep."""
|
||||
|
||||
@@ -75,28 +75,6 @@ COMPUTER_USE_SCHEMA: Dict[str, Any] = {
|
||||
"frontmost app's window or the whole screen."
|
||||
),
|
||||
},
|
||||
"max_elements": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Optional cap on the AX `elements` array returned by "
|
||||
"`action='capture'`. Default 100, hard maximum 1000. "
|
||||
"Dense UIs (Electron apps such as Obsidian or VS Code, "
|
||||
"JetBrains IDEs) can publish 500+ AX nodes — capping "
|
||||
"prevents a single capture from blowing session "
|
||||
"context. When the cap trims the response, "
|
||||
"`total_elements` and `truncated_elements` are "
|
||||
"surfaced in the result so you can re-call with "
|
||||
"`app=` to narrow scope or raise `max_elements` when "
|
||||
"the full tree is required. Has no effect on "
|
||||
"`mode='som'` / `mode='vision'` when a screenshot is "
|
||||
"included in the response; only the rare image-"
|
||||
"missing fallback returns an `elements` array and is "
|
||||
"subject to the cap."
|
||||
),
|
||||
"default": 100,
|
||||
"minimum": 1,
|
||||
"maximum": 1000,
|
||||
},
|
||||
# ── click / drag / scroll targeting ────────────────────
|
||||
"element": {
|
||||
"type": "integer",
|
||||
|
||||
@@ -200,10 +200,6 @@ class _NoopBackend(ComputerUseBackend): # pragma: no cover
|
||||
self.calls.append(("focus_app", {"app": app, "raise": raise_window}))
|
||||
return ActionResult(ok=True, action="focus_app")
|
||||
|
||||
def set_value(self, value: str, element: Optional[int] = None) -> ActionResult:
|
||||
self.calls.append(("set_value", {"value": value, "element": element}))
|
||||
return ActionResult(ok=True, action="set_value")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
@@ -321,7 +317,7 @@ def _dispatch(backend: ComputerUseBackend, action: str, args: Dict[str, Any]) ->
|
||||
if mode not in {"som", "vision", "ax"}:
|
||||
return json.dumps({"error": f"bad mode {mode!r}; use som|vision|ax"})
|
||||
cap = backend.capture(mode=mode, app=args.get("app"))
|
||||
return _capture_response(cap, max_elements=_coerce_max_elements(args.get("max_elements")))
|
||||
return _capture_response(cap)
|
||||
|
||||
if action == "wait":
|
||||
seconds = float(args.get("seconds", 1.0))
|
||||
@@ -420,62 +416,16 @@ def _text_response(res: ActionResult) -> str:
|
||||
return json.dumps(payload)
|
||||
|
||||
|
||||
# Default cap for the AX `elements` array returned by capture. Dense UIs
|
||||
# (Electron apps, Obsidian, JetBrains IDEs) can publish 500+ AX nodes, which
|
||||
# can exhaust session context after a single capture. The model-facing
|
||||
# `max_elements` argument lets callers raise this when they need the full tree.
|
||||
_DEFAULT_MAX_ELEMENTS = 100
|
||||
# Hard upper bound on caller-supplied `max_elements`. Without this, a tool
|
||||
# call passing a very large integer would silently disable the safeguard and
|
||||
# reintroduce the original unbounded behavior.
|
||||
_MAX_ALLOWED_MAX_ELEMENTS = 1000
|
||||
|
||||
|
||||
def _coerce_max_elements(value: Any) -> int:
|
||||
"""Validate the caller-supplied ``max_elements``.
|
||||
|
||||
Falls back to :data:`_DEFAULT_MAX_ELEMENTS` for missing / non-integer /
|
||||
sub-1 inputs so the cap can never be silently disabled by a malformed
|
||||
tool-call argument. Clamps oversized values to
|
||||
:data:`_MAX_ALLOWED_MAX_ELEMENTS` so a caller cannot bypass the
|
||||
safeguard by passing a very large integer.
|
||||
"""
|
||||
if value is None:
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
try:
|
||||
n = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
if n < 1:
|
||||
return _DEFAULT_MAX_ELEMENTS
|
||||
if n > _MAX_ALLOWED_MAX_ELEMENTS:
|
||||
return _MAX_ALLOWED_MAX_ELEMENTS
|
||||
return n
|
||||
|
||||
|
||||
def _capture_response(cap: CaptureResult, max_elements: int = _DEFAULT_MAX_ELEMENTS) -> Any:
|
||||
total_elements = len(cap.elements)
|
||||
visible_elements = cap.elements[:max_elements]
|
||||
truncated_elements = max(0, total_elements - len(visible_elements))
|
||||
|
||||
# Index only what's actually surfaced in the response — otherwise the
|
||||
# human-readable summary references element indices the model cannot
|
||||
# find in the JSON `elements` array (e.g. max_elements=10 vs the default
|
||||
# 40-line index window).
|
||||
element_index = _format_elements(visible_elements)
|
||||
def _capture_response(cap: CaptureResult) -> Any:
|
||||
element_index = _format_elements(cap.elements)
|
||||
summary_lines = [
|
||||
f"capture mode={cap.mode} {cap.width}x{cap.height}"
|
||||
+ (f" app={cap.app}" if cap.app else "")
|
||||
+ (f" window={cap.window_title!r}" if cap.window_title else ""),
|
||||
f"{total_elements} interactable element(s):",
|
||||
f"{len(cap.elements)} interactable element(s):",
|
||||
]
|
||||
if element_index:
|
||||
summary_lines.extend(element_index)
|
||||
# Multimodal and AX paths both reference `summary`; build it once up-front
|
||||
# so the aux-vision routing branch (which fires before either path is
|
||||
# selected) has a valid value to hand to _route_capture_through_aux_vision.
|
||||
# The AX path appends the "truncated to N of M" note to summary_lines
|
||||
# below and rebuilds; the multimodal path keeps this version untouched.
|
||||
summary = "\n".join(summary_lines)
|
||||
|
||||
if cap.png_b64 and cap.mode != "ax":
|
||||
@@ -499,9 +449,6 @@ def _capture_response(cap: CaptureResult, max_elements: int = _DEFAULT_MAX_ELEME
|
||||
# JPEG: base64 starts with /9j/ PNG: starts with iVBOR
|
||||
_b64_prefix = cap.png_b64[:8]
|
||||
_mime = "image/jpeg" if _b64_prefix.startswith("/9j/") else "image/png"
|
||||
# The multimodal response carries the screenshot, not the AX
|
||||
# elements array, so a "response truncated to N of M elements"
|
||||
# note would be inaccurate — skip it on this branch.
|
||||
return {
|
||||
"_multimodal": True,
|
||||
"content": [
|
||||
@@ -511,29 +458,18 @@ def _capture_response(cap: CaptureResult, max_elements: int = _DEFAULT_MAX_ELEME
|
||||
],
|
||||
"text_summary": summary,
|
||||
"meta": {"mode": cap.mode, "width": cap.width, "height": cap.height,
|
||||
"elements": total_elements, "png_bytes": cap.png_bytes_len},
|
||||
"elements": len(cap.elements), "png_bytes": cap.png_bytes_len},
|
||||
}
|
||||
# AX-only (or image-missing fallback): text path actually carries the
|
||||
# `elements` array, so the truncation note applies here.
|
||||
if truncated_elements:
|
||||
summary_lines.append(
|
||||
f" (response truncated to {len(visible_elements)} of {total_elements} elements; "
|
||||
f"raise max_elements or pass app= to narrow)"
|
||||
)
|
||||
summary = "\n".join(summary_lines)
|
||||
payload: Dict[str, Any] = {
|
||||
# AX-only (or image missing): text path.
|
||||
return json.dumps({
|
||||
"mode": cap.mode,
|
||||
"width": cap.width,
|
||||
"height": cap.height,
|
||||
"app": cap.app,
|
||||
"window_title": cap.window_title,
|
||||
"elements": [_element_to_dict(e) for e in visible_elements],
|
||||
"total_elements": total_elements,
|
||||
"elements": [_element_to_dict(e) for e in cap.elements],
|
||||
"summary": summary,
|
||||
}
|
||||
if truncated_elements:
|
||||
payload["truncated_elements"] = truncated_elements
|
||||
return json.dumps(payload)
|
||||
})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -675,11 +611,6 @@ def _maybe_follow_capture(
|
||||
) -> Any:
|
||||
if not do_capture:
|
||||
return _text_response(res)
|
||||
# Skip the follow-up capture when the action itself failed: showing a
|
||||
# normal-looking screenshot after a failure misleads the model into thinking
|
||||
# the action succeeded. Return the error text instead.
|
||||
if not res.ok:
|
||||
return _text_response(res)
|
||||
try:
|
||||
# Preserve the app context established by the preceding capture/focus_app so
|
||||
# that capture_after=True re-captures the same app rather than the frontmost
|
||||
|
||||
@@ -60,8 +60,7 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
|
||||
if gateway is None:
|
||||
raise ValueError("Managed Modal requires a configured tool gateway and Nous user token")
|
||||
|
||||
self._gateway_origin = gateway.resolved_origin.rstrip("/")
|
||||
self._gateway_host_header = gateway.gateway_host_header
|
||||
self._gateway_origin = gateway.gateway_origin.rstrip("/")
|
||||
self._nous_user_token = gateway.nous_user_token
|
||||
self._task_id = task_id
|
||||
self._persistent = persistent_filesystem
|
||||
@@ -235,8 +234,6 @@ class ManagedModalEnvironment(BaseModalExecutionEnvironment):
|
||||
"Authorization": f"Bearer {self._nous_user_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
if self._gateway_host_header:
|
||||
headers["Host"] = self._gateway_host_header
|
||||
if extra_headers:
|
||||
headers.update(extra_headers)
|
||||
|
||||
|
||||
@@ -169,7 +169,6 @@ class SSHEnvironment(BaseEnvironment):
|
||||
if not files:
|
||||
return
|
||||
|
||||
base = f"{self._remote_home}/.hermes"
|
||||
parents = unique_parent_dirs(files)
|
||||
if parents:
|
||||
cmd = self._build_ssh_command()
|
||||
@@ -181,19 +180,7 @@ class SSHEnvironment(BaseEnvironment):
|
||||
# Symlink staging avoids fragile GNU tar --transform rules.
|
||||
with tempfile.TemporaryDirectory(prefix="hermes-ssh-bulk-") as staging:
|
||||
for host_path, remote_path in files:
|
||||
try:
|
||||
rel_remote = os.path.relpath(remote_path, base)
|
||||
except ValueError as exc:
|
||||
raise RuntimeError(
|
||||
f"remote path {remote_path!r} is not under sync base {base!r}"
|
||||
) from exc
|
||||
|
||||
if rel_remote == "." or rel_remote.startswith("../"):
|
||||
raise RuntimeError(
|
||||
f"remote path {remote_path!r} escapes sync base {base!r}"
|
||||
)
|
||||
|
||||
staged = os.path.join(staging, rel_remote)
|
||||
staged = os.path.join(staging, remote_path.lstrip("/"))
|
||||
os.makedirs(os.path.dirname(staged), exist_ok=True)
|
||||
os.symlink(os.path.abspath(host_path), staged)
|
||||
|
||||
@@ -203,7 +190,7 @@ class SSHEnvironment(BaseEnvironment):
|
||||
# existing directories (e.g. /home/<user>) with the staging
|
||||
# directory's mode. Without this, a umask 002 produces 0775
|
||||
# dirs which breaks sshd StrictModes (refuses authorized_keys).
|
||||
ssh_cmd.append(f"tar xf - --no-overwrite-dir -C {shlex.quote(base)}")
|
||||
ssh_cmd.append("tar xf - --no-overwrite-dir -C /")
|
||||
|
||||
tar_proc = subprocess.Popen(
|
||||
tar_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
"""Shared FAL.ai SDK plumbing.
|
||||
|
||||
Holds the stateless atoms that every FAL-backed tool needs:
|
||||
|
||||
* :func:`import_fal_client` — lazy import + ``lazy_deps`` integration so
|
||||
``fal_client`` isn't pulled at cold start (it added ~64 ms per CLI
|
||||
invocation when imported eagerly).
|
||||
* :class:`_ManagedFalSyncClient` — wrapper that drives a Nous-managed
|
||||
fal-queue gateway through the standard ``fal_client.SyncClient``
|
||||
primitives.
|
||||
* :func:`_normalize_fal_queue_url_format`, :func:`_extract_http_status`
|
||||
— small helpers used by both the managed client wrapper and
|
||||
``_submit_fal_request``.
|
||||
|
||||
Stateful pieces (cache globals, ``_managed_fal_client*`` selectors,
|
||||
``_submit_fal_request``) intentionally stay on
|
||||
:mod:`tools.image_generation_tool`. That module is the patch target for
|
||||
existing test suites (``tests/tools/test_image_generation.py``,
|
||||
``tests/tools/test_managed_media_gateways.py``) and for the
|
||||
``plugins/image_gen/fal/`` plugin's ``_it`` indirection — moving the
|
||||
caches here would silently defeat ``monkeypatch.setattr(image_tool,
|
||||
"_managed_fal_client", None)`` because the lookups would go against
|
||||
``fal_common``'s namespace instead. See the per-rule walkthrough at
|
||||
issue #26241 for details.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
||||
def import_fal_client() -> Any:
|
||||
"""Import ``fal_client`` (via ``lazy_deps`` when available) and return
|
||||
the module reference.
|
||||
|
||||
Callers are responsible for caching the result on their own module
|
||||
global — keeping per-module globals lets tests monkey-patch the
|
||||
target module's ``fal_client`` attribute and have the patched value
|
||||
stick for that module's call sites.
|
||||
|
||||
Raises :class:`ImportError` if the package is genuinely unavailable.
|
||||
"""
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("image.fal", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc: # noqa: BLE001 — lazy_deps surfaces install hints
|
||||
raise ImportError(str(exc))
|
||||
import fal_client # type: ignore # noqa: WPS433 — intentionally lazy
|
||||
return fal_client
|
||||
|
||||
|
||||
def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
|
||||
normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
|
||||
if not normalized_origin:
|
||||
raise ValueError("Managed FAL queue origin is required")
|
||||
return f"{normalized_origin}/"
|
||||
|
||||
|
||||
def _extract_http_status(exc: BaseException) -> Optional[int]:
|
||||
"""Return an HTTP status code from httpx/fal exceptions, else None.
|
||||
|
||||
Defensive across exception shapes — httpx.HTTPStatusError exposes
|
||||
``.response.status_code`` while fal_client wrappers may expose
|
||||
``.status_code`` directly.
|
||||
"""
|
||||
response = getattr(exc, "response", None)
|
||||
if response is not None:
|
||||
status = getattr(response, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
status = getattr(exc, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
return None
|
||||
|
||||
|
||||
class _ManagedFalSyncClient:
|
||||
"""Small per-instance wrapper around ``fal_client.SyncClient`` for
|
||||
managed queue hosts.
|
||||
|
||||
The wrapper carries its own ``fal_client`` module reference instead
|
||||
of reaching into a module global, so callers stay in control of
|
||||
which module's ``fal_client`` is in scope (matters for the test
|
||||
patches that swap the legacy module's ``fal_client`` attribute).
|
||||
"""
|
||||
|
||||
def __init__(self, fal_client: Any, *, key: str, queue_run_origin: str):
|
||||
sync_client_class = getattr(fal_client, "SyncClient", None)
|
||||
if sync_client_class is None:
|
||||
raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
|
||||
|
||||
client_module = getattr(fal_client, "client", None)
|
||||
if client_module is None:
|
||||
raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
|
||||
|
||||
self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
|
||||
self._sync_client = sync_client_class(key=key)
|
||||
self._http_client = getattr(self._sync_client, "_client", None)
|
||||
self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
|
||||
self._raise_for_status = getattr(client_module, "_raise_for_status", None)
|
||||
self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
|
||||
self._add_hint_header = getattr(client_module, "add_hint_header", None)
|
||||
self._add_priority_header = getattr(client_module, "add_priority_header", None)
|
||||
self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
|
||||
|
||||
if self._http_client is None:
|
||||
raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
|
||||
if self._maybe_retry_request is None or self._raise_for_status is None:
|
||||
raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
|
||||
if self._request_handle_class is None:
|
||||
raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
|
||||
|
||||
def submit(
|
||||
self,
|
||||
application: str,
|
||||
arguments: Dict[str, Any],
|
||||
*,
|
||||
path: str = "",
|
||||
hint: Optional[str] = None,
|
||||
webhook_url: Optional[str] = None,
|
||||
priority: Any = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
start_timeout: Optional[Union[int, float]] = None,
|
||||
):
|
||||
url = self._queue_url_format + application
|
||||
if path:
|
||||
url += "/" + path.lstrip("/")
|
||||
if webhook_url is not None:
|
||||
url += "?" + urlencode({"fal_webhook": webhook_url})
|
||||
|
||||
request_headers = dict(headers or {})
|
||||
if hint is not None and self._add_hint_header is not None:
|
||||
self._add_hint_header(hint, request_headers)
|
||||
if priority is not None:
|
||||
if self._add_priority_header is None:
|
||||
raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
|
||||
self._add_priority_header(priority, request_headers)
|
||||
if start_timeout is not None:
|
||||
if self._add_timeout_header is None:
|
||||
raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
|
||||
self._add_timeout_header(start_timeout, request_headers)
|
||||
|
||||
response = self._maybe_retry_request(
|
||||
self._http_client,
|
||||
"POST",
|
||||
url,
|
||||
json=arguments,
|
||||
timeout=getattr(self._sync_client, "default_timeout", 120.0),
|
||||
headers=request_headers,
|
||||
)
|
||||
self._raise_for_status(response)
|
||||
|
||||
data = response.json()
|
||||
return self._request_handle_class(
|
||||
request_id=data["request_id"],
|
||||
response_url=data["response_url"],
|
||||
status_url=data["status_url"],
|
||||
cancel_url=data["cancel_url"],
|
||||
client=self._http_client,
|
||||
)
|
||||
+128
-26
@@ -26,7 +26,8 @@ import os
|
||||
import datetime
|
||||
import threading
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
|
||||
# fal_client is imported lazily — see _load_fal_client(). Pulling it
|
||||
# eagerly added ~64 ms to every CLI cold start because
|
||||
@@ -51,17 +52,19 @@ def _load_fal_client() -> Any:
|
||||
global fal_client
|
||||
if fal_client is not None:
|
||||
return fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
fal_client = import_fal_client()
|
||||
try:
|
||||
from tools.lazy_deps import ensure as _lazy_ensure
|
||||
_lazy_ensure("image.fal", prompt=False)
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
raise ImportError(str(e))
|
||||
import fal_client as _fal_client # noqa: F811 — module-global rebind
|
||||
fal_client = _fal_client
|
||||
return fal_client
|
||||
|
||||
|
||||
from tools.debug_helpers import DebugSession
|
||||
from tools.fal_common import (
|
||||
_ManagedFalSyncClient,
|
||||
_extract_http_status,
|
||||
_normalize_fal_queue_url_format, # noqa: F401 — re-exported for tests
|
||||
)
|
||||
from tools.managed_tool_gateway import resolve_managed_tool_gateway
|
||||
from tools.tool_backend_helpers import (
|
||||
fal_key_is_configured,
|
||||
@@ -357,25 +360,110 @@ def _resolve_managed_fal_gateway():
|
||||
return resolve_managed_tool_gateway("fal-queue")
|
||||
|
||||
|
||||
def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
|
||||
normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
|
||||
if not normalized_origin:
|
||||
raise ValueError("Managed FAL queue origin is required")
|
||||
return f"{normalized_origin}/"
|
||||
|
||||
|
||||
class _ManagedFalSyncClient:
|
||||
"""Small per-instance wrapper around fal_client.SyncClient for managed queue hosts."""
|
||||
|
||||
def __init__(self, *, key: str, queue_run_origin: str):
|
||||
# Trigger the lazy import on first construction. Idempotent — the
|
||||
# placeholder is overwritten with the real module on first call.
|
||||
_load_fal_client()
|
||||
sync_client_class = getattr(fal_client, "SyncClient", None)
|
||||
if sync_client_class is None:
|
||||
raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
|
||||
|
||||
client_module = getattr(fal_client, "client", None)
|
||||
if client_module is None:
|
||||
raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
|
||||
|
||||
self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
|
||||
self._sync_client = sync_client_class(key=key)
|
||||
self._http_client = getattr(self._sync_client, "_client", None)
|
||||
self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
|
||||
self._raise_for_status = getattr(client_module, "_raise_for_status", None)
|
||||
self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
|
||||
self._add_hint_header = getattr(client_module, "add_hint_header", None)
|
||||
self._add_priority_header = getattr(client_module, "add_priority_header", None)
|
||||
self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
|
||||
|
||||
if self._http_client is None:
|
||||
raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
|
||||
if self._maybe_retry_request is None or self._raise_for_status is None:
|
||||
raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
|
||||
if self._request_handle_class is None:
|
||||
raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
|
||||
|
||||
def submit(
|
||||
self,
|
||||
application: str,
|
||||
arguments: Dict[str, Any],
|
||||
*,
|
||||
path: str = "",
|
||||
hint: Optional[str] = None,
|
||||
webhook_url: Optional[str] = None,
|
||||
priority: Any = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
start_timeout: Optional[Union[int, float]] = None,
|
||||
):
|
||||
url = self._queue_url_format + application
|
||||
if path:
|
||||
url += "/" + path.lstrip("/")
|
||||
if webhook_url is not None:
|
||||
url += "?" + urlencode({"fal_webhook": webhook_url})
|
||||
|
||||
request_headers = dict(headers or {})
|
||||
if hint is not None and self._add_hint_header is not None:
|
||||
self._add_hint_header(hint, request_headers)
|
||||
if priority is not None:
|
||||
if self._add_priority_header is None:
|
||||
raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
|
||||
self._add_priority_header(priority, request_headers)
|
||||
if start_timeout is not None:
|
||||
if self._add_timeout_header is None:
|
||||
raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
|
||||
self._add_timeout_header(start_timeout, request_headers)
|
||||
|
||||
response = self._maybe_retry_request(
|
||||
self._http_client,
|
||||
"POST",
|
||||
url,
|
||||
json=arguments,
|
||||
timeout=getattr(self._sync_client, "default_timeout", 120.0),
|
||||
headers=request_headers,
|
||||
)
|
||||
self._raise_for_status(response)
|
||||
|
||||
data = response.json()
|
||||
return self._request_handle_class(
|
||||
request_id=data["request_id"],
|
||||
response_url=data["response_url"],
|
||||
status_url=data["status_url"],
|
||||
cancel_url=data["cancel_url"],
|
||||
client=self._http_client,
|
||||
)
|
||||
|
||||
|
||||
def _get_managed_fal_client(managed_gateway):
|
||||
"""Reuse the managed FAL client so its internal httpx.Client is not leaked per call."""
|
||||
global _managed_fal_client, _managed_fal_client_config
|
||||
|
||||
client_config = (
|
||||
managed_gateway.resolved_origin.rstrip("/"),
|
||||
managed_gateway.gateway_origin.rstrip("/"),
|
||||
managed_gateway.nous_user_token,
|
||||
)
|
||||
with _managed_fal_client_lock:
|
||||
if _managed_fal_client is not None and _managed_fal_client_config == client_config:
|
||||
return _managed_fal_client
|
||||
|
||||
# Resolve fal_client on the legacy module — preserves the test
|
||||
# pattern of monkey-patching ``image_generation_tool.fal_client``.
|
||||
_load_fal_client()
|
||||
_managed_fal_client = _ManagedFalSyncClient(
|
||||
fal_client,
|
||||
key=managed_gateway.nous_user_token,
|
||||
queue_run_origin=managed_gateway.resolved_origin,
|
||||
queue_run_origin=managed_gateway.gateway_origin,
|
||||
)
|
||||
_managed_fal_client_config = client_config
|
||||
return _managed_fal_client
|
||||
@@ -414,6 +502,24 @@ def _submit_fal_request(model: str, arguments: Dict[str, Any]):
|
||||
raise
|
||||
|
||||
|
||||
def _extract_http_status(exc: BaseException) -> Optional[int]:
|
||||
"""Return an HTTP status code from httpx/fal exceptions, else None.
|
||||
|
||||
Defensive across exception shapes — httpx.HTTPStatusError exposes
|
||||
``.response.status_code`` while fal_client wrappers may expose
|
||||
``.status_code`` directly.
|
||||
"""
|
||||
response = getattr(exc, "response", None)
|
||||
if response is not None:
|
||||
status = getattr(response, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
status = getattr(exc, "status_code", None)
|
||||
if isinstance(status, int):
|
||||
return status
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Model resolution + payload construction
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -867,12 +973,9 @@ def _read_configured_image_provider():
|
||||
"""Return the value of ``image_gen.provider`` from config.yaml, or None.
|
||||
|
||||
We only consult the plugin registry when this is explicitly set — an
|
||||
unset value keeps users on the in-tree FAL fallback even when other
|
||||
unset value keeps users on the legacy in-tree FAL path even when other
|
||||
providers happen to be registered (e.g. a user has OPENAI_API_KEY set
|
||||
for other features but never asked for OpenAI image gen). ``"fal"``
|
||||
explicitly routes through ``plugins/image_gen/fal/`` (which delegates
|
||||
back into this module's pipeline via call-time indirection — see
|
||||
issue #26241).
|
||||
for other features but never asked for OpenAI image gen).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
@@ -891,16 +994,15 @@ def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
|
||||
"""Route the call to a plugin-registered provider when one is selected.
|
||||
|
||||
Returns a JSON string on dispatch, or ``None`` to fall through to the
|
||||
in-tree FAL fallback in ``image_generate_tool``.
|
||||
built-in FAL path.
|
||||
|
||||
Dispatch fires when ``image_gen.provider`` is explicitly set — including
|
||||
``"fal"`` itself, which now resolves to the
|
||||
``plugins/image_gen/fal/`` plugin (the plugin re-enters this module's
|
||||
pipeline via ``_it`` indirection so behavior is identical to the
|
||||
direct call, just routed through the registry).
|
||||
Dispatch only fires when ``image_gen.provider`` is explicitly set AND
|
||||
it does not point to ``fal`` (FAL still lives in-tree in this PR;
|
||||
a later PR ports it into ``plugins/image_gen/fal/``). Any other value
|
||||
that matches a registered plugin provider wins.
|
||||
"""
|
||||
configured = _read_configured_image_provider()
|
||||
if not configured:
|
||||
if not configured or configured == "fal":
|
||||
return None
|
||||
|
||||
# Also read configured model so we can pass it to the plugin
|
||||
|
||||
@@ -7,8 +7,7 @@ import logging
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Optional, Tuple
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from typing import Callable, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -16,27 +15,6 @@ from hermes_constants import get_hermes_home
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
||||
|
||||
_DEFAULT_TOOL_GATEWAY_DOMAIN = "nousresearch.com"
|
||||
|
||||
|
||||
def _rewrite_localhost_origin(origin: str) -> Tuple[str, Optional[str]]:
|
||||
"""Rewrite ``*.localhost`` hostnames to ``127.0.0.1`` for DNS compatibility.
|
||||
|
||||
Python's :func:`socket.getaddrinfo` doesn't special-case ``*.localhost``
|
||||
subdomains (RFC 6761), so ``tools-gateway.localhost`` fails DNS resolution
|
||||
on most platforms. Bare ``localhost`` resolves fine and is left untouched.
|
||||
|
||||
Returns ``(resolved_origin, host_header_or_none)``.
|
||||
"""
|
||||
parsed = urlparse(origin)
|
||||
hostname = parsed.hostname
|
||||
if not hostname or not hostname.endswith(".localhost"):
|
||||
return origin, None
|
||||
|
||||
port = parsed.port
|
||||
netloc = f"127.0.0.1:{port}" if port else "127.0.0.1"
|
||||
host_header = f"{hostname}:{port}" if port else hostname
|
||||
resolved = urlunparse(parsed._replace(netloc=netloc))
|
||||
return resolved, host_header
|
||||
_DEFAULT_TOOL_GATEWAY_SCHEME = "https"
|
||||
_NOUS_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
|
||||
|
||||
@@ -48,16 +26,6 @@ class ManagedToolGatewayConfig:
|
||||
nous_user_token: str
|
||||
managed_mode: bool
|
||||
|
||||
@property
|
||||
def resolved_origin(self) -> str:
|
||||
"""Origin with ``*.localhost`` hostnames rewritten to ``127.0.0.1``."""
|
||||
return _rewrite_localhost_origin(self.gateway_origin)[0]
|
||||
|
||||
@property
|
||||
def gateway_host_header(self) -> Optional[str]:
|
||||
"""Original ``host[:port]`` when the origin was rewritten, else ``None``."""
|
||||
return _rewrite_localhost_origin(self.gateway_origin)[1]
|
||||
|
||||
|
||||
def auth_json_path():
|
||||
"""Return the Hermes auth store path, respecting HERMES_HOME overrides."""
|
||||
|
||||
@@ -21,11 +21,6 @@ def managed_nous_tools_enabled() -> bool:
|
||||
the free tier. We intentionally catch all exceptions and return
|
||||
False — never block the agent startup path.
|
||||
"""
|
||||
import os
|
||||
|
||||
if os.getenv("TOOL_GATEWAY_USER_TOKEN", "").strip():
|
||||
return True
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import get_nous_auth_status
|
||||
|
||||
@@ -128,25 +123,6 @@ def prefers_gateway(config_section: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def portal_app_tools_enabled() -> bool:
|
||||
"""Return True when the portal.app_tools config flag is on.
|
||||
|
||||
Resolution: PORTAL_APP_TOOLS env var → config.yaml → default True.
|
||||
Never raises — safe for check_fn and registration-time use.
|
||||
"""
|
||||
env_val = os.getenv("PORTAL_APP_TOOLS")
|
||||
if env_val is not None:
|
||||
return is_truthy_value(env_val)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
portal = (load_config() or {}).get("portal")
|
||||
if isinstance(portal, dict):
|
||||
return bool(portal.get("app_tools", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def fal_key_is_configured() -> bool:
|
||||
"""Return True when FAL_KEY is set to a non-whitespace value.
|
||||
|
||||
|
||||
@@ -197,26 +197,6 @@ def _normalize_local_command_model(model_name: Optional[str]) -> str:
|
||||
return _normalize_local_model(model_name)
|
||||
|
||||
|
||||
def _try_lazy_install_stt() -> bool:
|
||||
"""Attempt to lazy-install faster-whisper and return True on success.
|
||||
|
||||
The module-level ``_HAS_FASTER_WHISPER`` flag is set at import time and
|
||||
cached. If the package wasn't installed at startup, calling ``ensure()``
|
||||
installs it. This function re-checks dynamically after installation so
|
||||
the provider can use it immediately without a process restart.
|
||||
"""
|
||||
try:
|
||||
from tools.lazy_deps import ensure
|
||||
ensure("stt.faster_whisper")
|
||||
# Re-check dynamically after install
|
||||
import importlib.util as _iu
|
||||
if _iu.find_spec("faster_whisper"):
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Lazy install of faster-whisper failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
def _get_provider(stt_config: dict) -> str:
|
||||
"""Determine which STT provider to use.
|
||||
|
||||
@@ -238,9 +218,6 @@ def _get_provider(stt_config: dict) -> str:
|
||||
return "local"
|
||||
if _has_local_command():
|
||||
return "local_command"
|
||||
# Try lazy-install before giving up
|
||||
if _try_lazy_install_stt():
|
||||
return "local"
|
||||
logger.warning(
|
||||
"STT provider 'local' configured but unavailable "
|
||||
"(install faster-whisper or set HERMES_LOCAL_STT_COMMAND)"
|
||||
@@ -308,9 +285,6 @@ def _get_provider(stt_config: dict) -> str:
|
||||
return "local"
|
||||
if _has_local_command():
|
||||
return "local_command"
|
||||
# Try lazy-install before falling through to cloud providers
|
||||
if _try_lazy_install_stt():
|
||||
return "local"
|
||||
if _HAS_OPENAI and get_env_value("GROQ_API_KEY"):
|
||||
logger.info("No local STT available, using Groq Whisper API")
|
||||
return "groq"
|
||||
@@ -429,8 +403,7 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
|
||||
global _local_model, _local_model_name
|
||||
|
||||
if not _HAS_FASTER_WHISPER:
|
||||
if not _try_lazy_install_stt():
|
||||
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
|
||||
return {"success": False, "transcript": "", "error": "faster-whisper not installed"}
|
||||
|
||||
try:
|
||||
# Lazy-load the model (downloads on first use, ~150 MB for 'base')
|
||||
@@ -941,7 +914,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
|
||||
raise ValueError(message)
|
||||
|
||||
return managed_gateway.nous_user_token, urljoin(
|
||||
f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
|
||||
f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
|
||||
)
|
||||
|
||||
|
||||
|
||||
+1
-1
@@ -2048,7 +2048,7 @@ def _resolve_openai_audio_client_config() -> tuple[str, str]:
|
||||
raise ValueError(message)
|
||||
|
||||
return managed_gateway.nous_user_token, urljoin(
|
||||
f"{managed_gateway.resolved_origin.rstrip('/')}/", "v1"
|
||||
f"{managed_gateway.gateway_origin.rstrip('/')}/", "v1"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -58,8 +58,6 @@ _HERMES_CORE_TOOLS = [
|
||||
"cronjob",
|
||||
# Cross-platform messaging (gated on gateway running via check_fn)
|
||||
"send_message",
|
||||
# App integrations (500+ apps via Nous tool gateway, gated via check_fn)
|
||||
"app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections",
|
||||
# Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
# Kanban multi-agent coordination — only in schema when the agent is
|
||||
@@ -241,12 +239,6 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"app_tools": {
|
||||
"description": "External app integrations (Gmail, Slack, GitHub, Notion, 500+ apps) via Nous tool gateway",
|
||||
"tools": ["app_search_tools", "app_tool_schemas", "app_execute_tools", "app_manage_connections"],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"kanban": {
|
||||
"description": (
|
||||
"Kanban multi-agent coordination — only active when the agent "
|
||||
|
||||
+7
-95
@@ -1061,10 +1061,6 @@ def _session_tool_progress_mode(sid: str) -> str:
|
||||
return str(_sessions.get(sid, {}).get("tool_progress_mode", "all") or "all")
|
||||
|
||||
|
||||
def _session_verbose(sid: str) -> bool:
|
||||
return _session_tool_progress_mode(sid) == "verbose"
|
||||
|
||||
|
||||
def _tool_progress_enabled(sid: str) -> bool:
|
||||
return _session_tool_progress_mode(sid) != "off"
|
||||
|
||||
@@ -1496,74 +1492,6 @@ def _tool_ctx(name: str, args: dict) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
_TUI_VERBOSE_TEXT_MAX_CHARS = 16_000
|
||||
_TUI_VERBOSE_TEXT_MAX_LINES = 240
|
||||
|
||||
|
||||
def _cap_tui_verbose_text(text: str) -> str:
|
||||
if (
|
||||
len(text) <= _TUI_VERBOSE_TEXT_MAX_CHARS
|
||||
and text.count("\n") < _TUI_VERBOSE_TEXT_MAX_LINES
|
||||
):
|
||||
return text
|
||||
|
||||
idx = len(text)
|
||||
start = 0
|
||||
for _ in range(_TUI_VERBOSE_TEXT_MAX_LINES):
|
||||
idx = text.rfind("\n", 0, idx)
|
||||
if idx < 0:
|
||||
start = 0
|
||||
break
|
||||
start = idx + 1
|
||||
|
||||
line_start = start
|
||||
start = max(line_start, len(text) - _TUI_VERBOSE_TEXT_MAX_CHARS)
|
||||
if start > line_start:
|
||||
next_break = text.find("\n", start)
|
||||
if 0 <= next_break < len(text) - 1:
|
||||
start = next_break + 1
|
||||
|
||||
tail = text[start:].lstrip()
|
||||
omitted_chars = max(0, len(text) - len(tail))
|
||||
omitted_lines = text[:start].count("\n")
|
||||
if omitted_lines:
|
||||
label = (
|
||||
"[showing verbose tail; omitted "
|
||||
f"{omitted_lines} lines / {omitted_chars} chars]\n"
|
||||
)
|
||||
else:
|
||||
label = f"[showing verbose tail; omitted {omitted_chars} chars]\n"
|
||||
return f"{label}{tail}"
|
||||
|
||||
|
||||
def _redact_tui_verbose_text(text: str) -> str:
|
||||
try:
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
redacted = redact_sensitive_text(str(text), force=True)
|
||||
except Exception:
|
||||
return ""
|
||||
return _cap_tui_verbose_text(redacted)
|
||||
|
||||
|
||||
def _tool_args_text(args: dict) -> str:
|
||||
try:
|
||||
raw = json.dumps(args or {}, indent=2, ensure_ascii=False, default=str)
|
||||
except Exception:
|
||||
raw = str(args or {})
|
||||
return _redact_tui_verbose_text(raw)
|
||||
|
||||
|
||||
def _tool_result_text(result: object) -> str:
|
||||
try:
|
||||
from agent.tool_dispatch_helpers import _multimodal_text_summary
|
||||
|
||||
raw = _multimodal_text_summary(result)
|
||||
except Exception:
|
||||
raw = str(result)
|
||||
return _redact_tui_verbose_text(raw)
|
||||
|
||||
|
||||
def _fmt_tool_duration(seconds: float | None) -> str:
|
||||
if seconds is None:
|
||||
return ""
|
||||
@@ -1625,18 +1553,13 @@ def _on_tool_start(sid: str, tool_call_id: str, name: str, args: dict):
|
||||
pass
|
||||
session.setdefault("tool_started_at", {})[tool_call_id] = time.time()
|
||||
if _tool_progress_enabled(sid):
|
||||
payload = {
|
||||
"tool_id": tool_call_id,
|
||||
"name": name,
|
||||
"context": _tool_ctx(name, args),
|
||||
}
|
||||
if _session_verbose(sid):
|
||||
args_text = _tool_args_text(args)
|
||||
if args_text:
|
||||
payload["args_text"] = args_text
|
||||
# tool.complete is the source of truth for todos (full list from the
|
||||
# tool result). args.todos here may be a partial merge update.
|
||||
_emit("tool.start", sid, payload)
|
||||
_emit(
|
||||
"tool.start",
|
||||
sid,
|
||||
{"tool_id": tool_call_id, "name": name, "context": _tool_ctx(name, args)},
|
||||
)
|
||||
|
||||
|
||||
def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result: str):
|
||||
@@ -1653,10 +1576,6 @@ def _on_tool_complete(sid: str, tool_call_id: str, name: str, args: dict, result
|
||||
summary = _tool_summary(name, result, duration_s)
|
||||
if summary:
|
||||
payload["summary"] = summary
|
||||
if _session_verbose(sid):
|
||||
result_text = _tool_result_text(result)
|
||||
if result_text:
|
||||
payload["result_text"] = result_text
|
||||
if name == "todo":
|
||||
try:
|
||||
data = json.loads(result)
|
||||
@@ -1696,10 +1615,7 @@ def _on_tool_progress(
|
||||
_emit("tool.progress", sid, {"name": name, "preview": preview or ""})
|
||||
return
|
||||
if event_type == "reasoning.available" and preview:
|
||||
payload: dict[str, object] = {"text": str(preview)}
|
||||
if _session_verbose(sid):
|
||||
payload["verbose"] = True
|
||||
_emit("reasoning.available", sid, payload)
|
||||
_emit("reasoning.available", sid, {"text": str(preview)})
|
||||
return
|
||||
if event_type.startswith("subagent."):
|
||||
payload = {
|
||||
@@ -1775,11 +1691,7 @@ def _agent_cbs(sid: str) -> dict:
|
||||
"tool_gen_callback": lambda name: _tool_progress_enabled(sid)
|
||||
and _emit("tool.generating", sid, {"name": name}),
|
||||
"thinking_callback": lambda text: _emit("thinking.delta", sid, {"text": text}),
|
||||
"reasoning_callback": lambda text: _emit(
|
||||
"reasoning.delta",
|
||||
sid,
|
||||
{"text": text, **({"verbose": True} if _session_verbose(sid) else {})},
|
||||
),
|
||||
"reasoning_callback": lambda text: _emit("reasoning.delta", sid, {"text": text}),
|
||||
"status_callback": lambda kind, text=None: _status_update(
|
||||
sid, str(kind), None if text is None else str(text)
|
||||
),
|
||||
|
||||
@@ -342,25 +342,6 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[appended.length - 1]).toMatchObject({ role: 'assistant', text: 'final answer' })
|
||||
})
|
||||
|
||||
it('shows verbose reasoning even when normal reasoning display is off', () => {
|
||||
vi.useFakeTimers()
|
||||
patchUiState({ showReasoning: false })
|
||||
const appended: Msg[] = []
|
||||
const streamed = 'verbose-only reasoning'
|
||||
|
||||
try {
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
|
||||
onEvent({ payload: { text: streamed, verbose: true }, type: 'reasoning.delta' } as any)
|
||||
vi.runOnlyPendingTimers()
|
||||
|
||||
expect(turnController.reasoningText).toBe(streamed)
|
||||
expect(getTurnState().reasoning).toBe(streamed)
|
||||
} finally {
|
||||
vi.useRealTimers()
|
||||
}
|
||||
})
|
||||
|
||||
it('ignores fallback reasoning.available when streamed reasoning already exists', () => {
|
||||
const appended: Msg[] = []
|
||||
const streamed = 'short streamed reasoning'
|
||||
@@ -504,25 +485,6 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[3]?.text).not.toContain('```diff')
|
||||
})
|
||||
|
||||
it('keeps verbose result text on inline_diff tool completions', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const diff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
|
||||
onEvent({
|
||||
payload: { args_text: '{ "path": "foo.ts" }', context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
|
||||
type: 'tool.start'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { inline_diff: diff, result_text: 'patched result', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
|
||||
expect(turnController.segmentMessages[0]).toMatchObject({ kind: 'diff' })
|
||||
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Args:\n{ "path": "foo.ts" }')
|
||||
expect(turnController.segmentMessages[0]?.tools?.[0]).toContain('Result:\npatched result')
|
||||
})
|
||||
|
||||
it('keeps full final responses from duplicating flushed pre-diff narration', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
|
||||
@@ -222,21 +222,6 @@ describe('createSlashHandler', () => {
|
||||
expect(ctx.gateway.rpc).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('keeps visible scrollback when branching a TUI session', async () => {
|
||||
patchUiState({ sid: 'sid-parent' })
|
||||
const rpc = vi.fn(() => Promise.resolve({ session_id: 'sid-branch', title: 'branch title' }))
|
||||
const ctx = buildCtx({ gateway: { ...buildGateway(), rpc } })
|
||||
|
||||
expect(createSlashHandler(ctx)('/branch branch title')).toBe(true)
|
||||
|
||||
expect(rpc).toHaveBeenCalledWith('session.branch', { name: 'branch title', session_id: 'sid-parent' })
|
||||
await vi.waitFor(() => {
|
||||
expect(getUiState().sid).toBe('sid-branch')
|
||||
expect(ctx.transcript.sys).toHaveBeenCalledWith('branched → branch title')
|
||||
})
|
||||
expect(ctx.transcript.setHistoryItems).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it('reloads skills in the live gateway and refreshes the catalog', async () => {
|
||||
const rpc = vi.fn((method: string) => {
|
||||
if (method === 'skills.reload') {
|
||||
|
||||
@@ -16,16 +16,4 @@ describe('composerPromptText', () => {
|
||||
expect(composerPromptText('❯', 'custom')).toBe('❯')
|
||||
expect(composerPromptText('❯')).toBe('❯')
|
||||
})
|
||||
|
||||
it('uses a Termux-safe ASCII prompt marker in normal mode', () => {
|
||||
expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
|
||||
})
|
||||
|
||||
it('keeps profile prefix suppressed on narrow Termux widths', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
|
||||
})
|
||||
|
||||
it('allows profile prefix on very wide Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { stableComposerColumns, transcriptBodyWidth } from '../lib/inputMetrics.js'
|
||||
import { composerPromptText } from '../lib/prompt.js'
|
||||
|
||||
describe('Termux composer prompt + width guards', () => {
|
||||
it('uses a single-cell ASCII prompt marker in Termux mode', () => {
|
||||
expect(composerPromptText('❯', 'coder', false, true, 50)).toBe('>')
|
||||
})
|
||||
|
||||
it('suppresses profile prefixes on narrow Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 72)).toBe('>')
|
||||
})
|
||||
|
||||
it('keeps profile context on very wide Termux panes', () => {
|
||||
expect(composerPromptText('❯', 'upstr', false, true, 120)).toBe('upstr >')
|
||||
})
|
||||
|
||||
it('reserves fewer columns for gutter on narrow Termux widths', () => {
|
||||
// 32 columns after prompt: desktop reserves 2 for transcript scrollbar,
|
||||
// Termux keeps those 2 columns for the active composer.
|
||||
expect(stableComposerColumns(40, 8, false)).toBe(28)
|
||||
expect(stableComposerColumns(40, 8, true)).toBe(30)
|
||||
|
||||
// With ample room, Termux still reserves the gutter for alignment.
|
||||
expect(stableComposerColumns(60, 8, true)).toBe(48)
|
||||
})
|
||||
|
||||
it('never over-allocates transcript body width on narrow panes', () => {
|
||||
// Old behavior hard-minned to 20 columns and overflowed narrow layouts.
|
||||
expect(transcriptBodyWidth(24, 'assistant', '>', true)).toBe(19)
|
||||
expect(transcriptBodyWidth(24, 'user', 'upstr >', true)).toBe(14)
|
||||
expect(transcriptBodyWidth(10, 'user', '>', true)).toBeGreaterThanOrEqual(1)
|
||||
})
|
||||
|
||||
it('keeps legacy desktop floor outside Termux mode', () => {
|
||||
expect(transcriptBodyWidth(24, 'assistant', '>')).toBe(20)
|
||||
expect(transcriptBodyWidth(24, 'user', 'upstr >')).toBe(20)
|
||||
})
|
||||
})
|
||||
@@ -3,7 +3,6 @@ import { describe, expect, it } from 'vitest'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
buildToolTrailLine,
|
||||
buildVerboseToolTrailLine,
|
||||
edgePreview,
|
||||
estimateRows,
|
||||
estimateTokensRough,
|
||||
@@ -13,8 +12,8 @@ import {
|
||||
lastCotTrailIndex,
|
||||
parseToolTrailResultLine,
|
||||
pasteTokenLabel,
|
||||
sameToolTrailGroup,
|
||||
sanitizeAnsiForRender,
|
||||
sameToolTrailGroup,
|
||||
splitToolDuration,
|
||||
stripAnsi,
|
||||
thinkingPreview
|
||||
@@ -38,39 +37,6 @@ describe('buildToolTrailLine', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('buildVerboseToolTrailLine', () => {
|
||||
it('preserves multiline args and result details', () => {
|
||||
const line = buildVerboseToolTrailLine(
|
||||
'terminal',
|
||||
'npm test',
|
||||
false,
|
||||
1.25,
|
||||
'{\n "cmd": "npm test"\n}',
|
||||
'first line\nsecond :: line'
|
||||
)
|
||||
|
||||
expect(line).toContain('Args:\n{')
|
||||
expect(line).toContain('Result:\nfirst line\nsecond :: line')
|
||||
expect(parseToolTrailResultLine(line)).toEqual({
|
||||
call: 'Terminal("npm test") (1.3s)',
|
||||
detail: 'Args:\n{\n "cmd": "npm test"\n}\nResult:\nfirst line\nsecond :: line',
|
||||
mark: '✓'
|
||||
})
|
||||
})
|
||||
|
||||
it('labels verbose failures as errors', () => {
|
||||
const line = buildVerboseToolTrailLine('terminal', 'npm test', true, 0.5, undefined, 'command failed')
|
||||
|
||||
expect(line).toContain('Error:\ncommand failed')
|
||||
expect(line).not.toContain('Result:\ncommand failed')
|
||||
expect(parseToolTrailResultLine(line)).toEqual({
|
||||
call: 'Terminal("npm test") (0.5s)',
|
||||
detail: 'Error:\ncommand failed',
|
||||
mark: '✗'
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
describe('lastCotTrailIndex', () => {
|
||||
it('finds last non-result line', () => {
|
||||
expect(lastCotTrailIndex(['a ✓', 'thinking…'])).toBe(1)
|
||||
|
||||
@@ -178,22 +178,7 @@ describe('supportsFastEchoTerminal', () => {
|
||||
expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'Apple_Terminal' } as NodeJS.ProcessEnv)).toBe(false)
|
||||
})
|
||||
|
||||
it('disables fast-echo by default in Termux mode', () => {
|
||||
expect(
|
||||
supportsFastEchoTerminal({ TERMUX_VERSION: '0.118.0', PREFIX: '/data/data/com.termux/files/usr' } as NodeJS.ProcessEnv)
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
it('allows explicit Termux fast-echo opt-in via env override', () => {
|
||||
expect(
|
||||
supportsFastEchoTerminal({
|
||||
HERMES_TUI_TERMUX_FAST_ECHO: '1',
|
||||
TERMUX_VERSION: '0.118.0'
|
||||
} as NodeJS.ProcessEnv)
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('keeps fast-echo enabled in VS Code and unknown non-Termux terminals', () => {
|
||||
it('keeps fast-echo enabled in VS Code and unknown terminals', () => {
|
||||
expect(supportsFastEchoTerminal({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv)).toBe(true)
|
||||
expect(supportsFastEchoTerminal({ TERM: 'xterm-256color' } as NodeJS.ProcessEnv)).toBe(true)
|
||||
})
|
||||
|
||||
@@ -491,13 +491,13 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
|
||||
case 'reasoning.delta':
|
||||
if (ev.payload?.text) {
|
||||
turnController.recordReasoningDelta(ev.payload.text, Boolean(ev.payload.verbose))
|
||||
turnController.recordReasoningDelta(ev.payload.text)
|
||||
}
|
||||
|
||||
return
|
||||
|
||||
case 'reasoning.available':
|
||||
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''), Boolean(ev.payload?.verbose))
|
||||
turnController.recordReasoningAvailable(String(ev.payload?.text ?? ''))
|
||||
|
||||
return
|
||||
|
||||
@@ -517,18 +517,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
|
||||
case 'tool.start':
|
||||
turnController.recordTodos(ev.payload.todos)
|
||||
turnController.recordToolStart(
|
||||
ev.payload.tool_id,
|
||||
ev.payload.name ?? 'tool',
|
||||
ev.payload.context ?? '',
|
||||
ev.payload.args_text ? stripAnsi(String(ev.payload.args_text)) : undefined
|
||||
)
|
||||
turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
|
||||
|
||||
return
|
||||
case 'tool.complete': {
|
||||
const inlineDiffText =
|
||||
ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
|
||||
const resultText = ev.payload.result_text ? stripAnsi(String(ev.payload.result_text)) : undefined
|
||||
|
||||
if (inlineDiffText) {
|
||||
turnController.recordInlineDiffToolComplete(
|
||||
@@ -536,8 +530,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
ev.payload.tool_id,
|
||||
ev.payload.name,
|
||||
ev.payload.error,
|
||||
ev.payload.duration_s,
|
||||
resultText
|
||||
ev.payload.duration_s
|
||||
)
|
||||
} else {
|
||||
turnController.recordToolComplete(
|
||||
@@ -546,8 +539,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
ev.payload.error,
|
||||
ev.payload.summary,
|
||||
ev.payload.duration_s,
|
||||
ev.payload.todos,
|
||||
resultText
|
||||
ev.payload.todos
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -212,6 +212,7 @@ export const sessionCommands: SlashCommand[] = [
|
||||
void ctx.session.closeSession(prevSid)
|
||||
patchUiState({ sid: r.session_id })
|
||||
ctx.session.setSessionStartedAt(Date.now())
|
||||
ctx.transcript.setHistoryItems([])
|
||||
ctx.transcript.sys(`branched → ${r.title ?? ''}`)
|
||||
})
|
||||
)
|
||||
|
||||
@@ -11,7 +11,6 @@ import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
|
||||
import {
|
||||
boundedLiveRenderText,
|
||||
buildToolTrailLine,
|
||||
buildVerboseToolTrailLine,
|
||||
estimateTokensRough,
|
||||
isTransientTrailLine,
|
||||
sameToolTrailGroup,
|
||||
@@ -543,8 +542,8 @@ class TurnController {
|
||||
}
|
||||
}
|
||||
|
||||
recordReasoningAvailable(text: string, force = false) {
|
||||
if (this.interrupted || (!force && !getUiState().showReasoning)) {
|
||||
recordReasoningAvailable(text: string) {
|
||||
if (this.interrupted || !getUiState().showReasoning) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -561,8 +560,8 @@ class TurnController {
|
||||
this.pulseReasoningStreaming()
|
||||
}
|
||||
|
||||
recordReasoningDelta(text: string, force = false) {
|
||||
if (this.interrupted || (!force && !getUiState().showReasoning)) {
|
||||
recordReasoningDelta(text: string) {
|
||||
if (this.interrupted || !getUiState().showReasoning) {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -588,15 +587,14 @@ class TurnController {
|
||||
error?: string,
|
||||
summary?: string,
|
||||
duration?: number,
|
||||
todos?: unknown,
|
||||
resultText?: string
|
||||
todos?: unknown
|
||||
) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
|
||||
this.recordTodos(todos)
|
||||
const line = this.completeTool(toolId, fallbackName, error, summary, duration, resultText)
|
||||
const line = this.completeTool(toolId, fallbackName, error, summary, duration)
|
||||
|
||||
this.pendingSegmentTools = [...this.pendingSegmentTools, line]
|
||||
this.flushPendingToolsIntoLastSegment()
|
||||
@@ -608,42 +606,30 @@ class TurnController {
|
||||
toolId: string,
|
||||
fallbackName?: string,
|
||||
error?: string,
|
||||
duration?: number,
|
||||
resultText?: string
|
||||
duration?: number
|
||||
) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
|
||||
this.flushStreamingSegment()
|
||||
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration, resultText)])
|
||||
this.pushInlineDiffSegment(diffText, [this.completeTool(toolId, fallbackName, error, '', duration)])
|
||||
this.publishToolState()
|
||||
}
|
||||
|
||||
private completeTool(
|
||||
toolId: string,
|
||||
fallbackName?: string,
|
||||
error?: string,
|
||||
summary?: string,
|
||||
duration?: number,
|
||||
resultText?: string
|
||||
) {
|
||||
private completeTool(toolId: string, fallbackName?: string, error?: string, summary?: string, duration?: number) {
|
||||
const done = this.activeTools.find(tool => tool.id === toolId)
|
||||
const name = done?.name ?? fallbackName ?? 'tool'
|
||||
const label = toolTrailLabel(name)
|
||||
const fallbackDuration = done?.startedAt ? (Date.now() - done.startedAt) / 1000 : undefined
|
||||
|
||||
const line =
|
||||
done?.verboseArgs || resultText
|
||||
? buildVerboseToolTrailLine(
|
||||
name,
|
||||
done?.context || '',
|
||||
Boolean(error),
|
||||
duration ?? fallbackDuration,
|
||||
done?.verboseArgs,
|
||||
error || resultText || summary || ''
|
||||
)
|
||||
: buildToolTrailLine(name, done?.context || '', Boolean(error), error || summary || '', duration ?? fallbackDuration)
|
||||
const line = buildToolTrailLine(
|
||||
name,
|
||||
done?.context || '',
|
||||
Boolean(error),
|
||||
error || summary || '',
|
||||
duration ?? fallbackDuration
|
||||
)
|
||||
|
||||
this.activeTools = this.activeTools.filter(tool => tool.id !== toolId)
|
||||
|
||||
@@ -689,7 +675,7 @@ class TurnController {
|
||||
}, STREAM_BATCH_MS)
|
||||
}
|
||||
|
||||
recordToolStart(toolId: string, name: string, context: string, verboseArgs?: string) {
|
||||
recordToolStart(toolId: string, name: string, context: string) {
|
||||
if (this.interrupted) {
|
||||
return
|
||||
}
|
||||
@@ -702,7 +688,7 @@ class TurnController {
|
||||
const sample = `${name} ${context}`.trim()
|
||||
|
||||
this.toolTokenAcc += sample ? estimateTokensRough(sample) : 0
|
||||
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now(), verboseArgs }]
|
||||
this.activeTools = [...this.activeTools, { context, id: toolId, name, startedAt: Date.now() }]
|
||||
|
||||
patchTurnState({ toolTokens: this.toolTokenAcc, tools: this.activeTools })
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import { useGateway } from '../app/gatewayContext.js'
|
||||
import type { AppLayoutProps } from '../app/interfaces.js'
|
||||
import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
|
||||
import { $uiState } from '../app/uiStore.js'
|
||||
import { INLINE_MODE, SHOW_FPS, TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import { INLINE_MODE, SHOW_FPS } from '../config/env.js'
|
||||
import { PLACEHOLDER } from '../content/placeholders.js'
|
||||
import {
|
||||
COMPOSER_PROMPT_GAP_WIDTH,
|
||||
@@ -169,10 +169,10 @@ const ComposerPane = memo(function ComposerPane({
|
||||
const ui = useStore($uiState)
|
||||
const isBlocked = useStore($isBlocked)
|
||||
const sh = (composer.inputBuf[0] ?? composer.input).startsWith('!')
|
||||
const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh, TERMUX_TUI_MODE, composer.cols)
|
||||
const promptText = composerPromptText(ui.theme.brand.prompt, ui.info?.profile_name, sh)
|
||||
const promptWidth = composerPromptWidth(promptText)
|
||||
const promptBlank = ' '.repeat(promptWidth)
|
||||
const inputColumns = stableComposerColumns(composer.cols, promptWidth, TERMUX_TUI_MODE)
|
||||
const inputColumns = stableComposerColumns(composer.cols, promptWidth)
|
||||
const inputHeight = inputVisualHeight(composer.input, inputColumns)
|
||||
const inputMouseRef = useRef<null | TextInputMouseApi>(null)
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { Ansi, Box, NoSelect, Text } from '@hermes/ink'
|
||||
import { memo, useState } from 'react'
|
||||
|
||||
import { TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import { LONG_MSG } from '../config/limits.js'
|
||||
import { sectionMode } from '../domain/details.js'
|
||||
import { userDisplay } from '../domain/messages.js'
|
||||
@@ -140,7 +139,7 @@ export const MessageLine = memo(function MessageLine({
|
||||
}
|
||||
|
||||
if (msg.role === 'assistant') {
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, t.brand.prompt)
|
||||
|
||||
return isStreaming ? (
|
||||
// Incremental markdown: split at the last stable block boundary so
|
||||
@@ -202,7 +201,7 @@ export const MessageLine = memo(function MessageLine({
|
||||
</Text>
|
||||
</NoSelect>
|
||||
|
||||
<Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt, TERMUX_TUI_MODE)}>{content}</Box>
|
||||
<Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt)}>{content}</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)
|
||||
|
||||
@@ -13,7 +13,6 @@ import {
|
||||
isVoiceToggleKey,
|
||||
type ParsedVoiceRecordKey
|
||||
} from '../lib/platform.js'
|
||||
import { isTermuxTuiMode } from '../lib/termux.js'
|
||||
|
||||
type InkExt = typeof Ink & {
|
||||
stringWidth: (s: string) => number
|
||||
@@ -299,23 +298,7 @@ export function canFastBackspaceShape(current: string, cursor: number, columns?:
|
||||
export function supportsFastEchoTerminal(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
// Terminal.app still shows paint/cursor artifacts under the fast-echo
|
||||
// bypass path. Fall back to the normal Ink render path there.
|
||||
if ((env.TERM_PROGRAM ?? '').trim() === 'Apple_Terminal') {
|
||||
return false
|
||||
}
|
||||
|
||||
// Termux terminals are especially sensitive to bypass-path cursor drift and
|
||||
// stale paints at soft-wrap boundaries on tall/narrow viewports. Keep this
|
||||
// off by default in Termux mode; allow explicit opt-in for local debugging.
|
||||
if (isTermuxTuiMode(env)) {
|
||||
const override = String(env.HERMES_TUI_TERMUX_FAST_ECHO ?? '').trim().toLowerCase()
|
||||
if (override) {
|
||||
return /^(?:1|true|yes|on)$/i.test(override)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
return (env.TERM_PROGRAM ?? '').trim() !== 'Apple_Terminal'
|
||||
}
|
||||
|
||||
function renderWithCursor(value: string, cursor: number) {
|
||||
|
||||
@@ -856,16 +856,7 @@ export const ToolTrail = memo(function ToolTrail({
|
||||
color: t.color.text,
|
||||
key: tool.id,
|
||||
label,
|
||||
details: tool.verboseArgs
|
||||
? [
|
||||
{
|
||||
color: t.color.muted,
|
||||
content: `Args:\n${boundedLiveRenderText(tool.verboseArgs)}`,
|
||||
dimColor: true,
|
||||
key: `${tool.id}-args`
|
||||
}
|
||||
]
|
||||
: [],
|
||||
details: [],
|
||||
content: (
|
||||
<>
|
||||
<Spinner color={t.color.accent} variant="tool" /> {label}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user