Compare commits
28 Commits
v2026.4.23
...
onboarding
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2f230b5ad9 | ||
|
|
bdc9b07c9d | ||
|
|
6fdbf2f2d7 | ||
|
|
0a679cb7ad | ||
|
|
41b4d69167 | ||
|
|
3f343cf7cf | ||
|
|
4ae5b58cb1 | ||
|
|
2258a181f0 | ||
|
|
11b2942f16 | ||
|
|
b08cbc7a79 | ||
|
|
c95c6bdb7c | ||
|
|
bd929ea514 | ||
|
|
6a20e187dd | ||
|
|
9ff21437a0 | ||
|
|
44a0cbe525 | ||
|
|
2af0848f3c | ||
|
|
7baf370d3d | ||
|
|
eeda18a9b7 | ||
|
|
3a9598337f | ||
|
|
98418afd5d | ||
|
|
42ff785771 | ||
|
|
04c489b587 | ||
|
|
0bb460b070 | ||
|
|
3504bd401b | ||
|
|
50d97edbe1 | ||
|
|
e26c4f0e34 | ||
|
|
24f139e16a | ||
|
|
ef5eaf8d87 |
190
agent/moonshot_schema.py
Normal file
190
agent/moonshot_schema.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
|
||||
|
||||
Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
|
||||
tool calling. Requests that violate it fail with HTTP 400:
|
||||
|
||||
tools.function.parameters is not a valid moonshot flavored json schema,
|
||||
details: <...>
|
||||
|
||||
Known rejection modes documented at
|
||||
https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
|
||||
and MoonshotAI/kimi-cli#1595:
|
||||
|
||||
1. Every property schema must carry a ``type``. Standard JSON Schema allows
|
||||
type to be omitted (the value is then unconstrained); Moonshot refuses.
|
||||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
applies at MCP registration time for all providers.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Keys whose values are maps of name → schema (not schemas themselves).
|
||||
# When we recurse, we walk the values of these maps as schemas, but we do
|
||||
# NOT apply the missing-type repair to the map itself.
|
||||
_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
|
||||
|
||||
# Keys whose values are lists of schemas.
|
||||
_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
|
||||
|
||||
# Keys whose values are a single nested schema.
|
||||
_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
|
||||
|
||||
|
||||
def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
||||
"""Recursively apply Moonshot repairs to a schema node.
|
||||
|
||||
``is_schema=True`` means this dict is a JSON Schema node and gets the
|
||||
missing-type + anyOf-parent repairs applied. ``is_schema=False`` means
|
||||
it's a container map (e.g. the value of ``properties``) and we only
|
||||
recurse into its values.
|
||||
"""
|
||||
if isinstance(node, list):
|
||||
# Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
|
||||
# every element is itself a schema.
|
||||
return [_repair_schema(item, is_schema=True) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
# Walk the dict, deciding per-key whether recursion is into a schema
|
||||
# node, a container map, or a scalar.
|
||||
repaired: Dict[str, Any] = {}
|
||||
for key, value in node.items():
|
||||
if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
|
||||
# Map of name → schema. Don't treat the map itself as a schema
|
||||
# (it has no type / properties of its own), but each value is.
|
||||
repaired[key] = {
|
||||
sub_key: _repair_schema(sub_val, is_schema=True)
|
||||
for sub_key, sub_val in value.items()
|
||||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
if isinstance(value, dict):
|
||||
repaired[key] = _repair_schema(value, is_schema=True)
|
||||
else:
|
||||
repaired[key] = value
|
||||
else:
|
||||
# Scalars (description, title, format, enum values, etc.) pass through.
|
||||
repaired[key] = value
|
||||
|
||||
if not is_schema:
|
||||
return repaired
|
||||
|
||||
# Rule 2: when anyOf is present, type belongs only on the children.
|
||||
if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
|
||||
repaired.pop("type", None)
|
||||
return repaired
|
||||
|
||||
# Rule 1: property schemas without type need one. $ref nodes are exempt
|
||||
# — their type comes from the referenced definition.
|
||||
if "$ref" in repaired:
|
||||
return repaired
|
||||
return _fill_missing_type(repaired)
|
||||
|
||||
|
||||
def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Infer a reasonable ``type`` if this schema node has none."""
|
||||
if "type" in node and node["type"] not in (None, ""):
|
||||
return node
|
||||
|
||||
# Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
|
||||
# → type of first enum value, else fall back to ``string`` (safest scalar).
|
||||
if "properties" in node or "required" in node or "additionalProperties" in node:
|
||||
inferred = "object"
|
||||
elif "items" in node or "prefixItems" in node:
|
||||
inferred = "array"
|
||||
elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
|
||||
sample = node["enum"][0]
|
||||
if isinstance(sample, bool):
|
||||
inferred = "boolean"
|
||||
elif isinstance(sample, int):
|
||||
inferred = "integer"
|
||||
elif isinstance(sample, float):
|
||||
inferred = "number"
|
||||
else:
|
||||
inferred = "string"
|
||||
else:
|
||||
inferred = "string"
|
||||
|
||||
return {**node, "type": inferred}
|
||||
|
||||
|
||||
def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
|
||||
"""Normalize tool parameters to a Moonshot-compatible object schema.
|
||||
|
||||
Returns a deep-copied schema with the two flavored-JSON-Schema repairs
|
||||
applied. Input is not mutated.
|
||||
"""
|
||||
if not isinstance(parameters, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
|
||||
if not isinstance(repaired, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
# Top-level must be an object schema
|
||||
if repaired.get("type") != "object":
|
||||
repaired["type"] = "object"
|
||||
if "properties" not in repaired:
|
||||
repaired["properties"] = {}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
|
||||
if not tools:
|
||||
return tools
|
||||
|
||||
sanitized: List[Dict[str, Any]] = []
|
||||
any_change = False
|
||||
for tool in tools:
|
||||
if not isinstance(tool, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
fn = tool.get("function")
|
||||
if not isinstance(fn, dict):
|
||||
sanitized.append(tool)
|
||||
continue
|
||||
params = fn.get("parameters")
|
||||
repaired = sanitize_moonshot_tool_parameters(params)
|
||||
if repaired is not params:
|
||||
any_change = True
|
||||
new_fn = {**fn, "parameters": repaired}
|
||||
sanitized.append({**tool, "function": new_fn})
|
||||
else:
|
||||
sanitized.append(tool)
|
||||
|
||||
return sanitized if any_change else tools
|
||||
|
||||
|
||||
def is_moonshot_model(model: str | None) -> bool:
|
||||
"""True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
|
||||
|
||||
Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
|
||||
prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
|
||||
Detection by model name covers Nous / OpenRouter / other aggregators that
|
||||
route to Moonshot's inference, where the base URL is the aggregator's, not
|
||||
``api.moonshot.ai``.
|
||||
"""
|
||||
if not model:
|
||||
return False
|
||||
bare = model.strip().lower()
|
||||
# Last path segment (covers aggregator-prefixed slugs)
|
||||
tail = bare.rsplit("/", 1)[-1]
|
||||
if tail.startswith("kimi-") or tail == "kimi":
|
||||
return True
|
||||
# Vendor-prefixed forms commonly used on aggregators
|
||||
if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
|
||||
return True
|
||||
return False
|
||||
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
|
||||
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
|
||||
# Tools
|
||||
if tools:
|
||||
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
|
||||
# tool parameters here keeps aggregator routes (Nous, OpenRouter,
|
||||
# etc.) compatible, in addition to direct moonshot.ai endpoints.
|
||||
if is_moonshot_model(model):
|
||||
tools = sanitize_moonshot_tools(tools)
|
||||
api_kwargs["tools"] = tools
|
||||
|
||||
# max_tokens resolution — priority: ephemeral > user > provider default
|
||||
|
||||
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
|
||||
"""Resolve the toolset list for a cron job.
|
||||
|
||||
Precedence:
|
||||
1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
|
||||
Keeps the agent's job-scoped toolset override intact — #6130.
|
||||
2. Per-platform ``hermes tools`` config for the ``cron`` platform.
|
||||
Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
|
||||
so users can gate cron toolsets globally without recreating every job.
|
||||
3. ``None`` on any lookup failure — AIAgent loads the full default set
|
||||
(legacy behavior before this change, preserved as the safety net).
|
||||
|
||||
_DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
|
||||
``_get_platform_tools`` for unconfigured platforms, so fresh installs
|
||||
get cron WITHOUT ``moa`` by default (issue reported by Norbert —
|
||||
surprise $4.63 run).
|
||||
"""
|
||||
per_job = job.get("enabled_toolsets")
|
||||
if per_job:
|
||||
return per_job
|
||||
try:
|
||||
from hermes_cli.tools_config import _get_platform_tools # lazy: avoid heavy import at cron module load
|
||||
return sorted(_get_platform_tools(cfg or {}, "cron"))
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Cron toolset resolution failed, falling back to full default toolset: %s",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
# Valid delivery platforms — used to validate user-supplied platform names
|
||||
# in cron delivery targets, preventing env var enumeration via crafted names.
|
||||
_KNOWN_DELIVERY_PLATFORMS = frozenset({
|
||||
@@ -886,7 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
enabled_toolsets=job.get("enabled_toolsets") or None,
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
|
||||
@@ -2821,6 +2821,7 @@ def _prompt_model_selection(
|
||||
pricing: Optional[Dict[str, Dict[str, str]]] = None,
|
||||
unavailable_models: Optional[List[str]] = None,
|
||||
portal_url: str = "",
|
||||
allow_custom = True
|
||||
) -> Optional[str]:
|
||||
"""Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.
|
||||
|
||||
@@ -2909,8 +2910,16 @@ def _prompt_model_selection(
|
||||
from simple_term_menu import TerminalMenu
|
||||
|
||||
choices = [f" {_label(mid)}" for mid in ordered]
|
||||
choices.append(" Enter custom model name")
|
||||
choices.append(" Skip (keep current)")
|
||||
|
||||
custom_idx = None
|
||||
if allow_custom:
|
||||
custom_idx = len(choices)
|
||||
choices.append(" Enter custom model name")
|
||||
|
||||
skip_idx = None
|
||||
if current_model:
|
||||
skip_idx = len(choices)
|
||||
choices.append(" Skip (keep current)")
|
||||
|
||||
# Print the unavailable block BEFORE the menu via regular print().
|
||||
# simple_term_menu pads title lines to terminal width (causes wrapping),
|
||||
@@ -2947,21 +2956,29 @@ def _prompt_model_selection(
|
||||
print()
|
||||
if idx < len(ordered):
|
||||
return ordered[idx]
|
||||
elif idx == len(ordered):
|
||||
if idx == custom_idx:
|
||||
custom = input("Enter model name: ").strip()
|
||||
return custom if custom else None
|
||||
if idx == skip_idx:
|
||||
return None
|
||||
return None
|
||||
except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
|
||||
pass
|
||||
|
||||
# Fallback: numbered list
|
||||
print(menu_title)
|
||||
num_width = len(str(len(ordered) + 2))
|
||||
n = len(ordered)
|
||||
extra = []
|
||||
if allow_custom:
|
||||
extra.append("Enter custom model name")
|
||||
if current_model:
|
||||
extra.append("Skip (keep current)")
|
||||
total = n + len(extra)
|
||||
num_width = len(str(total))
|
||||
for i, mid in enumerate(ordered, 1):
|
||||
print(f" {i:>{num_width}}. {_label(mid)}")
|
||||
n = len(ordered)
|
||||
print(f" {n + 1:>{num_width}}. Enter custom model name")
|
||||
print(f" {n + 2:>{num_width}}. Skip (keep current)")
|
||||
for j, label in enumerate(extra, n + 1):
|
||||
print(f" {j:>{num_width}}. {label}")
|
||||
|
||||
if _unavailable:
|
||||
_upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
@@ -2973,18 +2990,19 @@ def _prompt_model_selection(
|
||||
|
||||
while True:
|
||||
try:
|
||||
choice = input(f"Choice [1-{n + 2}] (default: skip): ").strip()
|
||||
choice = input(f"Choice [1-{total}]: ").strip()
|
||||
if not choice:
|
||||
return None
|
||||
idx = int(choice)
|
||||
if 1 <= idx <= n:
|
||||
return ordered[idx - 1]
|
||||
elif idx == n + 1:
|
||||
custom = input("Enter model name: ").strip()
|
||||
return custom if custom else None
|
||||
elif idx == n + 2:
|
||||
return None
|
||||
print(f"Please enter 1-{n + 2}")
|
||||
val = int(choice)
|
||||
if 1 <= val <= n:
|
||||
return ordered[val - 1]
|
||||
extra_idx = val - n - 1
|
||||
if 0 <= extra_idx < len(extra):
|
||||
if extra[extra_idx] == "Enter custom model name":
|
||||
custom = input("Enter model name: ").strip()
|
||||
return custom if custom else None
|
||||
return None # skip
|
||||
print(f"Please enter 1-{total}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
@@ -3260,7 +3278,6 @@ def _nous_device_code_login(
|
||||
open_browser = False
|
||||
|
||||
print(f"Starting Hermes login via {pconfig.name}...")
|
||||
print(f"Portal: {portal_base_url}")
|
||||
if insecure:
|
||||
print("TLS verification: disabled (--insecure)")
|
||||
elif ca_bundle:
|
||||
@@ -3280,19 +3297,18 @@ def _nous_device_code_login(
|
||||
interval = int(device_data["interval"])
|
||||
|
||||
print()
|
||||
print("To continue:")
|
||||
print(f" 1. Open: {verification_url}")
|
||||
print(f" 2. If prompted, enter code: {user_code}")
|
||||
|
||||
if open_browser:
|
||||
opened = webbrowser.open(verification_url)
|
||||
if opened:
|
||||
print(" (Opened browser for verification)")
|
||||
print("If you don't see a browser window open, navigate to this URL:")
|
||||
else:
|
||||
print(" Could not open browser automatically — use the URL above.")
|
||||
print("Navigate to this URL to continue:")
|
||||
print(verification_url)
|
||||
print(f"If you're prompted for a code, use {user_code}")
|
||||
print()
|
||||
|
||||
effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
|
||||
print(f"Waiting for approval (polling every {effective_interval}s)...")
|
||||
print(f"Waiting for approval (checking every {effective_interval}s)...")
|
||||
|
||||
token_data = _poll_for_token(
|
||||
client=client,
|
||||
@@ -3357,7 +3373,7 @@ def _nous_device_code_login(
|
||||
raise
|
||||
|
||||
|
||||
def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
def login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
"""Nous Portal device authorization flow."""
|
||||
timeout_seconds = getattr(args, "timeout", None) or 15.0
|
||||
insecure = bool(getattr(args, "insecure", False))
|
||||
@@ -3419,7 +3435,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
|
||||
_portal = auth_state.get("portal_base_url", "")
|
||||
|
||||
print()
|
||||
|
||||
unavailable_models: list = []
|
||||
if model_ids:
|
||||
pricing = get_pricing_for_provider("nous")
|
||||
@@ -3428,14 +3447,17 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
model_ids, unavailable_models = partition_nous_models_by_tier(
|
||||
model_ids, pricing, free_tier=True,
|
||||
)
|
||||
_portal = auth_state.get("portal_base_url", "")
|
||||
if model_ids:
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
selected_model = _prompt_model_selection(
|
||||
model_ids, pricing=pricing,
|
||||
unavailable_models=unavailable_models,
|
||||
portal_url=_portal,
|
||||
)
|
||||
if not free_tier:
|
||||
print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
|
||||
if len(model_ids) > 1:
|
||||
selected_model = _prompt_model_selection(
|
||||
model_ids, pricing=pricing,
|
||||
unavailable_models=unavailable_models,
|
||||
portal_url=_portal,
|
||||
allow_custom=not free_tier
|
||||
)
|
||||
else:
|
||||
selected_model = model_ids[0]
|
||||
elif unavailable_models:
|
||||
_url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
|
||||
print("No free models currently available.")
|
||||
|
||||
@@ -739,6 +739,10 @@ DEFAULT_CONFIG = {
|
||||
"inherit_mcp_toolsets": True,
|
||||
"max_iterations": 50, # per-subagent iteration cap (each subagent gets its own budget,
|
||||
# independent of the parent's max_iterations)
|
||||
"child_timeout_seconds": 600, # wall-clock timeout for each child agent (floor 30s,
|
||||
# no ceiling). High-reasoning models on large tasks
|
||||
# (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
|
||||
# raise if children time out before producing output.
|
||||
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
|
||||
# "low", "minimal", "none" (empty = inherit parent's level)
|
||||
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
|
||||
|
||||
@@ -1085,9 +1085,6 @@ def cmd_chat(args):
|
||||
print(
|
||||
"It looks like Hermes isn't configured yet -- no API keys or providers found."
|
||||
)
|
||||
print()
|
||||
print(" Run: hermes setup")
|
||||
print()
|
||||
|
||||
from hermes_cli.setup import (
|
||||
is_interactive_stdin,
|
||||
@@ -1100,16 +1097,8 @@ def cmd_chat(args):
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
reply = input("Run setup now? [Y/n] ").strip().lower()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
reply = "n"
|
||||
if reply in ("", "y", "yes"):
|
||||
cmd_setup(args)
|
||||
return
|
||||
print()
|
||||
print("You can run 'hermes setup' at any time to configure.")
|
||||
sys.exit(1)
|
||||
cmd_setup(args)
|
||||
return
|
||||
|
||||
# Start update check in background (runs while other init happens)
|
||||
try:
|
||||
@@ -2135,7 +2124,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
resolve_nous_runtime_credentials,
|
||||
AuthError,
|
||||
format_auth_error,
|
||||
_login_nous,
|
||||
login_nous,
|
||||
PROVIDER_REGISTRY,
|
||||
)
|
||||
from hermes_cli.config import (
|
||||
@@ -2148,8 +2137,6 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
|
||||
state = get_provider_auth_state("nous")
|
||||
if not state or not state.get("access_token"):
|
||||
print("Not logged into Nous Portal. Starting login...")
|
||||
print()
|
||||
try:
|
||||
mock_args = argparse.Namespace(
|
||||
portal_url=getattr(args, "portal_url", None),
|
||||
@@ -2161,7 +2148,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
ca_bundle=getattr(args, "ca_bundle", None),
|
||||
insecure=bool(getattr(args, "insecure", False)),
|
||||
)
|
||||
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
|
||||
login_nous(mock_args, PROVIDER_REGISTRY["nous"])
|
||||
# Offer Tool Gateway enablement for paid subscribers
|
||||
try:
|
||||
_refreshed = load_config() or {}
|
||||
@@ -2212,7 +2199,7 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
ca_bundle=None,
|
||||
insecure=False,
|
||||
)
|
||||
_login_nous(mock_args, PROVIDER_REGISTRY["nous"])
|
||||
login_nous(mock_args, PROVIDER_REGISTRY["nous"])
|
||||
except Exception as login_exc:
|
||||
print(f"Re-login failed: {login_exc}")
|
||||
return
|
||||
|
||||
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
|
||||
("qqbot", PlatformInfo(label="💬 QQBot", default_toolset="hermes-qqbot")),
|
||||
("webhook", PlatformInfo(label="🔗 Webhook", default_toolset="hermes-webhook")),
|
||||
("api_server", PlatformInfo(label="🌐 API Server", default_toolset="hermes-api-server")),
|
||||
("cron", PlatformInfo(label="⏰ Cron", default_toolset="hermes-cron")),
|
||||
])
|
||||
|
||||
|
||||
|
||||
@@ -18,9 +18,10 @@ import shutil
|
||||
import sys
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Literal, Optional, Dict, Any
|
||||
|
||||
from hermes_cli.nous_subscription import get_nous_subscription_features
|
||||
from hermes_cli.main import _model_flow_nous
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
||||
from utils import base_url_hostname
|
||||
from hermes_constants import get_optional_skills_dir
|
||||
@@ -655,7 +656,7 @@ def _prompt_container_resources(config: dict):
|
||||
|
||||
|
||||
|
||||
def setup_model_provider(config: dict, *, quick: bool = False):
|
||||
def setup_model_provider(config: dict, *, quick: bool | Literal["nous_portal"] = False):
|
||||
"""Configure the inference provider and default model.
|
||||
|
||||
Delegates to ``cmd_model()`` (the same flow used by ``hermes model``)
|
||||
@@ -677,7 +678,11 @@ def setup_model_provider(config: dict, *, quick: bool = False):
|
||||
# credential prompting, model selection, and config persistence.
|
||||
from hermes_cli.main import select_provider_and_model
|
||||
try:
|
||||
select_provider_and_model()
|
||||
if quick == "nous_portal":
|
||||
config = load_config()
|
||||
_model_flow_nous(config)
|
||||
else:
|
||||
select_provider_and_model()
|
||||
except (SystemExit, KeyboardInterrupt):
|
||||
print()
|
||||
print_info("Provider setup skipped.")
|
||||
@@ -3030,11 +3035,15 @@ def run_setup_wizard(args):
|
||||
config = load_config()
|
||||
|
||||
setup_mode = prompt_choice("How would you like to set up Hermes?", [
|
||||
"Quick setup — provider, model & messaging (recommended)",
|
||||
"Nous Account setup — model & messaging (recommended)",
|
||||
"Quick setup — provider, model & messaging",
|
||||
"Full setup — configure everything",
|
||||
], 0)
|
||||
|
||||
if setup_mode == 0:
|
||||
_run_first_time_quick_setup(config, hermes_home, is_existing, nous_quick=True)
|
||||
return
|
||||
if setup_mode == 1:
|
||||
_run_first_time_quick_setup(config, hermes_home, is_existing)
|
||||
return
|
||||
|
||||
@@ -3095,7 +3104,7 @@ def _resolve_hermes_chat_argv() -> Optional[list[str]]:
|
||||
return None
|
||||
|
||||
|
||||
def _offer_launch_chat():
|
||||
def _offer_launch_chat(auto_launch = False):
|
||||
"""Prompt the user to jump straight into chat after setup."""
|
||||
print()
|
||||
if not prompt_yes_no("Launch hermes chat now?", True):
|
||||
@@ -3109,7 +3118,7 @@ def _offer_launch_chat():
|
||||
os.execvp(chat_argv[0], chat_argv)
|
||||
|
||||
|
||||
def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
|
||||
def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool, nous_quick=False):
|
||||
"""Streamlined first-time setup: provider + model only.
|
||||
|
||||
Applies sensible defaults for TTS (Edge), terminal (local), agent
|
||||
@@ -3117,7 +3126,7 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
|
||||
``hermes setup <section>``.
|
||||
"""
|
||||
# Step 1: Model & Provider (essential — skips rotation/vision/TTS)
|
||||
setup_model_provider(config, quick=True)
|
||||
setup_model_provider(config, quick="nous_portal" if nous_quick else True )
|
||||
|
||||
# Step 2: Apply defaults for everything else
|
||||
_apply_default_agent_settings(config)
|
||||
@@ -3150,7 +3159,9 @@ def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
|
||||
|
||||
_print_setup_summary(config, hermes_home)
|
||||
|
||||
_offer_launch_chat()
|
||||
# if the user hasn't set up the gateway, assume they want to launch chat.
|
||||
force_launch_chat = gateway_choice == 0
|
||||
_offer_launch_chat(force_launch_chat)
|
||||
|
||||
|
||||
def _run_quick_setup(config: dict, hermes_home):
|
||||
|
||||
548
hermes_cli/voice.py
Normal file
548
hermes_cli/voice.py
Normal file
@@ -0,0 +1,548 @@
|
||||
"""Process-wide voice recording + TTS API for the TUI gateway.
|
||||
|
||||
Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
|
||||
(text-to-speech) behind idempotent, stateful entry points that the gateway's
|
||||
``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
|
||||
call from a dedicated thread. The gateway imports this module lazily so that
|
||||
missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
|
||||
an ``ImportError`` at call time, not at startup.
|
||||
|
||||
Two usage modes are exposed:
|
||||
|
||||
* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
|
||||
manually-bounded capture used when the caller drives the start/stop pair
|
||||
explicitly.
|
||||
* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
|
||||
the classic CLI voice mode: recording auto-stops on silence, transcribes,
|
||||
hands the result to a callback, and then auto-restarts for the next turn.
|
||||
Three consecutive no-speech cycles stop the loop and fire
|
||||
``on_silent_limit`` so the UI can turn the mode off.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from tools.voice_mode import (
|
||||
create_audio_recorder,
|
||||
is_whisper_hallucination,
|
||||
play_audio_file,
|
||||
transcribe_recording,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _debug(msg: str) -> None:
|
||||
"""Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
|
||||
|
||||
Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
|
||||
which createGatewayEventHandler shows as an Activity line — exactly
|
||||
what we need to diagnose "why didn't the loop auto-restart?" in the
|
||||
user's real terminal without shipping a separate debug RPC.
|
||||
|
||||
Any OSError / BrokenPipeError is swallowed because this fires from
|
||||
background threads (silence callback, TTS daemon, beep) where a
|
||||
broken stderr pipe must not kill the whole gateway — the main
|
||||
command pipe (stdin+stdout) is what actually matters.
|
||||
"""
|
||||
if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
|
||||
return
|
||||
try:
|
||||
print(f"[voice] {msg}", file=sys.stderr, flush=True)
|
||||
except (BrokenPipeError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
def _beeps_enabled() -> bool:
|
||||
"""CLI parity: voice.beep_enabled in config.yaml (default True)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
|
||||
def _play_beep(frequency: int, count: int = 1) -> None:
|
||||
"""Audible cue matching cli.py's record/stop beeps.
|
||||
|
||||
880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
|
||||
660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
|
||||
Best-effort — sounddevice failures are silently swallowed so the
|
||||
voice loop never breaks because a speaker was unavailable.
|
||||
"""
|
||||
if not _beeps_enabled():
|
||||
return
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
|
||||
play_beep(frequency=frequency, count=count)
|
||||
except Exception as e:
|
||||
_debug(f"beep {frequency}Hz failed: {e}")
|
||||
|
||||
# ── Push-to-talk state ───────────────────────────────────────────────
|
||||
_recorder = None
|
||||
_recorder_lock = threading.Lock()
|
||||
|
||||
# ── Continuous (VAD) state ───────────────────────────────────────────
|
||||
_continuous_lock = threading.Lock()
|
||||
_continuous_active = False
|
||||
_continuous_recorder: Any = None
|
||||
|
||||
# ── TTS-vs-STT feedback guard ────────────────────────────────────────
|
||||
# When TTS plays the agent reply over the speakers, the live microphone
|
||||
# picks it up and transcribes the agent's own voice as user input — an
|
||||
# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
|
||||
# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
|
||||
# playing, set while silent. _continuous_on_silence waits on it before
|
||||
# re-arming the recorder, and speak_text itself cancels any live capture
|
||||
# before starting playback so the tail of the previous utterance doesn't
|
||||
# leak into the mic.
|
||||
_tts_playing = threading.Event()
|
||||
_tts_playing.set() # initially "not playing"
|
||||
_continuous_on_transcript: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_status: Optional[Callable[[str], None]] = None
|
||||
_continuous_on_silent_limit: Optional[Callable[[], None]] = None
|
||||
_continuous_no_speech_count = 0
|
||||
_CONTINUOUS_NO_SPEECH_LIMIT = 3
|
||||
|
||||
|
||||
# ── Push-to-talk API ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_recording() -> None:
|
||||
"""Begin capturing from the default input device (push-to-talk).
|
||||
|
||||
Idempotent — calling again while a recording is in progress is a no-op.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
if _recorder is not None and getattr(_recorder, "is_recording", False):
|
||||
return
|
||||
rec = create_audio_recorder()
|
||||
rec.start()
|
||||
_recorder = rec
|
||||
|
||||
|
||||
def stop_and_transcribe() -> Optional[str]:
|
||||
"""Stop the active push-to-talk recording, transcribe, return text.
|
||||
|
||||
Returns ``None`` when no recording is active, when the microphone
|
||||
captured no speech, or when Whisper returned a known hallucination.
|
||||
"""
|
||||
global _recorder
|
||||
|
||||
with _recorder_lock:
|
||||
rec = _recorder
|
||||
_recorder = None
|
||||
|
||||
if rec is None:
|
||||
return None
|
||||
|
||||
wav_path = rec.stop()
|
||||
if not wav_path:
|
||||
return None
|
||||
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
except Exception as e:
|
||||
logger.warning("voice transcription failed: %s", e)
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# transcribe_recording returns {"success": bool, "transcript": str, ...}
|
||||
# — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
|
||||
if not result.get("success"):
|
||||
return None
|
||||
text = (result.get("transcript") or "").strip()
|
||||
if not text or is_whisper_hallucination(text):
|
||||
return None
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# ── Continuous (VAD) API ─────────────────────────────────────────────
|
||||
|
||||
|
||||
def start_continuous(
|
||||
on_transcript: Callable[[str], None],
|
||||
on_status: Optional[Callable[[str], None]] = None,
|
||||
on_silent_limit: Optional[Callable[[], None]] = None,
|
||||
silence_threshold: int = 200,
|
||||
silence_duration: float = 3.0,
|
||||
) -> None:
|
||||
"""Start a VAD-driven continuous recording loop.
|
||||
|
||||
The loop calls ``on_transcript(text)`` each time speech is detected and
|
||||
transcribed successfully, then auto-restarts. After
|
||||
``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
|
||||
picked up at all) the loop stops itself and calls ``on_silent_limit``
|
||||
so the UI can reflect "voice off". Idempotent — calling while already
|
||||
active is a no-op.
|
||||
|
||||
``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
|
||||
``"idle"`` so the UI can show a live indicator.
|
||||
"""
|
||||
global _continuous_active, _continuous_recorder
|
||||
global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if _continuous_active:
|
||||
_debug("start_continuous: already active — no-op")
|
||||
return
|
||||
_continuous_active = True
|
||||
_continuous_on_transcript = on_transcript
|
||||
_continuous_on_status = on_status
|
||||
_continuous_on_silent_limit = on_silent_limit
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if _continuous_recorder is None:
|
||||
_continuous_recorder = create_audio_recorder()
|
||||
|
||||
_continuous_recorder._silence_threshold = silence_threshold
|
||||
_continuous_recorder._silence_duration = silence_duration
|
||||
rec = _continuous_recorder
|
||||
|
||||
_debug(
|
||||
f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
|
||||
)
|
||||
|
||||
# CLI parity: single 880 Hz beep *before* opening the stream — placing
|
||||
# the beep after stream.start() on macOS triggers a CoreAudio conflict
|
||||
# (cli.py:7528 comment).
|
||||
_play_beep(frequency=880, count=1)
|
||||
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to start continuous recording: %s", e)
|
||||
_debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
raise
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def stop_continuous() -> None:
|
||||
"""Stop the active continuous loop and release the microphone.
|
||||
|
||||
Idempotent — calling while not active is a no-op. Any in-flight
|
||||
transcription completes but its result is discarded (the callback
|
||||
checks ``_continuous_active`` before firing).
|
||||
"""
|
||||
global _continuous_active, _continuous_on_transcript
|
||||
global _continuous_on_status, _continuous_on_silent_limit
|
||||
global _continuous_recorder, _continuous_no_speech_count
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
return
|
||||
_continuous_active = False
|
||||
rec = _continuous_recorder
|
||||
on_status = _continuous_on_status
|
||||
_continuous_on_transcript = None
|
||||
_continuous_on_status = None
|
||||
_continuous_on_silent_limit = None
|
||||
_continuous_no_speech_count = 0
|
||||
|
||||
if rec is not None:
|
||||
try:
|
||||
# cancel() (not stop()) discards buffered frames — the loop
|
||||
# is over, we don't want to transcribe a half-captured turn.
|
||||
rec.cancel()
|
||||
except Exception as e:
|
||||
logger.warning("failed to cancel recorder: %s", e)
|
||||
|
||||
# Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
|
||||
# silence-auto-stop path plays).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def is_continuous_active() -> bool:
|
||||
"""Whether a continuous voice loop is currently running."""
|
||||
with _continuous_lock:
|
||||
return _continuous_active
|
||||
|
||||
|
||||
def _continuous_on_silence() -> None:
|
||||
"""AudioRecorder silence callback — runs in a daemon thread.
|
||||
|
||||
Stops the current capture, transcribes, delivers the text via
|
||||
``on_transcript``, and — if the loop is still active — starts the
|
||||
next capture. Three consecutive silent cycles end the loop.
|
||||
"""
|
||||
global _continuous_active, _continuous_no_speech_count
|
||||
|
||||
_debug("_continuous_on_silence: fired")
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: loop inactive — abort")
|
||||
return
|
||||
rec = _continuous_recorder
|
||||
on_transcript = _continuous_on_transcript
|
||||
on_status = _continuous_on_status
|
||||
on_silent_limit = _continuous_on_silent_limit
|
||||
|
||||
if rec is None:
|
||||
_debug("_continuous_on_silence: no recorder — abort")
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("transcribing")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
wav_path = rec.stop()
|
||||
# Peak RMS is the critical diagnostic when stop() returns None despite
|
||||
# the VAD firing — tells us at a glance whether the mic was too quiet
|
||||
# for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
|
||||
peak_rms = getattr(rec, "_peak_rms", -1)
|
||||
_debug(
|
||||
f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
|
||||
)
|
||||
|
||||
# CLI parity: double 660 Hz beep after the stream stops (safe from the
|
||||
# CoreAudio conflict that blocks pre-start beeps).
|
||||
_play_beep(frequency=660, count=2)
|
||||
|
||||
transcript: Optional[str] = None
|
||||
|
||||
if wav_path:
|
||||
try:
|
||||
result = transcribe_recording(wav_path)
|
||||
# transcribe_recording returns {"success": bool, "transcript": str,
|
||||
# "error": str?} — NOT {"text": str}. Using the wrong key silently
|
||||
# produced empty transcripts even when Groq/local STT returned fine,
|
||||
# which masqueraded as "not hearing the user" to the caller.
|
||||
success = bool(result.get("success"))
|
||||
text = (result.get("transcript") or "").strip()
|
||||
err = result.get("error")
|
||||
_debug(
|
||||
f"_continuous_on_silence: transcribe -> success={success} "
|
||||
f"text={text!r} err={err!r}"
|
||||
)
|
||||
if success and text and not is_whisper_hallucination(text):
|
||||
transcript = text
|
||||
except Exception as e:
|
||||
logger.warning("continuous transcription failed: %s", e)
|
||||
_debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
try:
|
||||
if os.path.isfile(wav_path):
|
||||
os.unlink(wav_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
# User stopped us while we were transcribing — discard.
|
||||
_debug("_continuous_on_silence: stopped during transcribe — no restart")
|
||||
return
|
||||
if transcript:
|
||||
_continuous_no_speech_count = 0
|
||||
else:
|
||||
_continuous_no_speech_count += 1
|
||||
should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
|
||||
no_speech = _continuous_no_speech_count
|
||||
|
||||
if transcript and on_transcript:
|
||||
try:
|
||||
on_transcript(transcript)
|
||||
except Exception as e:
|
||||
logger.warning("on_transcript callback raised: %s", e)
|
||||
|
||||
if should_halt:
|
||||
_debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
_continuous_no_speech_count = 0
|
||||
if on_silent_limit:
|
||||
try:
|
||||
on_silent_limit()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
rec.cancel()
|
||||
except Exception:
|
||||
pass
|
||||
if on_status:
|
||||
try:
|
||||
on_status("idle")
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
|
||||
# CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
|
||||
# finish before re-arming the mic, then leave a small gap to avoid
|
||||
# catching the tail of the speaker output. Without this the voice
|
||||
# loop becomes a feedback loop — the agent's spoken reply lands
|
||||
# back in the mic and gets re-submitted.
|
||||
if not _tts_playing.is_set():
|
||||
_debug("_continuous_on_silence: waiting for TTS to finish")
|
||||
_tts_playing.wait(timeout=60)
|
||||
import time as _time
|
||||
_time.sleep(0.3)
|
||||
|
||||
# User may have stopped the loop during the wait.
|
||||
with _continuous_lock:
|
||||
if not _continuous_active:
|
||||
_debug("_continuous_on_silence: stopped while waiting for TTS")
|
||||
return
|
||||
|
||||
# Restart for the next turn.
|
||||
_debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
|
||||
_play_beep(frequency=880, count=1)
|
||||
try:
|
||||
rec.start(on_silence_stop=_continuous_on_silence)
|
||||
except Exception as e:
|
||||
logger.error("failed to restart continuous recording: %s", e)
|
||||
_debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
|
||||
with _continuous_lock:
|
||||
_continuous_active = False
|
||||
return
|
||||
|
||||
if on_status:
|
||||
try:
|
||||
on_status("listening")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ── TTS API ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def speak_text(text: str) -> None:
|
||||
"""Synthesize ``text`` with the configured TTS provider and play it.
|
||||
|
||||
Mirrors cli.py:_voice_speak_response exactly — same markdown strip
|
||||
pipeline, same 4000-char cap, same explicit mp3 output path, same
|
||||
MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
|
||||
of both extensions. Keeping these in sync means a voice-mode TTS
|
||||
session in the TUI sounds identical to one in the classic CLI.
|
||||
|
||||
While playback is in flight the module-level _tts_playing Event is
|
||||
cleared so the continuous-recording loop knows to wait before
|
||||
re-arming the mic (otherwise the agent's spoken reply feedback-loops
|
||||
through the microphone and the agent ends up replying to itself).
|
||||
"""
|
||||
if not text or not text.strip():
|
||||
return
|
||||
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
# Cancel any live capture before we open the speakers — otherwise the
|
||||
# last ~200ms of the user's turn tail + the first syllables of our TTS
|
||||
# both end up in the next recording window. The continuous loop will
|
||||
# re-arm itself after _tts_playing flips back (see _continuous_on_silence).
|
||||
paused_recording = False
|
||||
with _continuous_lock:
|
||||
if (
|
||||
_continuous_active
|
||||
and _continuous_recorder is not None
|
||||
and getattr(_continuous_recorder, "is_recording", False)
|
||||
):
|
||||
try:
|
||||
_continuous_recorder.cancel()
|
||||
paused_recording = True
|
||||
except Exception as e:
|
||||
logger.warning("failed to pause recorder for TTS: %s", e)
|
||||
|
||||
_tts_playing.clear()
|
||||
_debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
|
||||
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
tts_text = text[:4000] if len(text) > 4000 else text
|
||||
tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text) # fenced code blocks
|
||||
tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text) # [text](url) → text
|
||||
tts_text = re.sub(r'https?://\S+', '', tts_text) # bare URLs
|
||||
tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text) # bold
|
||||
tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text) # italic
|
||||
tts_text = re.sub(r'`(.+?)`', r'\1', tts_text) # inline code
|
||||
tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE) # headers
|
||||
tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE) # list bullets
|
||||
tts_text = re.sub(r'---+', '', tts_text) # horizontal rules
|
||||
tts_text = re.sub(r'\n{3,}', '\n\n', tts_text) # excess newlines
|
||||
tts_text = tts_text.strip()
|
||||
if not tts_text:
|
||||
return
|
||||
|
||||
# MP3 output path, pre-chosen so we can play the MP3 directly even
|
||||
# when text_to_speech_tool auto-converts to OGG for messaging
|
||||
# platforms. afplay's OGG support is flaky, MP3 always works.
|
||||
os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
|
||||
mp3_path = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
"hermes_voice",
|
||||
f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
|
||||
)
|
||||
|
||||
_debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
|
||||
text_to_speech_tool(text=tts_text, output_path=mp3_path)
|
||||
|
||||
if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
|
||||
_debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
|
||||
play_audio_file(mp3_path)
|
||||
try:
|
||||
os.unlink(mp3_path)
|
||||
ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
|
||||
if os.path.isfile(ogg_path):
|
||||
os.unlink(ogg_path)
|
||||
except OSError:
|
||||
pass
|
||||
else:
|
||||
_debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
|
||||
except Exception as e:
|
||||
logger.warning("Voice TTS playback failed: %s", e)
|
||||
_debug(f"speak_text raised {type(e).__name__}: {e}")
|
||||
finally:
|
||||
_tts_playing.set()
|
||||
_debug("speak_text: TTS done")
|
||||
|
||||
# Re-arm the mic so the user can answer without pressing Ctrl+B.
|
||||
# Small delay lets the OS flush speaker output and afplay fully
|
||||
# release the audio device before sounddevice re-opens the input.
|
||||
if paused_recording:
|
||||
time.sleep(0.3)
|
||||
with _continuous_lock:
|
||||
if _continuous_active and _continuous_recorder is not None:
|
||||
try:
|
||||
_continuous_recorder.start(
|
||||
on_silence_stop=_continuous_on_silence
|
||||
)
|
||||
_debug("speak_text: recording resumed after TTS")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"failed to resume recorder after TTS: %s", e
|
||||
)
|
||||
@@ -418,6 +418,31 @@ def _coerce_value(value: str, expected_type):
|
||||
return _coerce_number(value, integer_only=(expected_type == "integer"))
|
||||
if expected_type == "boolean":
|
||||
return _coerce_boolean(value)
|
||||
if expected_type == "array":
|
||||
return _coerce_json(value, list)
|
||||
if expected_type == "object":
|
||||
return _coerce_json(value, dict)
|
||||
return value
|
||||
|
||||
|
||||
def _coerce_json(value: str, expected_python_type: type):
|
||||
"""Parse *value* as JSON when the schema expects an array or object.
|
||||
|
||||
Handles model output drift where a complex oneOf/discriminated-union schema
|
||||
causes the LLM to emit the array/object as a JSON string instead of a native
|
||||
structure. Returns the original string if parsing fails or yields the wrong
|
||||
Python type.
|
||||
"""
|
||||
try:
|
||||
parsed = json.loads(value)
|
||||
except (ValueError, TypeError):
|
||||
return value
|
||||
if isinstance(parsed, expected_python_type):
|
||||
logger.debug(
|
||||
"coerce_tool_args: coerced string to %s via json.loads",
|
||||
expected_python_type.__name__,
|
||||
)
|
||||
return parsed
|
||||
return value
|
||||
|
||||
|
||||
|
||||
@@ -167,6 +167,7 @@ AUTHOR_MAP = {
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
|
||||
254
tests/agent/test_moonshot_schema.py
Normal file
254
tests/agent/test_moonshot_schema.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
|
||||
|
||||
Moonshot's tool-parameter validator rejects several shapes that the rest of
|
||||
the JSON Schema ecosystem accepts:
|
||||
|
||||
1. Properties without ``type`` — Moonshot requires ``type`` on every node.
|
||||
2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
|
||||
``anyOf`` children.
|
||||
|
||||
These tests cover the repairs applied by ``agent/moonshot_schema.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.moonshot_schema import (
|
||||
is_moonshot_model,
|
||||
sanitize_moonshot_tool_parameters,
|
||||
sanitize_moonshot_tools,
|
||||
)
|
||||
|
||||
|
||||
class TestMoonshotModelDetection:
|
||||
"""is_moonshot_model() must match across aggregator prefixes."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"kimi-k2.6",
|
||||
"kimi-k2-thinking",
|
||||
"moonshotai/Kimi-K2.6",
|
||||
"moonshotai/kimi-k2.6",
|
||||
"nous/moonshotai/kimi-k2.6",
|
||||
"openrouter/moonshotai/kimi-k2-thinking",
|
||||
"MOONSHOTAI/KIMI-K2.6",
|
||||
],
|
||||
)
|
||||
def test_positive_matches(self, model):
|
||||
assert is_moonshot_model(model) is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"",
|
||||
None,
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"openai/gpt-5.4",
|
||||
"google/gemini-3-flash-preview",
|
||||
"deepseek-chat",
|
||||
],
|
||||
)
|
||||
def test_negative_matches(self, model):
|
||||
assert is_moonshot_model(model) is False
|
||||
|
||||
|
||||
class TestMissingTypeFilled:
|
||||
"""Rule 1: every property must carry a type."""
|
||||
|
||||
def test_property_without_type_gets_string(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"query": {"description": "a bare property"}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
|
||||
def test_property_with_enum_infers_type_from_first_value(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"flag": {"enum": [True, False]}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["flag"]["type"] == "boolean"
|
||||
|
||||
def test_nested_properties_are_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"field": {"description": "no type"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
|
||||
|
||||
def test_array_items_without_type_get_repaired(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag entry"},
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
|
||||
def test_ref_node_is_not_given_synthetic_type(self):
|
||||
"""$ref nodes should NOT get a synthetic type — the referenced
|
||||
definition supplies it, and Moonshot would reject the conflict."""
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"payload": {"$ref": "#/$defs/Payload"}},
|
||||
"$defs": {"Payload": {"type": "object", "properties": {}}},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert "type" not in out["properties"]["payload"]
|
||||
assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
|
||||
|
||||
|
||||
class TestAnyOfParentType:
|
||||
"""Rule 2: type must not appear at the anyOf parent level."""
|
||||
|
||||
def test_parent_type_stripped_when_anyof_present(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from_format": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
from_format = out["properties"]["from_format"]
|
||||
assert "type" not in from_format
|
||||
assert "anyOf" in from_format
|
||||
|
||||
def test_anyof_children_missing_type_get_filled(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"value": {
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"description": "A typeless option"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
children = out["properties"]["value"]["anyOf"]
|
||||
assert children[0]["type"] == "string"
|
||||
assert "type" in children[1]
|
||||
|
||||
|
||||
class TestTopLevelGuarantees:
|
||||
"""The returned top-level schema is always a well-formed object."""
|
||||
|
||||
def test_non_dict_input_returns_empty_object(self):
|
||||
assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
|
||||
assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
|
||||
|
||||
def test_non_object_top_level_coerced(self):
|
||||
params = {"type": "string"}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["type"] == "object"
|
||||
assert "properties" in out
|
||||
|
||||
def test_does_not_mutate_input(self):
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "no type"}},
|
||||
}
|
||||
snapshot = {
|
||||
"type": params["type"],
|
||||
"properties": {"q": dict(params["properties"]["q"])},
|
||||
}
|
||||
sanitize_moonshot_tool_parameters(params)
|
||||
assert params["type"] == snapshot["type"]
|
||||
assert "type" not in params["properties"]["q"]
|
||||
|
||||
|
||||
class TestToolListSanitizer:
|
||||
"""sanitize_moonshot_tools() walks an OpenAI-format tool list."""
|
||||
|
||||
def test_applies_per_tool(self):
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "noop",
|
||||
"description": "Does nothing",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
},
|
||||
},
|
||||
]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
# Second tool already clean — should be structurally equivalent
|
||||
assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_empty_list_is_passthrough(self):
|
||||
assert sanitize_moonshot_tools([]) == []
|
||||
assert sanitize_moonshot_tools(None) is None
|
||||
|
||||
def test_skips_malformed_entries(self):
|
||||
"""Entries without a function dict are passed through untouched."""
|
||||
tools = [{"type": "function"}, {"not": "a tool"}]
|
||||
out = sanitize_moonshot_tools(tools)
|
||||
assert out == tools
|
||||
|
||||
|
||||
class TestRealWorldMCPShape:
|
||||
"""End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
|
||||
|
||||
def test_combined_rewrites(self):
|
||||
# Shape: missing type on a property, anyOf with parent type, array
|
||||
# items without type — all in one tool.
|
||||
params = {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"description": "search text"},
|
||||
"filter": {
|
||||
"type": "string",
|
||||
"anyOf": [
|
||||
{"type": "string"},
|
||||
{"type": "null"},
|
||||
],
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {"description": "tag"},
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
out = sanitize_moonshot_tool_parameters(params)
|
||||
assert out["properties"]["query"]["type"] == "string"
|
||||
assert "type" not in out["properties"]["filter"]
|
||||
assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
|
||||
assert out["properties"]["tags"]["items"]["type"] == "string"
|
||||
assert out["required"] == ["query"]
|
||||
@@ -238,6 +238,56 @@ class TestChatCompletionsKimi:
|
||||
)
|
||||
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
|
||||
def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
|
||||
"""Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"q": {"description": "query"}, # missing type
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="moonshotai/kimi-k2.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
|
||||
|
||||
def test_non_moonshot_tools_are_not_mutated(self, transport):
|
||||
"""Other models don't go through the Moonshot sanitizer."""
|
||||
original_params = {
|
||||
"type": "object",
|
||||
"properties": {"q": {"description": "query"}}, # missing type
|
||||
}
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "search",
|
||||
"description": "Search",
|
||||
"parameters": original_params,
|
||||
},
|
||||
},
|
||||
]
|
||||
kw = transport.build_kwargs(
|
||||
model="anthropic/claude-sonnet-4.6",
|
||||
messages=[{"role": "user", "content": "Hi"}],
|
||||
tools=tools,
|
||||
max_tokens_param_fn=lambda n: {"max_tokens": n},
|
||||
)
|
||||
# The parameters dict is passed through untouched (no synthetic type)
|
||||
assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
|
||||
|
||||
|
||||
class TestChatCompletionsValidate:
|
||||
|
||||
|
||||
@@ -571,7 +571,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
|
||||
captured["ca_bundle"] = login_args.ca_bundle
|
||||
captured["insecure"] = login_args.insecure
|
||||
|
||||
monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_login)
|
||||
monkeypatch.setattr("hermes_cli.auth.login_nous", _fake_login)
|
||||
|
||||
hermes_main.cmd_model(
|
||||
SimpleNamespace(
|
||||
|
||||
@@ -710,7 +710,15 @@ class TestRunJobSessionPersistence:
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]
|
||||
|
||||
def test_run_job_enabled_toolsets_none_when_not_set(self, tmp_path):
|
||||
def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
|
||||
"""When a job has no explicit enabled_toolsets, the scheduler now
|
||||
resolves them from ``hermes tools`` platform config for ``cron``
|
||||
(PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
|
||||
|
||||
The legacy "pass None → AIAgent loads full default" path is still
|
||||
reachable, but only when ``_get_platform_tools`` raises (safety net
|
||||
for any unexpected config shape).
|
||||
"""
|
||||
job = {
|
||||
"id": "no-toolset-job",
|
||||
"name": "test",
|
||||
@@ -725,7 +733,39 @@ class TestRunJobSessionPersistence:
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] is None
|
||||
# Resolution happened — not None, is a list.
|
||||
assert isinstance(kwargs["enabled_toolsets"], list)
|
||||
# The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
|
||||
# (``moa``, ``homeassistant``, ``rl``) removed. The most important
|
||||
# invariant: ``moa`` is NOT in the default cron toolset, so a cron
|
||||
# run cannot accidentally spin up frontier models.
|
||||
assert "moa" not in kwargs["enabled_toolsets"]
|
||||
|
||||
def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
|
||||
"""Per-job enabled_toolsets (via cronjob tool) always take precedence
|
||||
over the platform-level ``hermes tools`` config."""
|
||||
job = {
|
||||
"id": "override-job",
|
||||
"name": "test",
|
||||
"prompt": "hello",
|
||||
"enabled_toolsets": ["terminal"],
|
||||
}
|
||||
fake_db, patches = self._make_run_job_patches(tmp_path)
|
||||
# Even if the user has ``hermes tools`` configured to enable web+file
|
||||
# for cron, the per-job override wins.
|
||||
with patches[0], patches[1], patches[2], patches[3], patches[4], \
|
||||
patch("run_agent.AIAgent") as mock_agent_cls, \
|
||||
patch(
|
||||
"hermes_cli.tools_config._get_platform_tools",
|
||||
return_value={"web", "file"},
|
||||
):
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent_cls.return_value = mock_agent
|
||||
run_job(job)
|
||||
|
||||
kwargs = mock_agent_cls.call_args.kwargs
|
||||
assert kwargs["enabled_toolsets"] == ["terminal"]
|
||||
|
||||
def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
|
||||
"""Empty final_response should stay empty for delivery logic (issue #2234).
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
"""Regression tests for the TUI gateway's `complete.path` handler.
|
||||
|
||||
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
|
||||
with no colon yet) still surfaced files alongside directories in the
|
||||
TUI composer, because the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
Reported during the TUI v2 blitz retest:
|
||||
- typing `@folder:` (and `@folder` with no colon yet) surfaced files
|
||||
alongside directories — the gateway-side completion lives in
|
||||
`tui_gateway/server.py` and was never touched by the earlier fix to
|
||||
`hermes_cli/commands.py`.
|
||||
- typing `@appChrome` required the full `@ui-tui/src/components/app…`
|
||||
path to find the file — users expect Cmd-P-style fuzzy basename
|
||||
matching across the repo, not a strict directory prefix filter.
|
||||
|
||||
Covers:
|
||||
- `@folder:` only yields directories
|
||||
- `@file:` only yields regular files
|
||||
- Bare `@folder` / `@file` (no colon) lists cwd directly
|
||||
- Explicit prefix is preserved in the completion text
|
||||
- `@<name>` with no slash fuzzy-matches basenames anywhere in the tree
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from tui_gateway import server
|
||||
|
||||
|
||||
@@ -33,6 +39,15 @@ def _items(word: str):
|
||||
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_fuzzy_cache(monkeypatch):
|
||||
# Each test walks a fresh tmp dir; clear the cached listing so prior
|
||||
# roots can't leak through the TTL window.
|
||||
server._fuzzy_cache.clear()
|
||||
yield
|
||||
server._fuzzy_cache.clear()
|
||||
|
||||
|
||||
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_fixture(tmp_path)
|
||||
@@ -89,3 +104,176 @@ def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
|
||||
|
||||
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
|
||||
assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
|
||||
|
||||
|
||||
# ── Fuzzy basename matching ──────────────────────────────────────────────
|
||||
# Users shouldn't have to know the full path — typing `@appChrome` should
|
||||
# find `ui-tui/src/components/appChrome.tsx`.
|
||||
|
||||
|
||||
def _nested_fixture(tmp_path: Path):
|
||||
(tmp_path / "readme.md").write_text("x")
|
||||
(tmp_path / ".env").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/components/appChrome.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/appLayout.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/components/thinking.tsx").write_text("x")
|
||||
(tmp_path / "ui-tui/src/hooks").mkdir(parents=True)
|
||||
(tmp_path / "ui-tui/src/hooks/useCompletion.ts").write_text("x")
|
||||
(tmp_path / "tui_gateway").mkdir()
|
||||
(tmp_path / "tui_gateway/server.py").write_text("x")
|
||||
|
||||
|
||||
def test_fuzzy_at_finds_file_without_directory_prefix(tmp_path, monkeypatch):
|
||||
"""`@appChrome` — with no slash — should surface the nested file."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
entries = _items("@appChrome")
|
||||
texts = [t for t, _, _ in entries]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
# Display is the basename, meta is the containing directory, so the
|
||||
# picker can show `appChrome.tsx ui-tui/src/components` on one row.
|
||||
row = next(r for r in entries if r[0] == "@file:ui-tui/src/components/appChrome.tsx")
|
||||
assert row[1] == "appChrome.tsx"
|
||||
assert row[2] == "ui-tui/src/components"
|
||||
|
||||
|
||||
def test_fuzzy_ranks_exact_before_prefix_before_subseq(tmp_path, monkeypatch):
|
||||
"""Better matches sort before weaker matches regardless of path depth."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
(tmp_path / "server.py").write_text("x") # exact basename match at root
|
||||
|
||||
texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
# Exact `server.py` beats `tui_gateway/server.py` (prefix match) — both
|
||||
# rank 1 on basename but exact basename wins on the sort key; shorter
|
||||
# rel path breaks ties.
|
||||
assert texts[0] == "@file:server.py", texts
|
||||
assert "@file:tui_gateway/server.py" in texts
|
||||
|
||||
|
||||
def test_fuzzy_camelcase_word_boundary(tmp_path, monkeypatch):
|
||||
"""Mid-basename camelCase pieces match without substring scanning."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@Chrome")]
|
||||
|
||||
# `Chrome` starts a camelCase word inside `appChrome.tsx`.
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_subsequence_catches_sparse_queries(tmp_path, monkeypatch):
|
||||
"""`@uCo` → `useCompletion.ts` via subsequence, last-resort tier."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@uCo")]
|
||||
|
||||
assert "@file:ui-tui/src/hooks/useCompletion.ts" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_at_file_prefix_preserved(tmp_path, monkeypatch):
|
||||
"""Explicit `@file:` prefix still wins the completion tag."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@file:appChrome")]
|
||||
|
||||
assert "@file:ui-tui/src/components/appChrome.tsx" in texts, texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_path_has_slash(tmp_path, monkeypatch):
|
||||
"""Any `/` in the query = user is navigating; keep directory listing."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@ui-tui/src/components/app")]
|
||||
|
||||
# Directory-listing mode prefixes with `@file:` / `@folder:` per entry.
|
||||
# It should only surface direct children of the named dir — not the
|
||||
# nested `useCompletion.ts`.
|
||||
assert any("appChrome.tsx" in t for t in texts), texts
|
||||
assert not any("useCompletion.ts" in t for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_skipped_when_folder_tag(tmp_path, monkeypatch):
|
||||
"""`@folder:<name>` still lists directories — fuzzy scanner only walks
|
||||
files (git-tracked + untracked), so defer to the dir-listing path."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
texts = [t for t, _, _ in _items("@folder:ui")]
|
||||
|
||||
# Root has `ui-tui/` as a directory; the listing branch should surface it.
|
||||
assert any(t.startswith("@folder:ui-tui") for t in texts), texts
|
||||
|
||||
|
||||
def test_fuzzy_hides_dotfiles_unless_asked(tmp_path, monkeypatch):
|
||||
"""`.env` doesn't leak into `@env` but does show for `@.env`."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
_nested_fixture(tmp_path)
|
||||
|
||||
assert not any(".env" in t for t, _, _ in _items("@env"))
|
||||
assert any(t.endswith(".env") for t, _, _ in _items("@.env"))
|
||||
|
||||
|
||||
def test_fuzzy_caps_results(tmp_path, monkeypatch):
|
||||
"""The 30-item cap survives a big tree."""
|
||||
monkeypatch.chdir(tmp_path)
|
||||
for i in range(60):
|
||||
(tmp_path / f"mod_{i:03d}.py").write_text("x")
|
||||
|
||||
items = _items("@mod")
|
||||
|
||||
assert len(items) == 30
|
||||
|
||||
|
||||
def test_fuzzy_paths_relative_to_cwd_inside_subdir(tmp_path, monkeypatch):
|
||||
"""When the gateway runs from a subdirectory of a git repo, fuzzy
|
||||
completion paths must resolve under that cwd — not under the repo root.
|
||||
|
||||
Without this, `@appChrome` from inside `apps/web/` would suggest
|
||||
`@file:apps/web/src/foo.tsx` but the agent (resolving from cwd) would
|
||||
look for `apps/web/apps/web/src/foo.tsx` and fail. We translate every
|
||||
`git ls-files` result back to a `relpath(root)` and drop anything
|
||||
outside `root` so the completion contract stays "paths are cwd-relative".
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
subprocess.run(["git", "init", "-q"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.email", "test@example.com"], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "config", "user.name", "test"], cwd=tmp_path, check=True)
|
||||
|
||||
(tmp_path / "apps" / "web" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "web" / "src" / "appChrome.tsx").write_text("x")
|
||||
(tmp_path / "apps" / "api" / "src").mkdir(parents=True)
|
||||
(tmp_path / "apps" / "api" / "src" / "server.ts").write_text("x")
|
||||
(tmp_path / "README.md").write_text("x")
|
||||
|
||||
subprocess.run(["git", "add", "."], cwd=tmp_path, check=True)
|
||||
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=tmp_path, check=True)
|
||||
|
||||
# Run from `apps/web/` — completions should be relative to here, and
|
||||
# files outside this subtree (apps/api, README.md at root) shouldn't
|
||||
# appear at all.
|
||||
monkeypatch.chdir(tmp_path / "apps" / "web")
|
||||
|
||||
texts = [t for t, _, _ in _items("@appChrome")]
|
||||
|
||||
assert "@file:src/appChrome.tsx" in texts, texts
|
||||
assert not any("apps/web/" in t for t in texts), texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
other_texts = [t for t, _, _ in _items("@server")]
|
||||
|
||||
assert not any("server.ts" in t for t in other_texts), other_texts
|
||||
|
||||
server._fuzzy_cache.clear()
|
||||
readme_texts = [t for t, _, _ in _items("@README")]
|
||||
|
||||
assert not any("README.md" in t for t in readme_texts), readme_texts
|
||||
|
||||
@@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency:
|
||||
|
||||
gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
|
||||
# Exclude non-messaging platforms from the check
|
||||
non_messaging = {"cli", "api_server"}
|
||||
non_messaging = {"cli", "api_server", "cron"}
|
||||
for platform, meta in PLATFORMS.items():
|
||||
if platform in non_messaging:
|
||||
continue
|
||||
|
||||
255
tests/hermes_cli/test_voice_wrapper.py
Normal file
255
tests/hermes_cli/test_voice_wrapper.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper.
|
||||
|
||||
The module is imported *lazily* by ``tui_gateway/server.py`` so that a
|
||||
box with missing audio deps fails at call time (returning a clean RPC
|
||||
error) rather than at gateway startup. These tests therefore only
|
||||
assert the public contract the gateway depends on: the three symbols
|
||||
exist, ``stop_and_transcribe`` is a no-op when nothing is recording,
|
||||
and ``speak_text`` tolerates empty input without touching the provider
|
||||
stack.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
|
||||
class TestPublicAPI:
|
||||
def test_gateway_symbols_importable(self):
|
||||
"""Match the exact import shape tui_gateway/server.py uses."""
|
||||
from hermes_cli.voice import (
|
||||
speak_text,
|
||||
start_recording,
|
||||
stop_and_transcribe,
|
||||
)
|
||||
|
||||
assert callable(start_recording)
|
||||
assert callable(stop_and_transcribe)
|
||||
assert callable(speak_text)
|
||||
|
||||
|
||||
class TestStopWithoutStart:
|
||||
def test_returns_none_when_no_recording_active(self, monkeypatch):
|
||||
"""Idempotent no-op: stop before start must not raise or touch state."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_recorder", None)
|
||||
|
||||
assert voice.stop_and_transcribe() is None
|
||||
|
||||
|
||||
class TestSpeakTextGuards:
|
||||
@pytest.mark.parametrize("text", ["", " ", "\n\t "])
|
||||
def test_empty_text_is_noop(self, text):
|
||||
"""Empty / whitespace-only text must return without importing tts_tool
|
||||
(the gateway spawns a thread per call, so a no-op on empty input
|
||||
keeps the thread pool from churning on trivial inputs)."""
|
||||
from hermes_cli.voice import speak_text
|
||||
|
||||
# Should simply return None without raising.
|
||||
assert speak_text(text) is None
|
||||
|
||||
|
||||
class TestContinuousAPI:
|
||||
"""Continuous (VAD) mode API — CLI-parity loop entry points."""
|
||||
|
||||
def test_continuous_exports(self):
|
||||
from hermes_cli.voice import (
|
||||
is_continuous_active,
|
||||
start_continuous,
|
||||
stop_continuous,
|
||||
)
|
||||
|
||||
assert callable(start_continuous)
|
||||
assert callable(stop_continuous)
|
||||
assert callable(is_continuous_active)
|
||||
|
||||
def test_not_active_by_default(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Isolate from any state left behind by other tests in the session.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
|
||||
"""stop_continuous must not raise when no loop is active — the
|
||||
gateway's voice.toggle off path calls it unconditionally."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
|
||||
# Should return cleanly without exceptions
|
||||
assert voice.stop_continuous() is None
|
||||
assert voice.is_continuous_active() is False
|
||||
|
||||
def test_double_start_is_idempotent(self, monkeypatch):
|
||||
"""A second start_continuous while already active is a no-op — prevents
|
||||
two overlapping capture threads fighting over the microphone when the
|
||||
UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_active", True)
|
||||
called = {"n": 0}
|
||||
|
||||
class FakeRecorder:
|
||||
def start(self, on_silence_stop=None):
|
||||
called["n"] += 1
|
||||
|
||||
def cancel(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
|
||||
|
||||
voice.start_continuous(on_transcript=lambda _t: None)
|
||||
|
||||
# The guard inside start_continuous short-circuits before rec.start()
|
||||
assert called["n"] == 0
|
||||
|
||||
|
||||
class TestContinuousLoopSimulation:
|
||||
"""End-to-end simulation of the VAD loop with a fake recorder.
|
||||
|
||||
Proves auto-restart works: the silence callback must trigger transcribe →
|
||||
on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
|
||||
the 3-strikes no-speech halt.
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def fake_recorder(self, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Reset module state between tests.
|
||||
monkeypatch.setattr(voice, "_continuous_active", False)
|
||||
monkeypatch.setattr(voice, "_continuous_recorder", None)
|
||||
monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
|
||||
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_status", None)
|
||||
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
|
||||
|
||||
class FakeRecorder:
|
||||
_silence_threshold = 200
|
||||
_silence_duration = 3.0
|
||||
is_recording = False
|
||||
|
||||
def __init__(self):
|
||||
self.start_calls = 0
|
||||
self.last_callback = None
|
||||
self.stopped = 0
|
||||
self.cancelled = 0
|
||||
# Preset WAV path returned by stop()
|
||||
self.next_stop_wav = "/tmp/fake.wav"
|
||||
|
||||
def start(self, on_silence_stop=None):
|
||||
self.start_calls += 1
|
||||
self.last_callback = on_silence_stop
|
||||
self.is_recording = True
|
||||
|
||||
def stop(self):
|
||||
self.stopped += 1
|
||||
self.is_recording = False
|
||||
return self.next_stop_wav
|
||||
|
||||
def cancel(self):
|
||||
self.cancelled += 1
|
||||
self.is_recording = False
|
||||
|
||||
rec = FakeRecorder()
|
||||
monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
|
||||
# Skip real file ops in the silence callback.
|
||||
monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
|
||||
return rec
|
||||
|
||||
def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": "hello world"},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
statuses = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_status=lambda s: statuses.append(s),
|
||||
)
|
||||
|
||||
assert fake_recorder.start_calls == 1
|
||||
assert statuses == ["listening"]
|
||||
|
||||
# Simulate AudioRecorder's silence detector firing.
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == ["hello world"]
|
||||
assert fake_recorder.start_calls == 2 # auto-restarted
|
||||
assert statuses == ["listening", "transcribing", "listening"]
|
||||
assert voice.is_continuous_active() is True
|
||||
|
||||
voice.stop_continuous()
|
||||
|
||||
def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
# Transcription returns no speech — fake_recorder.stop() returns the
|
||||
# path, but transcribe returns empty text, counting as silence.
|
||||
monkeypatch.setattr(
|
||||
voice,
|
||||
"transcribe_recording",
|
||||
lambda _p: {"success": True, "transcript": ""},
|
||||
)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
silent_limit_fired = []
|
||||
|
||||
voice.start_continuous(
|
||||
on_transcript=lambda t: transcripts.append(t),
|
||||
on_silent_limit=lambda: silent_limit_fired.append(True),
|
||||
)
|
||||
|
||||
# Fire silence callback 3 times
|
||||
for _ in range(3):
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert transcripts == []
|
||||
assert silent_limit_fired == [True]
|
||||
assert voice.is_continuous_active() is False
|
||||
assert fake_recorder.cancelled >= 1
|
||||
|
||||
def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
|
||||
"""User hits Ctrl+B mid-transcription: the in-flight transcript must
|
||||
still fire (it's a real utterance), but the loop must NOT restart."""
|
||||
import hermes_cli.voice as voice
|
||||
|
||||
stop_triggered = {"flag": False}
|
||||
|
||||
def late_transcribe(_p):
|
||||
# Simulate stop_continuous arriving while we're inside transcribe
|
||||
voice.stop_continuous()
|
||||
stop_triggered["flag"] = True
|
||||
return {"success": True, "transcript": "final word"}
|
||||
|
||||
monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
|
||||
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
|
||||
|
||||
transcripts = []
|
||||
voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
|
||||
|
||||
initial_starts = fake_recorder.start_calls # 1
|
||||
fake_recorder.last_callback()
|
||||
|
||||
assert stop_triggered["flag"] is True
|
||||
# Loop is stopped — no auto-restart
|
||||
assert fake_recorder.start_calls == initial_starts
|
||||
# The in-flight transcript was suppressed because we stopped mid-flight
|
||||
assert transcripts == []
|
||||
assert voice.is_continuous_active() is False
|
||||
@@ -134,6 +134,31 @@ class TestCoerceValue:
|
||||
"""A non-numeric string in [number, string] should stay a string."""
|
||||
assert _coerce_value("hello", ["number", "string"]) == "hello"
|
||||
|
||||
def test_array_type_parsed_from_json_string(self):
|
||||
"""Stringified JSON arrays are parsed into native lists."""
|
||||
assert _coerce_value('["a", "b"]', "array") == ["a", "b"]
|
||||
assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3]
|
||||
|
||||
def test_object_type_parsed_from_json_string(self):
|
||||
"""Stringified JSON objects are parsed into native dicts."""
|
||||
assert _coerce_value('{"k": "v"}', "object") == {"k": "v"}
|
||||
assert _coerce_value('{"n": 1}', "object") == {"n": 1}
|
||||
|
||||
def test_array_invalid_json_preserved(self):
|
||||
"""Unparseable strings are returned unchanged."""
|
||||
assert _coerce_value("not-json", "array") == "not-json"
|
||||
|
||||
def test_object_invalid_json_preserved(self):
|
||||
assert _coerce_value("not-json", "object") == "not-json"
|
||||
|
||||
def test_array_type_wrong_shape_preserved(self):
|
||||
"""A JSON object passed for an 'array' slot is preserved as a string."""
|
||||
assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}'
|
||||
|
||||
def test_object_type_wrong_shape_preserved(self):
|
||||
"""A JSON array passed for an 'object' slot is preserved as a string."""
|
||||
assert _coerce_value('["a"]', "object") == '["a"]'
|
||||
|
||||
|
||||
# ── Full coerce_tool_args with registry ───────────────────────────────────
|
||||
|
||||
@@ -212,6 +237,32 @@ class TestCoerceToolArgs:
|
||||
assert result["items"] == [1, 2, 3]
|
||||
assert result["config"] == {"key": "val"}
|
||||
|
||||
def test_coerces_stringified_array_arg(self):
|
||||
"""Regression for #3947 — MCP servers using z.array() expect lists, not strings."""
|
||||
schema = self._mock_schema({
|
||||
"messageIds": {"type": "array", "items": {"type": "string"}},
|
||||
})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"messageIds": '["abc", "def"]'}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["messageIds"] == ["abc", "def"]
|
||||
|
||||
def test_coerces_stringified_object_arg(self):
|
||||
"""Stringified JSON objects get parsed into dicts."""
|
||||
schema = self._mock_schema({"config": {"type": "object"}})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"config": '{"max": 50}'}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["config"] == {"max": 50}
|
||||
|
||||
def test_invalid_json_array_preserved_as_string(self):
|
||||
"""If the string isn't valid JSON, pass it through — let the tool decide."""
|
||||
schema = self._mock_schema({"items": {"type": "array"}})
|
||||
with patch("model_tools.registry.get_schema", return_value=schema):
|
||||
args = {"items": "not-json"}
|
||||
result = coerce_tool_args("test_tool", args)
|
||||
assert result["items"] == "not-json"
|
||||
|
||||
def test_extra_args_without_schema_left_alone(self):
|
||||
"""Args not in the schema properties are not touched."""
|
||||
schema = self._mock_schema({"limit": {"type": "integer"}})
|
||||
|
||||
@@ -120,6 +120,177 @@ class TestSchemaConversion:
|
||||
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_definitions_refs_are_rewritten_to_defs(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
mcp_tool = _make_mcp_tool(
|
||||
name="submit",
|
||||
description="Submit a payload",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input": {"$ref": "#/definitions/Payload"},
|
||||
},
|
||||
"required": ["input"],
|
||||
"definitions": {
|
||||
"Payload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
},
|
||||
"required": ["query"],
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
schema = _convert_mcp_schema("forms", mcp_tool)
|
||||
|
||||
assert schema["parameters"]["properties"]["input"]["$ref"] == "#/$defs/Payload"
|
||||
assert "$defs" in schema["parameters"]
|
||||
assert "definitions" not in schema["parameters"]
|
||||
|
||||
def test_nested_definition_refs_are_rewritten_recursively(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
mcp_tool = _make_mcp_tool(
|
||||
name="nested",
|
||||
description="Nested schema",
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "#/definitions/Entry"},
|
||||
},
|
||||
},
|
||||
"definitions": {
|
||||
"Entry": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"child": {"$ref": "#/definitions/Child"},
|
||||
},
|
||||
},
|
||||
"Child": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"value": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
schema = _convert_mcp_schema("forms", mcp_tool)
|
||||
|
||||
assert schema["parameters"]["properties"]["items"]["items"]["$ref"] == "#/$defs/Entry"
|
||||
assert schema["parameters"]["$defs"]["Entry"]["properties"]["child"]["$ref"] == "#/$defs/Child"
|
||||
|
||||
def test_missing_type_on_object_is_coerced(self):
|
||||
"""Schemas that describe an object but omit ``type`` get type='object'."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"properties": {"q": {"type": "string"}},
|
||||
"required": ["q"],
|
||||
})
|
||||
|
||||
assert schema["type"] == "object"
|
||||
assert schema["properties"]["q"]["type"] == "string"
|
||||
assert schema["required"] == ["q"]
|
||||
|
||||
def test_null_type_on_object_is_coerced(self):
|
||||
"""type: None should be treated like missing type (common MCP server bug)."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": None,
|
||||
"properties": {"x": {"type": "integer"}},
|
||||
})
|
||||
|
||||
assert schema["type"] == "object"
|
||||
|
||||
def test_required_pruned_when_property_missing(self):
|
||||
"""Gemini 400s on required names that don't exist in properties."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {"a": {"type": "string"}},
|
||||
"required": ["a", "ghost", "phantom"],
|
||||
})
|
||||
|
||||
assert schema["required"] == ["a"]
|
||||
|
||||
def test_required_removed_when_all_names_dangle(self):
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"required": ["ghost"],
|
||||
})
|
||||
|
||||
assert "required" not in schema
|
||||
|
||||
def test_required_pruning_applies_recursively_inside_nested_objects(self):
|
||||
"""Nested object schemas also get required pruning."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"filter": {
|
||||
"type": "object",
|
||||
"properties": {"field": {"type": "string"}},
|
||||
"required": ["field", "missing"],
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assert schema["properties"]["filter"]["required"] == ["field"]
|
||||
|
||||
def test_object_in_array_items_gets_properties_filled(self):
|
||||
"""Array-item object schemas without properties get an empty dict."""
|
||||
from tools.mcp_tool import _normalize_mcp_input_schema
|
||||
|
||||
schema = _normalize_mcp_input_schema({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {"type": "object"},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
assert schema["properties"]["items"]["items"]["properties"] == {}
|
||||
|
||||
def test_convert_mcp_schema_survives_missing_inputschema_attribute(self):
|
||||
"""A Tool object without .inputSchema must not crash registration."""
|
||||
import types
|
||||
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
bare_tool = types.SimpleNamespace(name="probe", description="Probe")
|
||||
schema = _convert_mcp_schema("srv", bare_tool)
|
||||
|
||||
assert schema["name"] == "mcp_srv_probe"
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_convert_mcp_schema_with_none_inputschema(self):
|
||||
"""Tool with inputSchema=None produces a valid empty object schema."""
|
||||
import types
|
||||
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
# Note: _make_mcp_tool(input_schema=None) falls back to a default —
|
||||
# build the namespace directly so .inputSchema really is None.
|
||||
mcp_tool = types.SimpleNamespace(name="probe", description="Probe", inputSchema=None)
|
||||
schema = _convert_mcp_schema("srv", mcp_tool)
|
||||
|
||||
assert schema["parameters"] == {"type": "object", "properties": {}}
|
||||
|
||||
def test_tool_name_prefix_format(self):
|
||||
from tools.mcp_tool import _convert_mcp_schema
|
||||
|
||||
|
||||
@@ -298,7 +298,7 @@ def _get_child_timeout() -> float:
|
||||
"""Read delegation.child_timeout_seconds from config.
|
||||
|
||||
Returns the number of seconds a single child agent is allowed to run
|
||||
before being considered stuck. Default: 300 s (5 minutes).
|
||||
before being considered stuck. Default: 600 s (10 minutes).
|
||||
"""
|
||||
cfg = _load_config()
|
||||
val = cfg.get("child_timeout_seconds")
|
||||
@@ -409,7 +409,7 @@ def _preserve_parent_mcp_toolsets(
|
||||
|
||||
|
||||
DEFAULT_MAX_ITERATIONS = 50
|
||||
DEFAULT_CHILD_TIMEOUT = 300 # seconds before a child agent is considered stuck
|
||||
DEFAULT_CHILD_TIMEOUT = 600 # seconds before a child agent is considered stuck
|
||||
_HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation
|
||||
_HEARTBEAT_STALE_CYCLES = (
|
||||
5 # mark child stale after this many heartbeats with no iteration progress
|
||||
|
||||
@@ -2019,14 +2019,92 @@ def _make_check_fn(server_name: str):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _normalize_mcp_input_schema(schema: dict | None) -> dict:
|
||||
"""Normalize MCP input schemas for LLM tool-calling compatibility."""
|
||||
"""Normalize MCP input schemas for LLM tool-calling compatibility.
|
||||
|
||||
MCP servers can emit plain JSON Schema with ``definitions`` /
|
||||
``#/definitions/...`` references. Kimi / Moonshot rejects that form and
|
||||
requires local refs to point into ``#/$defs/...`` instead. Normalize the
|
||||
common draft-07 shape here so MCP tool schemas remain portable across
|
||||
OpenAI-compatible providers.
|
||||
|
||||
Additional MCP-server robustness repairs applied recursively:
|
||||
|
||||
* Missing or ``null`` ``type`` on an object-shaped node is coerced to
|
||||
``"object"`` (some servers omit it). See PR #4897.
|
||||
* When an ``object`` node lacks ``properties``, an empty ``properties``
|
||||
dict is added so ``required`` entries don't dangle.
|
||||
* ``required`` arrays are pruned to only names that exist in
|
||||
``properties``; otherwise Google AI Studio / Gemini 400s with
|
||||
``property is not defined``. See PR #4651.
|
||||
|
||||
All repairs are provider-agnostic and ideally produce a schema valid on
|
||||
OpenAI, Anthropic, Gemini, and Moonshot in one pass.
|
||||
"""
|
||||
if not schema:
|
||||
return {"type": "object", "properties": {}}
|
||||
|
||||
if schema.get("type") == "object" and "properties" not in schema:
|
||||
return {**schema, "properties": {}}
|
||||
def _rewrite_local_refs(node):
|
||||
if isinstance(node, dict):
|
||||
normalized = {}
|
||||
for key, value in node.items():
|
||||
out_key = "$defs" if key == "definitions" else key
|
||||
normalized[out_key] = _rewrite_local_refs(value)
|
||||
ref = normalized.get("$ref")
|
||||
if isinstance(ref, str) and ref.startswith("#/definitions/"):
|
||||
normalized["$ref"] = "#/$defs/" + ref[len("#/definitions/"):]
|
||||
return normalized
|
||||
if isinstance(node, list):
|
||||
return [_rewrite_local_refs(item) for item in node]
|
||||
return node
|
||||
|
||||
return schema
|
||||
def _repair_object_shape(node):
|
||||
"""Recursively repair object-shaped nodes: fill type, prune required."""
|
||||
if isinstance(node, list):
|
||||
return [_repair_object_shape(item) for item in node]
|
||||
if not isinstance(node, dict):
|
||||
return node
|
||||
|
||||
repaired = {k: _repair_object_shape(v) for k, v in node.items()}
|
||||
|
||||
# Coerce missing / null type when the shape is clearly an object
|
||||
# (has properties or required but no type).
|
||||
if not repaired.get("type") and (
|
||||
"properties" in repaired or "required" in repaired
|
||||
):
|
||||
repaired["type"] = "object"
|
||||
|
||||
if repaired.get("type") == "object":
|
||||
# Ensure properties exists so required can reference it safely
|
||||
if "properties" not in repaired or not isinstance(
|
||||
repaired.get("properties"), dict
|
||||
):
|
||||
repaired["properties"] = {} if "properties" not in repaired else repaired["properties"]
|
||||
if not isinstance(repaired.get("properties"), dict):
|
||||
repaired["properties"] = {}
|
||||
|
||||
# Prune required to only include names that exist in properties
|
||||
required = repaired.get("required")
|
||||
if isinstance(required, list):
|
||||
props = repaired.get("properties") or {}
|
||||
valid = [r for r in required if isinstance(r, str) and r in props]
|
||||
if len(valid) != len(required):
|
||||
if valid:
|
||||
repaired["required"] = valid
|
||||
else:
|
||||
repaired.pop("required", None)
|
||||
|
||||
return repaired
|
||||
|
||||
normalized = _rewrite_local_refs(schema)
|
||||
normalized = _repair_object_shape(normalized)
|
||||
|
||||
# Ensure top-level is a well-formed object schema
|
||||
if not isinstance(normalized, dict):
|
||||
return {"type": "object", "properties": {}}
|
||||
if normalized.get("type") == "object" and "properties" not in normalized:
|
||||
normalized = {**normalized, "properties": {}}
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def sanitize_mcp_name_component(value: str) -> str:
|
||||
@@ -2057,7 +2135,7 @@ def _convert_mcp_schema(server_name: str, mcp_tool) -> dict:
|
||||
return {
|
||||
"name": prefixed_name,
|
||||
"description": mcp_tool.description or f"MCP tool {mcp_tool.name} from {server_name}",
|
||||
"parameters": _normalize_mcp_input_schema(mcp_tool.inputSchema),
|
||||
"parameters": _normalize_mcp_input_schema(getattr(mcp_tool, "inputSchema", None)),
|
||||
}
|
||||
|
||||
|
||||
|
||||
13
toolsets.py
13
toolsets.py
@@ -295,7 +295,18 @@ TOOLSETS = {
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
"includes": []
|
||||
},
|
||||
|
||||
|
||||
"hermes-cron": {
|
||||
# Mirrors hermes-cli so cron's "default" toolset is the same set of
|
||||
# core tools users see interactively — then `hermes tools` filters
|
||||
# them down per the platform config. _DEFAULT_OFF_TOOLSETS (moa,
|
||||
# homeassistant, rl) are excluded by _get_platform_tools() unless
|
||||
# the user explicitly enables them.
|
||||
"description": "Default cron toolset - same core tools as hermes-cli; gated by `hermes tools`",
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"hermes-telegram": {
|
||||
"description": "Telegram bot toolset - full access for personal use (terminal has safety checks)",
|
||||
"tools": _HERMES_CORE_TOOLS,
|
||||
|
||||
@@ -1,19 +1,93 @@
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from tui_gateway.server import dispatch, resolve_skin, write_json
|
||||
from tui_gateway.server import _CRASH_LOG, dispatch, resolve_skin, write_json
|
||||
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
|
||||
|
||||
def _log_signal(signum: int, frame) -> None:
|
||||
"""Capture WHICH thread and WHERE a termination signal hit us.
|
||||
|
||||
SIG_DFL for SIGPIPE kills the process silently the instant any
|
||||
background thread (TTS playback, beep, voice status emitter, etc.)
|
||||
writes to a stdout the TUI has stopped reading. Without this
|
||||
handler the gateway-exited banner in the TUI has no trace — the
|
||||
crash log never sees a Python exception because the kernel reaps
|
||||
the process before the interpreter runs anything.
|
||||
"""
|
||||
name = {
|
||||
signal.SIGPIPE: "SIGPIPE",
|
||||
signal.SIGTERM: "SIGTERM",
|
||||
signal.SIGHUP: "SIGHUP",
|
||||
}.get(signum, f"signal {signum}")
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
||||
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
||||
f.write(
|
||||
f"\n=== {name} received · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n"
|
||||
)
|
||||
if frame is not None:
|
||||
f.write("main-thread stack at signal delivery:\n")
|
||||
traceback.print_stack(frame, file=f)
|
||||
# All live threads — signal may have been triggered by a
|
||||
# background thread (write to broken stdout from TTS, etc.).
|
||||
import threading as _threading
|
||||
for tid, th in _threading._active.items():
|
||||
f.write(f"\n--- thread {th.name} (id={tid}) ---\n")
|
||||
f.write("".join(traceback.format_stack(sys._current_frames().get(tid))))
|
||||
except Exception:
|
||||
pass
|
||||
print(f"[gateway-signal] {name}", file=sys.stderr, flush=True)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
# SIGPIPE: ignore, don't exit. The old SIG_DFL killed the process
|
||||
# silently whenever a *background* thread (TTS playback chain, voice
|
||||
# debug stderr emitter, beep thread) wrote to a pipe the TUI had gone
|
||||
# quiet on — even though the main thread was perfectly fine waiting on
|
||||
# stdin. Ignoring the signal lets Python raise BrokenPipeError on the
|
||||
# offending write (write_json already handles that with a clean
|
||||
# sys.exit(0) + _log_exit), which keeps the gateway alive as long as
|
||||
# the main command pipe is still readable. Terminal signals still
|
||||
# route through _log_signal so kills and hangups are diagnosable.
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_IGN)
|
||||
signal.signal(signal.SIGTERM, _log_signal)
|
||||
signal.signal(signal.SIGHUP, _log_signal)
|
||||
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||
|
||||
|
||||
def _log_exit(reason: str) -> None:
|
||||
"""Record why the gateway subprocess is shutting down.
|
||||
|
||||
Three exit paths (startup write fail, parse-error-response write fail,
|
||||
dispatch-response write fail, stdin EOF) all collapse into a silent
|
||||
sys.exit(0) here. Without this trail the TUI shows "gateway exited"
|
||||
with no actionable clue about WHICH broken pipe or WHICH message
|
||||
triggered it — the main reason voice-mode turns look like phantom
|
||||
crashes when the real story is "TUI read pipe closed on this event".
|
||||
"""
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
||||
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
||||
f.write(
|
||||
f"\n=== gateway exit · {time.strftime('%Y-%m-%d %H:%M:%S')} "
|
||||
f"· reason={reason} ===\n"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
print(f"[gateway-exit] {reason}", file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
if not write_json({
|
||||
"jsonrpc": "2.0",
|
||||
"method": "event",
|
||||
"params": {"type": "gateway.ready", "payload": {"skin": resolve_skin()}},
|
||||
}):
|
||||
_log_exit("startup write failed (broken stdout pipe before first event)")
|
||||
sys.exit(0)
|
||||
|
||||
for raw in sys.stdin:
|
||||
@@ -25,14 +99,19 @@ def main():
|
||||
req = json.loads(line)
|
||||
except json.JSONDecodeError:
|
||||
if not write_json({"jsonrpc": "2.0", "error": {"code": -32700, "message": "parse error"}, "id": None}):
|
||||
_log_exit("parse-error-response write failed (broken stdout pipe)")
|
||||
sys.exit(0)
|
||||
continue
|
||||
|
||||
method = req.get("method") if isinstance(req, dict) else None
|
||||
resp = dispatch(req)
|
||||
if resp is not None:
|
||||
if not write_json(resp):
|
||||
_log_exit(f"response write failed for method={method!r} (broken stdout pipe)")
|
||||
sys.exit(0)
|
||||
|
||||
_log_exit("stdin EOF (TUI closed the command pipe)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -23,6 +23,75 @@ load_hermes_dotenv(
|
||||
hermes_home=_hermes_home, project_env=Path(__file__).parent.parent / ".env"
|
||||
)
|
||||
|
||||
|
||||
# ── Panic logger ─────────────────────────────────────────────────────
|
||||
# Gateway crashes in a TUI session leave no forensics: stdout is the
|
||||
# JSON-RPC pipe (TUI side parses it, doesn't log raw), the root logger
|
||||
# only catches handled warnings, and the subprocess exits before stderr
|
||||
# flushes through the stderr->gateway.stderr event pump. This hook
|
||||
# appends every unhandled exception to ~/.hermes/logs/tui_gateway_crash.log
|
||||
# AND re-emits a one-line summary to stderr so the TUI can surface it in
|
||||
# Activity — exactly what was missing when the voice-mode turns started
|
||||
# exiting the gateway mid-TTS.
|
||||
_CRASH_LOG = os.path.join(_hermes_home, "logs", "tui_gateway_crash.log")
|
||||
|
||||
|
||||
def _panic_hook(exc_type, exc_value, exc_tb):
|
||||
import traceback
|
||||
|
||||
trace = "".join(traceback.format_exception(exc_type, exc_value, exc_tb))
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
||||
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
||||
f.write(
|
||||
f"\n=== unhandled exception · {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n"
|
||||
)
|
||||
f.write(trace)
|
||||
except Exception:
|
||||
pass
|
||||
# Stderr goes through to the TUI as a gateway.stderr Activity line —
|
||||
# the first line here is what the user will see without opening any
|
||||
# log files. Rest of the stack is still in the log for full context.
|
||||
first = str(exc_value).strip().splitlines()[0] if str(exc_value).strip() else exc_type.__name__
|
||||
print(f"[gateway-crash] {exc_type.__name__}: {first}", file=sys.stderr, flush=True)
|
||||
# Chain to the default hook so the process still terminates normally.
|
||||
sys.__excepthook__(exc_type, exc_value, exc_tb)
|
||||
|
||||
|
||||
sys.excepthook = _panic_hook
|
||||
|
||||
|
||||
def _thread_panic_hook(args):
|
||||
# threading.excepthook signature: SimpleNamespace(exc_type, exc_value, exc_traceback, thread)
|
||||
import traceback
|
||||
|
||||
trace = "".join(
|
||||
traceback.format_exception(args.exc_type, args.exc_value, args.exc_traceback)
|
||||
)
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
||||
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
||||
f.write(
|
||||
f"\n=== thread exception · {time.strftime('%Y-%m-%d %H:%M:%S')} "
|
||||
f"· thread={args.thread.name} ===\n"
|
||||
)
|
||||
f.write(trace)
|
||||
except Exception:
|
||||
pass
|
||||
first_line = (
|
||||
str(args.exc_value).strip().splitlines()[0]
|
||||
if str(args.exc_value).strip()
|
||||
else args.exc_type.__name__
|
||||
)
|
||||
print(
|
||||
f"[gateway-crash] thread {args.thread.name} raised {args.exc_type.__name__}: {first_line}",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
threading.excepthook = _thread_panic_hook
|
||||
|
||||
try:
|
||||
from hermes_cli.banner import prefetch_update_check
|
||||
|
||||
@@ -2126,7 +2195,43 @@ def _(rid, params: dict) -> dict:
|
||||
if rendered:
|
||||
payload["rendered"] = rendered
|
||||
_emit("message.complete", sid, payload)
|
||||
|
||||
# CLI parity: when voice-mode TTS is on, speak the agent reply
|
||||
# (cli.py:_voice_speak_response). Only the final text — tool
|
||||
# calls / reasoning already stream separately and would be
|
||||
# noisy to read aloud.
|
||||
if (
|
||||
status == "complete"
|
||||
and isinstance(raw, str)
|
||||
and raw.strip()
|
||||
and _voice_tts_enabled()
|
||||
):
|
||||
try:
|
||||
from hermes_cli.voice import speak_text
|
||||
|
||||
spoken = raw
|
||||
threading.Thread(
|
||||
target=speak_text, args=(spoken,), daemon=True
|
||||
).start()
|
||||
except ImportError:
|
||||
logger.warning("voice TTS skipped: hermes_cli.voice unavailable")
|
||||
except Exception as e:
|
||||
logger.warning("voice TTS dispatch failed: %s", e)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
trace = traceback.format_exc()
|
||||
try:
|
||||
os.makedirs(os.path.dirname(_CRASH_LOG), exist_ok=True)
|
||||
with open(_CRASH_LOG, "a", encoding="utf-8") as f:
|
||||
f.write(
|
||||
f"\n=== turn-dispatcher exception · "
|
||||
f"{time.strftime('%Y-%m-%d %H:%M:%S')} · sid={sid} ===\n"
|
||||
)
|
||||
f.write(trace)
|
||||
except Exception:
|
||||
pass
|
||||
print(f"[gateway-turn] {type(e).__name__}: {e}", file=sys.stderr, flush=True)
|
||||
_emit("error", sid, {"message": str(e)})
|
||||
finally:
|
||||
try:
|
||||
@@ -3151,6 +3256,162 @@ def _(rid, params: dict) -> dict:
|
||||
|
||||
# ── Methods: complete ─────────────────────────────────────────────────
|
||||
|
||||
_FUZZY_CACHE_TTL_S = 5.0
|
||||
_FUZZY_CACHE_MAX_FILES = 20000
|
||||
_FUZZY_FALLBACK_EXCLUDES = frozenset(
|
||||
{
|
||||
".git",
|
||||
".hg",
|
||||
".svn",
|
||||
".next",
|
||||
".cache",
|
||||
".venv",
|
||||
"venv",
|
||||
"node_modules",
|
||||
"__pycache__",
|
||||
"dist",
|
||||
"build",
|
||||
"target",
|
||||
".mypy_cache",
|
||||
".pytest_cache",
|
||||
".ruff_cache",
|
||||
}
|
||||
)
|
||||
_fuzzy_cache_lock = threading.Lock()
|
||||
_fuzzy_cache: dict[str, tuple[float, list[str]]] = {}
|
||||
|
||||
|
||||
def _list_repo_files(root: str) -> list[str]:
|
||||
"""Return file paths relative to ``root``.
|
||||
|
||||
Uses ``git ls-files`` from the repo top (resolved via
|
||||
``rev-parse --show-toplevel``) so the listing covers tracked + untracked
|
||||
files anywhere in the repo, then converts each path back to be relative
|
||||
to ``root``. Files outside ``root`` (parent directories of cwd, sibling
|
||||
subtrees) are excluded so the picker stays scoped to what's reachable
|
||||
from the gateway's cwd. Falls back to a bounded ``os.walk(root)`` when
|
||||
``root`` isn't inside a git repo. Result cached per-root for
|
||||
``_FUZZY_CACHE_TTL_S`` so rapid keystrokes don't respawn git processes.
|
||||
"""
|
||||
now = time.monotonic()
|
||||
with _fuzzy_cache_lock:
|
||||
cached = _fuzzy_cache.get(root)
|
||||
if cached and now - cached[0] < _FUZZY_CACHE_TTL_S:
|
||||
return cached[1]
|
||||
|
||||
files: list[str] = []
|
||||
try:
|
||||
top_result = subprocess.run(
|
||||
["git", "-C", root, "rev-parse", "--show-toplevel"],
|
||||
capture_output=True,
|
||||
timeout=2.0,
|
||||
check=False,
|
||||
)
|
||||
if top_result.returncode == 0:
|
||||
top = top_result.stdout.decode("utf-8", "replace").strip()
|
||||
list_result = subprocess.run(
|
||||
["git", "-C", top, "ls-files", "-z", "--cached", "--others", "--exclude-standard"],
|
||||
capture_output=True,
|
||||
timeout=2.0,
|
||||
check=False,
|
||||
)
|
||||
if list_result.returncode == 0:
|
||||
for p in list_result.stdout.decode("utf-8", "replace").split("\0"):
|
||||
if not p:
|
||||
continue
|
||||
rel = os.path.relpath(os.path.join(top, p), root).replace(os.sep, "/")
|
||||
# Skip parents/siblings of cwd — keep the picker scoped
|
||||
# to root-and-below, matching Cmd-P workspace semantics.
|
||||
if rel.startswith("../"):
|
||||
continue
|
||||
files.append(rel)
|
||||
if len(files) >= _FUZZY_CACHE_MAX_FILES:
|
||||
break
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
if not files:
|
||||
# Fallback walk: skip vendor/build dirs + dot-dirs so the walk stays
|
||||
# tractable. Dotfiles themselves survive — the ranker decides based
|
||||
# on whether the query starts with `.`.
|
||||
try:
|
||||
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
||||
dirnames[:] = [
|
||||
d
|
||||
for d in dirnames
|
||||
if d not in _FUZZY_FALLBACK_EXCLUDES and not d.startswith(".")
|
||||
]
|
||||
rel_dir = os.path.relpath(dirpath, root)
|
||||
for f in filenames:
|
||||
rel = f if rel_dir == "." else f"{rel_dir}/{f}"
|
||||
files.append(rel.replace(os.sep, "/"))
|
||||
if len(files) >= _FUZZY_CACHE_MAX_FILES:
|
||||
break
|
||||
if len(files) >= _FUZZY_CACHE_MAX_FILES:
|
||||
break
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
with _fuzzy_cache_lock:
|
||||
_fuzzy_cache[root] = (now, files)
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def _fuzzy_basename_rank(name: str, query: str) -> tuple[int, int] | None:
|
||||
"""Rank ``name`` against ``query``; lower is better. Returns None to reject.
|
||||
|
||||
Tiers (kind):
|
||||
0 — exact basename
|
||||
1 — basename prefix (e.g. `app` → `appChrome.tsx`)
|
||||
2 — word-boundary / camelCase hit (e.g. `chrome` → `appChrome.tsx`)
|
||||
3 — substring anywhere in basename
|
||||
4 — subsequence match (every query char appears in order)
|
||||
|
||||
Secondary key is `len(name)` so shorter names win ties.
|
||||
"""
|
||||
if not query:
|
||||
return (3, len(name))
|
||||
|
||||
nl = name.lower()
|
||||
ql = query.lower()
|
||||
|
||||
if nl == ql:
|
||||
return (0, len(name))
|
||||
|
||||
if nl.startswith(ql):
|
||||
return (1, len(name))
|
||||
|
||||
# Word-boundary split: `foo-bar_baz.qux` → ["foo","bar","baz","qux"].
|
||||
# camelCase split: `appChrome` → ["app","Chrome"]. Cheap approximation;
|
||||
# falls through to substring/subsequence if it misses.
|
||||
parts: list[str] = []
|
||||
buf = ""
|
||||
for ch in name:
|
||||
if ch in "-_." or (ch.isupper() and buf and not buf[-1].isupper()):
|
||||
if buf:
|
||||
parts.append(buf)
|
||||
buf = ch if ch not in "-_." else ""
|
||||
else:
|
||||
buf += ch
|
||||
if buf:
|
||||
parts.append(buf)
|
||||
for p in parts:
|
||||
if p.lower().startswith(ql):
|
||||
return (2, len(name))
|
||||
|
||||
if ql in nl:
|
||||
return (3, len(name))
|
||||
|
||||
i = 0
|
||||
for ch in nl:
|
||||
if ch == ql[i]:
|
||||
i += 1
|
||||
if i == len(ql):
|
||||
return (4, len(name))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@method("complete.path")
|
||||
def _(rid, params: dict) -> dict:
|
||||
@@ -3186,6 +3447,42 @@ def _(rid, params: dict) -> dict:
|
||||
prefix_tag = ""
|
||||
path_part = query if is_context else query
|
||||
|
||||
# Fuzzy basename search across the repo when the user types a bare
|
||||
# name with no path separator — `@appChrome` surfaces every file
|
||||
# whose basename matches, regardless of directory depth. Matches what
|
||||
# editors like Cursor / VS Code do for Cmd-P. Path-ish queries (with
|
||||
# `/`, `./`, `~/`, `/abs`) fall through to the directory-listing
|
||||
# path so explicit navigation intent is preserved.
|
||||
if (
|
||||
is_context
|
||||
and path_part
|
||||
and "/" not in path_part
|
||||
and prefix_tag != "folder"
|
||||
):
|
||||
root = os.getcwd()
|
||||
ranked: list[tuple[tuple[int, int], str, str]] = []
|
||||
for rel in _list_repo_files(root):
|
||||
basename = os.path.basename(rel)
|
||||
if basename.startswith(".") and not path_part.startswith("."):
|
||||
continue
|
||||
rank = _fuzzy_basename_rank(basename, path_part)
|
||||
if rank is None:
|
||||
continue
|
||||
ranked.append((rank, rel, basename))
|
||||
|
||||
ranked.sort(key=lambda r: (r[0], len(r[1]), r[1]))
|
||||
tag = prefix_tag or "file"
|
||||
for _, rel, basename in ranked[:30]:
|
||||
items.append(
|
||||
{
|
||||
"text": f"@{tag}:{rel}",
|
||||
"display": basename,
|
||||
"meta": os.path.dirname(rel),
|
||||
}
|
||||
)
|
||||
|
||||
return _ok(rid, {"items": items})
|
||||
|
||||
expanded = _normalize_completion_path(path_part) if path_part else "."
|
||||
if expanded == "." or not expanded:
|
||||
search_dir, match = ".", ""
|
||||
@@ -3455,43 +3752,155 @@ def _(rid, params: dict) -> dict:
|
||||
# ── Methods: voice ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
_voice_sid_lock = threading.Lock()
|
||||
_voice_event_sid: str = ""
|
||||
|
||||
|
||||
def _voice_emit(event: str, payload: dict | None = None) -> None:
|
||||
"""Emit a voice event toward the session that most recently turned the
|
||||
mode on. Voice is process-global (one microphone), so there's only ever
|
||||
one sid to target; the TUI handler treats an empty sid as "active
|
||||
session". Kept separate from _emit to make the lack of per-call sid
|
||||
argument explicit."""
|
||||
with _voice_sid_lock:
|
||||
sid = _voice_event_sid
|
||||
_emit(event, sid, payload)
|
||||
|
||||
|
||||
def _voice_mode_enabled() -> bool:
|
||||
"""Current voice-mode flag (runtime-only, CLI parity).
|
||||
|
||||
cli.py initialises ``_voice_mode = False`` at startup and only flips
|
||||
it via ``/voice on``; it never reads a persisted enable bit from
|
||||
config.yaml. We match that: no config lookup, env var only. This
|
||||
avoids the TUI auto-starting in REC the next time the user opens it
|
||||
just because they happened to enable voice in a prior session.
|
||||
"""
|
||||
return os.environ.get("HERMES_VOICE", "").strip() == "1"
|
||||
|
||||
|
||||
def _voice_tts_enabled() -> bool:
|
||||
"""Whether agent replies should be spoken back via TTS (runtime only)."""
|
||||
return os.environ.get("HERMES_VOICE_TTS", "").strip() == "1"
|
||||
|
||||
|
||||
@method("voice.toggle")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""CLI parity for the ``/voice`` slash command.
|
||||
|
||||
Subcommands:
|
||||
|
||||
* ``status`` — report mode + TTS flags (default when action is unknown).
|
||||
* ``on`` / ``off`` — flip voice *mode* (the umbrella bit). Turning it
|
||||
off also tears down any active continuous recording loop. Does NOT
|
||||
start recording on its own; recording is driven by ``voice.record``
|
||||
(Ctrl+B) after mode is on, matching cli.py's enable/Ctrl+B split.
|
||||
* ``tts`` — toggle speech-output of agent replies. Requires mode on
|
||||
(mirrors CLI's _toggle_voice_tts guard).
|
||||
"""
|
||||
action = params.get("action", "status")
|
||||
|
||||
if action == "status":
|
||||
env = os.environ.get("HERMES_VOICE", "").strip()
|
||||
if env in {"0", "1"}:
|
||||
return _ok(rid, {"enabled": env == "1"})
|
||||
return _ok(
|
||||
rid,
|
||||
{
|
||||
"enabled": bool(
|
||||
_load_cfg().get("display", {}).get("voice_enabled", False)
|
||||
)
|
||||
},
|
||||
)
|
||||
# Mirror CLI's _show_voice_status: include STT/TTS provider
|
||||
# availability so the user can tell at a glance *why* voice mode
|
||||
# isn't working ("STT provider: MISSING ..." is the common case).
|
||||
payload: dict = {
|
||||
"enabled": _voice_mode_enabled(),
|
||||
"tts": _voice_tts_enabled(),
|
||||
}
|
||||
try:
|
||||
from tools.voice_mode import check_voice_requirements
|
||||
|
||||
reqs = check_voice_requirements()
|
||||
payload["available"] = bool(reqs.get("available"))
|
||||
payload["audio_available"] = bool(reqs.get("audio_available"))
|
||||
payload["stt_available"] = bool(reqs.get("stt_available"))
|
||||
payload["details"] = reqs.get("details") or ""
|
||||
except Exception as e:
|
||||
# check_voice_requirements pulls optional transcription deps —
|
||||
# swallow so /voice status always returns something useful.
|
||||
logger.warning("voice.toggle status: requirements probe failed: %s", e)
|
||||
|
||||
return _ok(rid, payload)
|
||||
|
||||
if action in ("on", "off"):
|
||||
enabled = action == "on"
|
||||
# Runtime-only flag (CLI parity) — no _write_config_key, so the
|
||||
# next TUI launch starts with voice OFF instead of auto-REC from a
|
||||
# persisted stale toggle.
|
||||
os.environ["HERMES_VOICE"] = "1" if enabled else "0"
|
||||
_write_config_key("display.voice_enabled", enabled)
|
||||
return _ok(rid, {"enabled": action == "on"})
|
||||
|
||||
if not enabled:
|
||||
# Disabling the mode must tear the continuous loop down; the
|
||||
# loop holds the microphone and would otherwise keep running.
|
||||
try:
|
||||
from hermes_cli.voice import stop_continuous
|
||||
|
||||
stop_continuous()
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("voice: stop_continuous failed during toggle off: %s", e)
|
||||
|
||||
return _ok(rid, {"enabled": enabled, "tts": _voice_tts_enabled()})
|
||||
|
||||
if action == "tts":
|
||||
if not _voice_mode_enabled():
|
||||
return _err(rid, 4014, "enable voice mode first: /voice on")
|
||||
new_value = not _voice_tts_enabled()
|
||||
# Runtime-only flag (CLI parity) — see voice.toggle on/off above.
|
||||
os.environ["HERMES_VOICE_TTS"] = "1" if new_value else "0"
|
||||
return _ok(rid, {"enabled": True, "tts": new_value})
|
||||
|
||||
return _err(rid, 4013, f"unknown voice action: {action}")
|
||||
|
||||
|
||||
@method("voice.record")
|
||||
def _(rid, params: dict) -> dict:
|
||||
"""VAD-driven continuous record loop, CLI-parity.
|
||||
|
||||
``start`` turns on a VAD loop that emits ``voice.transcript`` events
|
||||
for each detected utterance and auto-restarts for the next turn.
|
||||
``stop`` halts the loop (manual stop; matches cli.py's Ctrl+B-while-
|
||||
recording branch clearing ``_voice_continuous``). Three consecutive
|
||||
silent cycles stop the loop automatically and emit a
|
||||
``voice.transcript`` with ``no_speech_limit=True``.
|
||||
"""
|
||||
action = params.get("action", "start")
|
||||
|
||||
if action not in {"start", "stop"}:
|
||||
return _err(rid, 4019, f"unknown voice action: {action}")
|
||||
|
||||
try:
|
||||
if action == "start":
|
||||
from hermes_cli.voice import start_recording
|
||||
if not _voice_mode_enabled():
|
||||
return _err(rid, 4015, "voice mode is off — enable with /voice on")
|
||||
|
||||
start_recording()
|
||||
with _voice_sid_lock:
|
||||
global _voice_event_sid
|
||||
_voice_event_sid = params.get("session_id") or _voice_event_sid
|
||||
|
||||
from hermes_cli.voice import start_continuous
|
||||
|
||||
voice_cfg = _load_cfg().get("voice", {})
|
||||
start_continuous(
|
||||
on_transcript=lambda t: _voice_emit(
|
||||
"voice.transcript", {"text": t}
|
||||
),
|
||||
on_status=lambda s: _voice_emit("voice.status", {"state": s}),
|
||||
on_silent_limit=lambda: _voice_emit(
|
||||
"voice.transcript", {"no_speech_limit": True}
|
||||
),
|
||||
silence_threshold=voice_cfg.get("silence_threshold", 200),
|
||||
silence_duration=voice_cfg.get("silence_duration", 3.0),
|
||||
)
|
||||
return _ok(rid, {"status": "recording"})
|
||||
if action == "stop":
|
||||
from hermes_cli.voice import stop_and_transcribe
|
||||
|
||||
return _ok(rid, {"text": stop_and_transcribe() or ""})
|
||||
return _err(rid, 4019, f"unknown voice action: {action}")
|
||||
# action == "stop"
|
||||
from hermes_cli.voice import stop_continuous
|
||||
|
||||
stop_continuous()
|
||||
return _ok(rid, {"status": "stopped"})
|
||||
except ImportError:
|
||||
return _err(
|
||||
rid, 5025, "voice module not available — install audio dependencies"
|
||||
|
||||
@@ -83,6 +83,10 @@ export type DOMElement = {
|
||||
// Only set on ink-root. The document owns focus — any node can
|
||||
// reach it by walking parentNode, like browser getRootNode().
|
||||
focusManager?: FocusManager
|
||||
// Measurement cache for ink-text nodes: avoids re-squashing and re-wrapping
|
||||
// text when yoga calls measureFunc multiple times per frame with different
|
||||
// widths during flex re-pass. Keyed by `${width}|${widthMode}`.
|
||||
_textMeasureCache?: { gen: number; entries: Map<string, { _gen: number; result: { width: number; height: number } }> }
|
||||
} & InkNode
|
||||
|
||||
export type TextNode = {
|
||||
@@ -311,10 +315,42 @@ export const createTextNode = (text: string): TextNode => {
|
||||
return node
|
||||
}
|
||||
|
||||
const MEASURE_CACHE_CAP = 16
|
||||
|
||||
const measureTextNode = function (
|
||||
node: DOMNode,
|
||||
width: number,
|
||||
widthMode: LayoutMeasureMode
|
||||
): { width: number; height: number } {
|
||||
const elem = node.nodeName !== '#text' ? (node as DOMElement) : node.parentNode
|
||||
if (elem && elem.nodeName === 'ink-text') {
|
||||
let cache = elem._textMeasureCache
|
||||
if (!cache) {
|
||||
cache = { gen: 0, entries: new Map() }
|
||||
elem._textMeasureCache = cache
|
||||
}
|
||||
const key = `${width}|${widthMode}`
|
||||
const hit = cache.entries.get(key)
|
||||
if (hit && hit._gen === cache.gen) {
|
||||
return hit.result
|
||||
}
|
||||
const result = computeTextMeasure(node, width, widthMode)
|
||||
// Enforce cap with FIFO eviction to avoid unbounded growth during
|
||||
// pathological frames where yoga probes many widths.
|
||||
if (cache.entries.size >= MEASURE_CACHE_CAP) {
|
||||
const firstKey = cache.entries.keys().next().value
|
||||
cache.entries.delete(firstKey)
|
||||
}
|
||||
cache.entries.set(key, { _gen: cache.gen, result })
|
||||
return result
|
||||
}
|
||||
return computeTextMeasure(node, width, widthMode)
|
||||
}
|
||||
|
||||
const computeTextMeasure = function (
|
||||
node: DOMNode,
|
||||
width: number,
|
||||
widthMode: LayoutMeasureMode
|
||||
): { width: number; height: number } {
|
||||
const rawText = node.nodeName === '#text' ? node.nodeValue : squashTextNodes(node)
|
||||
|
||||
@@ -378,13 +414,19 @@ export const markDirty = (node?: DOMNode): void => {
|
||||
|
||||
while (current) {
|
||||
if (current.nodeName !== '#text') {
|
||||
;(current as DOMElement).dirty = true
|
||||
const elem = current as DOMElement
|
||||
elem.dirty = true
|
||||
|
||||
// Only mark yoga dirty on leaf nodes that have measure functions
|
||||
if (!markedYoga && (current.nodeName === 'ink-text' || current.nodeName === 'ink-raw-ansi') && current.yogaNode) {
|
||||
current.yogaNode.markDirty()
|
||||
if (!markedYoga && (elem.nodeName === 'ink-text' || elem.nodeName === 'ink-raw-ansi') && elem.yogaNode) {
|
||||
elem.yogaNode.markDirty()
|
||||
markedYoga = true
|
||||
}
|
||||
|
||||
// Invalidate text measurement cache — child text or style changed.
|
||||
if (elem._textMeasureCache) {
|
||||
elem._textMeasureCache.gen++
|
||||
}
|
||||
}
|
||||
|
||||
current = current.parentNode
|
||||
@@ -433,6 +475,7 @@ export const clearYogaNodeReferences = (node: DOMElement | TextNode): void => {
|
||||
for (const child of node.childNodes) {
|
||||
clearYogaNodeReferences(child)
|
||||
}
|
||||
node._textMeasureCache = undefined
|
||||
}
|
||||
|
||||
node.yogaNode = undefined
|
||||
|
||||
@@ -15,7 +15,8 @@ const buildCtx = (appended: Msg[]) =>
|
||||
composer: {
|
||||
dequeue: () => undefined,
|
||||
queueEditRef: ref<null | number>(null),
|
||||
sendQueued: vi.fn()
|
||||
sendQueued: vi.fn(),
|
||||
setInput: vi.fn()
|
||||
},
|
||||
gateway: {
|
||||
gw: { request: vi.fn() },
|
||||
@@ -29,6 +30,9 @@ const buildCtx = (appended: Msg[]) =>
|
||||
resumeById: vi.fn(),
|
||||
setCatalog: vi.fn()
|
||||
},
|
||||
submission: {
|
||||
submitRef: { current: vi.fn() }
|
||||
},
|
||||
system: {
|
||||
bellOnComplete: false,
|
||||
sys: vi.fn()
|
||||
@@ -38,6 +42,11 @@ const buildCtx = (appended: Msg[]) =>
|
||||
panel: (title: string, sections: any[]) =>
|
||||
appended.push({ kind: 'panel', panelData: { sections, title }, role: 'system', text: '' }),
|
||||
setHistoryItems: vi.fn()
|
||||
},
|
||||
voice: {
|
||||
setProcessing: vi.fn(),
|
||||
setRecording: vi.fn(),
|
||||
setVoiceEnabled: vi.fn()
|
||||
}
|
||||
}) as any
|
||||
|
||||
@@ -143,91 +152,79 @@ describe('createGatewayEventHandler', () => {
|
||||
expect(appended[0]?.thinkingTokens).toBe(estimateTokensRough(fromServer))
|
||||
})
|
||||
|
||||
it('attaches inline_diff to the assistant completion body', () => {
|
||||
it('anchors inline_diff as its own segment where the edit happened', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const diff = '\u001b[31m--- a/foo.ts\u001b[0m\n\u001b[32m+++ b/foo.ts\u001b[0m\n@@\n-old\n+new'
|
||||
const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
const block = `\`\`\`diff\n${cleaned}\n\`\`\``
|
||||
|
||||
onEvent({
|
||||
payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' },
|
||||
type: 'tool.start'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
// Narration → tool → tool-complete → more narration → message-complete.
|
||||
// The diff MUST land between the two narration segments, not tacked
|
||||
// onto the final one.
|
||||
onEvent({ payload: { text: 'Editing the file' }, type: 'message.delta' } as any)
|
||||
onEvent({ payload: { context: 'foo.ts', name: 'patch', tool_id: 'tool-1' }, type: 'tool.start' } as any)
|
||||
onEvent({ payload: { inline_diff: diff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
|
||||
|
||||
// Diff is buffered for message.complete and sanitized (ANSI stripped).
|
||||
// Diff is already committed to segmentMessages as its own segment.
|
||||
expect(appended).toHaveLength(0)
|
||||
expect(turnController.pendingInlineDiffs).toEqual([cleaned])
|
||||
expect(turnController.segmentMessages).toEqual([
|
||||
{ role: 'assistant', text: 'Editing the file' },
|
||||
{ kind: 'diff', role: 'assistant', text: block }
|
||||
])
|
||||
|
||||
onEvent({
|
||||
payload: { text: 'patch applied' },
|
||||
type: 'message.complete'
|
||||
} as any)
|
||||
onEvent({ payload: { text: 'patch applied' }, type: 'message.complete' } as any)
|
||||
|
||||
// Diff is rendered in the same assistant message body as the completion.
|
||||
expect(appended).toHaveLength(1)
|
||||
expect(appended[0]).toMatchObject({ role: 'assistant' })
|
||||
expect(appended[0]?.text).toContain('patch applied')
|
||||
expect(appended[0]?.text).toContain('```diff')
|
||||
expect(appended[0]?.text).toContain(cleaned)
|
||||
// Three transcript messages: pre-tool narration → diff (kind='diff',
|
||||
// so MessageLine gives it blank-line breathing room) → post-tool
|
||||
// narration. The final message does NOT contain a diff.
|
||||
expect(appended).toHaveLength(3)
|
||||
expect(appended[0]?.text).toBe('Editing the file')
|
||||
expect(appended[1]).toMatchObject({ kind: 'diff', text: block })
|
||||
expect(appended[2]?.text).toBe('patch applied')
|
||||
expect(appended[2]?.text).not.toContain('```diff')
|
||||
})
|
||||
|
||||
it('does not append inline_diff twice when assistant text already contains it', () => {
|
||||
it('drops the diff segment when the final assistant text narrates the same diff', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const cleaned = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
const assistantText = `Done. Here's the inline diff:\n\n\`\`\`diff\n${cleaned}\n\`\`\``
|
||||
|
||||
onEvent({
|
||||
payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { text: assistantText },
|
||||
type: 'message.complete'
|
||||
} as any)
|
||||
onEvent({ payload: { inline_diff: cleaned, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
|
||||
onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
|
||||
|
||||
// Only the final message — diff-only segment dropped so we don't
|
||||
// render two stacked copies of the same patch.
|
||||
expect(appended).toHaveLength(1)
|
||||
expect(appended[0]?.text).toBe(assistantText)
|
||||
expect((appended[0]?.text.match(/```diff/g) ?? []).length).toBe(1)
|
||||
})
|
||||
|
||||
it('strips the CLI "┊ review diff" header from queued inline diffs', () => {
|
||||
it('strips the CLI "┊ review diff" header from inline diff segments', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const raw = ' \u001b[33m┊ review diff\u001b[0m\n--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
|
||||
onEvent({
|
||||
payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { text: 'done' },
|
||||
type: 'message.complete'
|
||||
} as any)
|
||||
onEvent({ payload: { inline_diff: raw, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
|
||||
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
|
||||
|
||||
expect(appended).toHaveLength(1)
|
||||
// diff segment first (kind='diff'), final narration second
|
||||
expect(appended).toHaveLength(2)
|
||||
expect(appended[0]?.kind).toBe('diff')
|
||||
expect(appended[0]?.text).not.toContain('┊ review diff')
|
||||
expect(appended[0]?.text).toContain('--- a/foo.ts')
|
||||
expect(appended[1]?.text).toBe('done')
|
||||
})
|
||||
|
||||
it('suppresses inline_diff when assistant already wrote a diff fence', () => {
|
||||
it('drops the diff segment when assistant writes its own ```diff fence', () => {
|
||||
const appended: Msg[] = []
|
||||
const onEvent = createGatewayEventHandler(buildCtx(appended))
|
||||
const inlineDiff = '--- a/foo.ts\n+++ b/foo.ts\n@@\n-old\n+new'
|
||||
const assistantText = 'Done. Clean swap:\n\n```diff\n-old\n+new\n```'
|
||||
|
||||
onEvent({
|
||||
payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { text: assistantText },
|
||||
type: 'message.complete'
|
||||
} as any)
|
||||
onEvent({ payload: { inline_diff: inlineDiff, summary: 'patched', tool_id: 'tool-1' }, type: 'tool.complete' } as any)
|
||||
onEvent({ payload: { text: assistantText }, type: 'message.complete' } as any)
|
||||
|
||||
expect(appended).toHaveLength(1)
|
||||
expect(appended[0]?.text).toBe(assistantText)
|
||||
@@ -243,15 +240,18 @@ describe('createGatewayEventHandler', () => {
|
||||
payload: { inline_diff: diff, name: 'review_diff', summary: diff, tool_id: 'tool-1' },
|
||||
type: 'tool.complete'
|
||||
} as any)
|
||||
onEvent({
|
||||
payload: { text: 'done' },
|
||||
type: 'message.complete'
|
||||
} as any)
|
||||
onEvent({ payload: { text: 'done' }, type: 'message.complete' } as any)
|
||||
|
||||
expect(appended).toHaveLength(1)
|
||||
expect(appended[0]?.tools?.[0]).toContain('Review Diff')
|
||||
expect(appended[0]?.tools?.[0]).not.toContain('--- a/foo.ts')
|
||||
// Two segments: the diff block (kind='diff', no tool row) and the final
|
||||
// narration (tool row belongs here since pendingSegmentTools carries
|
||||
// across the flushStreamingSegment call).
|
||||
expect(appended).toHaveLength(2)
|
||||
expect(appended[0]?.kind).toBe('diff')
|
||||
expect(appended[0]?.text).toContain('```diff')
|
||||
expect(appended[0]?.tools ?? []).toEqual([])
|
||||
expect(appended[1]?.text).toBe('done')
|
||||
expect(appended[1]?.tools?.[0]).toContain('Review Diff')
|
||||
expect(appended[1]?.tools?.[0]).not.toContain('--- a/foo.ts')
|
||||
})
|
||||
|
||||
it('shows setup panel for missing provider startup error', () => {
|
||||
|
||||
@@ -31,6 +31,36 @@ describe('platform action modifier', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('isVoiceToggleKey', () => {
|
||||
it('matches raw Ctrl+B on macOS (doc-default across platforms)', async () => {
|
||||
const { isVoiceToggleKey } = await importPlatform('darwin')
|
||||
|
||||
expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true)
|
||||
expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'B')).toBe(true)
|
||||
})
|
||||
|
||||
it('matches Cmd+B on macOS (preserve platform muscle memory)', async () => {
|
||||
const { isVoiceToggleKey } = await importPlatform('darwin')
|
||||
|
||||
expect(isVoiceToggleKey({ ctrl: false, meta: true, super: false }, 'b')).toBe(true)
|
||||
expect(isVoiceToggleKey({ ctrl: false, meta: false, super: true }, 'b')).toBe(true)
|
||||
})
|
||||
|
||||
it('matches Ctrl+B on non-macOS platforms', async () => {
|
||||
const { isVoiceToggleKey } = await importPlatform('linux')
|
||||
|
||||
expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'b')).toBe(true)
|
||||
})
|
||||
|
||||
it('does not match unmodified b or other Ctrl combos', async () => {
|
||||
const { isVoiceToggleKey } = await importPlatform('darwin')
|
||||
|
||||
expect(isVoiceToggleKey({ ctrl: false, meta: false, super: false }, 'b')).toBe(false)
|
||||
expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'a')).toBe(false)
|
||||
expect(isVoiceToggleKey({ ctrl: true, meta: false, super: false }, 'c')).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
describe('isMacActionFallback', () => {
|
||||
it('routes raw Ctrl+K and Ctrl+W to readline kill-to-end / delete-word on macOS', async () => {
|
||||
const { isMacActionFallback } = await importPlatform('darwin')
|
||||
|
||||
@@ -51,6 +51,9 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
const { STARTUP_RESUME_ID, newSession, resumeById, setCatalog } = ctx.session
|
||||
const { bellOnComplete, stdout, sys } = ctx.system
|
||||
const { appendMessage, panel, setHistoryItems } = ctx.transcript
|
||||
const { setInput } = ctx.composer
|
||||
const { submitRef } = ctx.submission
|
||||
const { setProcessing: setVoiceProcessing, setRecording: setVoiceRecording, setVoiceEnabled } = ctx.voice
|
||||
|
||||
let pendingThinkingStatus = ''
|
||||
let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
|
||||
@@ -261,6 +264,57 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
return
|
||||
}
|
||||
|
||||
case 'voice.status': {
|
||||
// Continuous VAD loop reports its internal state so the status bar
|
||||
// can show listening / transcribing / idle without polling.
|
||||
const state = String(ev.payload?.state ?? '')
|
||||
|
||||
if (state === 'listening') {
|
||||
setVoiceRecording(true)
|
||||
setVoiceProcessing(false)
|
||||
} else if (state === 'transcribing') {
|
||||
setVoiceRecording(false)
|
||||
setVoiceProcessing(true)
|
||||
} else {
|
||||
setVoiceRecording(false)
|
||||
setVoiceProcessing(false)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
case 'voice.transcript': {
|
||||
// CLI parity: the 3-strikes silence detector flipped off automatically.
|
||||
// Mirror that on the UI side and tell the user why the mode is off.
|
||||
if (ev.payload?.no_speech_limit) {
|
||||
setVoiceEnabled(false)
|
||||
setVoiceRecording(false)
|
||||
setVoiceProcessing(false)
|
||||
sys('voice: no speech detected 3 times, continuous mode stopped')
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const text = String(ev.payload?.text ?? '').trim()
|
||||
|
||||
if (!text) {
|
||||
return
|
||||
}
|
||||
|
||||
// CLI parity: _pending_input.put(transcript) unconditionally feeds
|
||||
// the transcript to the agent as its next turn — draft handling
|
||||
// doesn't apply because voice-mode users are speaking, not typing.
|
||||
//
|
||||
// We can't branch on composer input from inside a setInput updater
|
||||
// (React strict mode double-invokes it, duplicating the submit).
|
||||
// Just clear + defer submit so the cleared input is committed before
|
||||
// submit reads it.
|
||||
setInput('')
|
||||
setTimeout(() => submitRef.current(text), 0)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
case 'gateway.start_timeout': {
|
||||
const { cwd, python } = ev.payload ?? {}
|
||||
const trace = python || cwd ? ` · ${String(python || '')} ${String(cwd || '')}`.trim() : ''
|
||||
@@ -331,10 +385,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
return
|
||||
}
|
||||
|
||||
// Keep inline diffs attached to the assistant completion body so
|
||||
// they render in the same message flow, not as a standalone system
|
||||
// artifact that can look out-of-place around tool rows.
|
||||
turnController.queueInlineDiff(inlineDiffText)
|
||||
// Anchor the diff to where the edit happened in the turn — between
|
||||
// the narration that preceded the tool call and whatever the agent
|
||||
// streams afterwards. The previous end-merge put the diff at the
|
||||
// bottom of the final message even when the edit fired mid-turn,
|
||||
// which read as "the agent wrote this after saying that".
|
||||
turnController.pushInlineDiffSegment(inlineDiffText)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -189,9 +189,11 @@ export interface InputHandlerContext {
|
||||
stdout?: NodeJS.WriteStream
|
||||
}
|
||||
voice: {
|
||||
enabled: boolean
|
||||
recording: boolean
|
||||
setProcessing: StateSetter<boolean>
|
||||
setRecording: StateSetter<boolean>
|
||||
setVoiceEnabled: StateSetter<boolean>
|
||||
}
|
||||
wheelStep: number
|
||||
}
|
||||
@@ -201,6 +203,9 @@ export interface InputHandlerResult {
|
||||
}
|
||||
|
||||
export interface GatewayEventHandlerContext {
|
||||
composer: {
|
||||
setInput: StateSetter<string>
|
||||
}
|
||||
gateway: GatewayServices
|
||||
session: {
|
||||
STARTUP_RESUME_ID: string
|
||||
@@ -210,6 +215,9 @@ export interface GatewayEventHandlerContext {
|
||||
resumeById: (id: string) => void
|
||||
setCatalog: StateSetter<null | SlashCatalog>
|
||||
}
|
||||
submission: {
|
||||
submitRef: MutableRefObject<(value: string) => void>
|
||||
}
|
||||
system: {
|
||||
bellOnComplete: boolean
|
||||
stdout?: NodeJS.WriteStream
|
||||
@@ -220,6 +228,11 @@ export interface GatewayEventHandlerContext {
|
||||
panel: (title: string, sections: PanelSection[]) => void
|
||||
setHistoryItems: StateSetter<Msg[]>
|
||||
}
|
||||
voice: {
|
||||
setProcessing: StateSetter<boolean>
|
||||
setRecording: StateSetter<boolean>
|
||||
setVoiceEnabled: StateSetter<boolean>
|
||||
}
|
||||
}
|
||||
|
||||
export interface SlashHandlerContext {
|
||||
|
||||
@@ -184,15 +184,64 @@ export const sessionCommands: SlashCommand[] = [
|
||||
},
|
||||
|
||||
{
|
||||
help: 'toggle voice input',
|
||||
help: 'voice mode: [on|off|tts|status]',
|
||||
name: 'voice',
|
||||
run: (arg, ctx) => {
|
||||
const action = arg === 'on' || arg === 'off' ? arg : 'status'
|
||||
const normalized = (arg ?? '').trim().toLowerCase()
|
||||
|
||||
const action =
|
||||
normalized === 'on' || normalized === 'off' || normalized === 'tts' || normalized === 'status'
|
||||
? normalized
|
||||
: 'status'
|
||||
|
||||
ctx.gateway.rpc<VoiceToggleResponse>('voice.toggle', { action }).then(
|
||||
ctx.guarded<VoiceToggleResponse>(r => {
|
||||
ctx.voice.setVoiceEnabled(!!r.enabled)
|
||||
ctx.transcript.sys(`voice: ${r.enabled ? 'on' : 'off'}`)
|
||||
|
||||
// Match CLI's _show_voice_status / _enable_voice_mode /
|
||||
// _toggle_voice_tts output shape so users don't have to learn
|
||||
// two vocabularies.
|
||||
if (action === 'status') {
|
||||
const mode = r.enabled ? 'ON' : 'OFF'
|
||||
const tts = r.tts ? 'ON' : 'OFF'
|
||||
ctx.transcript.sys('Voice Mode Status')
|
||||
ctx.transcript.sys(` Mode: ${mode}`)
|
||||
ctx.transcript.sys(` TTS: ${tts}`)
|
||||
ctx.transcript.sys(' Record key: Ctrl+B')
|
||||
|
||||
// CLI's "Requirements:" block — surfaces STT/audio setup issues
|
||||
// so the user sees "STT provider: MISSING ..." instead of
|
||||
// silently failing on every Ctrl+B press.
|
||||
if (r.details) {
|
||||
ctx.transcript.sys('')
|
||||
ctx.transcript.sys(' Requirements:')
|
||||
|
||||
for (const line of r.details.split('\n')) {
|
||||
if (line.trim()) {
|
||||
ctx.transcript.sys(` ${line}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (action === 'tts') {
|
||||
ctx.transcript.sys(`Voice TTS ${r.tts ? 'enabled' : 'disabled'}.`)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// on/off — mirror cli.py:_enable_voice_mode's 3-line output
|
||||
if (r.enabled) {
|
||||
const tts = r.tts ? ' (TTS enabled)' : ''
|
||||
ctx.transcript.sys(`Voice mode enabled${tts}`)
|
||||
ctx.transcript.sys(' Ctrl+B to start/stop recording')
|
||||
ctx.transcript.sys(' /voice tts to toggle speech output')
|
||||
ctx.transcript.sys(' /voice off to disable voice mode')
|
||||
} else {
|
||||
ctx.transcript.sys('Voice mode disabled.')
|
||||
}
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,20 @@ const INTERRUPT_COOLDOWN_MS = 1500
|
||||
const ACTIVITY_LIMIT = 8
|
||||
const TRAIL_LIMIT = 8
|
||||
|
||||
// Extracts the raw patch from a diff-only segment produced by
|
||||
// pushInlineDiffSegment. Used at message.complete to dedupe against final
|
||||
// assistant text that narrates the same patch. Returns null for anything
|
||||
// else so real assistant narration never gets touched.
|
||||
const diffSegmentBody = (msg: Msg): null | string => {
|
||||
if (msg.kind !== 'diff') {
|
||||
return null
|
||||
}
|
||||
|
||||
const m = msg.text.match(/^```diff\n([\s\S]*?)\n```$/)
|
||||
|
||||
return m ? m[1]! : null
|
||||
}
|
||||
|
||||
export interface InterruptDeps {
|
||||
appendMessage: (msg: Msg) => void
|
||||
gw: { request: <T = unknown>(method: string, params?: Record<string, unknown>) => Promise<T> }
|
||||
@@ -40,7 +54,6 @@ class TurnController {
|
||||
bufRef = ''
|
||||
interrupted = false
|
||||
lastStatusNote = ''
|
||||
pendingInlineDiffs: string[] = []
|
||||
persistedToolLabels = new Set<string>()
|
||||
persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
|
||||
protocolWarned = false
|
||||
@@ -79,7 +92,6 @@ class TurnController {
|
||||
this.activeTools = []
|
||||
this.streamTimer = clear(this.streamTimer)
|
||||
this.bufRef = ''
|
||||
this.pendingInlineDiffs = []
|
||||
this.pendingSegmentTools = []
|
||||
this.segmentMessages = []
|
||||
|
||||
@@ -186,18 +198,35 @@ class TurnController {
|
||||
}, REASONING_PULSE_MS)
|
||||
}
|
||||
|
||||
queueInlineDiff(diffText: string) {
|
||||
pushInlineDiffSegment(diffText: string) {
|
||||
// Strip CLI chrome the gateway emits before the unified diff (e.g. a
|
||||
// leading "┊ review diff" header written by `_emit_inline_diff` for the
|
||||
// terminal printer). That header only makes sense as stdout dressing,
|
||||
// not inside a markdown ```diff block.
|
||||
const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
|
||||
const stripped = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
|
||||
|
||||
if (!text || this.pendingInlineDiffs.includes(text)) {
|
||||
if (!stripped) {
|
||||
return
|
||||
}
|
||||
|
||||
this.pendingInlineDiffs = [...this.pendingInlineDiffs, text]
|
||||
// Flush any in-progress streaming text as its own segment first, so the
|
||||
// diff lands BETWEEN the assistant narration that preceded the edit and
|
||||
// whatever the agent streams afterwards — not glued onto the final
|
||||
// message. This is the whole point of segment-anchored diffs: the diff
|
||||
// renders where the edit actually happened.
|
||||
this.flushStreamingSegment()
|
||||
|
||||
const block = `\`\`\`diff\n${stripped}\n\`\`\``
|
||||
|
||||
// Skip consecutive duplicates (same tool firing tool.complete twice, or
|
||||
// two edits producing the same patch). Keeping this cheap — deeper
|
||||
// dedupe against the final assistant text happens at message.complete.
|
||||
if (this.segmentMessages.at(-1)?.text === block) {
|
||||
return
|
||||
}
|
||||
|
||||
this.segmentMessages = [...this.segmentMessages, { kind: 'diff', role: 'assistant', text: block }]
|
||||
patchTurnState({ streamSegments: this.segmentMessages })
|
||||
}
|
||||
|
||||
pushActivity(text: string, tone: ActivityItem['tone'] = 'info', replaceLabel?: string) {
|
||||
@@ -234,7 +263,6 @@ class TurnController {
|
||||
this.idle()
|
||||
this.clearReasoning()
|
||||
this.clearStatusTimer()
|
||||
this.pendingInlineDiffs = []
|
||||
this.pendingSegmentTools = []
|
||||
this.segmentMessages = []
|
||||
this.turnTools = []
|
||||
@@ -245,31 +273,31 @@ class TurnController {
|
||||
const rawText = (payload.rendered ?? payload.text ?? this.bufRef).trimStart()
|
||||
const split = splitReasoning(rawText)
|
||||
const finalText = split.text
|
||||
// Skip appending if the assistant already narrated the diff inside a
|
||||
// markdown fence of its own — otherwise we render two stacked diff
|
||||
// blocks for the same edit.
|
||||
const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
|
||||
|
||||
const remainingInlineDiffs = assistantAlreadyHasDiff
|
||||
? []
|
||||
: this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
|
||||
|
||||
const inlineDiffBlock = remainingInlineDiffs.length
|
||||
? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
|
||||
: ''
|
||||
|
||||
const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
|
||||
const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
|
||||
const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
|
||||
const savedReasoningTokens = savedReasoning ? estimateTokensRough(savedReasoning) : 0
|
||||
const savedToolTokens = this.toolTokenAcc
|
||||
const tools = this.pendingSegmentTools
|
||||
const finalMessages = [...this.segmentMessages]
|
||||
|
||||
if (mergedText) {
|
||||
// Drop diff-only segments the agent is about to narrate in the final
|
||||
// reply. Without this, a closing "here's the diff …" message would
|
||||
// render two stacked copies of the same patch. Only touches segments
|
||||
// with `kind: 'diff'` emitted by pushInlineDiffSegment — real
|
||||
// assistant narration stays put.
|
||||
const finalHasOwnDiffFence = /```(?:diff|patch)\b/i.test(finalText)
|
||||
|
||||
const segments = this.segmentMessages.filter(msg => {
|
||||
const body = diffSegmentBody(msg)
|
||||
|
||||
return body === null || (!finalHasOwnDiffFence && !finalText.includes(body))
|
||||
})
|
||||
|
||||
const finalMessages = [...segments]
|
||||
|
||||
if (finalText) {
|
||||
finalMessages.push({
|
||||
role: 'assistant',
|
||||
text: mergedText,
|
||||
text: finalText,
|
||||
thinking: savedReasoning || undefined,
|
||||
thinkingTokens: savedReasoning ? savedReasoningTokens : undefined,
|
||||
toolTokens: savedToolTokens || undefined,
|
||||
@@ -300,7 +328,7 @@ class TurnController {
|
||||
this.bufRef = ''
|
||||
patchTurnState({ activity: [], outcome: '' })
|
||||
|
||||
return { finalMessages, finalText: mergedText, wasInterrupted }
|
||||
return { finalMessages, finalText, wasInterrupted }
|
||||
}
|
||||
|
||||
recordMessageDelta({ rendered, text }: { rendered?: string; text?: string }) {
|
||||
@@ -406,7 +434,6 @@ class TurnController {
|
||||
this.bufRef = ''
|
||||
this.interrupted = false
|
||||
this.lastStatusNote = ''
|
||||
this.pendingInlineDiffs = []
|
||||
this.pendingSegmentTools = []
|
||||
this.protocolWarned = false
|
||||
this.segmentMessages = []
|
||||
@@ -452,7 +479,6 @@ class TurnController {
|
||||
this.endReasoningPhase()
|
||||
this.clearReasoning()
|
||||
this.activeTools = []
|
||||
this.pendingInlineDiffs = []
|
||||
this.turnTools = []
|
||||
this.toolTokenAcc = 0
|
||||
this.persistedToolLabels.clear()
|
||||
|
||||
@@ -8,7 +8,7 @@ import type {
|
||||
SudoRespondResponse,
|
||||
VoiceRecordResponse
|
||||
} from '../gatewayTypes.js'
|
||||
import { isAction, isMac } from '../lib/platform.js'
|
||||
import { isAction, isMac, isVoiceToggleKey } from '../lib/platform.js'
|
||||
|
||||
import { getInputSelection } from './inputSelectionStore.js'
|
||||
import type { InputHandlerContext, InputHandlerResult } from './interfaces.js'
|
||||
@@ -134,45 +134,43 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
||||
}
|
||||
}
|
||||
|
||||
const voiceStop = () => {
|
||||
voice.setRecording(false)
|
||||
voice.setProcessing(true)
|
||||
// CLI parity: Ctrl+B toggles the VAD-driven continuous recording loop
|
||||
// (NOT the voice-mode umbrella bit). The mode is enabled via /voice on;
|
||||
// Ctrl+B while the mode is off sys-nudges the user. While the mode is
|
||||
// on, the first press starts a continuous loop (gateway → start_continuous,
|
||||
// VAD auto-stop → transcribe → auto-restart), a subsequent press stops it.
|
||||
// The gateway publishes voice.status + voice.transcript events that
|
||||
// createGatewayEventHandler turns into UI badges and composer injection.
|
||||
const voiceRecordToggle = () => {
|
||||
if (!voice.enabled) {
|
||||
return actions.sys('voice: mode is off — enable with /voice on')
|
||||
}
|
||||
|
||||
const starting = !voice.recording
|
||||
const action = starting ? 'start' : 'stop'
|
||||
|
||||
// Optimistic UI — flip the REC badge immediately so the user gets
|
||||
// feedback while the RPC round-trips; the voice.status event is the
|
||||
// authoritative source and may correct us.
|
||||
if (starting) {
|
||||
voice.setRecording(true)
|
||||
} else {
|
||||
voice.setRecording(false)
|
||||
voice.setProcessing(false)
|
||||
}
|
||||
|
||||
gateway
|
||||
.rpc<VoiceRecordResponse>('voice.record', { action: 'stop' })
|
||||
.then(r => {
|
||||
if (!r) {
|
||||
return
|
||||
.rpc<VoiceRecordResponse>('voice.record', { action })
|
||||
.catch((e: Error) => {
|
||||
// Revert optimistic UI on failure.
|
||||
if (starting) {
|
||||
voice.setRecording(false)
|
||||
}
|
||||
|
||||
const transcript = String(r.text || '').trim()
|
||||
|
||||
if (!transcript) {
|
||||
return actions.sys('voice: no speech detected')
|
||||
}
|
||||
|
||||
cActions.setInput(prev => (prev ? `${prev}${/\s$/.test(prev) ? '' : ' '}${transcript}` : transcript))
|
||||
})
|
||||
.catch((e: Error) => actions.sys(`voice error: ${e.message}`))
|
||||
.finally(() => {
|
||||
voice.setProcessing(false)
|
||||
patchUiState({ status: 'ready' })
|
||||
actions.sys(`voice error: ${e.message}`)
|
||||
})
|
||||
}
|
||||
|
||||
const voiceStart = () =>
|
||||
gateway
|
||||
.rpc<VoiceRecordResponse>('voice.record', { action: 'start' })
|
||||
.then(r => {
|
||||
if (!r) {
|
||||
return
|
||||
}
|
||||
|
||||
voice.setRecording(true)
|
||||
patchUiState({ status: 'recording…' })
|
||||
})
|
||||
.catch((e: Error) => actions.sys(`voice error: ${e.message}`))
|
||||
|
||||
useInput((ch, key) => {
|
||||
const live = getUiState()
|
||||
|
||||
@@ -370,8 +368,8 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
|
||||
return actions.newSession()
|
||||
}
|
||||
|
||||
if (isAction(key, ch, 'b')) {
|
||||
return voice.recording ? voiceStop() : voiceStart()
|
||||
if (isVoiceToggleKey(key, ch)) {
|
||||
return voiceRecordToggle()
|
||||
}
|
||||
|
||||
if (isAction(key, ch, 'g')) {
|
||||
|
||||
@@ -454,13 +454,20 @@ export function useMainApp(gw: GatewayClient) {
|
||||
composer: { actions: composerActions, refs: composerRefs, state: composerState },
|
||||
gateway,
|
||||
terminal: { hasSelection, scrollRef, scrollWithSelection, selection, stdout },
|
||||
voice: { recording: voiceRecording, setProcessing: setVoiceProcessing, setRecording: setVoiceRecording },
|
||||
voice: {
|
||||
enabled: voiceEnabled,
|
||||
recording: voiceRecording,
|
||||
setProcessing: setVoiceProcessing,
|
||||
setRecording: setVoiceRecording,
|
||||
setVoiceEnabled
|
||||
},
|
||||
wheelStep: WHEEL_SCROLL_STEP
|
||||
})
|
||||
|
||||
const onEvent = useMemo(
|
||||
() =>
|
||||
createGatewayEventHandler({
|
||||
composer: { setInput: composerActions.setInput },
|
||||
gateway,
|
||||
session: {
|
||||
STARTUP_RESUME_ID,
|
||||
@@ -470,18 +477,29 @@ export function useMainApp(gw: GatewayClient) {
|
||||
resumeById: session.resumeById,
|
||||
setCatalog
|
||||
},
|
||||
submission: { submitRef },
|
||||
system: { bellOnComplete, stdout, sys },
|
||||
transcript: { appendMessage, panel, setHistoryItems }
|
||||
transcript: { appendMessage, panel, setHistoryItems },
|
||||
voice: {
|
||||
setProcessing: setVoiceProcessing,
|
||||
setRecording: setVoiceRecording,
|
||||
setVoiceEnabled
|
||||
}
|
||||
}),
|
||||
[
|
||||
appendMessage,
|
||||
bellOnComplete,
|
||||
composerActions.setInput,
|
||||
gateway,
|
||||
panel,
|
||||
session.newSession,
|
||||
session.resetSession,
|
||||
session.resumeById,
|
||||
setVoiceEnabled,
|
||||
setVoiceProcessing,
|
||||
setVoiceRecording,
|
||||
stdout,
|
||||
submitRef,
|
||||
sys
|
||||
]
|
||||
)
|
||||
@@ -698,7 +716,9 @@ export function useMainApp(gw: GatewayClient) {
|
||||
statusColor: statusColorOf(ui.status, ui.theme.color),
|
||||
stickyPrompt,
|
||||
turnStartedAt: ui.sid ? turnStartedAt : null,
|
||||
voiceLabel: voiceRecording ? 'REC' : voiceProcessing ? 'STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
|
||||
// CLI parity: the classic prompt_toolkit status bar shows a red dot
|
||||
// on REC (cli.py:_get_voice_status_fragments line 2344).
|
||||
voiceLabel: voiceRecording ? '● REC' : voiceProcessing ? '◉ STT' : `voice ${voiceEnabled ? 'on' : 'off'}`
|
||||
}),
|
||||
[
|
||||
cwd,
|
||||
|
||||
@@ -215,7 +215,20 @@ export function StatusRule({
|
||||
</Text>
|
||||
) : null}
|
||||
<SpawnHud t={t} />
|
||||
{voiceLabel ? <Text color={t.color.dim}> │ {voiceLabel}</Text> : null}
|
||||
{voiceLabel ? (
|
||||
<Text
|
||||
color={
|
||||
voiceLabel.startsWith('●')
|
||||
? t.color.error
|
||||
: voiceLabel.startsWith('◉')
|
||||
? t.color.warn
|
||||
: t.color.dim
|
||||
}
|
||||
>
|
||||
{' │ '}
|
||||
{voiceLabel}
|
||||
</Text>
|
||||
) : null}
|
||||
{bgCount > 0 ? <Text color={t.color.dim}> │ {bgCount} bg</Text> : null}
|
||||
{showCost && typeof usage.cost_usd === 'number' ? (
|
||||
<Text color={t.color.dim}> │ ${usage.cost_usd.toFixed(4)}</Text>
|
||||
|
||||
@@ -81,11 +81,16 @@ export const MessageLine = memo(function MessageLine({
|
||||
return <Text {...(body ? { color: body } : {})}>{msg.text}</Text>
|
||||
})()
|
||||
|
||||
// Diff segments (emitted by pushInlineDiffSegment between narration
|
||||
// segments) need a blank line on both sides so the patch doesn't butt up
|
||||
// against the prose around it.
|
||||
const isDiffSegment = msg.kind === 'diff'
|
||||
|
||||
return (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
marginBottom={msg.role === 'user' ? 1 : 0}
|
||||
marginTop={msg.role === 'user' || msg.kind === 'slash' ? 1 : 0}
|
||||
marginBottom={msg.role === 'user' || isDiffSegment ? 1 : 0}
|
||||
marginTop={msg.role === 'user' || msg.kind === 'slash' || isDiffSegment ? 1 : 0}
|
||||
>
|
||||
{showDetails && (
|
||||
<Box flexDirection="column" marginBottom={1}>
|
||||
|
||||
@@ -623,7 +623,19 @@ export function TextInput({
|
||||
return
|
||||
}
|
||||
|
||||
if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
|
||||
// Ctrl+B is the documented voice-recording toggle (see platform.ts →
|
||||
// isVoiceToggleKey). Pass it through so the app-level handler in
|
||||
// useInputHandlers receives it instead of being swallowed here as
|
||||
// either backward-word nav (line below) or a literal 'b' insertion.
|
||||
if (
|
||||
(k.ctrl && inp === 'c') ||
|
||||
(k.ctrl && inp === 'b') ||
|
||||
k.tab ||
|
||||
(k.shift && k.tab) ||
|
||||
k.pageUp ||
|
||||
k.pageDown ||
|
||||
k.escape
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -236,10 +236,16 @@ export interface ImageAttachResponse {
|
||||
// ── Voice ────────────────────────────────────────────────────────────
|
||||
|
||||
export interface VoiceToggleResponse {
|
||||
audio_available?: boolean
|
||||
available?: boolean
|
||||
details?: string
|
||||
enabled?: boolean
|
||||
stt_available?: boolean
|
||||
tts?: boolean
|
||||
}
|
||||
|
||||
export interface VoiceRecordResponse {
|
||||
status?: string
|
||||
text?: string
|
||||
}
|
||||
|
||||
@@ -368,6 +374,8 @@ export type GatewayEvent =
|
||||
| { payload?: { text?: string }; session_id?: string; type: 'thinking.delta' }
|
||||
| { payload?: undefined; session_id?: string; type: 'message.start' }
|
||||
| { payload?: { kind?: string; text?: string }; session_id?: string; type: 'status.update' }
|
||||
| { payload?: { state?: 'idle' | 'listening' | 'transcribing' }; session_id?: string; type: 'voice.status' }
|
||||
| { payload?: { no_speech_limit?: boolean; text?: string }; session_id?: string; type: 'voice.transcript' }
|
||||
| { payload: { line: string }; session_id?: string; type: 'gateway.stderr' }
|
||||
| { payload?: { cwd?: string; python?: string }; session_id?: string; type: 'gateway.start_timeout' }
|
||||
| { payload?: { preview?: string }; session_id?: string; type: 'gateway.protocol_error' }
|
||||
|
||||
@@ -33,3 +33,17 @@ export const isMacActionFallback = (
|
||||
/** Match action-modifier + a single character (case-insensitive). */
|
||||
export const isAction = (key: { ctrl: boolean; meta: boolean; super?: boolean }, ch: string, target: string): boolean =>
|
||||
isActionMod(key) && ch.toLowerCase() === target
|
||||
|
||||
/**
|
||||
* Voice recording toggle key (Ctrl+B).
|
||||
*
|
||||
* Documented as "Ctrl+B" everywhere: tips.py, config.yaml's voice.record_key
|
||||
* default, and the Python CLI prompt_toolkit handler. We accept raw Ctrl+B on
|
||||
* every platform so the TUI matches those docs. On macOS we additionally
|
||||
* accept Cmd+B (the platform action modifier) so existing macOS muscle memory
|
||||
* keeps working.
|
||||
*/
|
||||
export const isVoiceToggleKey = (
|
||||
key: { ctrl: boolean; meta: boolean; super?: boolean },
|
||||
ch: string
|
||||
): boolean => (key.ctrl || isActionMod(key)) && ch.toLowerCase() === 'b'
|
||||
|
||||
@@ -102,7 +102,7 @@ export interface ClarifyReq {
|
||||
|
||||
export interface Msg {
|
||||
info?: SessionInfo
|
||||
kind?: 'intro' | 'panel' | 'slash' | 'trail'
|
||||
kind?: 'diff' | 'intro' | 'panel' | 'slash' | 'trail'
|
||||
panelData?: PanelData
|
||||
role: Role
|
||||
text: string
|
||||
|
||||
Reference in New Issue
Block a user