Compare commits

..

1 Commits

Author SHA1 Message Date
kshitijk4poor be5a2ee5d3 feat(skills): expand touchdesigner-mcp with GLSL, post-FX, audio, geometry references
Add 6 new reference files with generic reusable patterns:
- glsl.md: uniforms, built-in functions, shader templates, Bayer dither
- postfx.md: bloom, CRT scanlines, chromatic aberration, feedback glow
- layout-compositor.md: layoutTOP, overTOP grids, panel dividers
- operator-tips.md: wireframe rendering, feedback TOP setup
- geometry-comp.md: instancing, POP vs SOP rendering, shape morphing
- audio-reactive.md: band extraction (audiofilterCHOP), beat detection, MIDI

Expand SKILL.md with:
- TD 2025 API quirks (connection syntax, GLSL TOP rules, expression gotchas)
- Trimmed param name table (8 known LLM traps, defers to td_get_par_info)
- Slider-to-shader wiring (td_execute_python + ParMode.EXPRESSION)
- Frame capture with run()/delayFrames (TOP.save() timing fix)
- TD 099 POP vs SOP rendering rules
- Incremental build strategy for large scripts
- Remote TD setup (PC over Ethernet)
- Audio synthesis via CHOPs (LFO-driven envelope pattern)

Expand pitfalls.md (#46-63):
- Connection syntax, moviefileoutTOP bug, batch frame capture
- TOP.save() time advancement, feedback masking, incremental builds
- MCP reconnection after project.load(), TOX reverse-engineering
- sliderCOMP naming, create() suffix requirement
- COMP reparenting (copyOPs), expressionCHOP crash

All content is generic — no session-specific paths, hardware, aesthetics,
or param-name-only entries (those belong in td_get_par_info).
Bumps version 1.0.0 → 2.0.0.
2026-04-22 01:49:49 +05:30
88 changed files with 1743 additions and 3336 deletions
+18 -200
View File
@@ -728,33 +728,6 @@ def _nous_base_url() -> str:
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
"""Return fresh Nous runtime credentials when available.
This mirrors the main agent's 401 recovery path and keeps auxiliary
clients aligned with the singleton auth store + mint flow instead of
relying only on whatever raw tokens happen to be sitting in auth.json
or the credential pool.
"""
try:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=force_refresh,
)
except Exception as exc:
logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
return None
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
if not api_key or not base_url:
return None
return api_key, base_url
def _read_codex_access_token() -> Optional[str]:
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
@@ -921,8 +894,7 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
pass
nous = _read_nous_auth()
runtime = _resolve_nous_runtime_api(force_refresh=False)
if runtime is None and not nous:
if not nous:
return None, None
global auxiliary_is_nous
auxiliary_is_nous = True
@@ -933,8 +905,6 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
model = _NOUS_MODEL
# Free-tier users can't use paid auxiliary models — use the free
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
# Paid accounts keep their tier-appropriate models: gemini-3-flash-preview
# for both text and vision tasks.
try:
from hermes_cli.models import check_nous_free_tier
if check_nous_free_tier():
@@ -943,15 +913,10 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
model, "vision" if vision else "text")
except Exception:
pass
if runtime is not None:
api_key, base_url = runtime
else:
api_key = _nous_api_key(nous or {})
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
return (
OpenAI(
api_key=api_key,
base_url=base_url,
api_key=_nous_api_key(nous),
base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
),
model,
)
@@ -1295,15 +1260,6 @@ def _is_connection_error(exc: Exception) -> bool:
return False
def _is_auth_error(exc: Exception) -> bool:
"""Detect auth failures that should trigger provider-specific refresh."""
status = getattr(exc, "status_code", None)
if status == 401:
return True
err_lower = str(exc).lower()
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
def _try_payment_fallback(
failed_provider: str,
task: str = None,
@@ -1611,13 +1567,7 @@ def resolve_provider_client(
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
# Detect vision tasks: either explicit model override from
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
_is_vision = (
model in _PROVIDER_VISION_MODELS.values()
or (model or "").strip().lower() == "mimo-v2-omni"
)
client, default = _try_nous(vision=_is_vision)
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes auth)")
@@ -2013,35 +1963,24 @@ def resolve_vision_provider_client(
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo). Nous is the exception: it has a dedicated
# strict vision backend with tier-aware defaults, so it must not
# fall through to the user's text chat model here.
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider == "nous":
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, default_model or resolved_model or main_model,
)
return _finalize(main_provider, sync_client, default_model)
else:
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
@@ -2116,76 +2055,6 @@ _client_cache_lock = threading.Lock()
_CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded
def _client_cache_key(
provider: str,
*,
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> tuple:
runtime = _normalize_main_runtime(main_runtime)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
with _client_cache_lock:
old_entry = _client_cache.get(cache_key)
if old_entry is not None and old_entry[0] is not client:
_force_close_async_httpx(old_entry[0])
try:
close_fn = getattr(old_entry[0], "close", None)
if callable(close_fn):
close_fn()
except Exception:
pass
_client_cache[cache_key] = (client, default_model, bound_loop)
def _refresh_nous_auxiliary_client(
*,
cache_provider: str,
model: Optional[str],
async_mode: bool,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
api_mode: Optional[str] = None,
main_runtime: Optional[Dict[str, Any]] = None,
) -> Tuple[Optional[Any], Optional[str]]:
"""Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
runtime = _resolve_nous_runtime_api(force_refresh=True)
if runtime is None:
return None, model
fresh_key, fresh_base_url = runtime
sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
final_model = model
current_loop = None
if async_mode:
try:
import asyncio as _aio
current_loop = _aio.get_event_loop()
except RuntimeError:
pass
client, final_model = _to_async_client(sync_client, final_model or "")
else:
client = sync_client
cache_key = _client_cache_key(
cache_provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
_store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
return client, final_model
def neuter_async_httpx_del() -> None:
"""Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.
@@ -2339,14 +2208,8 @@ def _get_cached_client(
except RuntimeError:
pass
runtime = _normalize_main_runtime(main_runtime)
cache_key = _client_cache_key(
provider,
async_mode=async_mode,
base_url=base_url,
api_key=api_key,
api_mode=api_mode,
main_runtime=main_runtime,
)
runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
with _client_cache_lock:
if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2794,29 +2657,6 @@ def call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=False,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
main_runtime=main_runtime,
)
if refreshed_client is not None:
logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / credit exhaustion fallback ──────────────────────
# When the resolved provider returns 402 or a credit-related error,
# try alternative providers instead of giving up. This handles the
@@ -3015,28 +2855,6 @@ async def async_call_llm(
raise
first_err = retry_err
# ── Nous auth refresh parity with main agent ──────────────────
client_is_nous = (
resolved_provider == "nous"
or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
)
if _is_auth_error(first_err) and client_is_nous:
refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
cache_provider=resolved_provider or "nous",
model=final_model,
async_mode=True,
base_url=resolved_base_url,
api_key=resolved_api_key,
api_mode=resolved_api_mode,
)
if refreshed_client is not None:
logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
task or "call")
if refreshed_model and refreshed_model != kwargs.get("model"):
kwargs["model"] = refreshed_model
return _validate_llm_response(
await refreshed_client.chat.completions.create(**kwargs), task)
# ── Payment / connection fallback (mirrors sync call_llm) ─────
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
is_auto = resolved_provider in ("auto", "", None)
+2 -4
View File
@@ -770,12 +770,10 @@ code_execution:
# Subagent Delegation
# =============================================================================
# The delegate_task tool spawns child agents with isolated context.
# Supports single tasks and batch mode (default 3 parallel, configurable).
# Supports single tasks and batch mode (up to 3 parallel).
delegation:
max_iterations: 50 # Max tool-calling turns per child (default: 50)
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
# # Resolves full credentials (base_url, api_key) automatically.
+1 -20
View File
@@ -371,6 +371,7 @@ def load_cli_config() -> Dict[str, Any]:
},
"delegation": {
"max_iterations": 45, # Max tool-calling turns per child agent
"default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents
"model": "", # Subagent model override (empty = inherit parent model)
"provider": "", # Subagent provider override (empty = inherit parent provider)
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
@@ -8370,17 +8371,6 @@ class HermesCLI:
def run_agent():
nonlocal result
# Set callbacks inside the agent thread so thread-local storage
# in terminal_tool is populated for this thread. The main thread
# registration (run() line ~9046) is invisible here because
# _callback_tls is threading.local(). Matches the pattern used
# by acp_adapter/server.py for ACP sessions.
set_sudo_password_callback(self._sudo_password_callback)
set_approval_callback(self._approval_callback)
try:
set_secret_capture_callback(self._secret_capture_callback)
except Exception:
pass
agent_message = _voice_prefix + message if _voice_prefix else message
# Prepend pending model switch note so the model knows about the switch
_msn = getattr(self, '_pending_model_switch_note', None)
@@ -8406,15 +8396,6 @@ class HermesCLI:
"failed": True,
"error": _summary,
}
finally:
# Clear thread-local callbacks so a reused thread doesn't
# hold stale references to a disposed CLI instance.
try:
set_sudo_password_callback(None)
set_approval_callback(None)
set_secret_capture_callback(None)
except Exception:
pass
# Start agent in background thread (daemon so it cannot keep the
# process alive when the user closes the terminal tab — SIGHUP
+1 -1
View File
@@ -1343,7 +1343,7 @@ class BasePlatformAdapter(ABC):
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
# and quoted/backticked paths for LLM-formatted outputs.
media_pattern = re.compile(
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
)
for match in media_pattern.finditer(content):
path = match.group("path").strip()
+7 -7
View File
@@ -3887,14 +3887,14 @@ class GatewayRunner:
message_text = f"{context_note}\n\n{message_text}"
if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
# Always inject the reply-to pointer — even when the quoted text
# already appears in history. The prefix isn't deduplication, it's
# disambiguation: it tells the agent *which* prior message the user
# is referencing. History can contain the same or similar text
# multiple times, and without an explicit pointer the agent has to
# guess (or answer for both subjects). Token overhead is minimal.
reply_snippet = event.reply_to_text[:500]
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
found_in_history = any(
reply_snippet[:200] in (msg.get("content") or "")
for msg in history
if msg.get("role") in ("assistant", "user", "tool")
)
if not found_in_history:
message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
if "@" in message_text:
try:
+8 -24
View File
@@ -924,22 +924,12 @@ class SlashCommandCompleter(Completer):
display_meta=meta,
)
# If the user typed @file: / @folder: (or just @file / @folder with
# no colon yet), delegate to path completions. Accepting the bare
# form lets the picker surface directories as soon as the user has
# typed `@folder`, without requiring them to first accept the static
# `@folder:` hint and re-trigger completion.
# If the user typed @file: or @folder:, delegate to path completions
for prefix in ("@file:", "@folder:"):
bare = prefix[:-1]
if word == bare or word.startswith(prefix):
want_dir = prefix == "@folder:"
path_part = '' if word == bare else word[len(prefix):]
if word.startswith(prefix):
path_part = word[len(prefix):] or "."
expanded = os.path.expanduser(path_part)
if not expanded or expanded == ".":
search_dir, match_prefix = ".", ""
elif expanded.endswith("/"):
if expanded.endswith("/"):
search_dir, match_prefix = expanded, ""
else:
search_dir = os.path.dirname(expanded) or "."
@@ -955,21 +945,15 @@ class SlashCommandCompleter(Completer):
for entry in sorted(entries):
if match_prefix and not entry.lower().startswith(prefix_lower):
continue
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
# `@folder:` must only surface directories; `@file:` only
# regular files. Without this filter `@folder:` listed
# every .env / .gitignore in the cwd, defeating the
# explicit prefix and confusing users expecting a
# directory picker.
if want_dir != is_dir:
continue
if count >= limit:
break
full_path = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full_path)
display_path = os.path.relpath(full_path)
suffix = "/" if is_dir else ""
kind = "folder" if is_dir else "file"
meta = "dir" if is_dir else _file_size_label(full_path)
completion = f"{prefix}{display_path}{suffix}"
completion = f"@{kind}:{display_path}{suffix}"
yield Completion(
completion,
start_position=-len(word),
-6
View File
@@ -712,12 +712,6 @@ DEFAULT_CONFIG = {
# independent of the parent's max_iterations)
"reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium",
# "low", "minimal", "none" (empty = inherit parent's level)
"max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling
# Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
# and _get_orchestrator_enabled). Values are clamped to [1, 3] with a
# warning log if out of range.
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
"orchestrator_enabled": True, # kill switch for role="orchestrator"
},
# Ephemeral prefill messages file — JSON list of {role, content} dicts
+1 -1
View File
@@ -127,7 +127,7 @@ TIPS = [
# --- Tools & Capabilities ---
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
"delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -1,8 +1,8 @@
---
name: touchdesigner-mcp
description: "Control a running TouchDesigner instance via twozero MCP — create operators, set parameters, wire connections, execute Python, build real-time visuals. 36 native tools."
version: 1.0.0
author: kshitijk4poor
version: 2.0.0
author: Hermes Agent
license: MIT
metadata:
hermes:
@@ -36,7 +36,7 @@ Hub health check: `GET http://localhost:40404/mcp` returns JSON with instance PI
Run the setup script to handle everything:
```bash
bash "${HERMES_HOME:-$HOME/.hermes}/skills/creative/touchdesigner-mcp/scripts/setup.sh"
bash ~/.hermes/skills/creative/touchdesigner-mcp/scripts/setup.sh
```
The script will:
@@ -332,6 +332,12 @@ See `references/network-patterns.md` for complete build scripts + shader code.
| `references/mcp-tools.md` | Full twozero MCP tool parameter schemas |
| `references/python-api.md` | TD Python: op(), scripting, extensions |
| `references/troubleshooting.md` | Connection diagnostics, debugging |
| `references/glsl.md` | GLSL uniforms, built-in functions, shader templates |
| `references/postfx.md` | Post-FX: bloom, CRT, chromatic aberration, feedback glow |
| `references/layout-compositor.md` | HUD layout patterns, panel grids, BSP-style layouts |
| `references/operator-tips.md` | Wireframe rendering, feedback TOP setup |
| `references/geometry-comp.md` | Geometry COMP: instancing, POP vs SOP, morphing |
| `references/audio-reactive.md` | Audio band extraction, beat detection, envelope following |
| `scripts/setup.sh` | Automated setup script |
---
@@ -0,0 +1,175 @@
# Audio-Reactive Reference
Patterns for driving visuals from audio — spectrum analysis, beat detection, envelope following.
## Audio Input
```python
# Live input from audio interface
audio_in = root.create(audiodeviceinCHOP, 'audio_in')
audio_in.par.rate = 44100
# OR: from audio file (for testing)
audio_file = root.create(audiofileinCHOP, 'audio_in')
audio_file.par.file = '/path/to/track.wav'
audio_file.par.play = True
audio_file.par.repeat = 'on' # NOT par.loop
audio_file.par.playmode = 'locked'
```
---
## Audio Band Extraction (Verified TD 2025.32460)
Use `audiofilterCHOP` for band separation (NOT `selectCHOP` by channel index):
```python
# Audio input
af = root.create(audiofileinCHOP, 'audio_in')
af.par.file = path
af.par.play = True
af.par.repeat = 'on'
af.par.playmode = 'locked'
# Low band: lowpass @ 250Hz
flt_low = root.create(audiofilterCHOP, 'flt_low')
flt_low.par.filter = 'lowpass'
flt_low.par.cutofffrequency = 250
flt_low.par.rolloff = 2
flt_low.inputConnectors[0].connect(af)
# Mid band: highpass@250 → lowpass@4000
flt_mid_hp = root.create(audiofilterCHOP, 'flt_mid_hp')
flt_mid_hp.par.filter = 'highpass'
flt_mid_hp.par.cutofffrequency = 250
flt_mid_hp.par.rolloff = 2
flt_mid_hp.inputConnectors[0].connect(af)
flt_mid_lp = root.create(audiofilterCHOP, 'flt_mid_lp')
flt_mid_lp.par.filter = 'lowpass'
flt_mid_lp.par.cutofffrequency = 4000
flt_mid_lp.par.rolloff = 2
flt_mid_lp.inputConnectors[0].connect(flt_mid_hp)
# High band: highpass @ 4000Hz
flt_high = root.create(audiofilterCHOP, 'flt_high')
flt_high.par.filter = 'highpass'
flt_high.par.cutofffrequency = 4000
flt_high.par.rolloff = 2
flt_high.inputConnectors[0].connect(af)
# Per-band: RMS → lag → gain → clamp
for name, filt in [('low', flt_low), ('mid', flt_mid_lp), ('high', flt_high)]:
rms = root.create(analyzeCHOP, f'rms_{name}')
rms.par.function = 'rmspower' # NOT 'rms'
rms.inputConnectors[0].connect(filt)
lag = root.create(lagCHOP, f'lag_{name}')
lag.par.lag1 = 0.05 # attack (NOT par.lagin)
lag.par.lag2 = 0.25 # release (NOT par.lagout)
lag.inputConnectors[0].connect(rms)
math = root.create(mathCHOP, f'scale_{name}')
math.par.gain = 8.0
math.inputConnectors[0].connect(lag)
# mathCHOP has NO par.clamp — use limitCHOP
lim = root.create(limitCHOP, f'clamp_{name}')
lim.par.type = 'clamp'
lim.par.min = 0.0
lim.par.max = 1.0
lim.inputConnectors[0].connect(math)
null = root.create(nullCHOP, f'out_{name}')
null.inputConnectors[0].connect(lim)
null.viewer = True
```
**Key TD 2025 corrections:**
- `analyzeCHOP.par.function = 'rmspower'` NOT `'rms'`
- `lagCHOP.par.lag1` / `par.lag2` NOT `par.lagin` / `par.lagout`
- `mathCHOP` has NO `par.clamp` — use separate `limitCHOP`
---
## Beat / Onset Detection
### Kick Detection (slope → trigger)
```python
slope = root.create(slopeCHOP, 'kick_slope')
slope.inputConnectors[0].connect(op('out_low'))
trig = root.create(triggerCHOP, 'kick_trig')
trig.par.threshold = 0.12
trig.par.attack = 0.005 # NOT par.attacktime
trig.par.decay = 0.15 # NOT par.decaytime
trig.par.triggeron = 'increase'
trig.inputConnectors[0].connect(slope)
kick_out = root.create(nullCHOP, 'out_kick')
kick_out.inputConnectors[0].connect(trig)
```
---
## Passing Audio to GLSL
```python
glsl.par.vec0name = 'uLow'
glsl.par.vec0valuex.expr = "op('out_low')['chan1']"
glsl.par.vec0valuex.mode = ParMode.EXPRESSION
glsl.par.vec1name = 'uKick'
glsl.par.vec1valuex.expr = "op('out_kick')['chan1']"
glsl.par.vec1valuex.mode = ParMode.EXPRESSION
```
```glsl
uniform float uLow;
uniform float uKick;
float scale = 1.0 + uKick * 0.4 + uLow * 0.2;
```
---
## Standard Audio Bus Pattern
Recommended structure:
```
audiodeviceinCHOP (audio_in)
[null_audio_in]
├──→ audiofilterCHOP (lowpass@250) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
├──→ audiofilterCHOP (bandpass@250-4k) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
├──→ audiofilterCHOP (highpass@4k) → analyzeCHOP → lagCHOP → mathCHOP → limitCHOP → null
└──→ slopeCHOP → triggerCHOP (beat_trigger)
```
Keep this entire bus inside a `baseCOMP` (e.g., `audio_bus`) and reference via paths from visual networks.
---
## MIDI Input
```python
midi_in = root.create(midiinCHOP, 'midi_in')
midi_in.par.device = 0 # Check midiinDAT for device index
# Outputs channels named by MIDI note/CC: 'ch1n60', 'ch1c74', etc.
# Map CC to a parameter
op('bloom1').par.threshold.mode = ParMode.EXPRESSION
op('bloom1').par.threshold.expr = "op('midi_in')['ch1c74'][0]"
```
---
## CRITICAL: DO NOT use Lag CHOP for spectrum smoothing
Lag CHOP in timeslice mode expands 256-sample spectrum to 1600-2400 samples, averaging all values to near-zero (~1e-06). The shader receives no usable data. Use `mathCHOP(gain=8)` directly, or smooth in GLSL via temporal lerp with a feedback texture.
Verified:
- Without Lag CHOP: bass bins = 5.0-5.4 (strong, usable)
- With Lag CHOP: ALL bins = 0.000001 (dead)
@@ -0,0 +1,121 @@
# Geometry COMP Reference
## Creating Geometry COMPs
```python
geo = root.create(geometryCOMP, 'geo1')
# Remove default torus
for c in list(geo.children):
if c.valid: c.destroy()
# Build your shape inside
```
## Correct Pattern (shapes inside geo)
```python
# Create shape INSIDE the geo COMP
box = geo.create(boxSOP, 'cube')
box.par.sizex = 1.5; box.par.sizey = 1.5; box.par.sizez = 1.5
# For POP-based geometry (TD 099), POPs must be inside:
sph = geo.create(spherePOP, 'shape')
out1 = geo.create(outPOP, 'out1')
out1.inputConnectors[0].connect(sph.outputConnectors[0])
```
## DO NOT: Common Mistakes
```python
# BAD: Don't create geometry at parent level and wire into COMP
box = root.create(boxPOP, 'box1') # ← outside geo, won't render
# BAD: Don't reference parent operators from inside COMP
choptopop1.par.chop = '../null1' # ← hidden dependency, breaks on move
```
## Instancing
```python
geo.par.instancing = True
geo.par.instanceop = 'sopto1' # relative path to CHOP/SOP with instance data
geo.par.instancetx = 'tx'
geo.par.instancety = 'ty'
geo.par.instancetz = 'tz'
```
### Instance Attribute Names by OP Type
| OP Type | Attribute Names |
|---------|-----------------|
| CHOP | Channel names: `tx`, `ty`, `tz` |
| SOP/POP | `P(0)`, `P(1)`, `P(2)` for position |
| DAT | Column header names from first row |
| TOP | `r`, `g`, `b`, `a` |
### Mixed Data Sources
```python
geo.par.instanceop = 'pos_chop' # Position from CHOP
geo.par.instancetx = 'tx'
geo.par.instancecolorop = 'color_top' # Color from TOP
geo.par.instancecolorr = 'r'
```
## Rendering Setup
```python
# Camera
cam = root.create(cameraCOMP, 'cam1')
cam.par.tx = 0; cam.par.ty = 0; cam.par.tz = 4
# Render TOP
render = root.create(renderTOP, 'render1')
render.par.outputresolution = 'custom'
render.par.resolutionw = 1280; render.par.resolutionh = 720
render.par.camera = cam.path
render.par.geometry = geo.path # accepts path string
```
## POPs vs SOPs for Rendering
In TD 099, `geometryCOMP` renders **POPs** but NOT SOPs. A `boxSOP` inside a geometry COMP is invisible — no errors.
```python
# WRONG — SOPs don't render (invisible, no errors)
box = geo.create(boxSOP, 'cube') # ✗ invisible
# CORRECT — POPs render
box = geo.create(boxPOP, 'cube') # ✓ visible
```
| SOP | POP | Notes |
|-----|-----|-------|
| `boxSOP` | `boxPOP` | `sizex/y/z`, `surftype` |
| `sphereSOP` | `spherePOP` | `radx/y/z`, `freq`, `type` (geodesic/grid/sharedpoles/tetrahedron) |
| `torusSOP` | `torusPOP` | TD auto-creates in new geo COMPs |
| `circleSOP` | `circlePOP` | |
| `gridSOP` | `gridPOP` | |
| `tubeSOP` | `tubePOP` | |
New geometry COMPs auto-create: `in1` (inPOP), `out1` (outPOP), `torus1` (torusPOP). Always clean before building.
## Morphing Between Shapes (switchPOP)
```python
sw = geo.create(switchPOP, 'shape_switch')
sw.par.index.expr = 'int(absTime.seconds / 3) % 4'
sw.inputConnectors[0].connect(tetra.outputConnectors[0]) # shape 0
sw.inputConnectors[1].connect(box.outputConnectors[0]) # shape 1
sw.inputConnectors[2].connect(octa.outputConnectors[0]) # shape 2
sw.inputConnectors[3].connect(sphere.outputConnectors[0]) # shape 3
out = geo.create(outPOP, 'out1')
out.inputConnectors[0].connect(sw.outputConnectors[0])
```
`spherePOP.par.type` options: `geodesic`, `grid`, `sharedpoles`, `tetrahedron`. Use `tetrahedron` for platonic solid polyhedra.
## Misc
- `connect()` replaces existing connections — no need to disconnect first
- `project.name` returns the TOE filename, `project.folder` returns the directory
@@ -0,0 +1,151 @@
# GLSL Reference
## Uniforms
```
TouchDesigner GLSL
─────────────────────────────
vec0name = 'uTime' → uniform float uTime;
vec0valuex = 1.0 → uTime value
```
### Pass Time
```python
glsl_op.par.vec0name = 'uTime'
glsl_op.par.vec0valuex.mode = ParMode.EXPRESSION
glsl_op.par.vec0valuex.expr = 'absTime.seconds'
```
```glsl
uniform float uTime;
void main() { float t = uTime * 0.5; }
```
### Built-in Uniforms (TOP)
```glsl
// Output resolution (always available)
vec2 res = uTDOutputInfo.res.zw;
// Input texture (only when inputs connected)
vec2 inputRes = uTD2DInfos[0].res.zw;
vec4 color = texture(sTD2DInputs[0], vUV.st);
// UV coordinates
vUV.st // 0-1 texture coords
```
**IMPORTANT:** `uTD2DInfos` requires input textures. For standalone shaders use `uTDOutputInfo`.
## Built-in Utility Functions
```glsl
// Noise
float TDPerlinNoise(vec2/vec3/vec4 v);
float TDSimplexNoise(vec2/vec3/vec4 v);
// Color conversion
vec3 TDHSVToRGB(vec3 c);
vec3 TDRGBToHSV(vec3 c);
// Matrix transforms
mat4 TDTranslate(float x, float y, float z);
mat3 TDRotateX/Y/Z(float radians);
mat3 TDRotateOnAxis(float radians, vec3 axis);
mat3 TDScale(float x, float y, float z);
mat3 TDRotateToVector(vec3 forward, vec3 up);
mat3 TDCreateRotMatrix(vec3 from, vec3 to); // vectors must be normalized
// Resolution struct
struct TDTexInfo {
vec4 res; // (1/width, 1/height, width, height)
vec4 depth;
};
// Output (always use this — handles sRGB correctly)
fragColor = TDOutputSwizzle(color);
// Instancing (MAT only)
int TDInstanceID();
```
## glslTOP
Docked DATs created automatically:
- `glsl1_pixel` — Pixel shader
- `glsl1_compute` — Compute shader
- `glsl1_info` — Compile info
### Pixel Shader Template
```glsl
out vec4 fragColor;
void main() {
vec4 color = texture(sTD2DInputs[0], vUV.st);
fragColor = TDOutputSwizzle(color);
}
```
### Compute Shader Template
```glsl
layout (local_size_x = 8, local_size_y = 8) in;
void main() {
vec4 color = texelFetch(sTD2DInputs[0], ivec2(gl_GlobalInvocationID.xy), 0);
TDImageStoreOutput(0, gl_GlobalInvocationID, color);
}
```
### Update Shader
```python
op('/project1/glsl1_pixel').text = shader_code
op('/project1/glsl1').cook(force=True)
# Check errors:
print(op('/project1/glsl1_info').text)
```
## glslMAT
Docked DATs:
- `glslmat1_vertex` — Vertex shader (param: `vdat`)
- `glslmat1_pixel` — Pixel shader (param: `pdat`)
- `glslmat1_info` — Compile info
Note: MAT uses `vdat`/`pdat`, TOP uses `vertexdat`/`pixeldat`.
### Vertex Shader Template
```glsl
uniform float uTime;
void main() {
vec3 pos = TDPos();
pos.z += sin(pos.x * 3.0 + uTime) * 0.2;
vec4 worldSpacePos = TDDeform(pos);
gl_Position = TDWorldToProj(worldSpacePos);
}
```
## Bayer 8x8 Dither Matrix
Reusable ordered dither function for retro/print aesthetics:
```glsl
float bayer8(vec2 pos) {
int x = int(mod(pos.x, 8.0)), y = int(mod(pos.y, 8.0)), idx = x + y * 8;
int b[64] = int[64](
0,32,8,40,2,34,10,42,48,16,56,24,50,18,58,26,
12,44,4,36,14,46,6,38,60,28,52,20,62,30,54,22,
3,35,11,43,1,33,9,41,51,19,59,27,49,17,57,25,
15,47,7,39,13,45,5,37,63,31,55,23,61,29,53,21
);
return float(b[idx]) / 64.0;
}
```
## glslPOP / glsladvancedPOP / glslcopyPOP
All use compute shaders. Docked DATs follow naming convention:
- `glsl1_compute` / `glsladv1_compute`
- `glslcopy1_ptCompute` / `glslcopy1_vertCompute` / `glslcopy1_primCompute`
@@ -0,0 +1,131 @@
# Layout Compositor Reference
Patterns for building modular multi-panel grids — useful for HUD interfaces, data dashboards, and multi-source visual composites.
## Layout Approaches
| Approach | Best For | Notes |
|----------|----------|-------|
| `layoutTOP` | Fixed grid, quick setup | GPU, simple tiling |
| Container COMP + `overTOP` | Full control, mixed-size panels | More setup, very flexible |
| GLSL compositor | Procedural / BSP-style | Most powerful, more complex |
---
## layoutTOP
Built-in grid compositor — fastest path for uniform tile grids.
```python
layout = root.create(layoutTOP, 'layout1')
layout.par.resolutionw = 1920
layout.par.resolutionh = 1080
layout.par.cols = 3
layout.par.rows = 2
layout.par.gap = 4
```
Connect inputs (up to cols×rows):
```python
layout.inputConnectors[0].connect(op('panel_radar'))
layout.inputConnectors[1].connect(op('panel_wave'))
layout.inputConnectors[2].connect(op('panel_data'))
```
**Variable-width columns:** Not directly supported. Use overTOP approach for non-uniform grids.
---
## Container COMP Grid
Build each element as its own `containerCOMP`. Compose with `overTOP`:
```python
def create_panel(root, name, width, height, x=0, y=0):
panel = root.create(containerCOMP, name)
panel.par.w = width
panel.par.h = height
panel.viewer = True
return panel
# Composite with overTOP chain
over1 = root.create(overTOP, 'over1')
over1.inputConnectors[0].connect(panel_radar)
over1.inputConnectors[1].connect(panel_wave)
over1.par.topx2 = 0
over1.par.topy2 = 512
```
**Tip:** Use a `resolutionTOP` before each `overTOP` input if panels are different sizes.
---
## Panel Dividers (GLSL)
```glsl
out vec4 fragColor;
uniform vec2 uGridDivisions; // e.g. vec2(3, 2) for 3 cols, 2 rows
uniform float uLineWidth; // pixels
uniform vec4 uLineColor; // e.g. vec4(0.0, 1.0, 0.8, 0.6) for cyan
void main() {
vec2 res = uTDOutputInfo.res.zw;
vec2 uv = vUV.st;
vec4 bg = texture(sTD2DInputs[0], uv);
float lineW = uLineWidth / res.x;
float lineH = uLineWidth / res.y;
float vDiv = 0.0;
for (float i = 1.0; i < uGridDivisions.x; i++) {
float x = i / uGridDivisions.x;
vDiv = max(vDiv, step(abs(uv.x - x), lineW));
}
float hDiv = 0.0;
for (float i = 1.0; i < uGridDivisions.y; i++) {
float y = i / uGridDivisions.y;
hDiv = max(hDiv, step(abs(uv.y - y), lineH));
}
float line = max(vDiv, hDiv);
vec4 result = mix(bg, uLineColor, line * uLineColor.a);
fragColor = TDOutputSwizzle(result);
}
```
---
## Element Library Pattern
Each visual element lives in its own `baseCOMP` as a reusable `.tox`:
### Standard Interface
```
inputs:
- in_audio (CHOP) — audio envelope / beat data
- in_data (CHOP) — optional data stream
- in_control (CHOP) — intensity, color, speed params
outputs:
- out_top (TOP) — rendered element
```
### Network Structure
```
/project1/
audio_bus/ ← all audio analysis (see audio-reactive.md)
elements/
elem_radar/ ← baseCOMP with out_top
elem_wave/
elem_data/
compositor/
layout1 ← layoutTOP or overTOP chain
dividers1 ← GLSL divider lines
postfx/ ← bloom → chrom → CRT stack (see postfx.md)
null_out ← final output
output/
windowCOMP ← full-screen output
```
**Key principle:** Elements don't know about each other. The compositor assembles them. Audio bus is referenced by all elements but lives separately.
@@ -0,0 +1,106 @@
# Operator Tips
## Wireframe Rendering Pattern
Reusable setup for wireframe geometry on black background:
```python
# 1. Material
mat = root.create(wireframeMAT, 'wire_mat')
mat.par.colorr = 1.0; mat.par.colorg = 0.0; mat.par.colorb = 0.0
mat.par.linewidth = 3
# 2. Geometry COMP
geo = root.create(geometryCOMP, 'my_geo')
geo.par.rx.expr = 'absTime.seconds * 30'
geo.par.ry.expr = 'absTime.seconds * 45'
geo.par.material = mat.path # NOTE: 'material' not 'mat'
# 3. Shape inside the geo
box = geo.create(boxSOP, 'cube')
box.par.sizex = 1.5; box.par.sizey = 1.5; box.par.sizez = 1.5
# 4. Camera
cam = root.create(cameraCOMP, 'cam1')
cam.par.tx = 0; cam.par.ty = 0; cam.par.tz = 4; cam.par.fov = 45
# 5. Render TOP
render = root.create(renderTOP, 'render1')
render.par.outputresolution = 'custom'
render.par.resolutionw = 1280; render.par.resolutionh = 720
render.par.bgcolorr = 0; render.par.bgcolorg = 0; render.par.bgcolorb = 0
render.par.camera = cam.path
render.par.geometry = geo.path
# 6. Output null
out = root.create(nullTOP, 'out1')
out.inputConnectors[0].connect(render.outputConnectors[0])
```
**Key rules:**
- Class names: `wireframeMAT` not `wireframeMat` (all-caps suffix)
- Geometry SOPs/POPs go INSIDE the geo comp
- Material: `geo.par.material` not `geo.par.mat`
- Render geometry: `render.par.geometry = geo.path` (string path)
- `wireframeMAT.par.wireframemode = 'topology'` for clean wireframe (vs `'tesselated'` for triangle edges)
- Alternative: Use `renderTOP.par.overridemat` instead of per-geo material
## Feedback TOP
### Basic Structure
```
input (initial state) ──┐
├──→ feedback_top ──→ processing ──→ null_out
│ ↑
└── par.top = 'null_out' ────────────────┘
```
### Setup Pattern
```python
# 1. Processing chain
glsl = root.create(glslTOP, 'sim')
null_out = root.create(nullTOP, 'null_out')
glsl.outputConnectors[0].connect(null_out.inputConnectors[0])
# 2. Feedback referencing null_out
feedback = root.create(feedbackTOP, 'feedback')
feedback.par.top = 'null_out'
# 3. Black initial state
const_init = root.create(constantTOP, 'const_init')
const_init.par.colorr = 0; const_init.par.colorg = 0; const_init.par.colorb = 0
# 4. Wire: initial → feedback, feedback → processing
feedback.inputConnectors[0].connect(const_init)
glsl.inputConnectors[0].connect(feedback)
# 5. Reset to apply initial state
feedback.par.resetpulse.pulse()
```
### Common Errors
| Error | Cause | Solution |
|-------|-------|----------|
| "Not enough sources specified" | No input connected | Connect initial state TOP |
| Unexpected initial pattern | Wrong initial state | Use Constant TOP (black) |
### Tips
1. Use float format for simulations: `glsl.par.format = 'rgba32float'`
2. Reset after setup: `feedback.par.resetpulse.pulse()`
3. Match resolutions — feedback, processing, and initial state must match
4. Soft boundary prevents edge artifacts:
```glsl
float edge = 3.0 * texel.x;
float bx = smoothstep(0.0, edge, uv.x) * smoothstep(0.0, edge, 1.0 - uv.x);
float by = smoothstep(0.0, edge, uv.y) * smoothstep(0.0, edge, 1.0 - uv.y);
value *= bx * by;
```
### Use Cases
- **Wave Simulation** — R=height, G=velocity, black initial state
- **Cellular Automata** — white=alive, black=dead, random noise initial state
- **Trail / Motion Blur** — blend current frame with feedback, black initial
@@ -143,20 +143,20 @@ Creating nodes with the same names you just destroyed in the SAME script causes
```python
# td_execute_python:
for c in list(root.children):
if c.valid and c.name.startswith('promo_'):
if c.valid and c.name.startswith('my_'):
c.destroy()
# ... then create promo_audio, promo_shader etc. in same script → CRASHES
# ... then create my_audio, my_shader etc. in same script → CRASHES
```
**CORRECT (two separate calls):**
```python
# Call 1: td_execute_python — clean only
for c in list(root.children):
if c.valid and c.name.startswith('promo_'):
if c.valid and c.name.startswith('my_'):
c.destroy()
# Call 2: td_execute_python — build (separate MCP call)
audio = root.create(audiofileinCHOP, 'promo_audio')
audio = root.create(audiofileinCHOP, 'my_audio')
# ... rest of build
```
@@ -361,21 +361,13 @@ win.par.winopen.pulse()
`out.sample(x, y)` returns pixels from a single cook snapshot. Compare samples with 2+ second delays, or use screencapture on the display window.
### 32. Audio-reactive GLSL: dual-layer sync pipeline
### 32. Audio-reactive GLSL: TD-side pipeline
For audio-synced visuals, use BOTH layers for maximum effect:
**Layer 1 (TD-side, real-time):** AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
**Layer 2 (Python-side, post-hoc):** scipy FFT on the SAME audio file → per-frame features (rms, bass, mid, hi, beat detection) → drive ASCII brightness, chromatic aberration, beat flashes during the render pass.
Both layers locked to the same audio file = visuals genuinely sync to the beat at two independent stages.
For audio-synced visuals: AudioFileIn → AudioSpectrum(timeslice=True, fftsize='256') → Math(gain=5) → choptoTOP(par.chop=math, layout='rowscropped') → GLSL input. The shader samples `sTD2DInputs[1]` at different x positions for bass/mid/hi. Record the TD output with MovieFileOut.
**Key gotcha:** AudioFileIn must be cued (`par.cue=True``par.cuepulse.pulse()`) then uncued (`par.cue=False`, `par.play=True`) before recording starts. Otherwise the spectrum is silent for the first few seconds.
### 33. twozero MCP: benchmark and prefer native tools
Benchmarked April 2026: twozero MCP with 36 native tools. The old curl/REST method (port 9981) had zero native tools.
### 33. twozero MCP: prefer native tools
**Always prefer native MCP tools over td_execute_python:**
- `td_create_operator` over `root.create()` scripts (handles viewport positioning)
@@ -425,13 +417,16 @@ TD can show `fps:0` in `td_get_perf` while ops still cook and `TOP.save()` still
**a) Project is paused (playbar stopped).** TD's playbar can be toggled with spacebar. The `root` at `/` has no `.playbar` attribute (it's on the perform COMP). The easiest fix is sending a spacebar keypress via `td_input_execute`, though this tool can sometimes error. As a workaround, `TOP.save()` always works regardless of play state — use it to verify rendering is actually happening before spending time debugging FPS.
**b) Audio device CHOP blocking the main thread.** An `audiooutCHOP` with an active audio device can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. Fix: keep the CHOP active but set `volume=0` to prevent the audio driver from blocking. Disabling it entirely (`active=False`) may also work but can prevent downstream audio processing CHOPs from cooking.
**b) Audio device CHOP blocking the main thread (MOST COMMON).** An `audiodeviceoutCHOP` with `active=True` can consume 300-400ms/s (2000%+ of frame budget), stalling the cook loop at FPS=0. **`volume=0` is NOT sufficient** — the audio driver still blocks. Fix: `par.active = False`. This completely stops the CHOP from interacting with the audio driver. If you need audio monitoring, enable it only during short playback checks, then disable before recording.
Verified April 2026: disabling `audiodeviceoutCHOP` (`active=False`) restored FPS from 0 to 60 instantly, recovering from 2348% budget usage to 0.1%.
Diagnostic sequence when FPS=0:
1. `td_get_perf` — check if any op has extreme CPU/s
2. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
3. Check for blocking CHOPs (audioout, audiodevin, etc.)
4. Toggle play state (spacebar, or check if absTime.seconds is advancing)
1. `td_get_perf` — check if any op has extreme CPU/s (audiodeviceoutCHOP is the usual suspect)
2. If audiodeviceoutCHOP shows >100ms/s: set `par.active = False` immediately
3. `TOP.save()` on the output — if it produces a valid image, the pipeline works, just not at real-time rate
4. Check for other blocking CHOPs (audiodevin, etc.)
5. Toggle play state (spacebar, or check if absTime.seconds is advancing)
### 39. Recording while FPS=0 produces empty or near-empty files
@@ -484,9 +479,20 @@ If `td_write_dat` fails, fall back to `td_execute_python`:
op("/project1/shader_code").text = shader_string
```
### 42. td_execute_python does NOT return stdout or print() output
### 42. td_execute_python DOES return print() output — use it for debugging
Despite what earlier versions of pitfall #33 stated, `print()` and `debug()` output from `td_execute_python` scripts does NOT appear in the MCP response. The response is always just `(ok)` + FPS/error summary. To read values back, use dedicated inspection tools (`td_get_operator_info`, `td_read_dat`, `td_read_chop`) instead of trying to print from within a script.
`print()` statements in `td_execute_python` scripts appear in the MCP response text. This is the correct way to read values back from scripts. The response format is: printed output first, then `[fps X.X/X] [N err/N warn]` on a separate line.
However, the `result` variable (if you set one) does NOT appear verbatim — use `print()` for anything you need to read back:
```python
# CORRECT — appears in response:
print('value:', some_value)
# WRONG — not reliably in response:
result = some_value
```
For structured data, use dedicated inspection tools (`td_get_operator_info`, `td_read_chop`) which return clean JSON.
### 43. td_get_operator_info JSON is appended with `[fps X.X/X]` — breaks json.loads()
@@ -496,13 +502,203 @@ clean = response_text.rsplit('[fps', 1)[0]
data = json.loads(clean)
```
### 44. td_get_screenshot is asynchronous — returns `{"status": "pending"}`
### 44. td_get_screenshot is unreliable — returns `{"status": "pending"}` and may never deliver
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file appears later. Wait a few seconds before checking for the file. There is no callback or completion notification — poll the filesystem.
Screenshots don't complete instantly. The tool returns `{"status": "pending", "requestId": "..."}` and the actual file may appear later — or may NEVER appear at all. In testing (April 2026), screenshots stayed "pending" indefinitely with no file written to disk, even though the shader was cooking at 8-30fps.
### 45. Recording duration is manual — no auto-stop at audio end
**Do NOT rely on `td_get_screenshot` for frame capture.** For reliable frame capture, use MovieFileOut recording + ffmpeg frame extraction:
```bash
# Record in TD first, then extract frames:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
If you need a quick visual check, `td_get_screenshot` is worth trying (it sometimes works), but always have the recording fallback. There is no callback or completion notification — if the file doesn't appear after 5-10 seconds, it's not coming.
### 45. Heavy shaders cook below record FPS — many duplicate frames in output
A raymarched GLSL shader may only cook at 8-15fps even though MovieFileOut records at 60fps. The recording still works (TD writes the last-cooked frame each time), but the resulting file has many duplicate frames. When extracting frames for post-processing, use a lower fps filter to avoid redundant frames:
```bash
# Extract at 24fps from a 60fps recording of an 8fps shader:
ffmpeg -y -i /tmp/td_output.mov -t 25 -vf 'fps=24' /tmp/td_frames/frame_%06d.png
```
Check actual cook FPS with `td_get_perf` before committing to a long recording. If FPS < 15, the output will be a slideshow regardless of the recording codec.
### 46. Recording duration is manual — no auto-stop at audio end
MovieFileOut records until `par.record = False` is set. If audio ends before you stop recording, the file keeps growing with repeated frames. Always stop recording promptly after the audio duration. For precision: set a timer on the agent side matching the audio length, then send `par.record = False`. Trim excess with ffmpeg as a safety net:
```bash
ffmpeg -i raw.mov -t 25 -c copy trimmed.mov
```
### 47. AudioFileIn par.index stays at 0 in sequential mode — not a reliable progress indicator
When `audiofileinCHOP` is in `playmode=2` (sequential), `par.index.eval()` returns 0.0 even while audio IS actively playing and the spectrum IS receiving data. Do NOT use `par.index` to check playback progress in sequential mode.
**How to verify audio is actually playing:**
- Read the spectrum CHOP values via `td_read_chop` — if values are non-zero and CHANGE between reads 1-2s apart, audio is flowing
- Read the audio CHOP itself: non-zero waveform samples confirm the file is loaded and playing
- `par.play.eval()` returning True is necessary but NOT sufficient — it can be True with no audio flowing if cue is stuck
### 48. GLSL shader whiteout — clamp audio spectrum values in the shader
Raw spectrum values multiplied by Math CHOP gain can produce very large numbers (5-20+) that blow out the shader's lighting, producing flat white/grey. The shader MUST clamp audio inputs:
```glsl
float bass = texture(sTD2DInputs[1], vec2(0.05, 0.25)).r;
bass = clamp(bass, 0.0, 3.0); // prevent whiteout
mids = clamp(mids, 0.0, 3.0);
hi = clamp(hi, 0.0, 3.0);
```
Discovered when gain=10 produced ~0.13 (too dark) during quiet passages but gain=50 produced ~9.4 (total whiteout). Fix: keep gain=10, use `highfreqboost=3.0` on AudioSpectrum, clamp in shader.
### 49. Non-Commercial TD records at 1280x1280 (square) — always crop in post
Even with `resolutionw=1280, resolutionh=720` on the GLSL TOP, Non-Commercial TD may output 1280x1280 to MovieFileOut. Always check dimensions with ffprobe and crop during extraction:
```bash
# Center-crop from 1280x1280 to 1280x720:
ffmpeg -y -i /tmp/td_output.mov -t 25 -r 24 -vf "crop=1280:720:0:280" /tmp/frames/frame_%06d.png
```
Large ProRes files (1-2GB) at 1280x1280 decode at ~3fps, so 25s of footage takes ~3 minutes to extract.
## Advanced Patterns (pitfalls 51+)
### 51. Connection syntax: use `outputConnectors`/`inputConnectors`, NOT `outputs`/`inputs`
```python
# CORRECT
src.outputConnectors[0].connect(dst.inputConnectors[0])
# WRONG — raises IndexError or AttributeError
src.outputs[0].connect(dst.inputs[0])
```
For feedback TOP, BOTH are required:
```python
fb.par.top = target.path
target.outputConnectors[0].connect(fb.inputConnectors[0])
```
### 52. moviefileoutTOP `par.input` doesn't resolve via Python in TD 2025.32460
Setting `moviefileoutTOP.par.input` programmatically does NOT work. All forms fail silently with "Not enough sources specified."
**Workaround — frame capture + ffmpeg:**
```python
out = op('/project1/out')
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('/tmp/frames/f_{i:04d}.png')", delayFrames=delay)
# Then: ffmpeg -y -framerate 30 -i /tmp/frames/f_%04d.png -c:v prores -pix_fmt yuv420p /tmp/output.mov
```
### 53. Batch frame capture — use `me.fetch`/`me.store` for state across calls
```python
start = me.fetch('cap_frame', 0)
for i in range(60):
frame = start + i
op('/project1/out').save(f'/tmp/frames/frame_{str(frame).zfill(4)}.png')
me.store('cap_frame', start + 60)
```
Call 5 times for 300 frames. Each picks up where the last left off.
### 54. GLSL TOP pixel shader requirements in TD 2025
```glsl
// REQUIRED — declare output
layout(location = 0) out vec4 fragColor;
void main() {
vec3 col = vec3(1.0, 0.0, 0.0);
fragColor = TDOutputSwizzle(vec4(col, 1.0));
}
```
**Built-in uniforms available:** `uTDOutputInfo.res` (vec4), `uTDTimeInfo.seconds`, `sTD2DInputs[N]`.
**Auto-created DATs:** `name_pixel`, `name_vertex`, `name_compute` textDATs with example code.
### 55. TOP.save() doesn't advance time — identical frames in tight loops
`.save()` captures the current cooked frame without advancing TD's timeline:
```python
# WRONG — all frames identical
for i in range(300):
op('/project1/out').save(f'frames/f_{i:04d}.png')
# CORRECT — use run() with delayFrames
for i in range(300):
delay = i * 5
run(f"op('/project1/out').save('frames/f_{i:04d}.png')", delayFrames=delay)
```
**NEVER use `time.sleep()` in TD** — it blocks the main thread and freezes the UI.
### 56. Feedback loop masks input changes — force switch during capture
With feedback TOP opacity 0.7+, the buffer dominates output. Switching input produces nearly identical frames.
**Fix — force switch index per capture:**
```python
for i in range(300):
idx = (i // 8) % num_inputs
delay = i * 5
run(f"op('/project1/vswitch').par.index={idx}; op('/project1/out').save('f_{i:04d}.png')", delayFrames=delay)
```
### 57. Large td_execute_python scripts fail — split into incremental calls
10+ operator creations in one script cause timing issues. Split into 2-4 calls of 2-4 operators each. Within one call, `create()` handles work immediately. Across calls, `op('name')` may return `None` if the previous call hasn't committed.
### 58. MCP instance reconnection after project.load()
`project.load(path)` changes the PID. After loading, call `td_list_instances()` and use the new `target_instance`. For TOX files: import as child comp instead (doesn't disconnect).
### 59. TOX reverse-engineering workflow
```python
comp = root.loadTox(r'/path/to/file.tox')
comp.name = '_study_comp'
for child in comp.children:
print(f'{child.name} ({child.OPType})')
# Use td_get_operators_info, td_read_dat, check custom params
```
### 60. sliderCOMP naming — TD appends suffix
TD auto-renames: `slider_brightness``slider_brightness1`. Always check names after creation.
### 61. create() requires full operator type suffix
```python
# CORRECT
proj.create('audiofileinCHOP', 'audio_in')
proj.create('glslTOP', 'render')
# WRONG — raises "Unknown operator type"
proj.create('audiofilein', 'audio_in')
proj.create('glsl', 'render')
```
### 62. Reparenting COMPs — use copyOPs, not connect()
Moving COMPs with `inputCOMPConnectors[0].connect()` fails. Use copy + destroy:
```python
copied = target.copyOPs([source]) # preserves internal wiring
source.destroy()
# Re-wire external connections manually after the move
```
### 63. Slider wiring — expressionCHOP with op() expressions crashes TD
```python
# CRASHES TD — don't do this
echop = root.create(expressionCHOP, 'slider_ctrl')
echop.par.chan0expr = 'op("/project1/controls/slider_brightness1").par.value0'
# WORKING — parameterCHOP as bridge
pchop = root.create(parameterCHOP, 'slider_vals')
pchop.par.ops = '/project1/controls'
pchop.par.parameters = 'value0'
pchop.par.custom = True
pchop.par.builtin = False
```
@@ -0,0 +1,183 @@
# Post-FX Reference
Bloom, CRT scanlines, chromatic aberration, and feedback glow patterns for live visual work.
---
## Bloom
### Built-in Bloom TOP
TD's `bloomTOP` is the fastest path — GPU-accelerated, no shader needed.
```python
bloom = root.create(bloomTOP, 'bloom1')
bloom.par.threshold = 0.6 # Luminance threshold (0-1)
bloom.par.size = 0.03 # Spread radius (0-1)
bloom.par.strength = 1.5 # Bloom intensity
bloom.par.blendmode = 'add' # 'add' or 'screen'
```
**Audio reactive bloom:**
```python
bloom.par.strength.mode = ParMode.EXPRESSION
bloom.par.strength.expr = "op('audio_env')['envelope'][0] * 3.0 + 0.5"
```
### GLSL Bloom (More Control)
For multi-pass bloom with color tinting:
```glsl
// bloom_pixel.glsl — pass1: threshold + tint
out vec4 fragColor;
uniform float uThreshold;
uniform vec3 uBloomColor;
void main() {
vec4 col = texture(sTD2DInputs[0], vUV.st);
float luma = dot(col.rgb, vec3(0.299, 0.587, 0.114));
float bloom = max(0.0, luma - uThreshold);
fragColor = TDOutputSwizzle(vec4(col.rgb * bloom * uBloomColor, col.a));
}
```
Then blur with `blurTOP` (size ~0.02-0.05), composite back over source with `addTOP` or `compositeTOP` in Add mode.
---
## CRT / Scanlines
Pure GLSL — create a `glslTOP` and paste into its `_pixel` DAT.
```glsl
// crt_pixel.glsl
out vec4 fragColor;
uniform float uTime;
uniform float uScanlineIntensity; // 0.0 - 1.0, default 0.4
uniform float uCurvature; // 0.0 - 0.15, default 0.05
uniform float uVignette; // 0.0 - 1.0, default 0.8
vec2 curveUV(vec2 uv, float amount) {
uv = uv * 2.0 - 1.0;
vec2 offset = abs(uv.yx) / vec2(6.0, 4.0);
uv = uv + uv * offset * offset * amount;
return uv * 0.5 + 0.5;
}
void main() {
vec2 res = uTDOutputInfo.res.zw;
vec2 uv = vUV.st;
// CRT barrel distortion
uv = curveUV(uv, uCurvature * 10.0);
// Kill pixels outside curved screen
if (uv.x < 0.0 || uv.x > 1.0 || uv.y < 0.0 || uv.y > 1.0) {
fragColor = vec4(0.0, 0.0, 0.0, 1.0);
return;
}
vec4 col = texture(sTD2DInputs[0], uv);
// Scanlines
float scanline = sin(uv.y * res.y * 3.14159) * 0.5 + 0.5;
col.rgb *= mix(1.0, scanline, uScanlineIntensity);
// Horizontal noise flicker
float flicker = TDSimplexNoise(vec2(uv.y * 100.0, uTime * 8.0)) * 0.03;
col.rgb += flicker;
// Vignette
vec2 vig = uv * (1.0 - uv.yx);
float v = pow(vig.x * vig.y * 15.0, uVignette);
col.rgb *= v;
fragColor = TDOutputSwizzle(col);
}
```
---
## Chromatic Aberration
Splits RGB channels and offsets them along screen axes.
```glsl
out vec4 fragColor;
uniform float uAmount; // 0.001 - 0.02, default 0.006
void main() {
vec2 uv = vUV.st;
vec2 dir = uv - 0.5;
float r = texture(sTD2DInputs[0], uv + dir * uAmount).r;
float g = texture(sTD2DInputs[0], uv).g;
float b = texture(sTD2DInputs[0], uv - dir * uAmount).b;
float a = texture(sTD2DInputs[0], uv).a;
fragColor = TDOutputSwizzle(vec4(r, g, b, a));
}
```
**Audio-reactive variant** — spike aberration on beats:
```glsl
uniform float uBeat;
void main() {
vec2 uv = vUV.st;
vec2 dir = uv - 0.5;
float amount = uAmount + uBeat * 0.04;
float r = texture(sTD2DInputs[0], uv + dir * amount * 1.2).r;
float g = texture(sTD2DInputs[0], uv).g;
float b = texture(sTD2DInputs[0], uv - dir * amount * 0.8).b;
fragColor = TDOutputSwizzle(vec4(r, g, b, 1.0));
}
```
---
## Feedback Glow
Warm persistent trails for glow effects.
```glsl
out vec4 fragColor;
uniform float uDecay; // 0.92 - 0.98 for slow trails
uniform vec3 uGlowColor; // tint accumulated feedback
void main() {
vec2 uv = vUV.st;
vec4 prev = texture(sTD2DInputs[0], uv); // feedback input
vec4 curr = texture(sTD2DInputs[1], uv); // current frame
vec3 glow = prev.rgb * uDecay * uGlowColor;
vec3 result = max(glow, curr.rgb);
fragColor = TDOutputSwizzle(vec4(result, 1.0));
}
```
**Tips:**
- `uDecay = 0.95` → medium trail
- `uDecay = 0.98` → long comet tail
- Set `glslTOP` format to `rgba16float` for smooth gradients
---
## Full Post-FX Stack
Recommended order:
```
[scene / composite]
bloomTOP ← luminance threshold bloom
glslTOP (chrom) ← chromatic aberration
glslTOP (crt) ← scanlines + barrel distortion + vignette
null_out ← final output
```
**Performance note:** Each glslTOP is a full GPU pass. For 1920×1080 at 60fps this stack is comfortably real-time. For 4K, consider downsampling bloom input with `resolutionTOP` first.
@@ -137,7 +137,7 @@ actual = str(n.width) + 'x' + str(n.height)
### Config location
`$HERMES_HOME/config.yaml` (defaults to `~/.hermes/config.yaml` when `HERMES_HOME` is unset)
~/.hermes/config.yaml
### MCP entry format
+5 -6
View File
@@ -8,8 +8,7 @@ OK="${GREEN}✔${NC}"; FAIL="${RED}✘${NC}"; WARN="${YELLOW}⚠${NC}"
TWOZERO_URL="https://www.404zero.com/pisang/twozero.tox"
TOX_PATH="$HOME/Downloads/twozero.tox"
HERMES_HOME_DIR="${HERMES_HOME:-$HOME/.hermes}"
HERMES_CFG="${HERMES_HOME_DIR}/config.yaml"
HERMES_CFG="$HOME/.hermes/config.yaml"
MCP_PORT=40404
MCP_ENDPOINT="http://localhost:${MCP_PORT}/mcp"
@@ -18,10 +17,7 @@ manual_steps=()
echo -e "\n${CYAN}═══ twozero MCP for TouchDesigner — Setup ═══${NC}\n"
# ── 1. Check if TouchDesigner is running ──
# Match on process *name* (not full cmdline) to avoid self-matching shells
# that happen to have "TouchDesigner" in their args. macOS and Linux pgrep
# both support -x for exact name match.
if pgrep -x TouchDesigner >/dev/null 2>&1 || pgrep -x TouchDesignerFTE >/dev/null 2>&1; then
if pgrep -if "TouchDesigner" >/dev/null 2>&1; then
echo -e " ${OK} TouchDesigner is running"
td_running=true
else
@@ -69,6 +65,9 @@ if 'twozero_td' not in cfg['mcp_servers']:
}
with open(cfg_path, 'w') as f:
yaml.dump(cfg, f, default_flow_style=False, sort_keys=False)
print('added')
else:
print('exists')
" 2>/dev/null && echo -e " ${OK} twozero_td MCP entry added to config" \
|| { echo -e " ${FAIL} Could not update config (is PyYAML installed?)"; \
manual_steps+=("Add twozero_td MCP entry to ${HERMES_CFG} manually"); }
+20 -39
View File
@@ -2005,22 +2005,6 @@ class AIAgent:
self._fallback_activated = False
self._fallback_index = 0
# When the user deliberately swaps primary providers (e.g. openrouter
# → anthropic), drop any fallback entries that target the OLD primary
# or the NEW one. The chain was seeded from config at agent init for
# the original provider — without pruning, a failed turn on the new
# primary silently re-activates the provider the user just rejected,
# which is exactly what was reported during TUI v2 blitz testing
# ("switched to anthropic, tui keeps trying openrouter").
old_norm = (old_provider or "").strip().lower()
new_norm = (new_provider or "").strip().lower()
if old_norm and new_norm and old_norm != new_norm:
self._fallback_chain = [
entry for entry in self._fallback_chain
if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm}
]
self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None
logging.info(
"Model switched in-place: %s (%s) -> %s (%s)",
old_model, old_provider, new_model, new_provider,
@@ -7645,27 +7629,8 @@ class AIAgent:
finally:
self._executing_tools = False
def _dispatch_delegate_task(self, function_args: dict) -> str:
"""Single call site for delegate_task dispatch.
New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all
invocation paths (concurrent, sequential, inline).
"""
from tools.delegate_tool import delegate_task as _delegate_task
return _delegate_task(
goal=function_args.get("goal"),
context=function_args.get("context"),
toolsets=function_args.get("toolsets"),
tasks=function_args.get("tasks"),
max_iterations=function_args.get("max_iterations"),
acp_command=function_args.get("acp_command"),
acp_args=function_args.get("acp_args"),
role=function_args.get("role"),
parent_agent=self,
)
def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str,
tool_call_id: Optional[str] = None, messages: list = None) -> str:
tool_call_id: Optional[str] = None) -> str:
"""Invoke a single tool and return the result string. No display logic.
Handles both agent-level tools (todo, memory, etc.) and registry-dispatched
@@ -7733,7 +7698,15 @@ class AIAgent:
callback=self.clarify_callback,
)
elif function_name == "delegate_task":
return self._dispatch_delegate_task(function_args)
from tools.delegate_tool import delegate_task as _delegate_task
return _delegate_task(
goal=function_args.get("goal"),
context=function_args.get("context"),
toolsets=function_args.get("toolsets"),
tasks=function_args.get("tasks"),
max_iterations=function_args.get("max_iterations"),
parent_agent=self,
)
else:
return handle_function_call(
function_name, function_args, effective_task_id,
@@ -7895,7 +7868,7 @@ class AIAgent:
pass
start = time.time()
try:
result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages)
result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id)
except Exception as tool_error:
result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
@@ -8247,6 +8220,7 @@ class AIAgent:
if self._should_emit_quiet_tool_messages():
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
tasks_arg = function_args.get("tasks")
if tasks_arg and isinstance(tasks_arg, list):
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
@@ -8261,7 +8235,14 @@ class AIAgent:
self._delegate_spinner = spinner
_delegate_result = None
try:
function_result = self._dispatch_delegate_task(function_args)
function_result = _delegate_task(
goal=function_args.get("goal"),
context=function_args.get("context"),
toolsets=function_args.get("toolsets"),
tasks=tasks_arg,
max_iterations=function_args.get("max_iterations"),
parent_agent=self,
)
_delegate_result = function_result
finally:
self._delegate_spinner = None
-2
View File
@@ -97,7 +97,6 @@ AUTHOR_MAP = {
"mygamez@163.com": "zhongyueming1121",
"hansnow@users.noreply.github.com": "hansnow",
"134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
"ben.burtenshaw@gmail.com": "burtenshaw",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",
@@ -129,7 +128,6 @@ AUTHOR_MAP = {
"brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
"withapurpose37@gmail.com": "StefanIsMe",
"4317663+helix4u@users.noreply.github.com": "helix4u",
"ifkellx@users.noreply.github.com": "Ifkellx",
"331214+counterposition@users.noreply.github.com": "counterposition",
"blspear@gmail.com": "BrennerSpear",
"akhater@gmail.com": "akhater",
+324 -134
View File
@@ -1,112 +1,217 @@
---
name: llama-cpp
description: llama.cpp local GGUF inference + HF Hub model discovery.
version: 2.1.2
description: Run LLM inference with llama.cpp on CPU, Apple Silicon, AMD/Intel GPUs, or NVIDIA — plus GGUF model conversion and quantization (28 bit with K-quants and imatrix). Covers CLI, Python bindings, OpenAI-compatible server, and Ollama/LM Studio integration. Use for edge deployment, M1/M2/M3/M4 Macs, CUDA-less environments, or flexible local quantization.
version: 2.0.0
author: Orchestra Research
license: MIT
dependencies: [llama-cpp-python>=0.2.0]
metadata:
hermes:
tags: [llama.cpp, GGUF, Quantization, Hugging Face Hub, CPU Inference, Apple Silicon, Edge Deployment, AMD GPUs, Intel GPUs, NVIDIA, URL-first]
tags: [llama.cpp, GGUF, Quantization, CPU Inference, Apple Silicon, Edge Deployment, Non-NVIDIA, AMD GPUs, Intel GPUs, Embedded, Model Compression]
---
# llama.cpp + GGUF
Use this skill for local GGUF inference, quant selection, or Hugging Face repo discovery for llama.cpp.
Pure C/C++ LLM inference with minimal dependencies, plus the GGUF (GPT-Generated Unified Format) standard used for quantized weights. One toolchain covers conversion, quantization, and serving.
## When to use
- Run local models on CPU, Apple Silicon, CUDA, ROCm, or Intel GPUs
- Find the right GGUF for a specific Hugging Face repo
- Build a `llama-server` or `llama-cli` command from the Hub
- Search the Hub for models that already support llama.cpp
- Enumerate available `.gguf` files and sizes for a repo
- Decide between Q4/Q5/Q6/IQ variants for the user's RAM or VRAM
**Use llama.cpp + GGUF when:**
- Running on CPU-only machines or Apple Silicon (M1/M2/M3/M4) with Metal acceleration
- Using AMD (ROCm) or Intel GPUs where CUDA isn't available
- Edge deployment (Raspberry Pi, embedded systems, consumer laptops)
- Need flexible quantization (28 bit with K-quants)
- Want local AI tools (LM Studio, Ollama, text-generation-webui, koboldcpp)
- Want a single binary deploy without Docker/Python
## Model Discovery workflow
**Key advantages:**
- Universal hardware: CPU, Apple Silicon, NVIDIA, AMD, Intel
- No Python runtime required (pure C/C++)
- K-quants + imatrix for better low-bit quality
- OpenAI-compatible server built in
- Rich ecosystem (Ollama, LM Studio, llama-cpp-python)
Prefer URL workflows before asking for `hf`, Python, or custom scripts.
1. Search for candidate repos on the Hub:
- Base: `https://huggingface.co/models?apps=llama.cpp&sort=trending`
- Add `search=<term>` for a model family
- Add `num_parameters=min:0,max:24B` or similar when the user has size constraints
2. Open the repo with the llama.cpp local-app view:
- `https://huggingface.co/<repo>?local-app=llama.cpp`
3. Treat the local-app snippet as the source of truth when it is visible:
- copy the exact `llama-server` or `llama-cli` command
- report the recommended quant exactly as HF shows it
4. Read the same `?local-app=llama.cpp` URL as page text or HTML and extract the section under `Hardware compatibility`:
- prefer its exact quant labels and sizes over generic tables
- keep repo-specific labels such as `UD-Q4_K_M` or `IQ4_NL_XL`
- if that section is not visible in the fetched page source, say so and fall back to the tree API plus generic quant guidance
5. Query the tree API to confirm what actually exists:
- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
- keep entries where `type` is `file` and `path` ends with `.gguf`
- use `path` and `size` as the source of truth for filenames and byte sizes
- separate quantized checkpoints from `mmproj-*.gguf` projector files and `BF16/` shard files
- use `https://huggingface.co/<repo>/tree/main` only as a human fallback
6. If the local-app snippet is not text-visible, reconstruct the command from the repo plus the chosen quant:
- shorthand quant selection: `llama-server -hf <repo>:<QUANT>`
- exact-file fallback: `llama-server --hf-repo <repo> --hf-file <filename.gguf>`
7. Only suggest conversion from Transformers weights if the repo does not already expose GGUF files.
**Use alternatives instead:**
- **vLLM** — NVIDIA GPUs, PagedAttention, Python-first, max throughput
- **TensorRT-LLM** — Production NVIDIA (A100/H100), maximum speed
- **AWQ/GPTQ** — Calibrated quantization for NVIDIA-only deployments
- **bitsandbytes** — Simple HuggingFace transformers integration
- **HQQ** — Fast calibration-free quantization
## Quick start
### Install llama.cpp
### Install
```bash
# macOS / Linux (simplest)
brew install llama.cpp
```
```bash
winget install llama.cpp
```
```bash
# Or build from source
git clone https://github.com/ggml-org/llama.cpp
cd llama.cpp
cmake -B build
cmake --build build --config Release
make # CPU
make GGML_METAL=1 # Apple Silicon
make GGML_CUDA=1 # NVIDIA CUDA
make LLAMA_HIP=1 # AMD ROCm
# Python bindings (optional)
pip install llama-cpp-python
# With CUDA: CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
# With Metal: CMAKE_ARGS="-DGGML_METAL=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
```
### Run directly from the Hugging Face Hub
### Download a pre-quantized GGUF
```bash
llama-cli -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
# TheBloke hosts most popular models pre-quantized
huggingface-cli download \
TheBloke/Llama-2-7B-Chat-GGUF \
llama-2-7b-chat.Q4_K_M.gguf \
--local-dir models/
```
### Or convert a HuggingFace model to GGUF
```bash
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
# 1. Download HF model
huggingface-cli download meta-llama/Llama-3.1-8B --local-dir ./llama-3.1-8b
# 2. Convert to FP16 GGUF
python convert_hf_to_gguf.py ./llama-3.1-8b \
--outfile llama-3.1-8b-f16.gguf \
--outtype f16
# 3. Quantize to Q4_K_M
./llama-quantize llama-3.1-8b-f16.gguf llama-3.1-8b-q4_k_m.gguf Q4_K_M
```
### Run an exact GGUF file from the Hub
Use this when the tree API shows custom file naming or the exact HF snippet is missing.
### Run inference
```bash
llama-server \
--hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
--hf-file Phi-3-mini-4k-instruct-q4.gguf \
-c 4096
# One-shot prompt
./llama-cli -m model.Q4_K_M.gguf -p "Explain quantum computing" -n 256
# Interactive chat
./llama-cli -m model.Q4_K_M.gguf --interactive
# With GPU offload
./llama-cli -m model.Q4_K_M.gguf -ngl 35 -p "Hello!"
```
### OpenAI-compatible server check
### Serve an OpenAI-compatible API
```bash
./llama-server \
-m model.Q4_K_M.gguf \
--host 0.0.0.0 \
--port 8080 \
-ngl 35 \
-c 4096 \
--parallel 4 \
--cont-batching
```
```bash
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"messages": [
{"role": "user", "content": "Write a limerick about Python exceptions"}
]
"model": "local",
"messages": [{"role": "user", "content": "Hello!"}],
"temperature": 0.7,
"max_tokens": 100
}'
```
## Python bindings (llama-cpp-python)
## Quantization formats (GGUF)
`pip install llama-cpp-python` (CUDA: `CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall --no-cache-dir`; Metal: `CMAKE_ARGS="-DGGML_METAL=on" ...`).
### K-quant methods (recommended)
| Type | Bits | Size (7B) | Quality | Use Case |
|------|------|-----------|---------|----------|
| Q2_K | 2.5 | ~2.8 GB | Low | Extreme compression (testing only) |
| Q3_K_S | 3.0 | ~3.0 GB | Low-Med | Memory constrained |
| Q3_K_M | 3.3 | ~3.3 GB | Medium | Fits small devices |
| Q4_K_S | 4.0 | ~3.8 GB | Med-High | Speed critical |
| **Q4_K_M** | 4.5 | ~4.1 GB | High | **Recommended default** |
| Q5_K_S | 5.0 | ~4.6 GB | High | Quality focused |
| Q5_K_M | 5.5 | ~4.8 GB | Very High | High quality |
| Q6_K | 6.0 | ~5.5 GB | Excellent | Near-original |
| Q8_0 | 8.0 | ~7.2 GB | Best | Maximum quality, minimal degradation |
**Variant suffixes** — `_S` (Small, faster, lower quality), `_M` (Medium, balanced), `_L` (Large, better quality).
**Legacy (Q4_0/Q4_1/Q5_0/Q5_1) exist** but always prefer K-quants for better quality/size ratio.
**IQ quantization** — ultra-low-bit with importance-aware methods: IQ2_XXS, IQ2_XS, IQ2_S, IQ3_XXS, IQ3_XS, IQ3_S, IQ4_XS. Require `--imatrix`.
**Task-specific defaults:**
- General chat / assistants: Q4_K_M, or Q5_K_M if RAM allows
- Code generation: Q5_K_M or Q6_K (higher precision helps)
- Technical / medical: Q6_K or Q8_0
- Very large (70B, 405B) on consumer hardware: Q3_K_M or Q4_K_S
- Raspberry Pi / edge: Q2_K or Q3_K_S
## Conversion workflows
### Basic: HF → GGUF → quantized
```bash
python convert_hf_to_gguf.py ./model --outfile model-f16.gguf --outtype f16
./llama-quantize model-f16.gguf model-q4_k_m.gguf Q4_K_M
./llama-cli -m model-q4_k_m.gguf -p "Hello!" -n 50
```
### With importance matrix (imatrix) — better low-bit quality
`imatrix` gives 1020% perplexity improvement at Q4, essential at Q3 and below.
```bash
# 1. Convert to FP16 GGUF
python convert_hf_to_gguf.py ./model --outfile model-f16.gguf
# 2. Prepare calibration data (diverse text, ~100MB is ideal)
cat > calibration.txt << 'EOF'
The quick brown fox jumps over the lazy dog.
Machine learning is a subset of artificial intelligence.
# Add more diverse text samples...
EOF
# 3. Generate importance matrix
./llama-imatrix -m model-f16.gguf \
-f calibration.txt \
--chunk 512 \
-o model.imatrix \
-ngl 35
# 4. Quantize with imatrix
./llama-quantize --imatrix model.imatrix \
model-f16.gguf model-q4_k_m.gguf Q4_K_M
```
### Multi-quant batch
```bash
#!/bin/bash
MODEL="llama-3.1-8b-f16.gguf"
IMATRIX="llama-3.1-8b.imatrix"
./llama-imatrix -m $MODEL -f wiki.txt -o $IMATRIX -ngl 35
for QUANT in Q4_K_M Q5_K_M Q6_K Q8_0; do
OUTPUT="llama-3.1-8b-${QUANT,,}.gguf"
./llama-quantize --imatrix $IMATRIX $MODEL $OUTPUT $QUANT
echo "Created: $OUTPUT ($(du -h $OUTPUT | cut -f1))"
done
```
### Quality testing (perplexity)
```bash
./llama-perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw -c 512
# Baseline FP16: ~5.96 | Q4_K_M: ~6.06 (+1.7%) | Q2_K: ~6.87 (+15.3%)
```
## Python bindings (llama-cpp-python)
### Basic generation
@@ -116,32 +221,39 @@ from llama_cpp import Llama
llm = Llama(
model_path="./model-q4_k_m.gguf",
n_ctx=4096,
n_gpu_layers=35, # 0 for CPU, 99 to offload everything
n_gpu_layers=35, # 0 for CPU only, 99 to offload everything
n_threads=8,
)
out = llm("What is machine learning?", max_tokens=256, temperature=0.7)
print(out["choices"][0]["text"])
output = llm(
"What is machine learning?",
max_tokens=256,
temperature=0.7,
stop=["</s>", "\n\n"],
)
print(output["choices"][0]["text"])
```
### Chat + streaming
### Chat completion + streaming
```python
llm = Llama(
model_path="./model-q4_k_m.gguf",
n_ctx=4096,
n_gpu_layers=35,
chat_format="llama-3", # or "chatml", "mistral", etc.
chat_format="llama-3", # Or "chatml", "mistral", etc.
)
resp = llm.create_chat_completion(
# Non-streaming
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is Python?"},
],
max_tokens=256,
temperature=0.7,
)
print(resp["choices"][0]["message"]["content"])
print(response["choices"][0]["message"]["content"])
# Streaming
for chunk in llm("Explain quantum computing:", max_tokens=256, stream=True):
@@ -156,93 +268,171 @@ vec = llm.embed("This is a test sentence.")
print(f"Embedding dimension: {len(vec)}")
```
You can also load a GGUF straight from the Hub:
## Hardware acceleration
### Apple Silicon (Metal)
```bash
make clean && make GGML_METAL=1
./llama-cli -m model.gguf -ngl 99 -p "Hello" # offload all layers
```
```python
llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
filename="*Q4_K_M.gguf",
n_gpu_layers=35,
llm = Llama(
model_path="model.gguf",
n_gpu_layers=99, # Offload everything
n_threads=1, # Metal handles parallelism
)
```
## Choosing a quant
Performance: M3 Max ~4060 tok/s on Llama 2-7B Q4_K_M.
Use the Hub page first, generic heuristics second.
### NVIDIA (CUDA)
- Prefer the exact quant that HF marks as compatible for the user's hardware profile.
- For general chat, start with `Q4_K_M`.
- For code or technical work, prefer `Q5_K_M` or `Q6_K` if memory allows.
- For very tight RAM budgets, consider `Q3_K_M`, `IQ` variants, or `Q2` variants only if the user explicitly prioritizes fit over quality.
- For multimodal repos, mention `mmproj-*.gguf` separately. The projector is not the main model file.
- Do not normalize repo-native labels. If the page says `UD-Q4_K_M`, report `UD-Q4_K_M`.
```bash
make clean && make GGML_CUDA=1
./llama-cli -m model.gguf -ngl 35 -p "Hello"
## Extracting available GGUFs from a repo
# Hybrid for large models
./llama-cli -m llama-70b.Q4_K_M.gguf -ngl 20 # GPU: 20 layers, CPU: rest
When the user asks what GGUFs exist, return:
- filename
- file size
- quant label
- whether it is a main model or an auxiliary projector
Ignore unless requested:
- README
- BF16 shard files
- imatrix blobs or calibration artifacts
Use the tree API for this step:
- `https://huggingface.co/api/models/<repo>/tree/main?recursive=true`
For a repo like `unsloth/Qwen3.6-35B-A3B-GGUF`, the local-app page can show quant chips such as `UD-Q4_K_M`, `UD-Q5_K_M`, `UD-Q6_K`, and `Q8_0`, while the tree API exposes exact file paths such as `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` and `Qwen3.6-35B-A3B-Q8_0.gguf` with byte sizes. Use the tree API to turn a quant label into an exact filename.
## Search patterns
Use these URL shapes directly:
```text
https://huggingface.co/models?apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
https://huggingface.co/<repo>?local-app=llama.cpp
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
https://huggingface.co/<repo>/tree/main
# Multi-GPU split
./llama-cli -m large-model.gguf --tensor-split 0.5,0.5 -ngl 60
```
## Output format
### AMD (ROCm)
When answering discovery requests, prefer a compact structured result like:
```text
Repo: <repo>
Recommended quant from HF: <label> (<size>)
llama-server: <command>
Other GGUFs:
- <filename> - <size>
- <filename> - <size>
Source URLs:
- <local-app URL>
- <tree API URL>
```bash
make LLAMA_HIP=1
./llama-cli -m model.gguf -ngl 999
```
### CPU
```bash
# Match PHYSICAL cores, not logical
./llama-cli -m model.gguf -t 8 -p "Hello"
# BLAS acceleration (23× speedup)
make LLAMA_OPENBLAS=1
```
```python
llm = Llama(
model_path="model.gguf",
n_gpu_layers=0,
n_threads=8,
n_batch=512, # Larger batch = faster prompt processing
)
```
## Performance benchmarks
### CPU (Llama 2-7B Q4_K_M)
| CPU | Threads | Speed |
|-----|---------|-------|
| Apple M3 Max (Metal) | 16 | 50 tok/s |
| AMD Ryzen 9 7950X | 32 | 35 tok/s |
| Intel i9-13900K | 32 | 30 tok/s |
### GPU offloading on RTX 4090
| Layers GPU | Speed | VRAM |
|------------|-------|------|
| 0 (CPU only) | 30 tok/s | 0 GB |
| 20 (hybrid) | 80 tok/s | 8 GB |
| 35 (all) | 120 tok/s | 12 GB |
## Supported models
- **LLaMA family**: Llama 2 (7B/13B/70B), Llama 3 (8B/70B/405B), Code Llama
- **Mistral family**: Mistral 7B, Mixtral 8x7B/8x22B
- **Other**: Falcon, BLOOM, GPT-J, Phi-3, Gemma, Qwen, LLaVA (vision), Whisper (audio)
Find GGUF models: https://huggingface.co/models?library=gguf
## Ecosystem integrations
### Ollama
```bash
cat > Modelfile << 'EOF'
FROM ./model-q4_k_m.gguf
TEMPLATE """{{ .System }}
{{ .Prompt }}"""
PARAMETER temperature 0.7
PARAMETER num_ctx 4096
EOF
ollama create mymodel -f Modelfile
ollama run mymodel "Hello!"
```
### LM Studio
1. Place GGUF file in `~/.cache/lm-studio/models/`
2. Open LM Studio and select the model
3. Configure context length and GPU offload, start inference
### text-generation-webui
```bash
cp model-q4_k_m.gguf text-generation-webui/models/
python server.py --model model-q4_k_m.gguf --loader llama.cpp --n-gpu-layers 35
```
### OpenAI client → llama-server
```python
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8080/v1", api_key="not-needed")
response = client.chat.completions.create(
model="local-model",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens=256,
)
print(response.choices[0].message.content)
```
## Best practices
1. **Use K-quants** — Q4_K_M is the recommended default
2. **Use imatrix** for Q4 and below (calibration improves quality substantially)
3. **Offload as many layers as VRAM allows** — start high, reduce by 5 on OOM
4. **Thread count** — match physical cores, not logical
5. **Batch size** — increase `n_batch` (e.g. 512) for faster prompt processing
6. **Context** — start at 4096, grow only as needed (memory scales with ctx)
7. **Flash Attention** — add `--flash-attn` if your build supports it
## Common issues (quick fixes)
**Model loads slowly** — use `--mmap` for memory-mapped loading.
**Out of memory (GPU)** — reduce `-ngl`, use a smaller quant (Q4_K_S / Q3_K_M), or quantize the KV cache:
```python
Llama(model_path="...", type_k=2, type_v=2, n_gpu_layers=35) # Q4_0 KV cache
```
**Garbage output** — wrong `chat_format`, temperature too high, or model file corrupted. Test with `temperature=0.1` and verify FP16 baseline works.
**Connection refused (server)** — bind to `--host 0.0.0.0`, check `lsof -i :8080`.
See `references/troubleshooting.md` for the full playbook.
## References
- **[hub-discovery.md](references/hub-discovery.md)** - URL-only Hugging Face workflows, search patterns, GGUF extraction, and command reconstruction
- **[advanced-usage.md](references/advanced-usage.md)** — speculative decoding, batched inference, grammar-constrained generation, LoRA, multi-GPU, custom builds, benchmark scripts
- **[quantization.md](references/quantization.md)** — quant quality tradeoffs, when to use Q4/Q5/Q6/IQ, model size scaling, imatrix
- **[server.md](references/server.md)** — direct-from-Hub server launch, OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
- **[quantization.md](references/quantization.md)** — perplexity tables, use-case guide, model size scaling (7B/13B/70B RAM needs), imatrix deep dive
- **[server.md](references/server.md)** — OpenAI API endpoints, Docker deployment, NGINX load balancing, monitoring
- **[optimization.md](references/optimization.md)** — CPU threading, BLAS, GPU offload heuristics, batch tuning, benchmarks
- **[troubleshooting.md](references/troubleshooting.md)** — install/convert/quantize/inference/server issues, Apple Silicon, debugging
## Resources
- **GitHub**: https://github.com/ggml-org/llama.cpp
- **Hugging Face GGUF + llama.cpp docs**: https://huggingface.co/docs/hub/gguf-llamacpp
- **Hugging Face Local Apps docs**: https://huggingface.co/docs/hub/main/local-apps
- **Hugging Face Local Agents docs**: https://huggingface.co/docs/hub/agents-local
- **Example local-app page**: https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
- **Example tree API**: https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
- **Example llama.cpp search**: https://huggingface.co/models?num_parameters=min:0,max:24B&apps=llama.cpp&sort=trending
- **Python bindings**: https://github.com/abetlen/llama-cpp-python
- **Pre-quantized models**: https://huggingface.co/TheBloke
- **GGUF converter Space**: https://huggingface.co/spaces/ggml-org/gguf-my-repo
- **License**: MIT
@@ -1,168 +0,0 @@
# Hugging Face URL Workflows for llama.cpp
Use URL-only workflows first. Do not require `hf` or API clients just to find GGUF files, choose a quant, or build a `llama-server` command.
## Core URLs
```text
Search:
https://huggingface.co/models?apps=llama.cpp&sort=trending
Search with text:
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
Search with size bounds:
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
Repo local-app view:
https://huggingface.co/<repo>?local-app=llama.cpp
Repo tree API:
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
Repo file tree:
https://huggingface.co/<repo>/tree/main
```
## 1. Search for llama.cpp-compatible models
Start from the models page with `apps=llama.cpp`.
Use:
- `search=<term>` for model family names such as `Qwen`, `Gemma`, `Phi`, or `Mistral`
- `num_parameters=min:0,max:24B` or similar if the user has hardware limits
- `sort=trending` when the user wants popular repos right now
Do not start with random GGUF repos if the user has not chosen a model family yet. Search first, shortlist second.
Example: https://huggingface.co/models?search=Qwen&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
## 2. Use the local-app page for the recommended quant
Open:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
Extract, in order:
1. The exact `Use this model` snippet, if it is visible as text
2. The `Hardware compatibility` section from the fetched page text or HTML:
- quant label
- file size
- bit-depth grouping
3. Any extra launch flags shown in the snippet, such as `--jinja`
Treat the HF local-app snippet as the source of truth when it is visible.
Do this by reading the URL itself, not by assuming the UI rendered in a browser. If the fetched page source does not expose `Hardware compatibility`, say that the section was not text-visible and fall back to the tree API plus generic guidance from `quantization.md`.
## 3. Confirm exact files from the tree API
Open:
```text
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Treat the JSON response as the source of truth for repo inventory.
Keep entries where:
- `type` is `file`
- `path` ends with `.gguf`
Use these fields:
- `path` for the filename and subdirectory
- `size` for the byte size
- optionally `lfs.size` to confirm the LFS payload size
Separate files into:
- quantized single-file checkpoints, for example `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
- projector weights, usually `mmproj-*.gguf`
- BF16 shard files, usually under `BF16/`
- everything else
Ignore unless the user asks:
- `README.md`
- imatrix or calibration blobs
Use `https://huggingface.co/<repo>/tree/main` only as a human fallback if the API endpoint fails or the user wants the web view.
## 4. Build the command
Preferred order:
1. Copy the exact HF snippet from the local-app page
2. If the page gives a clean quant label, use shorthand selection:
```bash
llama-server -hf <repo>:<QUANT>
```
3. If you need an exact file from the tree API, use the file-specific form:
```bash
llama-server --hf-repo <repo> --hf-file <filename.gguf>
```
4. For CLI usage instead of a server, use:
```bash
llama-cli -hf <repo>:<QUANT>
```
Use the exact-file form when the repo uses custom labels or nonstandard naming that could make `:<QUANT>` ambiguous.
## 5. Example: `unsloth/Qwen3.6-35B-A3B-GGUF`
Use these URLs:
```text
https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF?local-app=llama.cpp
https://huggingface.co/api/models/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main?recursive=true
https://huggingface.co/unsloth/Qwen3.6-35B-A3B-GGUF/tree/main
```
On the local-app page, the hardware compatibility section can expose entries such as:
- `UD-IQ4_XS` - 17.7 GB
- `UD-Q4_K_S` - 20.9 GB
- `UD-Q4_K_M` - 22.1 GB
- `UD-Q5_K_M` - 26.5 GB
- `UD-Q6_K` - 29.3 GB
- `Q8_0` - 36.9 GB
On the tree API, you can confirm exact filenames such as:
- `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf`
- `Qwen3.6-35B-A3B-UD-Q5_K_M.gguf`
- `Qwen3.6-35B-A3B-UD-Q6_K.gguf`
- `Qwen3.6-35B-A3B-Q8_0.gguf`
- `mmproj-F16.gguf`
Good final output for this repo:
```text
Repo: unsloth/Qwen3.6-35B-A3B-GGUF
Recommended quant from HF: UD-Q4_K_M (22.1 GB)
llama-server: llama-server --hf-repo unsloth/Qwen3.6-35B-A3B-GGUF --hf-file Qwen3.6-35B-A3B-UD-Q4_K_M.gguf
Other GGUFs:
- Qwen3.6-35B-A3B-UD-Q5_K_M.gguf - 26.5 GB
- Qwen3.6-35B-A3B-UD-Q6_K.gguf - 29.3 GB
- Qwen3.6-35B-A3B-Q8_0.gguf - 36.9 GB
Projector:
- mmproj-F16.gguf - 899 MB
```
## Notes
- Repo-specific quant labels matter. Do not rewrite `UD-Q4_K_M` to `Q4_K_M` unless the page itself does.
- `mmproj` files are projector weights for multimodal models, not the main language model checkpoint.
- If the HF hardware compatibility panel is missing because the user has no hardware profile configured, or because the fetched page source did not expose it, still use the tree API plus generic quant guidance from `quantization.md`.
- If the repo already has GGUFs, do not jump straight to conversion workflows.
@@ -2,22 +2,6 @@
Complete guide to GGUF quantization formats and model conversion.
## Hub-first quant selection
Before using generic tables, open the model repo with:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
Prefer the exact quant labels and sizes shown in the `Hardware compatibility` section of the fetched `?local-app=llama.cpp` page text or HTML. Then confirm the matching filenames in:
```text
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Use the Hub page first, and only fall back to the generic heuristics below when the repo page does not expose a clear recommendation.
## Quantization Overview
**GGUF** (GPT-Generated Unified Format) - Standard format for llama.cpp models.
@@ -39,11 +23,11 @@ Use the Hub page first, and only fall back to the generic heuristics below when
## Converting Models
### Hugging Face to GGUF
### HuggingFace to GGUF
```bash
# 1. Download Hugging Face model
hf download meta-llama/Llama-2-7b-chat-hf \
# 1. Download HuggingFace model
huggingface-cli download meta-llama/Llama-2-7b-chat-hf \
--local-dir models/llama-2-7b-chat/
# 2. Convert to FP16 GGUF
@@ -168,32 +152,18 @@ Q2_K or Q3_K_S - Fit in limited RAM
## Finding Pre-Quantized Models
Use the Hub search with the llama.cpp app filter:
```text
https://huggingface.co/models?apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&sort=trending
https://huggingface.co/models?search=<term>&apps=llama.cpp&num_parameters=min:0,max:24B&sort=trending
```
For a specific repo, open:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
https://huggingface.co/api/models/<repo>/tree/main?recursive=true
```
Then launch directly from the Hub without extra Hub tooling:
**TheBloke** on HuggingFace:
- https://huggingface.co/TheBloke
- Most models available in all GGUF formats
- No conversion needed
**Example**:
```bash
llama-cli -hf <repo>:Q4_K_M
llama-server -hf <repo>:Q4_K_M
```
If you need the exact file name from the tree API:
```bash
llama-server --hf-repo <repo> --hf-file <filename.gguf>
# Download pre-quantized Llama 2-7B
huggingface-cli download \
TheBloke/Llama-2-7B-Chat-GGUF \
llama-2-7b-chat.Q4_K_M.gguf \
--local-dir models/
```
## Importance Matrices (imatrix)
@@ -2,31 +2,6 @@
Production deployment of llama.cpp server with OpenAI-compatible API.
## Direct from Hugging Face Hub
Prefer the model repo's local-app page first:
```text
https://huggingface.co/<repo>?local-app=llama.cpp
```
If the page shows an exact snippet, copy it. If not, use one of these forms:
```bash
# Choose a quant label directly from the Hub repo
llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
```
```bash
# Pin an exact GGUF file from the repo tree
llama-server \
--hf-repo microsoft/Phi-3-mini-4k-instruct-gguf \
--hf-file Phi-3-mini-4k-instruct-q4.gguf \
-c 4096
```
Use the file-specific form when the repo has custom naming or when you already extracted the exact filename from the tree API.
## Server Modes
### llama-server
+8 -64
View File
@@ -1,7 +1,7 @@
---
name: llm-wiki
description: "Karpathy's LLM Wiki — build and maintain a persistent, interlinked markdown knowledge base. Ingest sources, query compiled knowledge, and lint for consistency."
version: 2.1.0
version: 2.0.0
author: Hermes Agent
license: MIT
metadata:
@@ -122,10 +122,6 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
- When updating a page, always bump the `updated` date
- Every new page must be added to `index.md` under the correct section
- Every action must be appended to `log.md`
- **Provenance markers:** On pages that synthesize 3+ sources, append `^[raw/articles/source-file.md]`
at the end of paragraphs whose claims come from a specific source. This lets a reader trace each
claim back without re-reading the whole raw file. Optional on single-source pages where the
`sources:` frontmatter is enough.
## Frontmatter
```yaml
@@ -136,33 +132,9 @@ Adapt to the user's domain. The schema constrains agent behavior and ensures con
type: entity | concept | comparison | query | summary
tags: [from taxonomy below]
sources: [raw/articles/source-name.md]
# Optional quality signals:
confidence: high | medium | low # how well-supported the claims are
contested: true # set when the page has unresolved contradictions
contradictions: [other-page-slug] # pages this one conflicts with
---
```
`confidence` and `contested` are optional but recommended for opinion-heavy or fast-moving
topics. Lint surfaces `contested: true` and `confidence: low` pages for review so weak claims
don't silently harden into accepted wiki fact.
### raw/ Frontmatter
Raw sources ALSO get a small frontmatter block so re-ingests can detect drift:
```yaml
---
source_url: https://example.com/article # original URL, if applicable
ingested: YYYY-MM-DD
sha256: <hex digest of the raw content below the frontmatter>
---
```
The `sha256:` lets a future re-ingest of the same URL skip processing when content is unchanged,
and flag drift when it has changed. Compute over the body only (everything after the closing
`---`), not the frontmatter itself.
## Tag Taxonomy
[Define 10-20 top-level tags for the domain. Add new tags here BEFORE using them.]
@@ -262,10 +234,6 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
- PDF → use `web_extract` (handles PDFs), save to `raw/papers/`
- Pasted text → save to appropriate `raw/` subdirectory
- Name the file descriptively: `raw/articles/karpathy-llm-wiki-2026.md`
- **Add raw frontmatter** (`source_url`, `ingested`, `sha256` of the body).
On re-ingest of the same URL: recompute the sha256, compare to the stored value —
skip if identical, flag drift and update if different. This is cheap enough to
do on every re-ingest and catches silent source changes.
**Discuss takeaways** with the user — what's interesting, what matters for
the domain. (Skip this in automated/cron contexts — proceed directly.)
@@ -282,11 +250,6 @@ When the user provides a source (URL, file, paste), integrate it into the wiki:
- **Cross-reference:** Every new or updated page must link to at least 2 other
pages via `[[wikilinks]]`. Check that existing pages link back.
- **Tags:** Only use tags from the taxonomy in SCHEMA.md
- **Provenance:** On pages synthesizing 3+ sources, append `^[raw/articles/source.md]`
markers to paragraphs whose claims trace to a specific source.
- **Confidence:** For opinion-heavy, fast-moving, or single-source claims, set
`confidence: medium` or `low` in frontmatter. Don't mark `high` unless the
claim is well-supported across multiple sources.
⑤ **Update navigation:**
- Add new pages to `index.md` under the correct section, alphabetically
@@ -341,28 +304,18 @@ wiki = "<WIKI_PATH>"
recent source that mentions the same entities.
**Contradictions:** Pages on the same topic with conflicting claims. Look for
pages that share tags/entities but state different facts. Surface all pages
with `contested: true` or `contradictions:` frontmatter for user review.
pages that share tags/entities but state different facts.
**Quality signals:** List pages with `confidence: low` and any page that cites
only a single source but has no confidence field set — these are candidates
for either finding corroboration or demoting to `confidence: medium`.
**Page size:** Flag pages over 200 lines — candidates for splitting.
**Source drift:** For each file in `raw/` with a `sha256:` frontmatter, recompute
the hash and flag mismatches. Mismatches indicate the raw file was edited
(shouldn't happen — raw/ is immutable) or ingested from a URL that has since
changed. Not a hard error, but worth reporting.
**Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
**Page size:** Flag pages over 200 lines — candidates for splitting.
**Log rotation:** If log.md exceeds 500 entries, rotate it.
**Tag audit:** List all tags in use, flag any not in the SCHEMA.md taxonomy.
**Report findings** with specific file paths and suggested actions, grouped by
severity (broken links > orphans > stale content > style issues).
**Log rotation:** If log.md exceeds 500 entries, rotate it.
**Report findings** with specific file paths and suggested actions, grouped by
severity (broken links > orphans > source drift > contested pages > stale content > style issues).
**Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
**Append to log.md:** `## [YYYY-MM-DD] lint | N issues found`
## Working with the Wiki
@@ -495,12 +448,3 @@ vault in Obsidian on your laptop/phone — changes appear within seconds.
The agent should check log size during lint.
- **Handle contradictions explicitly** — don't silently overwrite. Note both claims with dates,
mark in frontmatter, flag for user review.
## Related Tools
[llm-wiki-compiler](https://github.com/atomicmemory/llm-wiki-compiler) is a Node.js CLI that
compiles sources into a concept wiki with the same Karpathy inspiration. It's Obsidian-compatible,
so users who want a scheduled/CLI-driven compile pipeline can point it at the same vault this
skill maintains. Trade-offs: it owns page generation (replaces the agent's judgment on page
creation) and is tuned for small corpora. Use this skill when you want agent-in-the-loop curation;
use llmwiki when you want batch compile of a source directory.
-76
View File
@@ -476,82 +476,6 @@ class TestGetTextAuxiliaryClient:
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.2-codex"
class TestNousAuxiliaryRefresh:
def test_try_nous_prefers_runtime_credentials(self):
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
fresh_client = MagicMock()
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_client.chat.completions.create.return_value = {"ok": True}
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
fresh_async_client = MagicMock()
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
# ── Payment / credit exhaustion fallback ─────────────────────────────────
+9 -30
View File
@@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
class TestResolveVisionMainFirst:
"""Vision auto-detection prefers the main provider first."""
"""Vision auto-detection prefers main provider + main model first."""
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
"""OpenRouter main with vision-capable model → aux vision uses main model."""
@@ -200,49 +200,28 @@ class TestResolveVisionMainFirst:
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_nous_main_vision_uses_paid_nous_vision_backend(self):
"""Paid Nous main → aux vision uses the dedicated Nous vision backend."""
def test_nous_main_vision_uses_main_model(self):
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="openai/gpt-5",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "google/gemini-3-flash-preview"),
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "openai/gpt-5")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
"""Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="xiaomi/mimo-v2-pro",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert client is not None
assert model == "xiaomi/mimo-v2-omni"
assert model == "openai/gpt-5"
def test_exotic_provider_with_vision_override_preserved(self):
"""xiaomi → mimo-v2-omni override still wins over main_model."""
+2 -2
View File
@@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
# task_index=0 in a batch of 3 → prefix "[1]"
cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
cb0("tool.started", "web_search", "test", {})
cb0("web_search", "test")
output = buf.getvalue()
assert "[1]" in output
@@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
buf.truncate(0)
buf.seek(0)
cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
cb2("tool.started", "web_search", "test", {})
cb2("web_search", "test")
output = buf.getvalue()
assert "[3]" in output
-85
View File
@@ -254,88 +254,3 @@ class TestCliApprovalUi:
# Command got truncated with a marker.
assert "(command truncated" in rendered
class TestApprovalCallbackThreadLocalWiring:
"""Regression guard for the thread-local callback freeze (#13617 / #13618).
After 62348cff made _approval_callback / _sudo_password_callback thread-local
(ACP GHSA-qg5c-hvr5-hjgr), the CLI agent thread could no longer see callbacks
registered in the main thread the dangerous-command prompt silently fell
back to stdin input() and deadlocked against prompt_toolkit. The fix is to
register the callbacks INSIDE the agent worker thread (matching the ACP
pattern). These tests lock in that invariant.
"""
def test_main_thread_registration_is_invisible_to_child_thread(self):
"""Confirms the underlying threading.local semantics that drove the bug.
If this ever starts passing as "visible", the thread-local isolation
is gone and the ACP race GHSA-qg5c-hvr5-hjgr may be back.
"""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
def main_cb(_cmd, _desc):
return "once"
set_approval_callback(main_cb)
try:
seen = {}
def _child():
seen["value"] = _get_approval_callback()
t = threading.Thread(target=_child, daemon=True)
t.start()
t.join(timeout=2)
assert seen["value"] is None
finally:
set_approval_callback(None)
def test_child_thread_registration_is_visible_and_cleared_in_finally(self):
"""The fix pattern: register INSIDE the worker thread, clear in finally.
This is exactly what cli.py's run_agent() closure does. If this test
fails, the CLI approval prompt freeze (#13617) has regressed.
"""
from tools.terminal_tool import (
set_approval_callback,
set_sudo_password_callback,
_get_approval_callback,
_get_sudo_password_callback,
)
def approval_cb(_cmd, _desc):
return "once"
def sudo_cb():
return "hunter2"
seen = {}
def _worker():
# Mimic cli.py's run_agent() thread target.
set_approval_callback(approval_cb)
set_sudo_password_callback(sudo_cb)
try:
seen["approval"] = _get_approval_callback()
seen["sudo"] = _get_sudo_password_callback()
finally:
set_approval_callback(None)
set_sudo_password_callback(None)
seen["approval_after"] = _get_approval_callback()
seen["sudo_after"] = _get_sudo_password_callback()
t = threading.Thread(target=_worker, daemon=True)
t.start()
t.join(timeout=2)
assert seen["approval"] is approval_cb
assert seen["sudo"] is sudo_cb
# Finally block must clear both slots — otherwise a reused thread
# would hold a stale reference to a disposed CLI instance.
assert seen["approval_after"] is None
assert seen["sudo_after"] is None
@@ -1,91 +0,0 @@
"""Regression tests for the TUI gateway's `complete.path` handler.
Reported during the TUI v2 blitz retest: typing `@folder:` (and `@folder`
with no colon yet) still surfaced files alongside directories in the
TUI composer, because the gateway-side completion lives in
`tui_gateway/server.py` and was never touched by the earlier fix to
`hermes_cli/commands.py`.
Covers:
- `@folder:` only yields directories
- `@file:` only yields regular files
- Bare `@folder` / `@file` (no colon) lists cwd directly
- Explicit prefix is preserved in the completion text
"""
from __future__ import annotations
from pathlib import Path
from tui_gateway import server
def _fixture(tmp_path: Path):
(tmp_path / "readme.md").write_text("x")
(tmp_path / ".env").write_text("x")
(tmp_path / "src").mkdir()
(tmp_path / "docs").mkdir()
def _items(word: str):
resp = server.handle_request({"id": "1", "method": "complete.path", "params": {"word": word}})
return [(it["text"], it["display"], it.get("meta", "")) for it in resp["result"]["items"]]
def test_at_folder_colon_only_dirs(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
_fixture(tmp_path)
texts = [t for t, _, _ in _items("@folder:")]
assert all(t.startswith("@folder:") for t in texts), texts
assert any(t == "@folder:src/" for t in texts)
assert any(t == "@folder:docs/" for t in texts)
assert not any(t == "@folder:readme.md" for t in texts)
assert not any(t == "@folder:.env" for t in texts)
def test_at_file_colon_only_files(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
_fixture(tmp_path)
texts = [t for t, _, _ in _items("@file:")]
assert all(t.startswith("@file:") for t in texts), texts
assert any(t == "@file:readme.md" for t in texts)
assert not any(t == "@file:src/" for t in texts)
assert not any(t == "@file:docs/" for t in texts)
def test_at_folder_bare_without_colon_lists_dirs(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
_fixture(tmp_path)
texts = [t for t, _, _ in _items("@folder")]
assert any(t == "@folder:src/" for t in texts), texts
assert any(t == "@folder:docs/" for t in texts), texts
assert not any(t == "@folder:readme.md" for t in texts)
def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
_fixture(tmp_path)
texts = [t for t, _, _ in _items("@file")]
assert any(t == "@file:readme.md" for t in texts), texts
assert not any(t == "@file:src/" for t in texts)
def test_bare_at_still_shows_static_refs(tmp_path, monkeypatch):
"""`@` alone should list the static references so users discover the
available prefixes. (Unchanged behaviour; regression guard.)
"""
monkeypatch.chdir(tmp_path)
texts = [t for t, _, _ in _items("@")]
for expected in ("@diff", "@staged", "@file:", "@folder:", "@url:", "@git:"):
assert expected in texts, f"missing static ref {expected!r} in {texts!r}"
-159
View File
@@ -1,159 +0,0 @@
"""Tests for reply-to pointer injection in _prepare_inbound_message_text.
The `[Replying to: "..."]` prefix is a *disambiguation pointer*, not
deduplication. It must always be injected when the user explicitly replies
to a prior message even when the quoted text already exists somewhere
in the conversation history. History can contain the same or similar text
multiple times, and without an explicit pointer the agent has to guess
which prior message the user is referencing.
"""
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.run import GatewayRunner
from gateway.session import SessionSource
def _make_runner() -> GatewayRunner:
runner = object.__new__(GatewayRunner)
runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")},
)
runner.adapters = {}
runner._model = "openai/gpt-4.1-mini"
runner._base_url = None
return runner
def _source() -> SessionSource:
return SessionSource(
platform=Platform.TELEGRAM,
chat_id="123",
chat_name="DM",
chat_type="private",
user_name="Alice",
)
@pytest.mark.asyncio
async def test_reply_prefix_injected_when_text_absent_from_history():
runner = _make_runner()
source = _source()
event = MessageEvent(
text="What's the best time to go?",
source=source,
reply_to_message_id="42",
reply_to_text="Japan is great for culture, food, and efficiency.",
)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[{"role": "user", "content": "unrelated"}],
)
assert result is not None
assert result.startswith(
'[Replying to: "Japan is great for culture, food, and efficiency."]'
)
assert result.endswith("What's the best time to go?")
@pytest.mark.asyncio
async def test_reply_prefix_still_injected_when_text_in_history():
"""Regression test: the pointer must survive even when the quoted text
already appears in history. Previously a `found_in_history` guard
silently dropped the prefix, leaving the agent to guess which prior
message the user was referencing."""
runner = _make_runner()
source = _source()
quoted = "Japan is great for culture, food, and efficiency."
event = MessageEvent(
text="What's the best time to go?",
source=source,
reply_to_message_id="42",
reply_to_text=quoted,
)
history = [
{"role": "user", "content": "I'm thinking of going to Japan or Italy."},
{
"role": "assistant",
"content": (
f"{quoted} Italy is better if you prefer a relaxed pace."
),
},
{"role": "user", "content": "How long should I stay?"},
{"role": "assistant", "content": "For Japan, 10-14 days is ideal."},
]
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=history,
)
assert result is not None
assert result.startswith(f'[Replying to: "{quoted}"]')
assert result.endswith("What's the best time to go?")
@pytest.mark.asyncio
async def test_no_prefix_without_reply_context():
runner = _make_runner()
source = _source()
event = MessageEvent(text="hello", source=source)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "hello"
@pytest.mark.asyncio
async def test_no_prefix_when_reply_to_text_is_empty():
"""reply_to_message_id alone without text (e.g. a reply to a media-only
message) should not produce an empty `[Replying to: ""]` prefix."""
runner = _make_runner()
source = _source()
event = MessageEvent(
text="hi",
source=source,
reply_to_message_id="42",
reply_to_text=None,
)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "hi"
@pytest.mark.asyncio
async def test_reply_snippet_truncated_to_500_chars():
runner = _make_runner()
source = _source()
long_text = "x" * 800
event = MessageEvent(
text="follow-up",
source=source,
reply_to_message_id="42",
reply_to_text=long_text,
)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result is not None
assert result.startswith('[Replying to: "' + "x" * 500 + '"]')
assert "x" * 501 not in result
@@ -1,90 +0,0 @@
"""Regression test: `@folder:` completion must only surface directories and
`@file:` must only surface regular files.
Reported during TUI v2 blitz testing: typing `@folder:` showed .dockerignore,
.env, .gitignore, etc. alongside the actual directories because the path-
completion branch yielded every entry regardless of the explicit prefix, and
auto-switched the completion kind based on `is_dir`. That defeated the user's
explicit choice and rendered the `@folder:` / `@file:` prefixes useless for
filtering.
"""
from __future__ import annotations
from pathlib import Path
from typing import Iterable
from hermes_cli.commands import SlashCommandCompleter
def _run(tmp_path: Path, word: str) -> list[tuple[str, str]]:
(tmp_path / "readme.md").write_text("x")
(tmp_path / ".env").write_text("x")
(tmp_path / "src").mkdir()
(tmp_path / "docs").mkdir()
completer = SlashCommandCompleter.__new__(SlashCommandCompleter)
completions: Iterable = completer._context_completions(word)
return [(c.text, c.display_meta) for c in completions if c.text.startswith(("@file:", "@folder:"))]
def test_at_folder_only_yields_directories(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
texts = [t for t, _ in _run(tmp_path, "@folder:")]
assert all(t.startswith("@folder:") for t in texts), texts
assert any(t == "@folder:src/" for t in texts)
assert any(t == "@folder:docs/" for t in texts)
assert not any(t == "@folder:readme.md" for t in texts)
assert not any(t == "@folder:.env" for t in texts)
def test_at_file_only_yields_files(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
texts = [t for t, _ in _run(tmp_path, "@file:")]
assert all(t.startswith("@file:") for t in texts), texts
assert any(t == "@file:readme.md" for t in texts)
assert any(t == "@file:.env" for t in texts)
assert not any(t == "@file:src/" for t in texts)
assert not any(t == "@file:docs/" for t in texts)
def test_at_folder_preserves_prefix_on_empty_match(tmp_path, monkeypatch):
"""User typed `@folder:` (no partial) — completion text must keep the
`@folder:` prefix even though the previous implementation auto-rewrote
it to `@file:` for non-dir entries.
"""
monkeypatch.chdir(tmp_path)
texts = [t for t, _ in _run(tmp_path, "@folder:")]
assert texts, "expected at least one directory completion"
for t in texts:
assert t.startswith("@folder:"), f"prefix leaked: {t}"
def test_at_folder_bare_without_colon_lists_directories(tmp_path, monkeypatch):
"""Typing `@folder` alone (no colon yet) should surface directories so
users don't need to first accept the static `@folder:` hint before
seeing what they're picking from.
"""
monkeypatch.chdir(tmp_path)
texts = [t for t, _ in _run(tmp_path, "@folder")]
assert any(t == "@folder:src/" for t in texts), texts
assert any(t == "@folder:docs/" for t in texts), texts
assert not any(t == "@folder:readme.md" for t in texts)
def test_at_file_bare_without_colon_lists_files(tmp_path, monkeypatch):
monkeypatch.chdir(tmp_path)
texts = [t for t, _ in _run(tmp_path, "@file")]
assert any(t == "@file:readme.md" for t in texts), texts
assert not any(t == "@file:src/" for t in texts)
-36
View File
@@ -1,36 +0,0 @@
"""Regression tests for removed dead config keys.
This file guards against accidental re-introduction of config keys that were
documented or declared at some point but never actually wired up to read code.
Future dead-config regressions can accumulate here.
"""
import inspect
def test_delegation_default_toolsets_removed_from_cli_config():
"""delegation.default_toolsets was dead config — never read by
_load_config() or anywhere else. Removed.
Guards against accidental re-introduction in cli.py's CLI_CONFIG default
dict. If this test fails, someone re-added the key without wiring it up
to _load_config() in tools/delegate_tool.py.
We inspect the source of load_cli_config() instead of asserting on the
runtime CLI_CONFIG dict because CLI_CONFIG is populated by deep-merging
the user's ~/.hermes/config.yaml over the defaults (cli.py:359-366).
A contributor who still has the legacy key set in their own config
would cause a false failure, and HERMES_HOME patching via conftest
doesn't help because cli._hermes_home is frozen at module import time
(cli.py:76) before any autouse fixture can fire. Source inspection
sidesteps all of that: it tests the defaults literal directly.
"""
from cli import load_cli_config
source = inspect.getsource(load_cli_config)
assert '"default_toolsets"' not in source, (
"delegation.default_toolsets was removed because it was never read. "
"Do not re-add it to cli.py's CLI_CONFIG default dict; "
"use tools/delegate_tool.py's DEFAULT_TOOLSETS module constant or "
"wire a new config key through _load_config()."
)
@@ -1,93 +0,0 @@
"""Regression test for TUI v2 blitz bug: explicit /model --provider switch
silently fell back to the old primary provider on the next turn because the
fallback chain seeded from config at agent __init__ kept entries for the
provider the user just moved away from.
Reported: "switched from openrouter provider to anthropic api key via hermes
model and the tui keeps trying openrouter".
"""
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
def _make_agent(chain):
agent = AIAgent.__new__(AIAgent)
agent.provider = "openrouter"
agent.model = "x-ai/grok-4"
agent.base_url = "https://openrouter.ai/api/v1"
agent.api_key = "or-key"
agent.api_mode = "chat_completions"
agent.client = MagicMock()
agent._client_kwargs = {"api_key": "or-key", "base_url": "https://openrouter.ai/api/v1"}
agent.context_compressor = None
agent._anthropic_api_key = ""
agent._anthropic_base_url = None
agent._anthropic_client = None
agent._is_anthropic_oauth = False
agent._cached_system_prompt = "cached"
agent._primary_runtime = {}
agent._fallback_activated = False
agent._fallback_index = 0
agent._fallback_chain = list(chain)
agent._fallback_model = chain[0] if chain else None
return agent
def _switch_to_anthropic(agent):
with (
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-xyz"),
patch("agent.anthropic_adapter._is_oauth_token", return_value=False),
patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None),
):
agent.switch_model(
new_model="claude-sonnet-4-5",
new_provider="anthropic",
api_key="sk-ant-xyz",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
)
def test_switch_drops_old_primary_from_fallback_chain():
agent = _make_agent([
{"provider": "openrouter", "model": "x-ai/grok-4"},
{"provider": "nous", "model": "hermes-4"},
])
_switch_to_anthropic(agent)
providers = [entry["provider"] for entry in agent._fallback_chain]
assert "openrouter" not in providers, "old primary must be pruned"
assert "anthropic" not in providers, "new primary is redundant in the chain"
assert providers == ["nous"]
assert agent._fallback_model == {"provider": "nous", "model": "hermes-4"}
def test_switch_with_empty_chain_stays_empty():
agent = _make_agent([])
_switch_to_anthropic(agent)
assert agent._fallback_chain == []
assert agent._fallback_model is None
def test_switch_within_same_provider_preserves_chain():
chain = [{"provider": "openrouter", "model": "x-ai/grok-4"}]
agent = _make_agent(chain)
with patch("hermes_cli.timeouts.get_provider_request_timeout", return_value=None):
agent.switch_model(
new_model="openai/gpt-5",
new_provider="openrouter",
api_key="or-key",
base_url="https://openrouter.ai/api/v1",
)
assert agent._fallback_chain == chain
-90
View File
@@ -1,5 +1,4 @@
import json
import os
import sys
import threading
import time
@@ -118,53 +117,6 @@ def test_config_set_yolo_toggles_session_scope():
server._sessions.clear()
def test_config_set_mouse_writes_tui_mouse(monkeypatch):
writes: list[tuple[str, object]] = []
cfg = {"display": {}}
monkeypatch.setattr(server, "_load_cfg", lambda: cfg)
monkeypatch.setattr(server, "_write_config_key", lambda path, value: writes.append((path, value)))
resp_off = server.handle_request({"id": "1", "method": "config.set", "params": {"key": "mouse", "value": "off"}})
assert resp_off["result"] == {"key": "mouse", "value": "off"}
assert writes[-1] == ("display.tui_mouse", False)
resp_on = server.handle_request({"id": "2", "method": "config.set", "params": {"key": "mouse", "value": "on"}})
assert resp_on["result"] == {"key": "mouse", "value": "on"}
assert writes[-1] == ("display.tui_mouse", True)
def test_config_set_mouse_toggle_inverts_persisted_value(monkeypatch):
# Persisted off → toggle flips on.
writes: list[tuple[str, object]] = []
monkeypatch.setattr(server, "_load_cfg", lambda: {"display": {"tui_mouse": False}})
monkeypatch.setattr(server, "_write_config_key", lambda path, value: writes.append((path, value)))
resp = server.handle_request({"id": "1", "method": "config.set", "params": {"key": "mouse", "value": "toggle"}})
assert resp["result"] == {"key": "mouse", "value": "on"}
assert writes[-1] == ("display.tui_mouse", True)
def test_config_set_mouse_rejects_unknown_value(monkeypatch):
monkeypatch.setattr(server, "_load_cfg", lambda: {"display": {}})
monkeypatch.setattr(server, "_write_config_key", lambda path, value: None)
resp = server.handle_request({"id": "1", "method": "config.set", "params": {"key": "mouse", "value": "sure"}})
assert "error" in resp
assert "unknown mouse value" in resp["error"]["message"]
def test_config_get_mouse_defaults_on(monkeypatch):
monkeypatch.setattr(server, "_load_cfg", lambda: {})
resp = server.handle_request({"id": "1", "method": "config.get", "params": {"key": "mouse"}})
assert resp["result"] == {"value": "on"}
monkeypatch.setattr(server, "_load_cfg", lambda: {"display": {"tui_mouse": False}})
resp_off = server.handle_request({"id": "2", "method": "config.get", "params": {"key": "mouse"}})
assert resp_off["result"] == {"value": "off"}
def test_enable_gateway_prompts_sets_gateway_env(monkeypatch):
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@@ -278,48 +230,6 @@ def test_config_set_model_global_persists(monkeypatch):
assert saved["model"]["base_url"] == "https://api.anthropic.com"
def test_config_set_model_syncs_inference_provider_env(monkeypatch):
"""After an explicit provider switch, HERMES_INFERENCE_PROVIDER must
reflect the user's choice so ambient re-resolution (credential pool
refresh, aux clients) picks up the new provider instead of the original
one persisted in config or shell env.
Regression: a TUI user switched openrouter anthropic and the TUI kept
trying openrouter because the env-var-backed resolvers still saw the old
provider.
"""
class _Agent:
provider = "openrouter"
model = "old/model"
base_url = ""
api_key = "sk-or"
def switch_model(self, **_kwargs):
return None
result = types.SimpleNamespace(
success=True,
new_model="claude-sonnet-4.6",
target_provider="anthropic",
api_key="sk-ant",
base_url="https://api.anthropic.com",
api_mode="anthropic_messages",
warning_message="",
)
server._sessions["sid"] = _session(agent=_Agent())
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
monkeypatch.setattr("hermes_cli.model_switch.switch_model", lambda **_kwargs: result)
monkeypatch.setattr(server, "_restart_slash_worker", lambda session: None)
monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None)
server.handle_request(
{"id": "1", "method": "config.set", "params": {"session_id": "sid", "key": "model", "value": "claude-sonnet-4.6 --provider anthropic"}}
)
assert os.environ["HERMES_INFERENCE_PROVIDER"] == "anthropic"
def test_config_set_personality_rejects_unknown_name(monkeypatch):
monkeypatch.setattr(server, "_available_personalities", lambda cfg=None: {"helpful": "You are helpful."})
resp = server.handle_request(
+1 -642
View File
@@ -20,14 +20,11 @@ from unittest.mock import MagicMock, patch
from tools.delegate_tool import (
DELEGATE_BLOCKED_TOOLS,
DELEGATE_TASK_SCHEMA,
DelegateEvent,
_get_max_concurrent_children,
_LEGACY_EVENT_MAP,
MAX_DEPTH,
check_delegate_requirements,
delegate_task,
_build_child_agent,
_build_child_progress_callback,
_build_child_system_prompt,
_strip_blocked_tools,
_resolve_child_credential_pool,
@@ -571,16 +568,8 @@ class TestBlockedTools(unittest.TestCase):
self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
def test_constants(self):
from tools.delegate_tool import (
_get_max_spawn_depth, _get_orchestrator_enabled,
_MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
)
self.assertEqual(_get_max_concurrent_children(), 3)
self.assertEqual(MAX_DEPTH, 1)
self.assertEqual(_get_max_spawn_depth(), 1) # default: flat
self.assertTrue(_get_orchestrator_enabled()) # default
self.assertEqual(_MIN_SPAWN_DEPTH, 1)
self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
self.assertEqual(MAX_DEPTH, 2)
class TestDelegationCredentialResolution(unittest.TestCase):
@@ -1336,635 +1325,5 @@ class TestDelegationReasoningEffort(unittest.TestCase):
self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
# =========================================================================
# Dispatch helper, progress events, concurrency
# =========================================================================
class TestDispatchDelegateTask(unittest.TestCase):
"""Tests for the _dispatch_delegate_task helper and full param forwarding."""
@patch("tools.delegate_tool._load_config", return_value={})
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_acp_args_forwarded(self, mock_creds, mock_cfg):
"""Both acp_command and acp_args reach delegate_task via the helper."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
with patch("tools.delegate_tool._build_child_agent") as mock_build:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True,
"api_calls": 1, "messages": [],
}
mock_child._delegate_saved_tool_names = []
mock_child._credential_pool = None
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.model = "test"
mock_build.return_value = mock_child
delegate_task(
goal="test",
acp_command="claude",
acp_args=["--acp", "--stdio"],
parent_agent=parent,
)
_, kwargs = mock_build.call_args
self.assertEqual(kwargs["override_acp_command"], "claude")
self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
class TestDelegateEventEnum(unittest.TestCase):
"""Tests for DelegateEvent enum and back-compat aliases."""
def test_enum_values_are_strings(self):
for event in DelegateEvent:
self.assertIsInstance(event.value, str)
self.assertTrue(event.value.startswith("delegate."))
def test_legacy_map_covers_all_old_names(self):
expected_legacy = {"_thinking", "reasoning.available",
"tool.started", "tool.completed", "subagent_progress"}
self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
def test_legacy_map_values_are_delegate_events(self):
for old_name, event in _LEGACY_EVENT_MAP.items():
self.assertIsInstance(event, DelegateEvent)
def test_progress_callback_normalises_tool_started(self):
"""_build_child_progress_callback handles tool.started via enum."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
self.assertIsNotNone(cb)
cb("tool.started", tool_name="terminal", preview="ls")
parent._delegate_spinner.print_above.assert_called()
def test_progress_callback_normalises_thinking(self):
"""Both _thinking and reasoning.available route to TASK_THINKING."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("_thinking", tool_name=None, preview="pondering...")
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
parent._delegate_spinner.print_above.reset_mock()
cb("reasoning.available", tool_name=None, preview="hmm")
assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
def test_progress_callback_tool_completed_is_noop(self):
"""tool.completed is normalised but produces no display output."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("tool.completed", tool_name="terminal")
parent._delegate_spinner.print_above.assert_not_called()
def test_progress_callback_ignores_unknown_events(self):
"""Unknown event types are silently ignored."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
# Should not raise
cb("some.unknown.event", tool_name="x")
parent._delegate_spinner.print_above.assert_not_called()
def test_progress_callback_accepts_enum_value_directly(self):
"""cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
branch. Pre-fix the callback only handled legacy strings via
_LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = None
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb(DelegateEvent.TASK_THINKING, preview="pondering")
# If the enum was accepted, the thinking emoji got printed.
assert any(
"💭" in str(c)
for c in parent._delegate_spinner.print_above.call_args_list
)
def test_progress_callback_accepts_new_style_string(self):
"""cb('delegate.task_thinking', ...) — the string form of the
enum value must route to the thinking branch too, so new-style
emitters don't have to import DelegateEvent."""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("delegate.task_thinking", preview="hmm")
assert any(
"💭" in str(c)
for c in parent._delegate_spinner.print_above.call_args_list
)
def test_progress_callback_task_progress_not_misrendered(self):
"""'subagent_progress' (legacy name for TASK_PROGRESS) carries a
pre-batched summary in the tool_name slot. Before the fix, this
fell through to the TASK_TOOL_STARTED rendering path, treating
the summary string as a tool name. After the fix: distinct
render (no tool-start emoji lookup) and pass-through relay
upward (no re-batching).
Regression path only reachable once nested orchestration is
enabled: nested orchestrators relay subagent_progress from
grandchildren upward through this callback.
"""
parent = _make_mock_parent()
parent._delegate_spinner = MagicMock()
parent.tool_progress_callback = MagicMock()
cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
cb("subagent_progress", tool_name="🔀 [1] terminal, file")
# Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
# followed by the summary string as if it were a tool name.
calls = parent._delegate_spinner.print_above.call_args_list
self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
# Parent callback receives the relay (pass-through, no re-batching).
parent.tool_progress_callback.assert_called_once()
# No '⚡' tool-start emoji should appear — that's the pre-fix bug.
self.assertFalse(any("" in str(c) for c in calls))
class TestConcurrencyDefaults(unittest.TestCase):
"""Tests for the concurrency default and no hard ceiling."""
@patch("tools.delegate_tool._load_config", return_value={})
def test_default_is_three(self, mock_cfg):
# Clear env var if set
with patch.dict(os.environ, {}, clear=True):
self.assertEqual(_get_max_concurrent_children(), 3)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 10})
def test_no_upper_ceiling(self, mock_cfg):
"""Users can raise concurrency as high as they want — no hard cap."""
self.assertEqual(_get_max_concurrent_children(), 10)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 100})
def test_very_high_values_honored(self, mock_cfg):
self.assertEqual(_get_max_concurrent_children(), 100)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 0})
def test_zero_clamped_to_one(self, mock_cfg):
"""Floor of 1 is enforced; zero or negative values raise to 1."""
self.assertEqual(_get_max_concurrent_children(), 1)
@patch("tools.delegate_tool._load_config", return_value={})
def test_env_var_honored_uncapped(self, mock_cfg):
with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
self.assertEqual(_get_max_concurrent_children(), 12)
@patch("tools.delegate_tool._load_config",
return_value={"max_concurrent_children": 6})
def test_configured_value_returned(self, mock_cfg):
self.assertEqual(_get_max_concurrent_children(), 6)
# =========================================================================
# max_spawn_depth clamping
# =========================================================================
class TestMaxSpawnDepth(unittest.TestCase):
"""Tests for _get_max_spawn_depth clamping and fallback behavior."""
@patch("tools.delegate_tool._load_config", return_value={})
def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
from tools.delegate_tool import _get_max_spawn_depth
self.assertEqual(_get_max_spawn_depth(), 1)
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 0})
def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
import logging
from tools.delegate_tool import _get_max_spawn_depth
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
result = _get_max_spawn_depth()
self.assertEqual(result, 1)
self.assertTrue(any("clamping to 1" in m for m in cm.output))
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 99})
def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
import logging
from tools.delegate_tool import _get_max_spawn_depth
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
result = _get_max_spawn_depth()
self.assertEqual(result, 3)
self.assertTrue(any("clamping to 3" in m for m in cm.output))
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": "not-a-number"})
def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
from tools.delegate_tool import _get_max_spawn_depth
self.assertEqual(_get_max_spawn_depth(), 1)
# =========================================================================
# role param plumbing
# =========================================================================
#
# These tests cover the schema + signature + stash plumbing of the role
# param. The full role-honoring behavior (toolset re-add, role-aware
# prompt) lives in TestOrchestratorRoleBehavior below; these tests only
# assert on _delegate_role stashing and on the schema shape.
class TestOrchestratorRoleSchema(unittest.TestCase):
"""Tests that the role param reaches the child via dispatch."""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True,
"api_calls": 1, "messages": [],
}
mock_child._delegate_saved_tool_names = []
mock_child._credential_pool = None
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.model = "test"
MockAgent.return_value = mock_child
kwargs = {"goal": "test", "parent_agent": parent}
if role_arg is not _SENTINEL:
kwargs["role"] = role_arg
delegate_task(**kwargs)
return mock_child
def test_default_role_is_leaf(self):
child = self._run_with_mock_child(_SENTINEL)
self.assertEqual(child._delegate_role, "leaf")
def test_explicit_orchestrator_role_stashed(self):
"""role='orchestrator' reaches _build_child_agent and is stashed.
Full behavior (toolset re-add) lands in commit 3; commit 2 only
verifies the plumbing."""
child = self._run_with_mock_child("orchestrator")
self.assertEqual(child._delegate_role, "orchestrator")
def test_unknown_role_coerces_to_leaf(self):
"""role='nonsense' → _normalize_role warns and returns 'leaf'."""
import logging
with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
child = self._run_with_mock_child("nonsense")
self.assertEqual(child._delegate_role, "leaf")
self.assertTrue(any("coercing" in m.lower() for m in cm.output))
def test_schema_has_role_top_level_and_per_task(self):
from tools.delegate_tool import DELEGATE_TASK_SCHEMA
props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
self.assertIn("role", props)
self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
task_props = props["tasks"]["items"]["properties"]
self.assertIn("role", task_props)
self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
# Sentinel used to distinguish "role kwarg omitted" from "role=None".
_SENTINEL = object()
# =========================================================================
# role-honoring behavior
# =========================================================================
def _make_role_mock_child():
"""Helper: mock child with minimal fields for delegate_task to process."""
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True,
"api_calls": 1, "messages": [],
}
mock_child._delegate_saved_tool_names = []
mock_child._credential_pool = None
mock_child.session_prompt_tokens = 0
mock_child.session_completion_tokens = 0
mock_child.model = "test"
return mock_child
class TestOrchestratorRoleBehavior(unittest.TestCase):
"""Tests that role='orchestrator' actually changes toolset + prompt."""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_orchestrator_role_keeps_delegation_at_depth_1(
self, mock_cfg, mock_creds
):
"""role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
child at depth 1 gets 'delegation' in enabled_toolsets (can
further delegate). Requires max_spawn_depth>=2 since the new
default is 1 (flat)."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "orchestrator")
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_orchestrator_blocked_at_max_spawn_depth(
self, mock_cfg, mock_creds
):
"""Parent at depth 1 with max_spawn_depth=2 spawns child
at depth 2 (the floor); role='orchestrator' degrades to leaf."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=1)
parent.enabled_toolsets = ["terminal", "delegation"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config", return_value={})
def test_orchestrator_blocked_at_default_flat_depth(
self, mock_cfg, mock_creds
):
"""With default max_spawn_depth=1 (flat), role='orchestrator'
on a depth-0 parent produces a depth-1 child that is already at
the floor the role degrades to 'leaf' and the delegation
toolset is stripped. This is the new default posture."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator", parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
"""Kill switch delegation.orchestrator_enabled=false overrides
role='orchestrator'."""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "delegation"]
with patch("tools.delegate_tool._load_config",
return_value={"orchestrator_enabled": False}):
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator",
parent_agent=parent)
kwargs = MockAgent.call_args[1]
self.assertNotIn("delegation", kwargs["enabled_toolsets"])
self.assertEqual(mock_child._delegate_role, "leaf")
# ── Role-aware system prompt ────────────────────────────────────────
def test_leaf_prompt_does_not_mention_delegation(self):
prompt = _build_child_system_prompt(
"Fix tests", role="leaf",
max_spawn_depth=2, child_depth=1,
)
self.assertNotIn("delegate_task", prompt)
self.assertNotIn("Orchestrator Role", prompt)
def test_orchestrator_prompt_mentions_delegation_capability(self):
prompt = _build_child_system_prompt(
"Survey approaches", role="orchestrator",
max_spawn_depth=2, child_depth=1,
)
self.assertIn("delegate_task", prompt)
self.assertIn("Orchestrator Role", prompt)
# Depth/max-depth note present and literal:
self.assertIn("depth 1", prompt)
self.assertIn("max_spawn_depth=2", prompt)
def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
"""With max_spawn_depth=2 and child_depth=1, the orchestrator's
own children would be at depth 2 (the floor) must be leaves."""
prompt = _build_child_system_prompt(
"Survey", role="orchestrator",
max_spawn_depth=2, child_depth=1,
)
self.assertIn("MUST be leaves", prompt)
def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
"""With max_spawn_depth=3 and child_depth=1, the orchestrator's
own children can themselves be orchestrators (depth 2 < 3)."""
prompt = _build_child_system_prompt(
"Deep work", role="orchestrator",
max_spawn_depth=3, child_depth=1,
)
self.assertIn("can themselves be orchestrators", prompt)
# ── Batch mode and intersection ─────────────────────────────────────
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
"""Per-task role beats top-level; no top-level role → "leaf".
tasks=[{role:'orchestrator'},{role:'leaf'},{}] first gets
delegation, second and third don't. Requires max_spawn_depth>=2
(raised explicitly here) since the new default is 1 (flat).
"""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
built_toolsets = []
def _factory(*a, **kw):
m = _make_role_mock_child()
built_toolsets.append(kw.get("enabled_toolsets"))
return m
with patch("run_agent.AIAgent", side_effect=_factory):
delegate_task(
tasks=[
{"goal": "A", "role": "orchestrator"},
{"goal": "B", "role": "leaf"},
{"goal": "C"}, # no role → falls back to top_role (leaf)
],
parent_agent=parent,
)
self.assertIn("delegation", built_toolsets[0])
self.assertNotIn("delegation", built_toolsets[1])
self.assertNotIn("delegation", built_toolsets[2])
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_intersection_preserves_delegation_bound(
self, mock_cfg, mock_creds
):
"""Design decision: orchestrator capability is granted by role,
NOT inherited from the parent's toolset. A parent without
'delegation' in its enabled_toolsets can still spawn an
orchestrator child the re-add in _build_child_agent runs
unconditionally for orchestrators (when max_spawn_depth allows).
If you want to change to "parent must have delegation too",
update _build_child_agent to check parent_toolsets before the
re-add and update this test to match.
"""
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file"] # no delegation
with patch("run_agent.AIAgent") as MockAgent:
mock_child = _make_role_mock_child()
MockAgent.return_value = mock_child
delegate_task(goal="test", role="orchestrator",
parent_agent=parent)
self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
class TestOrchestratorEndToEnd(unittest.TestCase):
"""End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
Covers the acceptance gate: parent delegates to an orchestrator
child; the orchestrator delegates to two leaf grandchildren; the
role/toolset/depth chain all resolve correctly.
Mock strategy: a single AIAgent patch with a side_effect factory
that keys on the child's ephemeral_system_prompt — orchestrator
prompts contain the string "Orchestrator Role" (see
_build_child_system_prompt), leaves don't. The orchestrator
mock's run_conversation recursively calls delegate_task with
tasks=[{goal:...},{goal:...}] to spawn two leaves. This keeps
the test in one patch context and avoids depth-indexed nesting.
"""
@patch("tools.delegate_tool._resolve_delegation_credentials")
@patch("tools.delegate_tool._load_config",
return_value={"max_spawn_depth": 2})
def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
mock_creds.return_value = {
"provider": None, "base_url": None,
"api_key": None, "api_mode": None, "model": None,
}
parent = _make_mock_parent(depth=0)
parent.enabled_toolsets = ["terminal", "file", "delegation"]
# (enabled_toolsets, _delegate_role) for each agent built
built_agents: list = []
# Keep the orchestrator mock around so the re-entrant delegate_task
# can reach it via closure.
orch_mock = {}
def _factory(*a, **kw):
prompt = kw.get("ephemeral_system_prompt", "") or ""
is_orchestrator = "Orchestrator Role" in prompt
m = _make_role_mock_child()
built_agents.append({
"enabled_toolsets": list(kw.get("enabled_toolsets") or []),
"is_orchestrator_prompt": is_orchestrator,
})
if is_orchestrator:
# Prepare the orchestrator mock as a parent-capable object
# so the nested delegate_task call succeeds.
m._delegate_depth = 1
m._delegate_role = "orchestrator"
m._active_children = []
m._active_children_lock = threading.Lock()
m._session_db = None
m.platform = "cli"
m.enabled_toolsets = ["terminal", "file", "delegation"]
m.api_key = "***"
m.base_url = ""
m.provider = None
m.api_mode = None
m.providers_allowed = None
m.providers_ignored = None
m.providers_order = None
m.provider_sort = None
m._print_fn = None
m.tool_progress_callback = None
m.thinking_callback = None
orch_mock["agent"] = m
def _orchestrator_run(user_message=None):
# Re-entrant: orchestrator spawns two leaves
delegate_task(
tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
parent_agent=m,
)
return {
"final_response": "orchestrated 2 workers",
"completed": True, "api_calls": 1,
"messages": [],
}
m.run_conversation.side_effect = _orchestrator_run
return m
with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
delegate_task(
goal="top-level orchestration",
role="orchestrator",
parent_agent=parent,
)
# 1 orchestrator + 2 leaf grandchildren = 3 agents
self.assertEqual(MockAgent.call_count, 3)
# First built = the orchestrator (parent's direct child)
self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
# Next two = leaves (grandchildren)
self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
if __name__ == "__main__":
unittest.main()
+3 -47
View File
@@ -136,49 +136,6 @@ class TestGptLiteralFamily:
assert p["image_size"] == "1024x1536"
class TestGptImage2Presets:
"""GPT Image 2 uses preset enum sizes (not literal strings like 1.5).
Mapped to 4:3 variants so we stay above the 655,360 min-pixel floor
(16:9 presets at 1024x576 = 589,824 would be rejected)."""
def test_gpt2_landscape_uses_4_3_preset(self, image_tool):
p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "landscape")
assert p["image_size"] == "landscape_4_3"
def test_gpt2_square_uses_square_hd(self, image_tool):
p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "square")
assert p["image_size"] == "square_hd"
def test_gpt2_portrait_uses_4_3_preset(self, image_tool):
p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hello", "portrait")
assert p["image_size"] == "portrait_4_3"
def test_gpt2_quality_pinned_to_medium(self, image_tool):
p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square")
assert p["quality"] == "medium"
def test_gpt2_strips_byok_and_unsupported_overrides(self, image_tool):
"""openai_api_key (BYOK) is deliberately not in supports — all users
route through shared FAL billing. guidance_scale/num_inference_steps
aren't in the model's API surface either."""
p = image_tool._build_fal_payload(
"fal-ai/gpt-image-2", "hi", "square",
overrides={
"openai_api_key": "sk-...",
"guidance_scale": 7.5,
"num_inference_steps": 50,
},
)
assert "openai_api_key" not in p
assert "guidance_scale" not in p
assert "num_inference_steps" not in p
def test_gpt2_strips_seed_even_if_passed(self, image_tool):
# seed isn't in the GPT Image 2 API surface either.
p = image_tool._build_fal_payload("fal-ai/gpt-image-2", "hi", "square", seed=42)
assert "seed" not in p
# ---------------------------------------------------------------------------
# Supports whitelist — the main safety property
# ---------------------------------------------------------------------------
@@ -274,11 +231,10 @@ class TestGptQualityPinnedToMedium:
assert p["quality"] == "medium"
def test_non_gpt_model_never_gets_quality(self, image_tool):
"""quality is only meaningful for GPT-Image models (1.5, 2) — other
models should never have it in their payload."""
gpt_models = {"fal-ai/gpt-image-1.5", "fal-ai/gpt-image-2"}
"""quality is only meaningful for gpt-image-1.5 — other models should
never have it in their payload."""
for mid in image_tool.FAL_MODELS:
if mid in gpt_models:
if mid == "fal-ai/gpt-image-1.5":
continue
p = image_tool._build_fal_payload(mid, "hi", "square")
assert "quality" not in p, f"{mid} unexpectedly has 'quality' in payload"
+24 -291
View File
@@ -16,7 +16,6 @@ The parent's context only sees the delegation call and the summary result,
never the child's intermediate tool calls or reasoning.
"""
import enum
import json
import logging
logger = logging.getLogger(__name__)
@@ -42,12 +41,6 @@ DELEGATE_BLOCKED_TOOLS = frozenset([
# Build a description fragment listing toolsets available for subagents.
# Excludes toolsets where ALL tools are blocked, composite/platform toolsets
# (hermes-* prefixed), and scenario toolsets.
#
# NOTE: "delegation" is in this exclusion set so the subagent-facing
# capability hint string (_TOOLSET_LIST_STR) doesn't advertise it as a
# toolset to request explicitly — the correct mechanism for nested
# delegation is role='orchestrator', which re-adds "delegation" in
# _build_child_agent regardless of this exclusion.
_EXCLUDED_TOOLSET_NAMES = frozenset({"debugging", "safe", "delegation", "moa", "rl"})
_SUBAGENT_TOOLSETS = sorted(
name for name, defn in TOOLSETS.items()
@@ -58,36 +51,13 @@ _SUBAGENT_TOOLSETS = sorted(
_TOOLSET_LIST_STR = ", ".join(f"'{n}'" for n in _SUBAGENT_TOOLSETS)
_DEFAULT_MAX_CONCURRENT_CHILDREN = 3
MAX_DEPTH = 1 # flat by default: parent (0) -> child (1); grandchild rejected unless max_spawn_depth raised.
# Configurable depth cap consulted by _get_max_spawn_depth; MAX_DEPTH
# stays as the default fallback and is still the symbol tests import.
_MIN_SPAWN_DEPTH = 1
_MAX_SPAWN_DEPTH_CAP = 3
def _normalize_role(r: Optional[str]) -> str:
"""Normalise a caller-provided role to 'leaf' or 'orchestrator'.
None/empty -> 'leaf'. Unknown strings coerce to 'leaf' with a
warning log (matches the silent-degrade pattern of
_get_orchestrator_enabled). _build_child_agent adds a second
degrade layer for depth/kill-switch bounds.
"""
if r is None or not r:
return "leaf"
r_norm = str(r).strip().lower()
if r_norm in ("leaf", "orchestrator"):
return r_norm
logger.warning("Unknown delegate_task role=%r, coercing to 'leaf'", r)
return "leaf"
MAX_DEPTH = 2 # parent (0) -> child (1) -> grandchild rejected (2)
def _get_max_concurrent_children() -> int:
"""Read delegation.max_concurrent_children from config, falling back to
DELEGATION_MAX_CONCURRENT_CHILDREN env var, then the default (3).
Users can raise this as high as they want; only the floor (1) is enforced.
Uses the same ``_load_config()`` path that the rest of ``delegate_task``
uses, keeping config priority consistent (config.yaml > env > default).
"""
@@ -101,108 +71,18 @@ def _get_max_concurrent_children() -> int:
"delegation.max_concurrent_children=%r is not a valid integer; "
"using default %d", val, _DEFAULT_MAX_CONCURRENT_CHILDREN,
)
return _DEFAULT_MAX_CONCURRENT_CHILDREN
env_val = os.getenv("DELEGATION_MAX_CONCURRENT_CHILDREN")
if env_val:
try:
return max(1, int(env_val))
except (TypeError, ValueError):
return _DEFAULT_MAX_CONCURRENT_CHILDREN
pass
return _DEFAULT_MAX_CONCURRENT_CHILDREN
def _get_max_spawn_depth() -> int:
"""Read delegation.max_spawn_depth from config, clamped to [1, 3].
depth 0 = parent agent. max_spawn_depth = N means agents at depths
0..N-1 can spawn; depth N is the leaf floor. Default 1 is flat:
parent spawns children (depth 1), depth-1 children cannot spawn
(blocked by this guard AND, for leaf children, by the delegation
toolset strip in _strip_blocked_tools).
Raise to 2 or 3 to unlock nested orchestration. role="orchestrator"
removes the toolset strip for depth-1 children when
max_spawn_depth >= 2, enabling them to spawn their own workers.
"""
cfg = _load_config()
val = cfg.get("max_spawn_depth")
if val is None:
return MAX_DEPTH
try:
ival = int(val)
except (TypeError, ValueError):
logger.warning(
"delegation.max_spawn_depth=%r is not a valid integer; "
"using default %d", val, MAX_DEPTH,
)
return MAX_DEPTH
clamped = max(_MIN_SPAWN_DEPTH, min(_MAX_SPAWN_DEPTH_CAP, ival))
if clamped != ival:
logger.warning(
"delegation.max_spawn_depth=%d out of range [%d, %d]; "
"clamping to %d", ival, _MIN_SPAWN_DEPTH,
_MAX_SPAWN_DEPTH_CAP, clamped,
)
return clamped
def _get_orchestrator_enabled() -> bool:
"""Global kill switch for the orchestrator role.
When False, role="orchestrator" is silently forced to "leaf" in
_build_child_agent and the delegation toolset is stripped as before.
Lets an operator disable the feature without a code revert.
"""
cfg = _load_config()
val = cfg.get("orchestrator_enabled", True)
if isinstance(val, bool):
return val
# Accept "true"/"false" strings from YAML that doesn't auto-coerce.
if isinstance(val, str):
return val.strip().lower() in ("true", "1", "yes", "on")
return True
DEFAULT_MAX_ITERATIONS = 50
_HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during delegation
DEFAULT_TOOLSETS = ["terminal", "file", "web"]
# ---------------------------------------------------------------------------
# Delegation progress event types
# ---------------------------------------------------------------------------
class DelegateEvent(str, enum.Enum):
"""Formal event types emitted during delegation progress.
_build_child_progress_callback normalises incoming legacy strings
(``tool.started``, ``_thinking``, ) to these enum values via
``_LEGACY_EVENT_MAP``. External consumers (gateway SSE, ACP adapter,
CLI) still receive the legacy strings during the deprecation window.
TASK_SPAWNED / TASK_COMPLETED / TASK_FAILED are reserved for
future orchestrator lifecycle events and are not currently emitted.
"""
TASK_SPAWNED = "delegate.task_spawned"
TASK_PROGRESS = "delegate.task_progress"
TASK_COMPLETED = "delegate.task_completed"
TASK_FAILED = "delegate.task_failed"
TASK_THINKING = "delegate.task_thinking"
TASK_TOOL_STARTED = "delegate.tool_started"
TASK_TOOL_COMPLETED = "delegate.tool_completed"
# Legacy event strings → DelegateEvent mapping.
# Incoming child-agent events use the old names; the callback normalises them.
_LEGACY_EVENT_MAP: Dict[str, DelegateEvent] = {
"_thinking": DelegateEvent.TASK_THINKING,
"reasoning.available": DelegateEvent.TASK_THINKING,
"tool.started": DelegateEvent.TASK_TOOL_STARTED,
"tool.completed": DelegateEvent.TASK_TOOL_COMPLETED,
"subagent_progress": DelegateEvent.TASK_PROGRESS,
}
def check_delegate_requirements() -> bool:
"""Delegation has no external requirements -- always available."""
return True
@@ -213,18 +93,8 @@ def _build_child_system_prompt(
context: Optional[str] = None,
*,
workspace_path: Optional[str] = None,
role: str = "leaf",
max_spawn_depth: int = 2,
child_depth: int = 1,
) -> str:
"""Build a focused system prompt for a child agent.
When role='orchestrator', appends a delegation-capability block
modeled on OpenClaw's buildSubagentSystemPrompt (canSpawn branch at
inspiration/openclaw/src/agents/subagent-system-prompt.ts:63-95).
The depth note is literal truth (grounded in the passed config) so
the LLM doesn't confabulate nesting capabilities that don't exist.
"""
"""Build a focused system prompt for a child agent."""
parts = [
"You are a focused subagent working on a specific delegated task.",
"",
@@ -250,37 +120,6 @@ def _build_child_system_prompt(
"Be thorough but concise -- your response is returned to the "
"parent agent as a summary."
)
if role == "orchestrator":
child_note = (
"Your own children MUST be leaves (cannot delegate further) "
"because they would be at the depth floor — you cannot pass "
"role='orchestrator' to your own delegate_task calls."
if child_depth + 1 >= max_spawn_depth else
"Your own children can themselves be orchestrators or leaves, "
"depending on the `role` you pass to delegate_task. Default is "
"'leaf'; pass role='orchestrator' explicitly when a child "
"needs to further decompose its work."
)
parts.append(
"\n## Subagent Spawning (Orchestrator Role)\n"
"You have access to the `delegate_task` tool and CAN spawn "
"your own subagents to parallelize independent work.\n\n"
"WHEN to delegate:\n"
"- The goal decomposes into 2+ independent subtasks that can "
"run in parallel (e.g. research A and B simultaneously).\n"
"- A subtask is reasoning-heavy and would flood your context "
"with intermediate data.\n\n"
"WHEN NOT to delegate:\n"
"- Single-step mechanical work — do it directly.\n"
"- Trivial tasks you can execute in one or two tool calls.\n"
"- Re-delegating your entire assigned goal to one worker "
"(that's just pass-through with no value added).\n\n"
"Coordinate your workers' results and synthesize them before "
"reporting back to your parent. You are responsible for the "
"final summary, not your workers.\n\n"
f"NOTE: You are at depth {child_depth}. The delegation tree "
f"is capped at max_spawn_depth={max_spawn_depth}. {child_note}"
)
return "\n".join(parts)
@@ -358,9 +197,10 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
except Exception as e:
logger.debug("Parent callback failed: %s", e)
def _callback(event_type, tool_name: str = None, preview: str = None, args=None, **kwargs):
# Lifecycle events emitted by the orchestrator itself — handled
# before enum normalisation since they are not part of DelegateEvent.
def _callback(event_type: str, tool_name: str = None, preview: str = None, args=None, **kwargs):
# event_type is one of: "tool.started", "tool.completed",
# "reasoning.available", "_thinking", "subagent.*"
if event_type == "subagent.start":
if spinner and goal_label:
short = (goal_label[:55] + "...") if len(goal_label) > 55 else goal_label
@@ -375,21 +215,8 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
_relay("subagent.complete", preview=preview, **kwargs)
return
# Normalise legacy strings, new-style "delegate.*" strings, and
# DelegateEvent enum values all to a single DelegateEvent. The
# original implementation only accepted the five legacy strings;
# enum-typed callers were silently dropped.
if isinstance(event_type, DelegateEvent):
event = event_type
else:
event = _LEGACY_EVENT_MAP.get(event_type)
if event is None:
try:
event = DelegateEvent(event_type)
except (ValueError, TypeError):
return # Unknown event — ignore
if event == DelegateEvent.TASK_THINKING:
# "_thinking" / reasoning events
if event_type in ("_thinking", "reasoning.available"):
text = preview or tool_name or ""
if spinner:
short = (text[:55] + "...") if len(text) > 55 else text
@@ -400,31 +227,11 @@ def _build_child_progress_callback(task_index: int, goal: str, parent_agent, tas
_relay("subagent.thinking", preview=text)
return
if event == DelegateEvent.TASK_TOOL_COMPLETED:
# tool.completed — no display needed here (spinner shows on started)
if event_type == "tool.completed":
return
if event == DelegateEvent.TASK_PROGRESS:
# Pre-batched progress summary relayed from a nested
# orchestrator's grandchild (upstream emits as
# parent_cb("subagent_progress", summary_string) where the
# summary lands in the tool_name positional slot). Treat as
# a pass-through: render distinctly (not via the tool-start
# emoji lookup, which would mistake the summary string for a
# tool name) and relay upward without re-batching.
summary_text = tool_name or preview or ""
if spinner and summary_text:
try:
spinner.print_above(f" {prefix}├─ 🔀 {summary_text}")
except Exception as e:
logger.debug("Spinner print_above failed: %s", e)
if parent_cb:
try:
parent_cb("subagent_progress", f"{prefix}{summary_text}")
except Exception as e:
logger.debug("Parent callback relay failed: %s", e)
return
# TASK_TOOL_STARTED — display and batch for parent relay
# tool.started — display and batch for parent relay
if spinner:
short = (preview[:35] + "...") if preview and len(preview) > 35 else (preview or "")
from agent.display import get_tool_emoji
@@ -473,10 +280,6 @@ def _build_child_agent(
# ACP transport overrides — lets a non-ACP parent spawn ACP child agents
override_acp_command: Optional[str] = None,
override_acp_args: Optional[List[str]] = None,
# Per-call role controlling whether the child can further delegate.
# 'leaf' (default) cannot; 'orchestrator' retains the delegation
# toolset subject to depth/kill-switch bounds applied below.
role: str = "leaf",
):
"""
Build a child AIAgent on the main thread (thread-safe construction).
@@ -489,17 +292,6 @@ def _build_child_agent(
"""
from run_agent import AIAgent
# ── Role resolution ─────────────────────────────────────────────────
# Honor the caller's role only when BOTH the kill switch and the
# child's depth allow it. This is the single point where role
# degrades to 'leaf' — keeps the rule predictable. Callers pass
# the normalised role (_normalize_role ran in delegate_task) so
# we only deal with 'leaf' or 'orchestrator' here.
child_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
max_spawn = _get_max_spawn_depth()
orchestrator_ok = _get_orchestrator_enabled() and child_depth < max_spawn
effective_role = role if (role == "orchestrator" and orchestrator_ok) else "leaf"
# When no explicit toolsets given, inherit from parent's enabled toolsets
# so disabled tools (e.g. web) don't leak to subagents.
# Note: enabled_toolsets=None means "all tools enabled" (the default),
@@ -527,21 +319,8 @@ def _build_child_agent(
else:
child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)
# Orchestrators retain the 'delegation' toolset that _strip_blocked_tools
# removed. The re-add is unconditional on parent-toolset membership because
# orchestrator capability is granted by role, not inherited — see the
# test_intersection_preserves_delegation_bound test for the design rationale.
if effective_role == "orchestrator" and "delegation" not in child_toolsets:
child_toolsets.append("delegation")
workspace_hint = _resolve_workspace_hint(parent_agent)
child_prompt = _build_child_system_prompt(
goal, context,
workspace_path=workspace_hint,
role=effective_role,
max_spawn_depth=max_spawn,
child_depth=child_depth,
)
child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
# Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
parent_api_key = getattr(parent_agent, "api_key", None)
if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):
@@ -627,10 +406,7 @@ def _build_child_agent(
)
child._print_fn = getattr(parent_agent, '_print_fn', None)
# Set delegation depth so children can't spawn grandchildren
child._delegate_depth = child_depth
# Stash the post-degrade role for introspection (leaf if the
# kill switch or depth bounded the caller's requested role).
child._delegate_role = effective_role
child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
# Share a credential pool with the child when possible so subagents can
# rotate credentials on rate limits instead of getting pinned to one key.
@@ -915,40 +691,27 @@ def delegate_task(
max_iterations: Optional[int] = None,
acp_command: Optional[str] = None,
acp_args: Optional[List[str]] = None,
role: Optional[str] = None,
parent_agent=None,
) -> str:
"""
Spawn one or more child agents to handle delegated tasks.
Supports two modes:
- Single: provide goal (+ optional context, toolsets, role)
- Batch: provide tasks array [{goal, context, toolsets, role}, ...]
The 'role' parameter controls whether a child can further delegate:
'leaf' (default) cannot; 'orchestrator' retains the delegation
toolset and can spawn its own workers, bounded by
delegation.max_spawn_depth. Per-task role beats the top-level one.
- Single: provide goal (+ optional context, toolsets)
- Batch: provide tasks array [{goal, context, toolsets}, ...]
Returns JSON with results array, one entry per task.
"""
if parent_agent is None:
return tool_error("delegate_task requires a parent agent context.")
# Normalise the top-level role once; per-task overrides re-normalise.
top_role = _normalize_role(role)
# Depth limit — configurable via delegation.max_spawn_depth,
# default 2 for parity with the original MAX_DEPTH constant.
# Depth limit
depth = getattr(parent_agent, '_delegate_depth', 0)
max_spawn = _get_max_spawn_depth()
if depth >= max_spawn:
if depth >= MAX_DEPTH:
return json.dumps({
"error": (
f"Delegation depth limit reached (depth={depth}, "
f"max_spawn_depth={max_spawn}). Raise "
f"delegation.max_spawn_depth in config.yaml if deeper "
f"nesting is required (cap: {_MAX_SPAWN_DEPTH_CAP})."
f"Delegation depth limit reached ({MAX_DEPTH}). "
"Subagents cannot spawn further subagents."
)
})
@@ -980,8 +743,7 @@ def delegate_task(
)
task_list = tasks
elif goal and isinstance(goal, str) and goal.strip():
task_list = [{"goal": goal, "context": context,
"toolsets": toolsets, "role": top_role}]
task_list = [{"goal": goal, "context": context, "toolsets": toolsets}]
else:
return tool_error("Provide either 'goal' (single task) or 'tasks' (batch).")
@@ -1013,9 +775,6 @@ def delegate_task(
try:
for i, t in enumerate(task_list):
task_acp_args = t.get("acp_args") if "acp_args" in t else None
# Per-task role beats top-level; normalise again so unknown
# per-task values warn and degrade to leaf uniformly.
effective_role = _normalize_role(t.get("role") or top_role)
child = _build_child_agent(
task_index=i, goal=t["goal"], context=t.get("context"),
toolsets=t.get("toolsets") or toolsets, model=creds["model"],
@@ -1027,7 +786,6 @@ def delegate_task(
override_acp_args=task_acp_args if task_acp_args is not None else (
acp_args if acp_args is not None else creds.get("args")
),
role=effective_role,
)
# Override with correct parent tool names (before child construction mutated global)
child._delegate_saved_tool_names = _parent_tool_names
@@ -1361,7 +1119,7 @@ DELEGATE_TASK_SCHEMA = {
"never enter your context window.\n\n"
"TWO MODES (one of 'goal' or 'tasks' is required):\n"
"1. Single task: provide 'goal' (+ optional context, toolsets)\n"
"2. Batch (parallel): provide 'tasks' array with up to delegation.max_concurrent_children items (default 3). "
"2. Batch (parallel): provide 'tasks' array with up to 3 items. "
"All run concurrently and results are returned together.\n\n"
"WHEN TO USE delegate_task:\n"
"- Reasoning-heavy subtasks (debugging, code review, research synthesis)\n"
@@ -1374,14 +1132,8 @@ DELEGATE_TASK_SCHEMA = {
"IMPORTANT:\n"
"- Subagents have NO memory of your conversation. Pass all relevant "
"info (file paths, error messages, constraints) via the 'context' field.\n"
"- Leaf subagents (role='leaf', the default) CANNOT call: "
"delegate_task, clarify, memory, send_message, execute_code.\n"
"- Orchestrator subagents (role='orchestrator') retain "
"delegate_task so they can spawn their own workers, but still "
"cannot use clarify, memory, send_message, or execute_code. "
"Orchestrators are bounded by delegation.max_spawn_depth "
"(default 2) and can be disabled globally via "
"delegation.orchestrator_enabled=false.\n"
"- Subagents CANNOT call: delegate_task, clarify, memory, send_message, "
"execute_code.\n"
"- Each subagent gets its own terminal session (separate working directory and state).\n"
"- Results are always returned as an array, one entry per task."
),
@@ -1437,11 +1189,6 @@ DELEGATE_TASK_SCHEMA = {
"items": {"type": "string"},
"description": "Per-task ACP args override.",
},
"role": {
"type": "string",
"enum": ["leaf", "orchestrator"],
"description": "Per-task role override. See top-level 'role' for semantics.",
},
},
"required": ["goal"],
},
@@ -1461,19 +1208,6 @@ DELEGATE_TASK_SCHEMA = {
"Only set lower for simple tasks."
),
},
"role": {
"type": "string",
"enum": ["leaf", "orchestrator"],
"description": (
"Role of the child agent. 'leaf' (default) = focused "
"worker, cannot delegate further. 'orchestrator' = can "
"use delegate_task to spawn its own workers. Requires "
"delegation.max_spawn_depth >= 2 in config; ignored "
"(treated as 'leaf') when the child would exceed "
"max_spawn_depth or when "
"delegation.orchestrator_enabled=false."
),
},
"acp_command": {
"type": "string",
"description": (
@@ -1512,7 +1246,6 @@ registry.register(
max_iterations=args.get("max_iterations"),
acp_command=args.get("acp_command"),
acp_args=args.get("acp_args"),
role=args.get("role"),
parent_agent=kw.get("parent_agent")),
check_fn=check_delegate_requirements,
emoji="🔀",
-32
View File
@@ -188,38 +188,6 @@ FAL_MODELS: Dict[str, Dict[str, Any]] = {
},
"upscale": False,
},
"fal-ai/gpt-image-2": {
"display": "GPT Image 2",
"speed": "~20s",
"strengths": "SOTA text rendering + CJK, world-aware photorealism",
"price": "$0.040.06/image",
# GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal
# dimensions). We map to the 4:3 variants — the 16:9 presets
# (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement
# and would be rejected. 4:3 keeps us above the minimum on all
# three aspect ratios.
"size_style": "image_size_preset",
"sizes": {
"landscape": "landscape_4_3", # 1024x768
"square": "square_hd", # 1024x1024
"portrait": "portrait_4_3", # 768x1024
},
"defaults": {
# Same quality pinning as gpt-image-1.5: medium keeps Nous
# Portal billing predictable. "high" is 3-4x the per-image
# cost at the same size; "low" is too rough for production use.
"quality": "medium",
"num_images": 1,
"output_format": "png",
},
"supports": {
"prompt", "image_size", "quality", "num_images", "output_format",
"sync_mode",
# openai_api_key (BYOK) intentionally omitted — all users go
# through the shared FAL billing path.
},
"upscale": False,
},
"fal-ai/ideogram/v3": {
"display": "Ideogram V3",
"speed": "~5s",
+16 -51
View File
@@ -40,22 +40,13 @@ _SLASH_WORKER_TIMEOUT_S = max(5.0, float(os.environ.get("HERMES_TUI_SLASH_TIMEOU
# ── Async RPC dispatch (#12546) ──────────────────────────────────────
# A handful of handlers block the dispatcher loop in entry.py for seconds
# to minutes (slash.exec, cli.exec, shell.exec, session.resume,
# session.branch, skills.manage). While they're running, inbound RPCs —
# notably approval.respond and session.interrupt — sit unread in the
# stdin pipe. We route only those slow handlers onto a small thread pool;
# everything else stays on the main thread so ordering stays sane for the
# fast path. write_json is already _stdout_lock-guarded, so concurrent
# response writes are safe.
_LONG_HANDLERS = frozenset(
{
"cli.exec",
"session.branch",
"session.resume",
"shell.exec",
"skills.manage",
"slash.exec",
}
)
# session.branch). While they're running, inbound RPCs — notably
# approval.respond and session.interrupt — sit unread in the stdin pipe.
# We route only those slow handlers onto a small thread pool; everything
# else stays on the main thread so ordering stays sane for the fast path.
# write_json is already _stdout_lock-guarded, so concurrent response
# writes are safe.
_LONG_HANDLERS = frozenset({"cli.exec", "session.branch", "session.resume", "shell.exec", "slash.exec"})
_pool = concurrent.futures.ThreadPoolExecutor(
max_workers=max(2, int(os.environ.get("HERMES_TUI_RPC_POOL_WORKERS", "4") or 4)),
@@ -538,12 +529,6 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
_emit("session.info", sid, _session_info(agent))
os.environ["HERMES_MODEL"] = result.new_model
# Keep the process-level provider env var in sync with the user's explicit
# choice so any ambient re-resolution (credential pool refresh, compressor
# rebuild, aux clients) resolves to the new provider instead of the
# original one persisted in config or env.
if result.target_provider:
os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
if persist_global:
_persist_model_switch(result)
return {"value": result.new_model, "warning": result.warning_message or ""}
@@ -1951,15 +1936,12 @@ def _(rid, params: dict) -> dict:
_write_config_key("display.details_mode", "expanded" if nv == "full" else "collapsed")
return _ok(rid, {"key": key, "value": nv})
if key in ("compact", "statusbar", "mouse"):
# compact defaults off, statusbar + mouse default on.
defaults = {"tui_compact": False, "tui_statusbar": True, "tui_mouse": True}
def_keys = {"compact": "tui_compact", "statusbar": "tui_statusbar", "mouse": "tui_mouse"}
def_key = def_keys[key]
if key in ("compact", "statusbar"):
raw = str(value or "").strip().lower()
cfg0 = _load_cfg()
d0 = cfg0.get("display") if isinstance(cfg0.get("display"), dict) else {}
cur_b = bool(d0.get(def_key, defaults[def_key]))
def_key = "tui_compact" if key == "compact" else "tui_statusbar"
cur_b = bool(d0.get(def_key, False if key == "compact" else True))
if raw in ("", "toggle"):
nv_b = not cur_b
elif raw == "on":
@@ -2056,9 +2038,6 @@ def _(rid, params: dict) -> dict:
if key == "statusbar":
on = bool(_load_cfg().get("display", {}).get("tui_statusbar", True))
return _ok(rid, {"value": "on" if on else "off"})
if key == "mouse":
on = bool(_load_cfg().get("display", {}).get("tui_mouse", True))
return _ok(rid, {"value": "on" if on else "off"})
if key == "mtime":
cfg_path = _hermes_home / "config.yaml"
try:
@@ -2112,7 +2091,6 @@ _TUI_HIDDEN: frozenset[str] = frozenset({
_TUI_EXTRA: list[tuple[str, str, str]] = [
("/compact", "Toggle compact display mode", "TUI"),
("/logs", "Show recent gateway log lines", "TUI"),
("/mouse", "Toggle SGR mouse tracking (turn off if your terminal prints escape codes on mouse move)", "TUI"),
]
# Commands that queue messages onto _pending_input in the CLI.
@@ -2439,22 +2417,15 @@ def _(rid, params: dict) -> dict:
]
return _ok(rid, {"items": items})
# Accept both `@folder:path` and the bare `@folder` form so the user
# sees directory listings as soon as they finish typing the keyword,
# without first accepting the static `@folder:` hint.
if is_context and query in ("file", "folder"):
prefix_tag, path_part = query, ""
elif is_context and query.startswith(("file:", "folder:")):
prefix_tag, _, tail = query.partition(":")
path_part = tail
if is_context and query.startswith(("file:", "folder:")):
prefix_tag = query.split(":", 1)[0]
path_part = query.split(":", 1)[1] or "."
else:
prefix_tag = ""
path_part = query if is_context else query
path_part = query if not is_context else query
expanded = _normalize_completion_path(path_part) if path_part else "."
if expanded == "." or not expanded:
search_dir, match = ".", ""
elif expanded.endswith("/"):
expanded = _normalize_completion_path(path_part)
if expanded.endswith("/"):
search_dir, match = expanded, ""
else:
search_dir = os.path.dirname(expanded) or "."
@@ -2463,7 +2434,6 @@ def _(rid, params: dict) -> dict:
if not os.path.isdir(search_dir):
return _ok(rid, {"items": []})
want_dir = prefix_tag == "folder"
match_lower = match.lower()
for entry in sorted(os.listdir(search_dir)):
if match and not entry.lower().startswith(match_lower):
@@ -2472,11 +2442,6 @@ def _(rid, params: dict) -> dict:
continue
full = os.path.join(search_dir, entry)
is_dir = os.path.isdir(full)
# Explicit `@folder:` / `@file:` — honour the user's filter. Skip
# the opposite kind instead of auto-rewriting the completion tag,
# which used to defeat the prefix and let `@folder:` list files.
if prefix_tag and want_dir != is_dir:
continue
rel = os.path.relpath(full)
suffix = "/" if is_dir else ""
-1
View File
@@ -30,7 +30,6 @@ export { useTerminalViewport } from './src/ink/hooks/use-terminal-viewport.ts'
export { default as measureElement } from './src/ink/measure-element.ts'
export { createRoot, default as render, renderSync } from './src/ink/root.ts'
export type { Instance, RenderOptions, Root } from './src/ink/root.ts'
export { setAltScreenMouseTracking } from './src/ink/set-alt-screen-mouse-tracking.ts'
export { stringWidth } from './src/ink/stringWidth.ts'
export { default as TextInput, UncontrolledTextInput } from 'ink-text-input'
export type { Props as TextInputProps } from 'ink-text-input'
@@ -22,6 +22,5 @@ export { useTerminalTitle } from './ink/hooks/use-terminal-title.js'
export { useTerminalViewport } from './ink/hooks/use-terminal-viewport.js'
export { default as measureElement } from './ink/measure-element.js'
export { createRoot, default as render, renderSync } from './ink/root.js'
export { setAltScreenMouseTracking } from './ink/set-alt-screen-mouse-tracking.js'
export { stringWidth } from './ink/stringWidth.js'
export { default as TextInput, UncontrolledTextInput } from 'ink-text-input'
@@ -1,13 +1,11 @@
import { describe, expect, it } from 'vitest'
import { parseMultipleKeypresses } from '../parse-keypress.js'
import { InputEvent } from './input-event.js'
import { parseMultipleKeypresses } from '../parse-keypress.js'
function parseOne(sequence: string) {
const [keys] = parseMultipleKeypresses({ incomplete: '', mode: 'NORMAL' }, sequence)
expect(keys).toHaveLength(1)
return keys[0]!
}
@@ -1094,30 +1094,6 @@ export default class Ink {
return this.altScreenActive
}
/**
* Toggle SGR mouse tracking (DEC 1000/1002/1003/1006) at runtime without
* re-entering the alt screen. Updates the internal flag so resize/resume/
* reenterAltScreen respect the new state, and writes ENABLE/DISABLE bytes
* if we're currently in alt-screen + TTY + not paused.
*
* Idempotent. Intended for live `/mouse on|off` toggling the
* <AlternateScreen> prop controls setup/teardown at mount/unmount, this
* controls in-session switches without a screen flicker.
*/
setAltScreenMouseTracking(enabled: boolean): void {
if (this.altScreenMouseTracking === enabled) {
return
}
this.altScreenMouseTracking = enabled
if (!this.altScreenActive || this.isPaused || !this.options.stdout.isTTY) {
return
}
this.options.stdout.write(enabled ? ENABLE_MOUSE_TRACKING : DISABLE_MOUSE_TRACKING)
}
/**
* Re-assert terminal modes after a gap (>5s stdin silence or event-loop
* stall). Catches tmux detachattach, ssh reconnect, and laptop
@@ -1,22 +0,0 @@
import instances from './instances.js'
/**
* Toggle SGR mouse tracking (DEC 1000/1002/1003/1006) at runtime on the Ink
* instance bound to this stdout. No-op if no Ink instance is attached.
*
* Use this for in-session `/mouse on|off` toggles. The <AlternateScreen>
* prop owns setup/teardown at mount/unmount; this function sidesteps the
* full alt-screen re-entry so the toggle is flicker-free.
*
* Updates the instance's internal `altScreenMouseTracking` flag so the
* resize / SIGCONT-resume / re-enter-alt paths respect the new state.
*
* Defaults to `process.stdout` pass a specific stream for tests or
* multi-output setups.
*/
export function setAltScreenMouseTracking(
enabled: boolean,
stdout: NodeJS.WriteStream = process.stdout
): void {
instances.get(stdout)?.setAltScreenMouseTracking(enabled)
}
+5 -20
View File
@@ -28,9 +28,7 @@ describe('readClipboardText', () => {
it('tries powershell.exe first on WSL', async () => {
const run = vi.fn().mockResolvedValue({ stdout: 'from wsl\n' })
await expect(readClipboardText('linux', run, { WSL_INTEROP: '/tmp/socket' } as NodeJS.ProcessEnv)).resolves.toBe(
'from wsl\n'
)
await expect(readClipboardText('linux', run, { WSL_INTEROP: '/tmp/socket' } as NodeJS.ProcessEnv)).resolves.toBe('from wsl\n')
expect(run).toHaveBeenCalledWith(
'powershell.exe',
['-NoProfile', '-NonInteractive', '-Command', 'Get-Clipboard -Raw'],
@@ -41,9 +39,7 @@ describe('readClipboardText', () => {
it('uses wl-paste on Wayland Linux', async () => {
const run = vi.fn().mockResolvedValue({ stdout: 'from wayland\n' })
await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
'from wayland\n'
)
await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe('from wayland\n')
expect(run).toHaveBeenCalledWith(
'wl-paste',
['--type', 'text'],
@@ -57,9 +53,7 @@ describe('readClipboardText', () => {
.mockRejectedValueOnce(new Error('wl-paste missing'))
.mockResolvedValueOnce({ stdout: 'from xclip\n' })
await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe(
'from xclip\n'
)
await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBe('from xclip\n')
expect(run).toHaveBeenNthCalledWith(
1,
'wl-paste',
@@ -77,9 +71,7 @@ describe('readClipboardText', () => {
it('returns null when every clipboard backend fails', async () => {
const run = vi.fn().mockRejectedValue(new Error('clipboard failed'))
await expect(
readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)
).resolves.toBeNull()
await expect(readClipboardText('linux', run, { WAYLAND_DISPLAY: 'wayland-1' } as NodeJS.ProcessEnv)).resolves.toBeNull()
})
})
@@ -109,7 +101,6 @@ describe('writeClipboardText', () => {
it('writes text to pbcopy on macOS', async () => {
const stdin = { end: vi.fn() }
const child = {
once: vi.fn((event: string, cb: (code?: number) => void) => {
if (event === 'close') {
@@ -120,15 +111,10 @@ describe('writeClipboardText', () => {
}),
stdin
}
const start = vi.fn().mockReturnValue(child)
await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(true)
expect(start).toHaveBeenCalledWith(
'pbcopy',
[],
expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true })
)
expect(start).toHaveBeenCalledWith('pbcopy', [], expect.objectContaining({ stdio: ['pipe', 'ignore', 'ignore'], windowsHide: true }))
expect(stdin.end).toHaveBeenCalledWith('hello world')
})
@@ -143,7 +129,6 @@ describe('writeClipboardText', () => {
}),
stdin: { end: vi.fn() }
}
const start = vi.fn().mockReturnValue(child)
await expect(writeClipboardText('hello world', 'darwin', start as any)).resolves.toBe(false)
@@ -88,31 +88,6 @@ describe('createSlashHandler', () => {
expect(ctx.transcript.sys).toHaveBeenCalledWith('details: expanded')
})
it('toggles mouse tracking and persists it', async () => {
const ctx = buildCtx()
expect(getUiState().mouseTracking).toBe(true)
expect(createSlashHandler(ctx)('/mouse off')).toBe(true)
expect(getUiState().mouseTracking).toBe(false)
expect(ctx.gateway.rpc).toHaveBeenCalledWith('config.set', { key: 'mouse', value: 'off' })
await Promise.resolve()
expect(ctx.transcript.sys).toHaveBeenCalledWith('mouse off')
expect(createSlashHandler(ctx)('/mouse')).toBe(true)
expect(getUiState().mouseTracking).toBe(true)
expect(ctx.gateway.rpc).toHaveBeenCalledWith('config.set', { key: 'mouse', value: 'on' })
})
it('rejects unknown /mouse args', () => {
const ctx = buildCtx()
createSlashHandler(ctx)('/mouse wat')
expect(getUiState().mouseTracking).toBe(true)
expect(ctx.transcript.sys).toHaveBeenCalledWith('usage: /mouse [on|off|toggle]')
expect(ctx.gateway.rpc).not.toHaveBeenCalled()
})
it('shows tool enable usage when names are missing', () => {
const ctx = buildCtx()
-64
View File
@@ -1,64 +0,0 @@
import { describe, expect, it } from 'vitest'
import { ensureEmojiPresentation } from '../lib/emoji.js'
const VS16 = '\uFE0F'
describe('ensureEmojiPresentation', () => {
it('passes through ASCII unchanged', () => {
expect(ensureEmojiPresentation('hello world')).toBe('hello world')
expect(ensureEmojiPresentation('')).toBe('')
})
it('passes through emoji that already defaults to emoji presentation', () => {
expect(ensureEmojiPresentation('🚀 rocket')).toBe('🚀 rocket')
expect(ensureEmojiPresentation('😀')).toBe('😀')
})
it('injects VS16 after text-default emoji codepoints', () => {
expect(ensureEmojiPresentation('⚠ careful')).toBe(`${VS16} careful`)
expect(ensureEmojiPresentation(' info')).toBe(`${VS16} info`)
expect(ensureEmojiPresentation('love ❤ you')).toBe(`love ❤${VS16} you`)
expect(ensureEmojiPresentation('✔ done')).toBe(`${VS16} done`)
})
it('is idempotent when VS16 is already present', () => {
const already = `${VS16} ${VS16}${VS16}`
expect(ensureEmojiPresentation(already)).toBe(already)
expect(ensureEmojiPresentation(ensureEmojiPresentation('⚠'))).toBe(`${VS16}`)
})
it('leaves keycap sequences alone when the base is not a text-default emoji', () => {
expect(ensureEmojiPresentation('1\u20e3')).toBe('1\u20e3')
})
it('injects VS16 before ZWJ so text-default bases participate in emoji sequences', () => {
// ❤ + ZWJ + 🔥 → ❤️‍🔥 (heart on fire). Without VS16 between the heart
// and the ZWJ, terminals render the heart in text/monochrome form and
// the ZWJ ligature can fail to form.
const heartFire = '\u2764\u200d\ud83d\udd25'
expect(ensureEmojiPresentation(heartFire)).toBe(`\u2764\uFE0F\u200d\ud83d\udd25`)
})
it('leaves explicit text-presentation selector (VS15) alone', () => {
// `❤︎` (U+2764 + U+FE0E) asks for text presentation — injecting VS16
// would create an invalid double-variation sequence.
const explicitText = '\u2764\ufe0e'
expect(ensureEmojiPresentation(explicitText)).toBe(explicitText)
})
it('returns the original reference when no change is needed', () => {
const already = `${VS16} ${VS16}${VS16}`
// Reference equality — the lazy allocator should short-circuit to the
// input when nothing needed injection.
expect(ensureEmojiPresentation(already)).toBe(already)
})
it('handles mixed content', () => {
expect(ensureEmojiPresentation('⚠ path: /tmp/x ❤ done')).toBe(`${VS16} path: /tmp/x ❤${VS16} done`)
})
})
-1
View File
@@ -49,7 +49,6 @@ describe('readOsc52Clipboard', () => {
data: `c;${Buffer.from('queried text', 'utf8').toString('base64')}`,
type: 'osc'
})
const flush = vi.fn().mockResolvedValue(undefined)
await expect(readOsc52Clipboard({ flush, send })).resolves.toBe('queried text')
-1
View File
@@ -5,7 +5,6 @@ const originalPlatform = process.platform
async function importPlatform(platform: NodeJS.Platform) {
vi.resetModules()
Object.defineProperty(process, 'platform', { value: platform })
return import('../lib/platform.js')
}
+13 -40
View File
@@ -17,55 +17,28 @@ describe('terminalParityHints', () => {
it('suggests IDE setup only for VS Code-family terminals that still need bindings', async () => {
const readFile = vi.fn().mockRejectedValue(Object.assign(new Error('missing'), { code: 'ENOENT' }))
const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
fileOps: { readFile },
homeDir: '/tmp/fake-home'
})
const hints = await terminalParityHints(
{ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
{ fileOps: { readFile }, homeDir: '/tmp/fake-home' }
)
expect(hints.some(h => h.key === 'ide-setup')).toBe(true)
})
it('suppresses IDE setup hint when keybindings are already configured', async () => {
const readFile = vi.fn().mockResolvedValue(
JSON.stringify([
{
key: 'shift+enter',
command: 'workbench.action.terminal.sendSequence',
when: 'terminalFocus',
args: { text: '\\\r\n' }
},
{
key: 'ctrl+enter',
command: 'workbench.action.terminal.sendSequence',
when: 'terminalFocus',
args: { text: '\\\r\n' }
},
{
key: 'cmd+enter',
command: 'workbench.action.terminal.sendSequence',
when: 'terminalFocus',
args: { text: '\\\r\n' }
},
{
key: 'cmd+z',
command: 'workbench.action.terminal.sendSequence',
when: 'terminalFocus',
args: { text: '\u001b[122;9u' }
},
{
key: 'shift+cmd+z',
command: 'workbench.action.terminal.sendSequence',
when: 'terminalFocus',
args: { text: '\u001b[122;10u' }
}
{ key: 'shift+enter', command: 'workbench.action.terminal.sendSequence', when: 'terminalFocus', args: { text: '\\\r\n' } },
{ key: 'ctrl+enter', command: 'workbench.action.terminal.sendSequence', when: 'terminalFocus', args: { text: '\\\r\n' } },
{ key: 'cmd+enter', command: 'workbench.action.terminal.sendSequence', when: 'terminalFocus', args: { text: '\\\r\n' } },
{ key: 'cmd+z', command: 'workbench.action.terminal.sendSequence', when: 'terminalFocus', args: { text: '\u001b[122;9u' } },
{ key: 'shift+cmd+z', command: 'workbench.action.terminal.sendSequence', when: 'terminalFocus', args: { text: '\u001b[122;10u' } }
])
)
const hints = await terminalParityHints({ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv, {
fileOps: { readFile },
homeDir: '/tmp/fake-home'
})
const hints = await terminalParityHints(
{ TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
{ fileOps: { readFile }, homeDir: '/tmp/fake-home' }
)
expect(hints.some(h => h.key === 'ide-setup')).toBe(false)
})
})
+3 -13
View File
@@ -21,17 +21,10 @@ describe('terminalSetup helpers', () => {
expect(getVSCodeStyleConfigDir('Code', 'darwin', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
'/home/me/Library/Application Support/Code/User'
)
expect(getVSCodeStyleConfigDir('Code', 'linux', {} as NodeJS.ProcessEnv, '/home/me')).toBe(
'/home/me/.config/Code/User'
expect(getVSCodeStyleConfigDir('Code', 'linux', {} as NodeJS.ProcessEnv, '/home/me')).toBe('/home/me/.config/Code/User')
expect(getVSCodeStyleConfigDir('Code', 'win32', { APPDATA: 'C:/Users/me/AppData/Roaming' } as NodeJS.ProcessEnv, '/home/me')).toBe(
'C:/Users/me/AppData/Roaming/Code/User'
)
expect(
getVSCodeStyleConfigDir(
'Code',
'win32',
{ APPDATA: 'C:/Users/me/AppData/Roaming' } as NodeJS.ProcessEnv,
'/home/me'
)
).toBe('C:/Users/me/AppData/Roaming/Code/User')
})
it('strips line comments from keybindings JSON', () => {
@@ -86,7 +79,6 @@ describe('configureTerminalKeybindings', () => {
it('reports conflicts without overwriting existing bindings', async () => {
const mkdir = vi.fn().mockResolvedValue(undefined)
const readFile = vi.fn().mockResolvedValue(
JSON.stringify([
{
@@ -97,7 +89,6 @@ describe('configureTerminalKeybindings', () => {
}
])
)
const writeFile = vi.fn().mockResolvedValue(undefined)
const copyFile = vi.fn().mockResolvedValue(undefined)
@@ -218,7 +209,6 @@ describe('configureTerminalKeybindings', () => {
}
])
)
await expect(
shouldPromptForTerminalSetup({
env: { TERM_PROGRAM: 'vscode' } as NodeJS.ProcessEnv,
@@ -1,15 +1,11 @@
import { describe, expect, it } from 'vitest'
import { describe, expect, it, vi } from 'vitest'
import { looksLikeDroppedPath } from '../app/useComposerState.js'
describe('looksLikeDroppedPath', () => {
it('recognizes macOS screenshot temp paths and file URIs', () => {
expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot\\ 2026-04-21\\ at\\ 1.04.43 PM.png')).toBe(
true
)
expect(
looksLikeDroppedPath('file:///var/folders/x/T/TemporaryItems/Screenshot%202026-04-21%20at%201.04.43%20PM.png')
).toBe(true)
expect(looksLikeDroppedPath('/var/folders/x/T/TemporaryItems/Screenshot\\ 2026-04-21\\ at\\ 1.04.43 PM.png')).toBe(true)
expect(looksLikeDroppedPath('file:///var/folders/x/T/TemporaryItems/Screenshot%202026-04-21%20at%201.04.43%20PM.png')).toBe(true)
})
it('rejects normal multiline or plain text paste', () => {
@@ -22,7 +22,6 @@ describe('applyDisplay', () => {
show_reasoning: true,
streaming: false,
tui_compact: true,
tui_mouse: false,
tui_statusbar: false
}
}
@@ -35,7 +34,6 @@ describe('applyDisplay', () => {
expect(s.compact).toBe(true)
expect(s.detailsMode).toBe('expanded')
expect(s.inlineDiffs).toBe(false)
expect(s.mouseTracking).toBe(false)
expect(s.showCost).toBe(true)
expect(s.showReasoning).toBe(true)
expect(s.statusBar).toBe(false)
@@ -50,7 +48,6 @@ describe('applyDisplay', () => {
const s = $uiState.get()
expect(setBell).toHaveBeenCalledWith(false)
expect(s.inlineDiffs).toBe(true)
expect(s.mouseTracking).toBe(true)
expect(s.showCost).toBe(false)
expect(s.showReasoning).toBe(false)
expect(s.statusBar).toBe(true)
+2
View File
@@ -1,6 +1,7 @@
import { GatewayProvider } from './app/gatewayContext.js'
import { useMainApp } from './app/useMainApp.js'
import { AppLayout } from './components/appLayout.js'
import { MOUSE_TRACKING } from './config/env.js'
import type { GatewayClient } from './gatewayClient.js'
export function App({ gw }: { gw: GatewayClient }) {
@@ -11,6 +12,7 @@ export function App({ gw }: { gw: GatewayClient }) {
<AppLayout
actions={appActions}
composer={appComposer}
mouseTracking={MOUSE_TRACKING}
progress={appProgress}
status={appStatus}
transcript={appTranscript}
+1 -1
View File
@@ -83,7 +83,6 @@ export interface UiState {
detailsMode: DetailsMode
info: null | SessionInfo
inlineDiffs: boolean
mouseTracking: boolean
showCost: boolean
showReasoning: boolean
sid: null | string
@@ -322,6 +321,7 @@ export interface AppLayoutTranscriptProps {
export interface AppLayoutProps {
actions: AppLayoutActions
composer: AppLayoutComposerProps
mouseTracking: boolean
progress: AppLayoutProgressProps
status: AppLayoutStatusProps
transcript: AppLayoutTranscriptProps
+14 -37
View File
@@ -240,28 +240,22 @@ export const coreCommands: SlashCommand[] = [
return ctx.transcript.sys('usage: /terminal-setup [auto|vscode|cursor|windsurf]')
}
const runner =
!target || target === 'auto'
? configureDetectedTerminalKeybindings()
: configureTerminalKeybindings(target as 'cursor' | 'vscode' | 'windsurf')
const runner = !target || target === 'auto' ? configureDetectedTerminalKeybindings() : configureTerminalKeybindings(target as 'cursor' | 'vscode' | 'windsurf')
void runner
.then(result => {
if (ctx.stale()) {
return
}
void runner.then(result => {
if (ctx.stale()) {
return
}
ctx.transcript.sys(result.message)
if (result.success && result.requiresRestart) {
ctx.transcript.sys('restart the IDE terminal for the new keybindings to take effect')
}
})
.catch(error => {
if (!ctx.stale()) {
ctx.transcript.sys(`terminal setup failed: ${String(error)}`)
}
})
ctx.transcript.sys(result.message)
if (result.success && result.requiresRestart) {
ctx.transcript.sys('restart the IDE terminal for the new keybindings to take effect')
}
}).catch(error => {
if (!ctx.stale()) {
ctx.transcript.sys(`terminal setup failed: ${String(error)}`)
}
})
}
},
@@ -293,23 +287,6 @@ export const coreCommands: SlashCommand[] = [
}
},
{
help: 'toggle SGR mouse tracking (wheel + click/drag). Turn off if your terminal prints escape codes on mouse move.',
name: 'mouse',
run: (arg, ctx) => {
const next = flagFromArg(arg, ctx.ui.mouseTracking)
if (next === null) {
return ctx.transcript.sys('usage: /mouse [on|off|toggle]')
}
patchUiState({ mouseTracking: next })
ctx.gateway.rpc<ConfigSetResponse>('config.set', { key: 'mouse', value: next ? 'on' : 'off' }).catch(() => {})
queueMicrotask(() => ctx.transcript.sys(`mouse ${next ? 'on' : 'off'}`))
}
},
{
help: 'inspect or enqueue a message',
name: 'queue',
+2 -17
View File
@@ -1,4 +1,4 @@
import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
import type { ToolsConfigureResponse } from '../../../gatewayTypes.js'
import type { PanelSection } from '../../../types.js'
import { patchOverlayState } from '../../overlayStore.js'
import type { SlashCommand } from '../types.js'
@@ -207,25 +207,10 @@ export const opsCommands: SlashCommand[] = [
{
help: 'enable or disable tools (client-side history reset on change)',
name: 'tools',
run: (arg, ctx, cmd) => {
run: (arg, ctx) => {
const [subcommand, ...names] = arg.trim().split(/\s+/).filter(Boolean)
if (subcommand !== 'disable' && subcommand !== 'enable') {
ctx.gateway.gw
.request<SlashExecResponse>('slash.exec', { command: cmd.slice(1), session_id: ctx.sid })
.then(r => {
if (ctx.stale()) {
return
}
const body = r?.output || '/tools: no output'
const text = r?.warning ? `warning: ${r.warning}\n${body}` : body
const long = text.length > 180 || text.split('\n').filter(Boolean).length > 2
long ? ctx.transcript.page(text, 'Tools') : ctx.transcript.sys(text)
})
.catch(ctx.guardedErr)
return
}
+1 -1
View File
@@ -1,4 +1,4 @@
import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
import { introMsg, toTranscriptMessages, attachedImageNotice } from '../../../domain/messages.js'
import type {
BackgroundStartResponse,
BtwStartResponse,
+2 -24
View File
@@ -95,36 +95,14 @@ class TurnController {
this.interrupted = true
gw.request<SessionInterruptResponse>('session.interrupt', { session_id: sid }).catch(() => {})
const segments = this.segmentMessages
const partial = this.bufRef.trimStart()
const tools = this.pendingSegmentTools
// Drain streaming/segment state off the nanostore before writing the
// preserved snapshot to the transcript — otherwise each flushed segment
// appears in both `turn.streamSegments` and the transcript for one frame.
partial ? appendMessage({ role: 'assistant', text: `${partial}\n\n*[interrupted]*` }) : sys('interrupted')
this.idle()
this.clearReasoning()
this.turnTools = []
patchTurnState({ activity: [], outcome: '' })
for (const msg of segments) {
appendMessage(msg)
}
// Always surface an interruption indicator — if there's an in-flight
// `partial` or pending tools, fold them into a single assistant message;
// otherwise emit a sys note so the transcript always records that the
// turn was cancelled, even when only prior `segments` were preserved.
if (partial || tools.length) {
appendMessage({
role: 'assistant',
text: partial ? `${partial}\n\n*[interrupted]*` : '*[interrupted]*',
...(tools.length && { tools })
})
} else {
sys('interrupted')
}
patchUiState({ status: 'interrupted' })
this.clearStatusTimer()
-2
View File
@@ -1,6 +1,5 @@
import { atom } from 'nanostores'
import { MOUSE_TRACKING } from '../config/env.js'
import { ZERO } from '../domain/usage.js'
import { DEFAULT_THEME } from '../theme.js'
@@ -13,7 +12,6 @@ const buildUiState = (): UiState => ({
detailsMode: 'collapsed',
info: null,
inlineDiffs: true,
mouseTracking: MOUSE_TRACKING,
showCost: false,
showReasoning: false,
sid: null,
+7 -30
View File
@@ -3,13 +3,12 @@ import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { useStdin } from '@hermes/ink'
import { useStore } from '@nanostores/react'
import { useCallback, useMemo, useState } from 'react'
import { useStdin } from '@hermes/ink'
import type { PasteEvent } from '../components/textInput.js'
import { LARGE_PASTE } from '../config/limits.js'
import type { ImageAttachResponse, InputDetectDropResponse } from '../gatewayTypes.js'
import { useCompletion } from '../hooks/useCompletion.js'
import { useInputHistory } from '../hooks/useInputHistory.js'
import { useQueue } from '../hooks/useQueue.js'
@@ -17,6 +16,7 @@ import { isUsableClipboardText, readClipboardText } from '../lib/clipboard.js'
import { readOsc52Clipboard } from '../lib/osc52.js'
import { isRemoteShellSession } from '../lib/terminalSetup.js'
import { pasteTokenLabel, stripTrailingPasteNewlines } from '../lib/text.js'
import type { ImageAttachResponse, InputDetectDropResponse } from '../gatewayTypes.js'
import type { MaybePromise, PasteSnippet, UseComposerStateOptions, UseComposerStateResult } from './interfaces.js'
import { $isBlocked } from './overlayStore.js'
@@ -79,8 +79,8 @@ export function looksLikeDroppedPath(text: string): boolean {
trimmed.startsWith("'/") ||
trimmed.startsWith('"~') ||
trimmed.startsWith("'~") ||
/^[A-Za-z]:[/\\]/.test(trimmed) ||
/^["'][A-Za-z]:[/\\]/.test(trimmed)
(/^[A-Za-z]:[/\\]/.test(trimmed)) ||
(/^["'][A-Za-z]:[/\\]/.test(trimmed))
) {
return true
}
@@ -90,19 +90,13 @@ export function looksLikeDroppedPath(text: string): boolean {
// unnecessary RPC round-trips.
if (trimmed.startsWith('/')) {
const rest = trimmed.slice(1)
return rest.includes('/') || rest.includes('.')
}
return false
}
export function useComposerState({
gw,
onClipboardPaste,
onImageAttached,
submitRef
}: UseComposerStateOptions): UseComposerStateResult {
export function useComposerState({ gw, onClipboardPaste, onImageAttached, submitRef }: UseComposerStateOptions): UseComposerStateResult {
const [input, setInput] = useState('')
const [inputBuf, setInputBuf] = useState<string[]>([])
const [pasteSnips, setPasteSnips] = useState<PasteSnippet[]>([])
@@ -125,12 +119,7 @@ export function useComposerState({
}, [historyDraftRef, setQueueEdit, setHistoryIdx])
const handleResolvedPaste = useCallback(
async ({
bracketed,
cursor,
text,
value
}: Omit<PasteEvent, 'hotkey'>): Promise<null | { cursor: number; value: string }> => {
async ({ bracketed, cursor, text, value }: Omit<PasteEvent, 'hotkey'>): Promise<null | { cursor: number; value: string }> => {
const cleanedText = stripTrailingPasteNewlines(text)
if (!cleanedText || !/[^\n]/.test(cleanedText)) {
@@ -142,7 +131,6 @@ export function useComposerState({
}
const sid = getUiState().sid
if (sid && looksLikeDroppedPath(cleanedText)) {
try {
const attached = await gw.request<ImageAttachResponse>('image.attach', {
@@ -153,7 +141,6 @@ export function useComposerState({
if (attached?.name) {
onImageAttached?.(attached)
const remainder = attached.remainder?.trim() ?? ''
if (!remainder) {
return { cursor, value }
}
@@ -211,29 +198,20 @@ export function useComposerState({
)
const handleTextPaste = useCallback(
({
bracketed,
cursor,
hotkey,
text,
value
}: PasteEvent): MaybePromise<null | { cursor: number; value: string }> => {
({ bracketed, cursor, hotkey, text, value }: PasteEvent): MaybePromise<null | { cursor: number; value: string }> => {
if (hotkey) {
const preferOsc52 = isRemoteShellSession(process.env)
const readPreferredText = preferOsc52
? readOsc52Clipboard(querier).then(async osc52Text => {
if (isUsableClipboardText(osc52Text)) {
return osc52Text
}
return readClipboardText()
})
: readClipboardText().then(async clipText => {
if (isUsableClipboardText(clipText)) {
return clipText
}
return readOsc52Clipboard(querier)
})
@@ -243,7 +221,6 @@ export function useComposerState({
}
void onClipboardPaste(false)
return null
})
}
-4
View File
@@ -1,6 +1,5 @@
import { useEffect, useRef } from 'react'
import { MOUSE_TRACKING } from '../config/env.js'
import { resolveDetailsMode } from '../domain/details.js'
import type { GatewayClient } from '../gatewayClient.js'
import type {
@@ -36,9 +35,6 @@ export const applyDisplay = (cfg: ConfigFullResponse | null, setBell: (v: boolea
compact: !!d.tui_compact,
detailsMode: resolveDetailsMode(d),
inlineDiffs: d.inline_diffs !== false,
// HERMES_TUI_DISABLE_MOUSE=1 wins — env-var opt-out must outrank config
// since the user set it specifically because their terminal is broken.
mouseTracking: MOUSE_TRACKING && d.tui_mouse !== false,
showCost: !!d.show_cost,
showReasoning: !!d.show_reasoning,
statusBar: d.tui_statusbar !== false,
+7 -64
View File
@@ -7,6 +7,7 @@ import type {
SudoRespondResponse,
VoiceRecordResponse
} from '../gatewayTypes.js'
import { isAction, isMac } from '../lib/platform.js'
import { getInputSelection } from './inputSelectionStore.js'
@@ -172,73 +173,15 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
const live = getUiState()
if (isBlocked) {
// When approval/clarify/confirm overlays are active, their own useInput
// handlers must receive keystrokes (arrow keys, numbers, Enter). Only
// intercept Ctrl+C here so the user can deny/dismiss — all other keys
// fall through to the component-level handlers.
if (overlay.approval || overlay.clarify || overlay.confirm) {
if (isCtrl(key, ch, 'c')) {
cancelOverlayFromCtrlC()
}
return
}
if (overlay.pager) {
if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
return patchOverlayState({ pager: null })
}
if (key.return || ch === ' ') {
const nextOffset = overlay.pager.offset + pagerPageSize
const move = (delta: number | 'top' | 'bottom') =>
patchOverlayState(prev => {
if (!prev.pager) {
return prev
}
const { lines, offset } = prev.pager
const max = Math.max(0, lines.length - pagerPageSize)
const step = delta === 'top' ? -lines.length : delta === 'bottom' ? lines.length : delta
const next = Math.max(0, Math.min(offset + step, max))
return next === offset ? prev : { ...prev, pager: { ...prev.pager, offset: next } }
})
if (key.upArrow || ch === 'k') {
return move(-1)
}
if (key.downArrow || ch === 'j') {
return move(1)
}
if (key.pageUp || ch === 'b') {
return move(-pagerPageSize)
}
if (ch === 'g') {
return move('top')
}
if (ch === 'G') {
return move('bottom')
}
if (key.return || ch === ' ' || key.pageDown) {
patchOverlayState(prev => {
if (!prev.pager) {
return prev
}
const { lines, offset } = prev.pager
const max = Math.max(0, lines.length - pagerPageSize)
// Auto-close only when already at the last page — otherwise clamp
// to `max` so the offset matches what the line/page-back handlers
// can reach (prevents a snap-back jump on the next ↑/↓/PgUp).
return offset >= max
? { ...prev, pager: null }
: { ...prev, pager: { ...prev.pager, offset: Math.min(offset + pagerPageSize, max) } }
patchOverlayState({
pager: nextOffset >= overlay.pager.lines.length ? null : { ...overlay.pager, offset: nextOffset }
})
} else if (key.escape || isCtrl(key, ch, 'c') || ch === 'q') {
patchOverlayState({ pager: null })
}
return
+1 -1
View File
@@ -5,6 +5,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { STARTUP_RESUME_ID } from '../config/env.js'
import { MAX_HISTORY, WHEEL_SCROLL_STEP } from '../config/limits.js'
import { attachedImageNotice, imageTokenMeta } from '../domain/messages.js'
import { terminalParityHints } from '../lib/terminalParity.js'
import { fmtCwdBranch } from '../domain/paths.js'
import { type GatewayClient } from '../gatewayClient.js'
import type {
@@ -16,7 +17,6 @@ import type {
import { useGitBranch } from '../hooks/useGitBranch.js'
import { useVirtualHistory } from '../hooks/useVirtualHistory.js'
import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
import { terminalParityHints } from '../lib/terminalParity.js'
import { buildToolTrailLine, sameToolTrailGroup, toolTrailLabel } from '../lib/text.js'
import type { Msg, PanelSection, SlashCatalog } from '../types.js'
+2 -2
View File
@@ -234,11 +234,11 @@ export function useSubmission(opts: UseSubmissionOptions) {
const submit = useCallback(
(value: string) => {
if (composerState.completions.length) {
if (value.startsWith('/') && composerState.completions.length) {
const row = composerState.completions[composerState.compIdx]
if (row?.text) {
const text = value.startsWith('/') && row.text.startsWith('/') ? row.text.slice(1) : row.text
const text = row.text.startsWith('/') && composerState.compReplace > 0 ? row.text.slice(1) : row.text
const next = value.slice(0, composerState.compReplace) + text
if (next !== value) {
+3 -6
View File
@@ -28,7 +28,8 @@ function FaceTicker({ color, startedAt }: { color: string; startedAt?: null | nu
return (
<Text color={color}>
{FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}{startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
{FACES[tick % FACES.length]} {VERBS[tick % VERBS.length]}
{startedAt ? ` · ${fmtDuration(now - startedAt)}` : ''}
</Text>
)
}
@@ -126,11 +127,7 @@ export function StatusRule({
<Box flexShrink={1} width={leftWidth}>
<Text color={t.color.bronze} wrap="truncate-end">
{'─ '}
{busy ? (
<FaceTicker color={statusColor} startedAt={turnStartedAt} />
) : (
<Text color={statusColor}>{status}</Text>
)}
{busy ? <FaceTicker color={statusColor} startedAt={turnStartedAt} /> : <Text color={statusColor}>{status}</Text>}
<Text color={t.color.dim}> {model}</Text>
{ctxLabel ? <Text color={t.color.dim}> {ctxLabel}</Text> : null}
{bar ? (
+11 -20
View File
@@ -1,11 +1,10 @@
import { AlternateScreen, Box, NoSelect, ScrollBox, setAltScreenMouseTracking, Text } from '@hermes/ink'
import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
import { useStore } from '@nanostores/react'
import { memo, useEffect, useRef } from 'react'
import { memo } from 'react'
import type { AppLayoutProgressProps, AppLayoutProps } from '../app/interfaces.js'
import { $isBlocked } from '../app/overlayStore.js'
import { $uiState } from '../app/uiStore.js'
import { MOUSE_TRACKING } from '../config/env.js'
import { PLACEHOLDER } from '../content/placeholders.js'
import type { Theme } from '../theme.js'
import type { DetailsMode } from '../types.js'
@@ -257,24 +256,16 @@ const ComposerPane = memo(function ComposerPane({
)
})
export const AppLayout = memo(function AppLayout({ actions, composer, progress, status, transcript }: AppLayoutProps) {
const { mouseTracking } = useStore($uiState)
// Freeze <AlternateScreen>'s mouseTracking prop at initial value — runtime
// toggles go through setAltScreenMouseTracking below. Re-running the
// AlternateScreen insertion effect on prop change would re-enter the
// alt-screen (EXIT + ENTER + erase) and flash the frame. Teardown at
// unmount/exit still runs correctly because signal-exit + the final
// useEffect cleanup both emit DISABLE_MOUSE_TRACKING regardless.
const initialMouseTracking = useRef(MOUSE_TRACKING).current
useEffect(() => {
setAltScreenMouseTracking(mouseTracking)
return () => setAltScreenMouseTracking(false)
}, [mouseTracking])
export const AppLayout = memo(function AppLayout({
actions,
composer,
mouseTracking,
progress,
status,
transcript
}: AppLayoutProps) {
return (
<AlternateScreen mouseTracking={initialMouseTracking}>
<AlternateScreen mouseTracking={mouseTracking}>
<Box flexDirection="column" flexGrow={1}>
<Box flexDirection="row" flexGrow={1}>
<TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
+4 -11
View File
@@ -13,8 +13,6 @@ import { ApprovalPrompt, ClarifyPrompt, ConfirmPrompt } from './prompts.js'
import { SessionPicker } from './sessionPicker.js'
import { SkillsHub } from './skillsHub.js'
const COMPLETION_WINDOW = 16
export function PromptZone({
cols,
onApprovalChoice,
@@ -108,12 +106,7 @@ export function FloatingOverlays({
return null
}
// Fixed viewport centered on compIdx — previously the slice end was
// compIdx + 8 so the dropdown grew from 8 rows to 16 as the user scrolled
// down, bouncing the height on every keystroke.
const viewportSize = Math.min(COMPLETION_WINDOW, completions.length)
const start = Math.max(0, Math.min(compIdx - Math.floor(COMPLETION_WINDOW / 2), completions.length - viewportSize))
const start = Math.max(0, compIdx - 8)
return (
<Box alignItems="flex-start" bottom="100%" flexDirection="column" left={0} position="absolute" right={0}>
@@ -164,8 +157,8 @@ export function FloatingOverlays({
<Box marginTop={1}>
<Text color={ui.theme.color.dim}>
{overlay.pager.offset + pagerPageSize < overlay.pager.lines.length
? `↑↓/jk line · Enter/Space/PgDn page · b/PgUp back · g/G top/bottom · q close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
: `end · ↑↓/jk · b/PgUp back · g top · q close (${overlay.pager.lines.length} lines)`}
? `Enter/Space for more · q to close (${Math.min(overlay.pager.offset + pagerPageSize, overlay.pager.lines.length)}/${overlay.pager.lines.length})`
: `end · q to close (${overlay.pager.lines.length} lines)`}
</Text>
</Box>
</Box>
@@ -175,7 +168,7 @@ export function FloatingOverlays({
{!!completions.length && (
<FloatBox color={ui.theme.color.gold}>
<Box flexDirection="column" width={Math.max(28, cols - 6)}>
{completions.slice(start, start + viewportSize).map((item, i) => {
{completions.slice(start, compIdx + 8).map((item, i) => {
const active = start + i === compIdx
return (
+1 -2
View File
@@ -1,7 +1,6 @@
import { Box, Link, Text } from '@hermes/ink'
import { memo, type ReactNode, useMemo } from 'react'
import { ensureEmojiPresentation } from '../lib/emoji.js'
import { highlightLine, isHighlightable } from '../lib/syntax.js'
import type { Theme } from '../theme.js'
@@ -233,7 +232,7 @@ interface MdProps {
function MdImpl({ compact, t, text }: MdProps) {
const nodes = useMemo(() => {
const lines = ensureEmojiPresentation(text).split('\n')
const lines = text.split('\n')
const nodes: ReactNode[] = []
let i = 0
+23 -79
View File
@@ -1,4 +1,4 @@
import { Box, Text, useInput, useStdout } from '@hermes/ink'
import { Box, Text, useInput } from '@hermes/ink'
import { useEffect, useMemo, useState } from 'react'
import { providerDisplayNames } from '../domain/providers.js'
@@ -8,8 +8,6 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
const VISIBLE = 12
const MIN_WIDTH = 40
const MAX_WIDTH = 90
const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
@@ -29,13 +27,6 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
const [modelIdx, setModelIdx] = useState(0)
const [stage, setStage] = useState<'model' | 'provider'>('provider')
const { stdout } = useStdout()
// Pin the picker to a stable width so the FloatBox parent (which shrinks-
// to-fit with alignSelf="flex-start") doesn't resize as long provider /
// model names scroll into view, and so `wrap="truncate-end"` on each row
// has an actual constraint to truncate against.
const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
useEffect(() => {
gw.request<ModelOptionsResponse>('model.options', sessionId ? { session_id: sessionId } : {})
.then(raw => {
@@ -177,53 +168,32 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
const { items, off } = visibleItems(rows, providerIdx)
return (
<Box flexDirection="column" width={width}>
<Text bold color={t.color.amber} wrap="truncate-end">
<Box flexDirection="column">
<Text bold color={t.color.amber}>
Select Provider
</Text>
<Text color={t.color.dim} wrap="truncate-end">
Current model: {currentModel || '(unknown)'}
</Text>
<Text color={t.color.label} wrap="truncate-end">
{provider?.warning ? `warning: ${provider.warning}` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{off > 0 ? `${off} more` : ' '}
</Text>
<Text color={t.color.dim}>Current model: {currentModel || '(unknown)'}</Text>
{provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
{off > 0 && <Text color={t.color.dim}> {off} more</Text>}
{Array.from({ length: VISIBLE }, (_, i) => {
const row = items[i]
{items.map((row, i) => {
const idx = off + i
return row ? (
return (
<Text
bold={providerIdx === idx}
color={providerIdx === idx ? t.color.amber : t.color.dim}
inverse={providerIdx === idx}
color={providerIdx === idx ? t.color.cornsilk : t.color.dim}
key={providers[idx]?.slug ?? `row-${idx}`}
wrap="truncate-end"
>
{providerIdx === idx ? '▸ ' : ' '}
{i + 1}. {row}
</Text>
) : (
<Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
{' '}
</Text>
)
})}
<Text color={t.color.dim} wrap="truncate-end">
{off + VISIBLE < rows.length ? `${rows.length - off - VISIBLE} more` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
persist: {persistGlobal ? 'global' : 'session'} · g toggle
</Text>
<Text color={t.color.dim} wrap="truncate-end">
/ select · Enter choose · 1-9,0 quick · Esc cancel
</Text>
{off + VISIBLE < rows.length && <Text color={t.color.dim}> {rows.length - off - VISIBLE} more</Text>}
<Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
<Text color={t.color.dim}>/ select · Enter choose · 1-9,0 quick · Esc cancel</Text>
</Box>
)
}
@@ -231,44 +201,23 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
const { items, off } = visibleItems(models, modelIdx)
return (
<Box flexDirection="column" width={width}>
<Text bold color={t.color.amber} wrap="truncate-end">
<Box flexDirection="column">
<Text bold color={t.color.amber}>
Select Model
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{names[providerIdx] || '(unknown provider)'}
</Text>
<Text color={t.color.label} wrap="truncate-end">
{provider?.warning ? `warning: ${provider.warning}` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{off > 0 ? `${off} more` : ' '}
</Text>
<Text color={t.color.dim}>{names[providerIdx] || '(unknown provider)'}</Text>
{!models.length ? <Text color={t.color.dim}>no models listed for this provider</Text> : null}
{provider?.warning ? <Text color={t.color.label}>warning: {provider.warning}</Text> : null}
{off > 0 && <Text color={t.color.dim}> {off} more</Text>}
{Array.from({ length: VISIBLE }, (_, i) => {
const row = items[i]
{items.map((row, i) => {
const idx = off + i
if (!row) {
return !models.length && i === 0 ? (
<Text color={t.color.dim} key="empty" wrap="truncate-end">
no models listed for this provider
</Text>
) : (
<Text color={t.color.dim} key={`pad-${i}`} wrap="truncate-end">
{' '}
</Text>
)
}
return (
<Text
bold={modelIdx === idx}
color={modelIdx === idx ? t.color.amber : t.color.dim}
inverse={modelIdx === idx}
color={modelIdx === idx ? t.color.cornsilk : t.color.dim}
key={`${provider?.slug ?? 'prov'}:${idx}:${row}`}
wrap="truncate-end"
>
{modelIdx === idx ? '▸ ' : ' '}
{i + 1}. {row}
@@ -276,14 +225,9 @@ export function ModelPicker({ gw, onCancel, onSelect, sessionId, t }: ModelPicke
)
})}
<Text color={t.color.dim} wrap="truncate-end">
{off + VISIBLE < models.length ? `${models.length - off - VISIBLE} more` : ' '}
</Text>
<Text color={t.color.dim} wrap="truncate-end">
persist: {persistGlobal ? 'global' : 'session'} · g toggle
</Text>
<Text color={t.color.dim} wrap="truncate-end">
{off + VISIBLE < models.length && <Text color={t.color.dim}> {models.length - off - VISIBLE} more</Text>}
<Text color={t.color.dim}>persist: {persistGlobal ? 'global' : 'session'} · g toggle</Text>
<Text color={t.color.dim}>
{models.length ? '↑/↓ select · Enter switch · 1-9,0 quick · Esc back' : 'Enter/Esc back'}
</Text>
</Box>
+6 -7
View File
@@ -1,11 +1,11 @@
import { Box, Text, useInput } from '@hermes/ink'
import { useState } from 'react'
import { isMac } from '../lib/platform.js'
import type { Theme } from '../theme.js'
import type { ApprovalReq, ClarifyReq, ConfirmReq } from '../types.js'
import { TextInput } from './textInput.js'
import { isMac } from '../lib/platform.js'
const OPTS = ['once', 'session', 'always', 'deny'] as const
const LABELS = { always: 'Always allow', deny: 'Deny', once: 'Allow once', session: 'Allow this session' } as const
@@ -64,8 +64,8 @@ export function ApprovalPrompt({ onChoice, req, t }: ApprovalPromptProps) {
{OPTS.map((o, i) => (
<Text key={o}>
<Text bold={sel === i} color={sel === i ? t.color.warn : t.color.dim} inverse={sel === i}>
{sel === i ? '▸ ' : ' '}
<Text color={sel === i ? t.color.warn : t.color.dim}>{sel === i ? '▸ ' : ' '}</Text>
<Text color={sel === i ? t.color.cornsilk : t.color.dim}>
{i + 1}. {LABELS[o]}
</Text>
</Text>
@@ -130,8 +130,7 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
</Box>
<Text color={t.color.dim}>
Enter send · Esc {choices.length ? 'back' : 'cancel'} ·{' '}
{isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
Enter send · Esc {choices.length ? 'back' : 'cancel'} · {isMac ? 'Cmd+C copy · Cmd+V paste · Ctrl+C cancel' : 'Ctrl+C cancel'}
</Text>
</Box>
)
@@ -143,8 +142,8 @@ export function ClarifyPrompt({ cols = 80, onAnswer, onCancel, req, t }: Clarify
{[...choices, 'Other (type your answer)'].map((c, i) => (
<Text key={i}>
<Text bold={sel === i} color={sel === i ? t.color.label : t.color.dim} inverse={sel === i}>
{sel === i ? '▸ ' : ' '}
<Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : ' '}</Text>
<Text color={sel === i ? t.color.cornsilk : t.color.dim}>
{i + 1}. {c}
</Text>
</Text>
+6 -16
View File
@@ -1,4 +1,4 @@
import { Box, Text, useInput, useStdout } from '@hermes/ink'
import { Box, Text, useInput } from '@hermes/ink'
import { useEffect, useState } from 'react'
import type { GatewayClient } from '../gatewayClient.js'
@@ -7,8 +7,6 @@ import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
const VISIBLE = 15
const MIN_WIDTH = 60
const MAX_WIDTH = 120
const age = (ts: number) => {
const d = (Date.now() / 1000 - ts) / 86400
@@ -30,9 +28,6 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
const [sel, setSel] = useState(0)
const [loading, setLoading] = useState(true)
const { stdout } = useStdout()
const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
useEffect(() => {
gw.request<SessionListResponse>('session.list', { limit: 20 })
.then(raw => {
@@ -104,7 +99,7 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
const off = Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), items.length - VISIBLE))
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text bold color={t.color.amber}>
Resume Session
</Text>
@@ -113,29 +108,24 @@ export function SessionPicker({ gw, onCancel, onSelect, t }: SessionPickerProps)
{items.slice(off, off + VISIBLE).map((s, vi) => {
const i = off + vi
const selected = sel === i
return (
<Box key={s.id}>
<Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
{selected ? '▸ ' : ' '}
</Text>
<Text color={sel === i ? t.color.label : t.color.dim}>{sel === i ? '▸ ' : ' '}</Text>
<Box width={30}>
<Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
<Text color={sel === i ? t.color.cornsilk : t.color.dim}>
{String(i + 1).padStart(2)}. [{s.id}]
</Text>
</Box>
<Box width={30}>
<Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected}>
<Text color={t.color.dim}>
({s.message_count} msgs, {age(s.started_at)}, {s.source || 'tui'})
</Text>
</Box>
<Text bold={selected} color={selected ? t.color.amber : t.color.dim} inverse={selected} wrap="truncate-end">
{s.title || s.preview || '(untitled)'}
</Text>
<Text color={sel === i ? t.color.cornsilk : t.color.dim}>{s.title || s.preview || '(untitled)'}</Text>
</Box>
)
})}
+8 -25
View File
@@ -1,4 +1,4 @@
import { Box, Text, useInput, useStdout } from '@hermes/ink'
import { Box, Text, useInput } from '@hermes/ink'
import { useEffect, useState } from 'react'
import type { GatewayClient } from '../gatewayClient.js'
@@ -6,8 +6,6 @@ import { rpcErrorMessage } from '../lib/rpc.js'
import type { Theme } from '../theme.js'
const VISIBLE = 12
const MIN_WIDTH = 40
const MAX_WIDTH = 90
const pageOffset = (count: number, sel: number) => Math.max(0, Math.min(sel - Math.floor(VISIBLE / 2), count - VISIBLE))
@@ -28,9 +26,6 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const [err, setErr] = useState('')
const [loading, setLoading] = useState(true)
const { stdout } = useStdout()
const width = Math.max(MIN_WIDTH, Math.min(MAX_WIDTH, (stdout?.columns ?? 80) - 6))
useEffect(() => {
gw.request<{ skills?: Record<string, string[]> }>('skills.manage', { action: 'list' })
.then(r => {
@@ -191,7 +186,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
if (err && stage === 'category') {
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text color={t.color.label}>error: {err}</Text>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
@@ -200,7 +195,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
if (!cats.length) {
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text color={t.color.dim}>no skills available</Text>
<Text color={t.color.dim}>Esc to cancel</Text>
</Box>
@@ -212,7 +207,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const { items, off } = visibleItems(rows, catIdx)
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text bold color={t.color.amber}>
Skills Hub
</Text>
@@ -224,13 +219,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const idx = off + i
return (
<Text
bold={catIdx === idx}
color={catIdx === idx ? t.color.amber : t.color.dim}
inverse={catIdx === idx}
key={row}
wrap="truncate-end"
>
<Text color={catIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
{catIdx === idx ? '▸ ' : ' '}
{i + 1}. {row}
</Text>
@@ -247,7 +236,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const { items, off } = visibleItems(skills, skillIdx)
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text bold color={t.color.amber}>
{selectedCat}
</Text>
@@ -260,13 +249,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
const idx = off + i
return (
<Text
bold={skillIdx === idx}
color={skillIdx === idx ? t.color.amber : t.color.dim}
inverse={skillIdx === idx}
key={row}
wrap="truncate-end"
>
<Text color={skillIdx === idx ? t.color.cornsilk : t.color.dim} key={row}>
{skillIdx === idx ? '▸ ' : ' '}
{i + 1}. {row}
</Text>
@@ -282,7 +265,7 @@ export function SkillsHub({ gw, onClose, t }: SkillsHubProps) {
}
return (
<Box flexDirection="column" width={width}>
<Box flexDirection="column">
<Text bold color={t.color.amber}>
{info?.name ?? skillName}
</Text>
+13 -19
View File
@@ -277,9 +277,8 @@ function useFwdDelete(active: boolean) {
type PasteResult = { cursor: number; value: string } | null
const isPasteResultPromise = (
value: PasteResult | Promise<PasteResult> | null | undefined
): value is Promise<PasteResult> => !!value && typeof (value as PromiseLike<PasteResult>).then === 'function'
const isPasteResultPromise = (value: PasteResult | Promise<PasteResult> | null | undefined): value is Promise<PasteResult> =>
!!value && typeof (value as PromiseLike<PasteResult>).then === 'function'
export function TextInput({
columns = 80,
@@ -523,11 +522,9 @@ export function TextInput({
}
const range = selRange()
const nextValue = range
? vRef.current.slice(0, range.start) + cleaned + vRef.current.slice(range.end)
: vRef.current.slice(0, curRef.current) + cleaned + vRef.current.slice(curRef.current)
const nextCursor = range ? range.start + cleaned.length : curRef.current + cleaned.length
commit(nextValue, nextCursor)
@@ -594,8 +591,7 @@ export function TextInput({
let c = curRef.current
let v = vRef.current
const mod = isActionMod(k)
const wordMod = mod || k.meta
const actionHome = k.home || (!isMac && mod && inp === 'a') || isMacActionFallback(k, inp, 'a')
const actionHome = k.home || isMacActionFallback(k, inp, 'a')
const actionEnd = k.end || (mod && inp === 'e') || isMacActionFallback(k, inp, 'e')
const actionDeleteToStart = (mod && inp === 'u') || isMacActionFallback(k, inp, 'u')
const range = selRange()
@@ -609,7 +605,7 @@ export function TextInput({
return swap(redo, undo)
}
if (isMac && mod && inp === 'a') {
if (mod && inp === 'a') {
return selectAll()
}
@@ -620,32 +616,32 @@ export function TextInput({
clearSel()
c = v.length
} else if (k.leftArrow) {
if (range && !wordMod) {
if (range && !mod) {
clearSel()
c = range.start
} else {
clearSel()
c = wordMod ? wordLeft(v, c) : prevPos(v, c)
c = mod ? wordLeft(v, c) : prevPos(v, c)
}
} else if (k.rightArrow) {
if (range && !wordMod) {
if (range && !mod) {
clearSel()
c = range.end
} else {
clearSel()
c = wordMod ? wordRight(v, c) : nextPos(v, c)
c = mod ? wordRight(v, c) : nextPos(v, c)
}
} else if (wordMod && inp === 'b') {
} else if (mod && inp === 'b') {
clearSel()
c = wordLeft(v, c)
} else if (wordMod && inp === 'f') {
} else if (mod && inp === 'f') {
clearSel()
c = wordRight(v, c)
} else if (range && (k.backspace || delFwd)) {
v = v.slice(0, range.start) + v.slice(range.end)
c = range.start
} else if (k.backspace && c > 0) {
if (wordMod) {
if (mod) {
const t = wordLeft(v, c)
v = v.slice(0, t) + v.slice(c)
c = t
@@ -655,7 +651,7 @@ export function TextInput({
c = t
}
} else if (delFwd && c < v.length) {
if (wordMod) {
if (mod) {
const t = wordRight(v, c)
v = v.slice(0, c) + v.slice(t)
} else {
@@ -782,9 +778,7 @@ interface TextInputProps {
focus?: boolean
mask?: string
onChange: (v: string) => void
onPaste?: (
e: PasteEvent
) => { cursor: number; value: string } | Promise<{ cursor: number; value: string } | null> | null
onPaste?: (e: PasteEvent) => { cursor: number; value: string } | Promise<{ cursor: number; value: string } | null> | null
onSubmit?: (v: string) => void
placeholder?: string
value: string
+8 -6
View File
@@ -4,12 +4,14 @@ const action = isMac ? 'Cmd' : 'Ctrl'
const paste = isMac ? 'Cmd' : 'Alt'
export const HOTKEYS: [string, string][] = [
...(isMac
? ([
['Cmd+C', 'copy selection'],
['Ctrl+C', 'interrupt / clear draft / exit']
] as [string, string][])
: ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])),
...(
isMac
? ([
['Cmd+C', 'copy selection'],
['Ctrl+C', 'interrupt / clear draft / exit']
] as [string, string][])
: ([['Ctrl+C', 'copy selection / interrupt / clear draft / exit']] as [string, string][])
),
[action + '+D', 'exit'],
[action + '+G', 'open $EDITOR for prompt'],
[action + '+L', 'new session (clear)'],
-1
View File
@@ -60,7 +60,6 @@ export interface ConfigDisplayConfig {
streaming?: boolean
thinking_mode?: string
tui_compact?: boolean
tui_mouse?: boolean
tui_statusbar?: boolean
}
+1 -6
View File
@@ -17,11 +17,9 @@ export function isUsableClipboardText(text: null | string): text is string {
}
let suspicious = 0
for (const ch of text) {
const code = ch.charCodeAt(0)
const isControl = code < 0x20 && ch !== '\n' && ch !== '\r' && ch !== '\t'
if (isControl || ch === '\ufffd') {
suspicious += 1
}
@@ -30,10 +28,7 @@ export function isUsableClipboardText(text: null | string): text is string {
return suspicious <= Math.max(2, Math.floor(text.length * 0.02))
}
function readClipboardCommands(
platform: NodeJS.Platform,
env: NodeJS.ProcessEnv
): Array<{ args: readonly string[]; cmd: string }> {
function readClipboardCommands(platform: NodeJS.Platform, env: NodeJS.ProcessEnv): Array<{ args: readonly string[]; cmd: string }> {
if (platform === 'darwin') {
return [{ cmd: 'pbpaste', args: [] }]
}
-55
View File
@@ -1,55 +0,0 @@
const VS15 = 0xfe0e
const VS16 = 0xfe0f
const KEYCAP = 0x20e3
const TEXT_DEFAULT_EMOJI = new Set<number>([
0x00a9, 0x00ae, 0x203c, 0x2049, 0x2122, 0x2139, 0x2194, 0x2195, 0x2196, 0x2197, 0x2198, 0x2199, 0x21a9, 0x21aa,
0x2328, 0x23cf, 0x23ed, 0x23ee, 0x23ef, 0x23f1, 0x23f2, 0x23f8, 0x23f9, 0x23fa, 0x24c2, 0x25aa, 0x25ab, 0x25b6,
0x25c0, 0x25fb, 0x25fc, 0x2600, 0x2601, 0x2602, 0x2603, 0x2604, 0x260e, 0x2611, 0x2618, 0x261d, 0x2620, 0x2622,
0x2623, 0x2626, 0x262a, 0x262e, 0x262f, 0x2638, 0x2639, 0x263a, 0x2640, 0x2642, 0x265f, 0x2660, 0x2663, 0x2665,
0x2666, 0x2668, 0x267b, 0x267e, 0x2692, 0x2694, 0x2695, 0x2696, 0x2697, 0x2699, 0x269b, 0x269c, 0x26a0, 0x26a7,
0x26b0, 0x26b1, 0x26c8, 0x26cf, 0x26d1, 0x26d3, 0x26d4, 0x26e9, 0x26f0, 0x26f1, 0x26f4, 0x26f7, 0x26f8, 0x26f9,
0x2702, 0x2708, 0x2709, 0x270c, 0x270d, 0x270f, 0x2712, 0x2714, 0x2716, 0x271d, 0x2721, 0x2733, 0x2734, 0x2744,
0x2747, 0x2763, 0x2764, 0x27a1, 0x2934, 0x2935, 0x2b05, 0x2b06, 0x2b07, 0x3030, 0x303d, 0x3297, 0x3299
])
const MAYBE_TEXT_EMOJI_RE =
/[\u00a9\u00ae\u203c\u2049\u2122\u2139\u2194-\u2199\u21a9\u21aa\u2328\u23cf\u23ed-\u23ef\u23f1\u23f2\u23f8-\u23fa\u24c2\u25aa\u25ab\u25b6\u25c0\u25fb\u25fc\u2600-\u2604\u260e\u2611\u2618\u261d\u2620\u2622\u2623\u2626\u262a\u262e\u262f\u2638-\u263a\u2640\u2642\u265f\u2660\u2663\u2665\u2666\u2668\u267b\u267e\u2692\u2694-\u2697\u2699\u269b\u269c\u26a0\u26a7\u26b0\u26b1\u26c8\u26cf\u26d1\u26d3\u26d4\u26e9\u26f0\u26f1\u26f4\u26f7-\u26f9\u2702\u2708\u2709\u270c\u270d\u270f\u2712\u2714\u2716\u271d\u2721\u2733\u2734\u2744\u2747\u2763\u2764\u27a1\u2934\u2935\u2b05-\u2b07\u3030\u303d\u3297\u3299]/
export function ensureEmojiPresentation(text: string): string {
if (!text || !MAYBE_TEXT_EMOJI_RE.test(text)) {
return text
}
// Lazy output: only start building when we actually need to insert VS16.
// Short-circuits the whole walk for strings where every text-default emoji
// is already followed by VS16/VS15, avoiding per-codepoint string growth.
let out: null | string = null
let last = 0
let i = 0
while (i < text.length) {
const cp = text.codePointAt(i)!
const size = cp > 0xffff ? 2 : 1
if (TEXT_DEFAULT_EMOJI.has(cp)) {
const next = text.codePointAt(i + size)
// Skip only when the sequence already carries an explicit presentation
// selector. VS16 means the user (or a prior pass) already requested
// emoji presentation; VS15 is an explicit text-presentation request so
// leave it alone and don't pile VS16 on top of it. Inject before ZWJ
// and KEYCAP so ZWJ-joined sequences (e.g. ❤️‍🔥) and digit keycaps
// both render as emoji rather than text.
if (next !== VS16 && next !== VS15) {
out ??= ''
out += text.slice(last, i + size) + '\uFE0F'
last = i + size
}
}
i += size
}
return out === null ? text : out + text.slice(last)
}
-1
View File
@@ -54,7 +54,6 @@ export async function readOsc52Clipboard(querier: null | OscQuerier, timeoutMs =
}
const timeout = new Promise<undefined>(resolve => setTimeout(resolve, timeoutMs))
const query = querier.send<OscResponse>({
request: buildOsc52ClipboardQuery(),
match: (r: unknown): r is OscResponse => {
+1 -1
View File
@@ -13,7 +13,7 @@ export const isMac = process.platform === 'darwin'
/** True when the platform action-modifier is pressed (Cmd on macOS, Ctrl elsewhere). */
export const isActionMod = (key: { ctrl: boolean; meta: boolean; super?: boolean }): boolean =>
isMac ? key.meta || key.super === true : key.ctrl
(isMac ? key.meta || key.super === true : key.ctrl)
/**
* Some macOS terminals rewrite Cmd navigation/deletion into readline control keys.
+5 -16
View File
@@ -1,9 +1,4 @@
import {
detectVSCodeLikeTerminal,
type FileOps,
isRemoteShellSession,
shouldPromptForTerminalSetup
} from './terminalSetup.js'
import { detectVSCodeLikeTerminal, isRemoteShellSession, shouldPromptForTerminalSetup, type FileOps } from './terminalSetup.js'
export type MacTerminalHint = {
key: string
@@ -36,10 +31,7 @@ export async function terminalParityHints(
const ctx = detectMacTerminalContext(env)
const hints: MacTerminalHint[] = []
if (
ctx.vscodeLike &&
(await shouldPromptForTerminalSetup({ env, fileOps: options?.fileOps, homeDir: options?.homeDir }))
) {
if (ctx.vscodeLike && (await shouldPromptForTerminalSetup({ env, fileOps: options?.fileOps, homeDir: options?.homeDir }))) {
hints.push({
key: 'ide-setup',
tone: 'info',
@@ -51,8 +43,7 @@ export async function terminalParityHints(
hints.push({
key: 'apple-terminal',
tone: 'warn',
message:
'Apple Terminal detected · use /paste for image-only clipboard fallback, and try Ctrl+A / Ctrl+E / Ctrl+U if Cmd+←/→/⌫ gets rewritten'
message: 'Apple Terminal detected · use /paste for image-only clipboard fallback, and try Ctrl+A / Ctrl+E / Ctrl+U if Cmd+←/→/⌫ gets rewritten'
})
}
@@ -60,8 +51,7 @@ export async function terminalParityHints(
hints.push({
key: 'tmux',
tone: 'warn',
message:
'tmux detected · clipboard copy/paste uses passthrough when available; allow-passthrough improves OSC52 reliability'
message: 'tmux detected · clipboard copy/paste uses passthrough when available; allow-passthrough improves OSC52 reliability'
})
}
@@ -69,8 +59,7 @@ export async function terminalParityHints(
hints.push({
key: 'remote',
tone: 'warn',
message:
'SSH session detected · text clipboard can bridge via OSC52, but image clipboard and local screenshot paths still depend on the machine running Hermes'
message: 'SSH session detected · text clipboard can bridge via OSC52, but image clipboard and local screenshot paths still depend on the machine running Hermes'
})
}
+2 -20
View File
@@ -26,7 +26,6 @@ export type TerminalSetupResult = {
const DEFAULT_FILE_OPS: FileOps = { copyFile, mkdir, readFile, writeFile }
const MULTILINE_SEQUENCE = '\\\r\n'
const TERMINAL_META: Record<SupportedTerminal, { appName: string; label: string }> = {
vscode: { appName: 'Code', label: 'VS Code' },
cursor: { appName: 'Cursor', label: 'Cursor' },
@@ -100,22 +99,18 @@ export function stripJsonComments(content: string): string {
// String literal — copy as-is, including any comment-like chars inside
if (ch === '"') {
let j = i + 1
while (j < len) {
if (content[j] === '\\') {
j += 2 // skip escaped char
} else if (content[j] === '"') {
j++
break
} else {
j++
}
}
result += content.slice(i, j)
i = j
continue
}
@@ -123,7 +118,6 @@ export function stripJsonComments(content: string): string {
if (ch === '/' && content[i + 1] === '/') {
const eol = content.indexOf('\n', i)
i = eol === -1 ? len : eol
continue
}
@@ -131,7 +125,6 @@ export function stripJsonComments(content: string): string {
if (ch === '/' && content[i + 1] === '*') {
const end = content.indexOf('*/', i + 2)
i = end === -1 ? len : end + 2
continue
}
@@ -215,23 +208,19 @@ export async function configureTerminalKeybindings(
let keybindings: unknown[] = []
let hasExistingFile = false
try {
const content = await ops.readFile(keybindingsFile, 'utf8')
hasExistingFile = true
const parsed: unknown = JSON.parse(stripJsonComments(content))
if (!Array.isArray(parsed)) {
return {
success: false,
message: `${meta.label} keybindings.json is not a JSON array: ${keybindingsFile}`
}
}
keybindings = parsed
} catch (error) {
const code = (error as NodeJS.ErrnoException | undefined)?.code
if (code !== 'ENOENT') {
return {
success: false,
@@ -241,9 +230,7 @@ export async function configureTerminalKeybindings(
}
const conflicts = TARGET_BINDINGS.filter(target =>
keybindings.some(
existing => isKeybinding(existing) && existing.key === target.key && !sameBinding(existing, target)
)
keybindings.some(existing => isKeybinding(existing) && existing.key === target.key && !sameBinding(existing, target))
)
if (conflicts.length) {
@@ -255,10 +242,8 @@ export async function configureTerminalKeybindings(
}
let added = 0
for (const target of TARGET_BINDINGS.slice().reverse()) {
const exists = keybindings.some(existing => isKeybinding(existing) && sameBinding(existing, target))
if (!exists) {
keybindings.unshift(target)
added += 1
@@ -335,14 +320,11 @@ export async function shouldPromptForTerminalSetup(options?: {
try {
const content = await ops.readFile(join(configDir, 'keybindings.json'), 'utf8')
const parsed: unknown = JSON.parse(stripJsonComments(content))
if (!Array.isArray(parsed)) {
return true
}
return TARGET_BINDINGS.some(
target => !parsed.some(existing => isKeybinding(existing) && sameBinding(existing, target))
)
return TARGET_BINDINGS.some(target => !parsed.some(existing => isKeybinding(existing) && sameBinding(existing, target)))
} catch {
return true
}
-2
View File
@@ -77,8 +77,6 @@ declare module '@hermes/ink' {
export function render(node: React.ReactNode, options?: NodeJS.WriteStream | RenderOptions): Instance
export function setAltScreenMouseTracking(enabled: boolean, stdout?: NodeJS.WriteStream): void
export function useApp(): { readonly exit: (error?: Error) => void }
export type RunExternalProcess = () => Promise<void>
export function useExternalProcess(): (run: RunExternalProcess) => Promise<void>
+3 -3
View File
@@ -216,10 +216,10 @@ Restricting toolsets keeps the subagent focused and prevents accidental side eff
## Constraints
- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml — no hard ceiling, only a floor of 1)
- **Nested delegation is opt-in** leaf subagents (default) cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`. Orchestrator subagents (`role="orchestrator"`) retain `delegate_task` for further delegation, but only when `delegation.max_spawn_depth` is raised above the default of 1 (1-3 supported); the other four remain blocked. Disable globally via `delegation.orchestrator_enabled: false`.
- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml)
- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
- **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
- **No conversation history** — subagents see only the `goal` and `context` the parent agent passes when calling `delegate_task`
- **No conversation history** — subagents see only what you put in `goal` and `context`
- **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
---
+14 -32
View File
@@ -20,7 +20,7 @@ delegate_task(
## Parallel Batch
Up to 3 concurrent subagents by default (configurable, no hard ceiling):
Up to 3 concurrent subagents:
```python
delegate_task(tasks=[
@@ -33,10 +33,10 @@ delegate_task(tasks=[
## How Subagent Context Works
:::warning Critical: Subagents Know Nothing
Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields the parent agent populates when it calls `delegate_task`.
Subagents start with a **completely fresh conversation**. They have zero knowledge of the parent's conversation history, prior tool calls, or anything discussed before delegation. The subagent's only context comes from the `goal` and `context` fields you provide.
:::
This means the parent agent must pass **everything** the subagent needs in the call:
This means you must pass **everything** the subagent needs:
```python
# BAD - subagent has no idea what "the error" is
@@ -121,8 +121,8 @@ delegate_task(
When you provide a `tasks` array, subagents run in **parallel** using a thread pool:
- **Maximum concurrency:** 5 tasks by default (configurable via `delegation.max_concurrent_children`, absolute cap of 8)
- **Thread pool:** Uses `ThreadPoolExecutor` with the configured concurrency limit as max workers
- **Maximum concurrency:** 3 tasks (the `tasks` array is truncated to 3 if longer)
- **Thread pool:** Uses `ThreadPoolExecutor` with `MAX_CONCURRENT_CHILDREN = 3` workers
- **Progress display:** In CLI mode, a tree-view shows tool calls from each subagent in real-time with per-task completion lines. In gateway mode, progress is batched and relayed to the parent's progress callback
- **Result ordering:** Results are sorted by task index to match input order regardless of completion order
- **Interrupt propagation:** Interrupting the parent (e.g., sending a new message) interrupts all active children
@@ -154,8 +154,8 @@ The `toolsets` parameter controls what tools the subagent has access to. Choose
| `["file"]` | Read-only analysis, code review without execution |
| `["terminal"]` | System administration, process management |
Certain toolsets are blocked for subagents regardless of what you specify:
- `delegation`blocked for leaf subagents (the default). Retained for `role="orchestrator"` children, bounded by `max_spawn_depth` — see [Depth Limit and Nested Orchestration](#depth-limit-and-nested-orchestration) below.
Certain toolsets are **always blocked** for subagents regardless of what you specify:
- `delegation`no recursive delegation (prevents infinite spawning)
- `clarify` — subagents cannot interact with the user
- `memory` — no writes to shared persistent memory
- `code_execution` — children should reason step-by-step
@@ -173,32 +173,16 @@ delegate_task(
)
```
## Depth Limit and Nested Orchestration
## Depth Limit
By default, delegation is **flat**: a parent (depth 0) spawns children (depth 1), and those children cannot delegate further. This prevents runaway recursive delegation.
For multi-stage workflows (research → synthesis, or parallel orchestration over sub-problems), a parent can spawn **orchestrator** children that *can* delegate their own workers:
```python
delegate_task(
goal="Survey three code review approaches and recommend one",
role="orchestrator", # Allows this child to spawn its own workers
context="...",
)
```
- `role="leaf"` (default): child cannot delegate further — identical to the flat-delegation behavior.
- `role="orchestrator"`: child retains the `delegation` toolset. Gated by `delegation.max_spawn_depth` (default **1** = flat, so `role="orchestrator"` is a no-op at defaults). Raise `max_spawn_depth` to 2 to allow orchestrator children to spawn leaf grandchildren; 3 for three levels (cap).
- `delegation.orchestrator_enabled: false`: global kill switch that forces every child to `leaf` regardless of the `role` parameter.
**Cost warning:** With `max_spawn_depth: 3` and `max_concurrent_children: 3`, the tree can reach 3×3×3 = 27 concurrent leaf agents. Each extra level multiplies spend — raise `max_spawn_depth` intentionally.
Delegation has a **depth limit of 2** a parent (depth 0) can spawn children (depth 1), but children cannot delegate further. This prevents runaway recursive delegation chains.
## Key Properties
- Each subagent gets its **own terminal session** (separate from the parent)
- **Nested delegation is opt-in** — only `role="orchestrator"` children can delegate further, and only when `max_spawn_depth` is raised from its default of 1 (flat). Disable globally with `orchestrator_enabled: false`.
- Leaf subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`. Orchestrator subagents retain `delegate_task` but still cannot use the other four.
- **Interrupt propagation** — interrupting the parent interrupts all active children (including grandchildren under orchestrators)
- **No nested delegation** children cannot delegate further (no grandchildren)
- Subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`
- **Interrupt propagation** — interrupting the parent interrupts all active children
- Only the final summary enters the parent's context, keeping token usage efficient
- Subagents inherit the parent's **API key, provider configuration, and credential pool** (enabling key rotation on rate limits)
@@ -209,7 +193,7 @@ delegate_task(
| **Reasoning** | Full LLM reasoning loop | Just Python code execution |
| **Context** | Fresh isolated conversation | No conversation, just script |
| **Tool access** | All non-blocked tools with reasoning | 7 tools via RPC, no reasoning |
| **Parallelism** | 3 concurrent subagents by default (configurable) | Single script |
| **Parallelism** | Up to 3 concurrent subagents | Single script |
| **Best for** | Complex tasks needing judgment | Mechanical multi-step pipelines |
| **Token cost** | Higher (full LLM loop) | Lower (only stdout returned) |
| **User interaction** | None (subagents can't clarify) | None |
@@ -222,9 +206,7 @@ delegate_task(
# In ~/.hermes/config.yaml
delegation:
max_iterations: 50 # Max turns per child (default: 50)
# max_concurrent_children: 3 # Parallel children per batch (default: 3)
# max_spawn_depth: 1 # Tree depth (1-3, default 1 = flat). Raise to 2 to allow orchestrator children to spawn leaves; 3 for three levels.
# orchestrator_enabled: true # Disable to force all children to leaf role.
default_toolsets: ["terminal", "file", "web"] # Default toolsets
model: "google/gemini-3-flash-preview" # Optional provider/model override
provider: "openrouter" # Optional built-in provider
@@ -1,13 +1,13 @@
---
title: Image Generation
description: Generate images via FAL.ai — 9 models including FLUX 2, GPT Image (1.5 & 2), Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
description: Generate images via FAL.ai — 8 models including FLUX 2, GPT-Image, Nano Banana Pro, Ideogram, Recraft V4 Pro, and more, selectable via `hermes tools`.
sidebar_label: Image Generation
sidebar_position: 6
---
# Image Generation
Hermes Agent generates images from text prompts via FAL.ai. Nine models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
Hermes Agent generates images from text prompts via FAL.ai. Eight models are supported out of the box, each with different speed, quality, and cost tradeoffs. The active model is user-configurable via `hermes tools` and persists in `config.yaml`.
## Supported Models
@@ -18,7 +18,6 @@ Hermes Agent generates images from text prompts via FAL.ai. Nine models are supp
| `fal-ai/z-image/turbo` | ~2s | Bilingual EN/CN, 6B params | $0.005/MP |
| `fal-ai/nano-banana-pro` | ~8s | Gemini 3 Pro, reasoning depth, text rendering | $0.15/image (1K) |
| `fal-ai/gpt-image-1.5` | ~15s | Prompt adherence | $0.034/image |
| `fal-ai/gpt-image-2` | ~20s | SOTA text rendering + CJK, world-aware photorealism | $0.040.06/image |
| `fal-ai/ideogram/v3` | ~5s | Best typography | $0.030.09/image |
| `fal-ai/recraft/v4/pro/text-to-image` | ~8s | Design, brand systems, production-ready | $0.25/image |
| `fal-ai/qwen-image` | ~12s | LLM-based, complex text | $0.02/MP |
@@ -66,7 +65,7 @@ image_gen:
### GPT-Image Quality
The `fal-ai/gpt-image-1.5` and `fal-ai/gpt-image-2` request quality is pinned to `medium` (~$0.034$0.06/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is 322×. If you want a cheaper option, pick Klein 9B or Z-Image Turbo; if you want higher quality, use Nano Banana Pro or Recraft V4 Pro.
The `fal-ai/gpt-image-1.5` request quality is pinned to `medium` (~$0.034/image at 1024×1024). We don't expose the `low` / `high` tiers as a user-facing option so that Nous Portal billing stays predictable across all users — the cost spread between tiers is ~22×. If you want a cheaper GPT-Image option, pick a different model; if you want higher quality, use Klein 9B or Imagen-class models.
## Usage
@@ -88,13 +87,11 @@ Make me a futuristic cityscape, landscape orientation
Every model accepts the same three aspect ratios from the agent's perspective. Internally, each model's native size spec is filled in automatically:
| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image-1.5) | image_size (gpt-image-2) |
|---|---|---|---|---|
| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` | `landscape_4_3` (1024×768) |
| `square` | `square_hd` | `1:1` | `1024x1024` | `square_hd` (1024×1024) |
| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` | `portrait_4_3` (768×1024) |
GPT Image 2 maps to 4:3 presets rather than 16:9 because its minimum pixel count is 655,360 — the `landscape_16_9` preset (1024×576 = 589,824) would be rejected.
| Agent input | image_size (flux/z-image/qwen/recraft/ideogram) | aspect_ratio (nano-banana-pro) | image_size (gpt-image) |
|---|---|---|---|
| `landscape` | `landscape_16_9` | `16:9` | `1536x1024` |
| `square` | `square_hd` | `1:1` | `1024x1024` |
| `portrait` | `portrait_16_9` | `9:16` | `1024x1536` |
This translation happens in `_build_fal_payload()` — agent code never has to know about per-model schema differences.
+1 -1
View File
@@ -20,7 +20,7 @@ Hermes Agent includes a rich set of capabilities that extend far beyond basic ch
## Automation
- **[Scheduled Tasks (Cron)](cron.md)** — Schedule tasks to run automatically with natural language or cron expressions. Jobs can attach skills, deliver results to any platform, and support pause/resume/edit operations.
- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run 3 concurrent subagents by default (configurable) for parallel workstreams.
- **[Subagent Delegation](delegation.md)** — The `delegate_task` tool spawns child agent instances with isolated context, restricted toolsets, and their own terminal sessions. Run up to 3 concurrent subagents for parallel workstreams.
- **[Code Execution](code-execution.md)** — The `execute_code` tool lets the agent write Python scripts that call Hermes tools programmatically, collapsing multi-step workflows into a single LLM turn via sandboxed RPC execution.
- **[Event Hooks](hooks.md)** — Run custom code at key lifecycle points. Gateway hooks handle logging, alerts, and webhooks; plugin hooks handle tool interception, metrics, and guardrails.
- **[Batch Processing](batch-processing.md)** — Run the Hermes agent across hundreds or thousands of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation.