Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 75b7bad6be | |||
| 6022d95732 | |||
| 2614d46f06 | |||
| 2c5fb45d08 |
+5
-16
@@ -145,11 +145,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -165,17 +164,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
|
||||
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -176,64 +176,6 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
+8
-23
@@ -824,9 +824,7 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -840,15 +838,12 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -864,22 +859,17 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -889,15 +879,10 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
@@ -417,11 +417,6 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
"onboarding": {
|
||||
# First-touch hint flags (see agent/onboarding.py). Each hint is
|
||||
# shown once per install then latched here.
|
||||
"seen": {},
|
||||
},
|
||||
}
|
||||
|
||||
# Track whether the config file explicitly set terminal config.
|
||||
@@ -5158,29 +5153,27 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
try:
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cache_enabled = (
|
||||
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
|
||||
@@ -5818,28 +5811,7 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
def _handle_kanban_command(self, cmd: str):
|
||||
"""Handle the /kanban command — delegate to the shared kanban CLI.
|
||||
|
||||
The string form passed here is the user's full ``/kanban ...``
|
||||
including the leading slash; we strip it and hand the remainder
|
||||
to ``kanban.run_slash`` which returns a single formatted string.
|
||||
"""
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
rest = cmd.strip()
|
||||
if rest.startswith("/"):
|
||||
rest = rest.lstrip("/")
|
||||
if rest.startswith("kanban"):
|
||||
rest = rest[len("kanban"):].lstrip()
|
||||
try:
|
||||
output = run_slash(rest)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
output = f"(._.) kanban error: {exc}"
|
||||
if output:
|
||||
print(output)
|
||||
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6076,8 +6048,6 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "kanban":
|
||||
self._handle_kanban_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6152,6 +6122,8 @@ class HermesCLI:
|
||||
self._handle_agents_command()
|
||||
elif canonical == "background":
|
||||
self._handle_background_command(cmd_original)
|
||||
elif canonical == "btw":
|
||||
self._handle_btw_command(cmd_original)
|
||||
elif canonical == "queue":
|
||||
# Extract prompt after "/queue " or "/q "
|
||||
parts = cmd_original.split(None, 1)
|
||||
@@ -6438,6 +6410,122 @@ class HermesCLI:
|
||||
self._background_tasks[task_id] = thread
|
||||
thread.start()
|
||||
|
||||
def _handle_btw_command(self, cmd: str):
|
||||
"""Handle /btw <question> — ephemeral side question using session context.
|
||||
|
||||
Snapshots the current conversation history, spawns a no-tools agent in
|
||||
a background thread, and prints the answer without persisting anything
|
||||
to the main session.
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
_cprint(" Usage: /btw <question>")
|
||||
_cprint(" Example: /btw what module owns session title sanitization?")
|
||||
_cprint(" Answers using session context. No tools, not persisted.")
|
||||
return
|
||||
|
||||
question = parts[1].strip()
|
||||
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
|
||||
if not self._ensure_runtime_credentials():
|
||||
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
|
||||
return
|
||||
|
||||
turn_route = self._resolve_turn_agent_config(question)
|
||||
history_snapshot = list(self.conversation_history)
|
||||
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
_cprint(f' 💬 /btw: "{preview}"')
|
||||
|
||||
def run_btw():
|
||||
try:
|
||||
btw_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_iterations=8,
|
||||
enabled_toolsets=[],
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
provider_sort=self._provider_sort,
|
||||
provider_require_parameters=self._provider_require_params,
|
||||
provider_data_collection=self._provider_data_collection,
|
||||
fallback_model=self._fallback_model,
|
||||
session_db=None,
|
||||
skip_memory=True,
|
||||
skip_context_files=True,
|
||||
persist_session=False,
|
||||
)
|
||||
|
||||
btw_prompt = (
|
||||
"[Ephemeral /btw side question. Answer using the conversation "
|
||||
"context. No tools available. Be direct and concise.]\n\n"
|
||||
+ question
|
||||
)
|
||||
result = btw_agent.run_conversation(
|
||||
user_message=btw_prompt,
|
||||
conversation_history=history_snapshot,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
response = (result.get("final_response") or "") if result else ""
|
||||
if not response and result and result.get("error"):
|
||||
response = f"Error: {result['error']}"
|
||||
|
||||
# TUI refresh before printing
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
|
||||
if response:
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_skin = get_active_skin()
|
||||
_resp_color = _skin.get_color("response_border", "#4F6D4A")
|
||||
except Exception:
|
||||
_resp_color = "#4F6D4A"
|
||||
|
||||
ChatConsole().print(Panel(
|
||||
_render_final_assistant_content(response, mode=self.final_response_markdown),
|
||||
title=f"[{_resp_color} bold]⚕ /btw[/]",
|
||||
title_align="left",
|
||||
border_style=_resp_color,
|
||||
box=rich_box.HORIZONTALS,
|
||||
padding=(1, 4),
|
||||
))
|
||||
else:
|
||||
_cprint(" 💬 /btw: (no response)")
|
||||
|
||||
if self.bell_on_complete:
|
||||
sys.stdout.write("\a")
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception as e:
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
_cprint(f" ❌ /btw failed: {e}")
|
||||
finally:
|
||||
if self._app:
|
||||
self._invalidate(min_interval=0)
|
||||
|
||||
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
|
||||
thread.start()
|
||||
|
||||
@staticmethod
|
||||
def _try_launch_chrome_debug(port: int, system: str) -> bool:
|
||||
"""Try to launch Chrome/Chromium with remote debugging enabled.
|
||||
@@ -7322,31 +7410,6 @@ class HermesCLI:
|
||||
_cprint(f" {line}")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the first tool in this process
|
||||
# that takes longer than the threshold while we're in the
|
||||
# noisiest progress mode, print a one-time hint about
|
||||
# /verbose. Latched on self so it fires at most once per
|
||||
# process; persisted to config.yaml so it never fires again
|
||||
# across processes either.
|
||||
try:
|
||||
if (
|
||||
not getattr(self, "_long_tool_hint_fired", False)
|
||||
and self.tool_progress_mode == "all"
|
||||
and duration >= 30.0
|
||||
):
|
||||
from agent.onboarding import (
|
||||
TOOL_PROGRESS_FLAG,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
|
||||
self._long_tool_hint_fired = True
|
||||
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
self._invalidate()
|
||||
return
|
||||
if event_type != "tool.started":
|
||||
@@ -9230,24 +9293,6 @@ class HermesCLI:
|
||||
f"agent_running={self._agent_running}\n")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the very first busy-while-running
|
||||
# event for this install, print a one-line tip explaining the
|
||||
# /busy knob. Flag persists to config.yaml and never fires
|
||||
# again. Guarded for exceptions so onboarding can't break
|
||||
# the input loop.
|
||||
try:
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
busy_input_hint_cli,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
|
||||
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self._pending_input.put(payload)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
|
||||
@@ -41,15 +41,6 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -76,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
|
||||
Binary file not shown.
+3
-16
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -104,28 +103,16 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, handler_path
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
@@ -1025,20 +1025,7 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -1060,21 +1047,6 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -2242,14 +2214,12 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
_tts_path = None
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
if (event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files):
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
|
||||
@@ -2315,6 +2315,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
|
||||
+188
-479
@@ -881,74 +881,23 @@ class GatewayRunner:
|
||||
return
|
||||
if disabled:
|
||||
disabled_chats.add(chat_id)
|
||||
# ``/voice off`` also clears any explicit enable — it's a hard override.
|
||||
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
|
||||
if isinstance(enabled_chats, set):
|
||||
enabled_chats.discard(chat_id)
|
||||
else:
|
||||
disabled_chats.discard(chat_id)
|
||||
|
||||
def _set_adapter_auto_tts_enabled(self, adapter, chat_id: str, enabled: bool) -> None:
|
||||
"""Update an adapter's per-chat auto-TTS opt-in set if present.
|
||||
|
||||
Used for ``/voice on``/``/voice tts`` where the user explicitly wants
|
||||
auto-TTS even when ``voice.auto_tts`` is False globally.
|
||||
"""
|
||||
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
|
||||
if not isinstance(enabled_chats, set):
|
||||
return
|
||||
if enabled:
|
||||
enabled_chats.add(chat_id)
|
||||
# An explicit opt-in clears any stale /voice off for this chat.
|
||||
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
|
||||
if isinstance(disabled_chats, set):
|
||||
disabled_chats.discard(chat_id)
|
||||
else:
|
||||
enabled_chats.discard(chat_id)
|
||||
|
||||
def _sync_voice_mode_state_to_adapter(self, adapter) -> None:
|
||||
"""Restore persisted /voice state into a live platform adapter.
|
||||
|
||||
Populates three fields from config + ``self._voice_mode``:
|
||||
- ``_auto_tts_default``: global default from ``voice.auto_tts``
|
||||
- ``_auto_tts_enabled_chats``: chats with mode ``voice_only``/``all``
|
||||
- ``_auto_tts_disabled_chats``: chats with mode ``off``
|
||||
"""
|
||||
"""Restore persisted /voice off state into a live platform adapter."""
|
||||
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
|
||||
if not isinstance(disabled_chats, set):
|
||||
return
|
||||
platform = getattr(adapter, "platform", None)
|
||||
if not isinstance(platform, Platform):
|
||||
return
|
||||
|
||||
disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None)
|
||||
enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None)
|
||||
if not isinstance(disabled_chats, set) and not isinstance(enabled_chats, set):
|
||||
return
|
||||
|
||||
# Push the global voice.auto_tts default (config.yaml) onto the adapter.
|
||||
# Lazy import to avoid adding a module-level dep from gateway → hermes_cli.
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_full_config
|
||||
_full_cfg = _load_full_config()
|
||||
_auto_tts_default = bool(
|
||||
(_full_cfg.get("voice") or {}).get("auto_tts", False)
|
||||
)
|
||||
except Exception:
|
||||
_auto_tts_default = False
|
||||
if hasattr(adapter, "_auto_tts_default"):
|
||||
adapter._auto_tts_default = _auto_tts_default
|
||||
|
||||
disabled_chats.clear()
|
||||
prefix = f"{platform.value}:"
|
||||
if isinstance(disabled_chats, set):
|
||||
disabled_chats.clear()
|
||||
disabled_chats.update(
|
||||
key[len(prefix):] for key, mode in self._voice_mode.items()
|
||||
if mode == "off" and key.startswith(prefix)
|
||||
)
|
||||
if isinstance(enabled_chats, set):
|
||||
enabled_chats.clear()
|
||||
enabled_chats.update(
|
||||
key[len(prefix):] for key, mode in self._voice_mode.items()
|
||||
if mode in ("voice_only", "all") and key.startswith(prefix)
|
||||
)
|
||||
disabled_chats.update(
|
||||
key[len(prefix):] for key, mode in self._voice_mode.items()
|
||||
if mode == "off" and key.startswith(prefix)
|
||||
)
|
||||
|
||||
async def _safe_adapter_disconnect(self, adapter, platform) -> None:
|
||||
"""Call adapter.disconnect() defensively, swallowing any error.
|
||||
@@ -1630,27 +1579,6 @@ class GatewayRunner:
|
||||
f"I'll respond to your message shortly."
|
||||
)
|
||||
|
||||
# First-touch onboarding: the very first time a user sends a message
|
||||
# while the agent is busy, append a one-time hint explaining the
|
||||
# queue/interrupt knob. Flag is persisted to config.yaml so it never
|
||||
# fires again on this install.
|
||||
try:
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
busy_input_hint_gateway,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
)
|
||||
_user_cfg = _load_gateway_config()
|
||||
if not is_seen(_user_cfg, BUSY_INPUT_FLAG):
|
||||
message = (
|
||||
f"{message}\n\n"
|
||||
f"{busy_input_hint_gateway('queue' if is_queue_mode else 'interrupt')}"
|
||||
)
|
||||
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
|
||||
except Exception as _onb_err:
|
||||
logger.debug("Failed to apply busy-input onboarding hint: %s", _onb_err)
|
||||
|
||||
thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
||||
try:
|
||||
await adapter._send_with_retry(
|
||||
@@ -2288,11 +2216,6 @@ class GatewayRunner:
|
||||
# Start background session expiry watcher to finalize expired sessions
|
||||
asyncio.create_task(self._session_expiry_watcher())
|
||||
|
||||
# Start background kanban notifier — delivers `completed`, `blocked`,
|
||||
# `spawn_auto_blocked`, and `crashed` events to gateway subscribers
|
||||
# so human-in-the-loop workflows hear back without polling.
|
||||
asyncio.create_task(self._kanban_notifier_watcher())
|
||||
|
||||
# Start background reconnection watcher for platforms that failed at startup
|
||||
if self._failed_platforms:
|
||||
logger.info(
|
||||
@@ -2468,241 +2391,6 @@ class GatewayRunner:
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def _kanban_notifier_watcher(self, interval: float = 5.0) -> None:
|
||||
"""Poll ``kanban_notify_subs`` and deliver terminal events to users.
|
||||
|
||||
For each subscription row, fetches ``task_events`` newer than the
|
||||
stored cursor with kind in the terminal set (``completed``,
|
||||
``blocked``, ``gave_up``, ``crashed``, ``timed_out``). Sends one
|
||||
message per new event to ``(platform, chat_id, thread_id)``,
|
||||
then advances the cursor. When a task reaches a terminal state
|
||||
(``completed`` / ``archived``), the subscription is removed.
|
||||
|
||||
Runs in the gateway event loop; all SQLite work is pushed to a
|
||||
thread via ``asyncio.to_thread`` so the loop never blocks on the
|
||||
WAL lock. Failures in one tick don't stop subsequent ticks.
|
||||
"""
|
||||
from gateway.config import Platform as _Platform
|
||||
try:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
except Exception:
|
||||
logger.warning("kanban notifier: kanban_db not importable; notifier disabled")
|
||||
return
|
||||
|
||||
TERMINAL_KINDS = ("completed", "blocked", "gave_up", "crashed", "timed_out")
|
||||
# Terminal event kinds trigger automatic unsubscription — the task
|
||||
# is done, blocked, or in a retry-needed state that the human
|
||||
# shouldn't keep pinging a stale chat for. Previously we only
|
||||
# unsubbed when task.status in ('done', 'archived'), which left
|
||||
# subscriptions on 'blocked' / 'gave_up' / 'crashed' / 'timed_out'
|
||||
# tasks stranded forever.
|
||||
TERMINAL_EVENT_KINDS = TERMINAL_KINDS
|
||||
# Per-subscription send-failure counter. Adapter.send raising
|
||||
# means the chat is dead (deleted, bot kicked, etc.) — after N
|
||||
# consecutive send failures the sub is dropped so we don't spin
|
||||
# against a dead chat every 5 seconds forever.
|
||||
MAX_SEND_FAILURES = 3
|
||||
sub_fail_counts: dict[tuple, int] = getattr(
|
||||
self, "_kanban_sub_fail_counts", {}
|
||||
)
|
||||
self._kanban_sub_fail_counts = sub_fail_counts
|
||||
|
||||
# Initial delay so the gateway can finish wiring adapters.
|
||||
await asyncio.sleep(5)
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
def _collect():
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.init_db() # idempotent; handles first-run
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
subs = _kb.list_notify_subs(conn)
|
||||
deliveries: list[dict] = []
|
||||
for sub in subs:
|
||||
cursor, events = _kb.unseen_events_for_sub(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
kinds=TERMINAL_KINDS,
|
||||
)
|
||||
if not events:
|
||||
continue
|
||||
task = _kb.get_task(conn, sub["task_id"])
|
||||
deliveries.append({
|
||||
"sub": sub,
|
||||
"cursor": cursor,
|
||||
"events": events,
|
||||
"task": task,
|
||||
})
|
||||
return deliveries
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
deliveries = await asyncio.to_thread(_collect)
|
||||
for d in deliveries:
|
||||
sub = d["sub"]
|
||||
task = d["task"]
|
||||
platform_str = (sub["platform"] or "").lower()
|
||||
try:
|
||||
plat = _Platform(platform_str)
|
||||
except ValueError:
|
||||
# Unknown platform string; skip and advance cursor so
|
||||
# we don't replay forever.
|
||||
await asyncio.to_thread(
|
||||
self._kanban_advance, sub, d["cursor"],
|
||||
)
|
||||
continue
|
||||
adapter = self.adapters.get(plat)
|
||||
if adapter is None:
|
||||
continue # platform not currently connected
|
||||
title = (task.title if task else sub["task_id"])[:120]
|
||||
for ev in d["events"]:
|
||||
kind = ev.kind
|
||||
# Identity prefix: attribute terminal pings to the
|
||||
# worker that did the work. Makes fleets (where one
|
||||
# chat subscribes to many tasks) legible at a glance.
|
||||
who = (task.assignee if task and task.assignee else None)
|
||||
tag = f"@{who} " if who else ""
|
||||
if kind == "completed":
|
||||
# Prefer the run's summary (the worker's
|
||||
# intentional human-facing handoff, carried
|
||||
# in the event payload), then fall back to
|
||||
# task.result for legacy rows written before
|
||||
# runs shipped.
|
||||
handoff = ""
|
||||
payload_summary = None
|
||||
if ev.payload and ev.payload.get("summary"):
|
||||
payload_summary = str(ev.payload["summary"])
|
||||
if payload_summary:
|
||||
h = payload_summary.strip().splitlines()[0][:200]
|
||||
handoff = f"\n{h}"
|
||||
elif task and task.result:
|
||||
r = task.result.strip().splitlines()[0][:160]
|
||||
handoff = f"\n{r}"
|
||||
msg = (
|
||||
f"✔ {tag}Kanban {sub['task_id']} done"
|
||||
f" — {title}{handoff}"
|
||||
)
|
||||
elif kind == "blocked":
|
||||
reason = ""
|
||||
if ev.payload and ev.payload.get("reason"):
|
||||
reason = f": {str(ev.payload['reason'])[:160]}"
|
||||
msg = f"⏸ {tag}Kanban {sub['task_id']} blocked{reason}"
|
||||
elif kind == "gave_up":
|
||||
err = ""
|
||||
if ev.payload and ev.payload.get("error"):
|
||||
err = f"\n{str(ev.payload['error'])[:200]}"
|
||||
msg = (
|
||||
f"✖ {tag}Kanban {sub['task_id']} gave up "
|
||||
f"after repeated spawn failures{err}"
|
||||
)
|
||||
elif kind == "crashed":
|
||||
msg = (
|
||||
f"✖ {tag}Kanban {sub['task_id']} worker crashed "
|
||||
f"(pid gone); dispatcher will retry"
|
||||
)
|
||||
elif kind == "timed_out":
|
||||
limit = 0
|
||||
if ev.payload and ev.payload.get("limit_seconds"):
|
||||
limit = int(ev.payload["limit_seconds"])
|
||||
msg = (
|
||||
f"⏱ {tag}Kanban {sub['task_id']} timed out "
|
||||
f"(max_runtime={limit}s); will retry"
|
||||
)
|
||||
else:
|
||||
continue
|
||||
metadata: dict[str, Any] = {}
|
||||
if sub.get("thread_id"):
|
||||
metadata["thread_id"] = sub["thread_id"]
|
||||
sub_key = (
|
||||
sub["task_id"], sub["platform"],
|
||||
sub["chat_id"], sub.get("thread_id") or "",
|
||||
)
|
||||
try:
|
||||
await adapter.send(
|
||||
sub["chat_id"], msg, metadata=metadata,
|
||||
)
|
||||
# Reset the failure counter on success.
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
except Exception as exc:
|
||||
fails = sub_fail_counts.get(sub_key, 0) + 1
|
||||
sub_fail_counts[sub_key] = fails
|
||||
logger.warning(
|
||||
"kanban notifier: send failed for %s on %s "
|
||||
"(attempt %d/%d): %s",
|
||||
sub["task_id"], platform_str, fails,
|
||||
MAX_SEND_FAILURES, exc,
|
||||
)
|
||||
if fails >= MAX_SEND_FAILURES:
|
||||
logger.warning(
|
||||
"kanban notifier: dropping subscription "
|
||||
"%s on %s after %d consecutive send failures",
|
||||
sub["task_id"], platform_str, fails,
|
||||
)
|
||||
await asyncio.to_thread(self._kanban_unsub, sub)
|
||||
sub_fail_counts.pop(sub_key, None)
|
||||
# Don't advance cursor on send failure — retry next tick.
|
||||
break
|
||||
else:
|
||||
# All events delivered; advance cursor + maybe unsub.
|
||||
await asyncio.to_thread(
|
||||
self._kanban_advance, sub, d["cursor"],
|
||||
)
|
||||
# Unsubscribe when the LAST delivered event is a
|
||||
# terminal kind (the task hit a "no further updates"
|
||||
# state), not just on task.status in {done, archived}.
|
||||
# Covers blocked / gave_up / crashed / timed_out which
|
||||
# used to leak subs forever.
|
||||
last_kind = d["events"][-1].kind if d["events"] else None
|
||||
task_terminal = task and task.status in ("done", "archived")
|
||||
event_terminal = last_kind in TERMINAL_EVENT_KINDS
|
||||
if task_terminal or event_terminal:
|
||||
await asyncio.to_thread(
|
||||
self._kanban_unsub, sub,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("kanban notifier tick failed: %s", exc)
|
||||
# Sleep with cancellation checks.
|
||||
for _ in range(int(max(1, interval))):
|
||||
if not self._running:
|
||||
return
|
||||
await asyncio.sleep(1)
|
||||
|
||||
def _kanban_advance(self, sub: dict, cursor: int) -> None:
|
||||
"""Sync helper: advance a subscription's cursor. Runs in to_thread."""
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.advance_notify_cursor(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
new_cursor=cursor,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _kanban_unsub(self, sub: dict) -> None:
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.remove_notify_sub(
|
||||
conn,
|
||||
task_id=sub["task_id"],
|
||||
platform=sub["platform"],
|
||||
chat_id=sub["chat_id"],
|
||||
thread_id=sub.get("thread_id") or "",
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
async def _platform_reconnect_watcher(self) -> None:
|
||||
"""Background task that periodically retries connecting failed platforms.
|
||||
|
||||
@@ -3738,19 +3426,9 @@ class GatewayRunner:
|
||||
|
||||
# /background must bypass the running-agent guard — it starts a
|
||||
# parallel task and must never interrupt the active conversation.
|
||||
# /btw is an alias of /background and resolves to the same canonical
|
||||
# name, so this branch handles both commands.
|
||||
if _cmd_def_inner and _cmd_def_inner.name == "background":
|
||||
return await self._handle_background_command(event)
|
||||
|
||||
# /kanban must bypass the guard. It writes to a profile-agnostic
|
||||
# DB (kanban.db), not to the running agent's state. In fact
|
||||
# /kanban unblock is often the only way to free a worker that
|
||||
# has blocked waiting for a peer — letting that be dispatched
|
||||
# mid-run is the whole point of the board.
|
||||
if _cmd_def_inner and _cmd_def_inner.name == "kanban":
|
||||
return await self._handle_kanban_command(event)
|
||||
|
||||
# Session-level toggles that are safe to run mid-agent —
|
||||
# /yolo can unblock a pending approval prompt, /verbose cycles
|
||||
# the tool-progress display mode for the ongoing stream.
|
||||
@@ -3975,9 +3653,6 @@ class GatewayRunner:
|
||||
if canonical == "personality":
|
||||
return await self._handle_personality_command(event)
|
||||
|
||||
if canonical == "kanban":
|
||||
return await self._handle_kanban_command(event)
|
||||
|
||||
if canonical == "retry":
|
||||
return await self._handle_retry_command(event)
|
||||
|
||||
@@ -4026,6 +3701,9 @@ class GatewayRunner:
|
||||
if canonical == "background":
|
||||
return await self._handle_background_command(event)
|
||||
|
||||
if canonical == "btw":
|
||||
return await self._handle_btw_command(event)
|
||||
|
||||
if canonical == "steer":
|
||||
# No active agent — /steer has no tool call to inject into.
|
||||
# Strip the prefix so downstream treats it as a normal user
|
||||
@@ -5405,84 +5083,6 @@ class GatewayRunner:
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def _handle_kanban_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /kanban — delegate to the shared kanban CLI.
|
||||
|
||||
Run the potentially-blocking DB work in a thread pool so the
|
||||
gateway event loop stays responsive. Read operations (list,
|
||||
show, context, tail) are permitted while an agent is running;
|
||||
mutations are allowed too because the board is profile-agnostic
|
||||
and does not touch the running agent's state.
|
||||
|
||||
For ``/kanban create`` invocations we also auto-subscribe the
|
||||
originating gateway source (platform + chat + thread) to the new
|
||||
task's terminal events, so the user hears back when the worker
|
||||
completes / blocks / auto-blocks / crashes without having to poll.
|
||||
"""
|
||||
import asyncio
|
||||
import re
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
text = (event.text or "").strip()
|
||||
# Strip the leading "/kanban" (with or without slash), leaving args.
|
||||
if text.startswith("/"):
|
||||
text = text.lstrip("/")
|
||||
if text.startswith("kanban"):
|
||||
text = text[len("kanban"):].lstrip()
|
||||
|
||||
is_create = text.split(None, 1)[:1] == ["create"]
|
||||
|
||||
try:
|
||||
output = await asyncio.to_thread(run_slash, text)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
return f"⚠ kanban error: {exc}"
|
||||
|
||||
# Auto-subscribe on create. Parse the task id from the CLI's standard
|
||||
# success line ("Created t_abcd (ready, assignee=...)"). If the user
|
||||
# passed --json we don't subscribe; they're clearly scripting and
|
||||
# can call /kanban notify-subscribe explicitly.
|
||||
if is_create and output:
|
||||
m = re.search(r"Created\s+(t_[0-9a-f]+)\b", output)
|
||||
if m:
|
||||
task_id = m.group(1)
|
||||
try:
|
||||
source = event.source
|
||||
platform = getattr(source, "platform", None)
|
||||
platform_str = (
|
||||
platform.value if hasattr(platform, "value") else str(platform or "")
|
||||
).lower()
|
||||
chat_id = str(getattr(source, "chat_id", "") or "")
|
||||
thread_id = str(getattr(source, "thread_id", "") or "")
|
||||
user_id = str(getattr(source, "user_id", "") or "") or None
|
||||
if platform_str and chat_id:
|
||||
def _sub():
|
||||
from hermes_cli import kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.add_notify_sub(
|
||||
conn, task_id=task_id,
|
||||
platform=platform_str, chat_id=chat_id,
|
||||
thread_id=thread_id or None,
|
||||
user_id=user_id,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
await asyncio.to_thread(_sub)
|
||||
output = (
|
||||
output.rstrip()
|
||||
+ f"\n(subscribed — you'll be notified when {task_id} "
|
||||
f"completes or blocks)"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("kanban create auto-subscribe failed: %s", exc)
|
||||
|
||||
# Gateway messages have practical length caps; truncate long
|
||||
# listings to keep the UX reasonable.
|
||||
if len(output) > 3800:
|
||||
output = output[:3800] + "\n… (truncated; use `hermes kanban …` in your terminal for full output)"
|
||||
return output or "(no output)"
|
||||
|
||||
async def _handle_status_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /status command."""
|
||||
source = event.source
|
||||
@@ -6377,7 +5977,7 @@ class GatewayRunner:
|
||||
self._voice_mode[voice_key] = "voice_only"
|
||||
self._save_voice_modes()
|
||||
if adapter:
|
||||
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
|
||||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||||
return (
|
||||
"Voice mode enabled.\n"
|
||||
"I'll reply with voice when you send voice messages.\n"
|
||||
@@ -6393,7 +5993,7 @@ class GatewayRunner:
|
||||
self._voice_mode[voice_key] = "all"
|
||||
self._save_voice_modes()
|
||||
if adapter:
|
||||
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
|
||||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||||
return (
|
||||
"Auto-TTS enabled.\n"
|
||||
"All replies will include a voice message."
|
||||
@@ -6432,7 +6032,7 @@ class GatewayRunner:
|
||||
self._voice_mode[voice_key] = "voice_only"
|
||||
self._save_voice_modes()
|
||||
if adapter:
|
||||
self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True)
|
||||
self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False)
|
||||
return "Voice mode enabled."
|
||||
else:
|
||||
self._voice_mode[voice_key] = "off"
|
||||
@@ -6483,7 +6083,7 @@ class GatewayRunner:
|
||||
adapter._voice_sources[guild_id] = event.source.to_dict()
|
||||
self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
|
||||
self._save_voice_modes()
|
||||
self._set_adapter_auto_tts_enabled(adapter, event.source.chat_id, enabled=True)
|
||||
self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
|
||||
return (
|
||||
f"Joined voice channel **{voice_channel.name}**.\n"
|
||||
f"I'll speak my replies and listen to you. Use /voice leave to disconnect."
|
||||
@@ -7001,6 +6601,177 @@ class GatewayRunner:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _handle_btw_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /btw <question> — ephemeral side question in the same chat."""
|
||||
question = event.get_command_args().strip()
|
||||
if not question:
|
||||
return (
|
||||
"Usage: /btw <question>\n"
|
||||
"Example: /btw what module owns session title sanitization?\n\n"
|
||||
"Answers using session context. No tools, not persisted."
|
||||
)
|
||||
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
|
||||
# Guard: one /btw at a time per session
|
||||
existing = getattr(self, "_active_btw_tasks", {}).get(session_key)
|
||||
if existing and not existing.done():
|
||||
return "A /btw is already running for this chat. Wait for it to finish."
|
||||
|
||||
if not hasattr(self, "_active_btw_tasks"):
|
||||
self._active_btw_tasks: dict = {}
|
||||
|
||||
import uuid as _uuid
|
||||
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}"
|
||||
_task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id))
|
||||
self._background_tasks.add(_task)
|
||||
self._active_btw_tasks[session_key] = _task
|
||||
|
||||
def _cleanup(task):
|
||||
self._background_tasks.discard(task)
|
||||
if self._active_btw_tasks.get(session_key) is task:
|
||||
self._active_btw_tasks.pop(session_key, None)
|
||||
|
||||
_task.add_done_callback(_cleanup)
|
||||
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
return f'💬 /btw: "{preview}"\nReply will appear here shortly.'
|
||||
|
||||
async def _run_btw_task(
|
||||
self, question: str, source, session_key: str, task_id: str,
|
||||
) -> None:
|
||||
"""Execute an ephemeral /btw side question and deliver the answer."""
|
||||
from run_agent import AIAgent
|
||||
|
||||
adapter = self.adapters.get(source.platform)
|
||||
if not adapter:
|
||||
logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id)
|
||||
return
|
||||
|
||||
_thread_meta = {"thread_id": source.thread_id} if source.thread_id else None
|
||||
|
||||
try:
|
||||
user_config = _load_gateway_config()
|
||||
model, runtime_kwargs = self._resolve_session_agent_runtime(
|
||||
source=source,
|
||||
session_key=session_key,
|
||||
user_config=user_config,
|
||||
)
|
||||
if not runtime_kwargs.get("api_key"):
|
||||
await adapter.send(
|
||||
source.chat_id,
|
||||
"❌ /btw failed: no provider credentials configured.",
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
return
|
||||
|
||||
platform_key = _platform_config_key(source.platform)
|
||||
reasoning_config = self._resolve_session_reasoning_config(
|
||||
source=source,
|
||||
session_key=session_key,
|
||||
)
|
||||
self._service_tier = self._load_service_tier()
|
||||
turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs)
|
||||
pr = self._provider_routing
|
||||
|
||||
# Snapshot history from running agent or stored transcript
|
||||
running_agent = self._running_agents.get(session_key)
|
||||
if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
|
||||
history_snapshot = list(getattr(running_agent, "_session_messages", []) or [])
|
||||
else:
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
history_snapshot = self.session_store.load_transcript(session_entry.session_id)
|
||||
|
||||
btw_prompt = (
|
||||
"[Ephemeral /btw side question. Answer using the conversation "
|
||||
"context. No tools available. Be direct and concise.]\n\n"
|
||||
+ question
|
||||
)
|
||||
|
||||
def run_sync():
|
||||
agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
**turn_route["runtime"],
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
enabled_toolsets=[],
|
||||
reasoning_config=reasoning_config,
|
||||
service_tier=self._service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=pr.get("only"),
|
||||
providers_ignored=pr.get("ignore"),
|
||||
providers_order=pr.get("order"),
|
||||
provider_sort=pr.get("sort"),
|
||||
provider_require_parameters=pr.get("require_parameters", False),
|
||||
provider_data_collection=pr.get("data_collection"),
|
||||
session_id=task_id,
|
||||
platform=platform_key,
|
||||
session_db=None,
|
||||
fallback_model=self._fallback_model,
|
||||
skip_memory=True,
|
||||
skip_context_files=True,
|
||||
persist_session=False,
|
||||
)
|
||||
try:
|
||||
return agent.run_conversation(
|
||||
user_message=btw_prompt,
|
||||
conversation_history=history_snapshot,
|
||||
task_id=task_id,
|
||||
)
|
||||
finally:
|
||||
self._cleanup_agent_resources(agent)
|
||||
|
||||
result = await self._run_in_executor_with_context(run_sync)
|
||||
|
||||
response = (result.get("final_response") or "") if result else ""
|
||||
if not response and result and result.get("error"):
|
||||
response = f"Error: {result['error']}"
|
||||
if not response:
|
||||
response = "(No response generated)"
|
||||
|
||||
media_files, response = adapter.extract_media(response)
|
||||
images, text_content = adapter.extract_images(response)
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
header = f'💬 /btw: "{preview}"\n\n'
|
||||
|
||||
if text_content:
|
||||
await adapter.send(
|
||||
chat_id=source.chat_id,
|
||||
content=header + text_content,
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
elif not images and not media_files:
|
||||
await adapter.send(
|
||||
chat_id=source.chat_id,
|
||||
content=header + "(No response generated)",
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
|
||||
for image_url, alt_text in (images or []):
|
||||
try:
|
||||
await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for media_path, _is_voice in (media_files or []):
|
||||
try:
|
||||
await adapter.send_file(chat_id=source.chat_id, file_path=media_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("/btw task %s failed", task_id)
|
||||
try:
|
||||
await adapter.send(
|
||||
chat_id=source.chat_id,
|
||||
content=f"❌ /btw failed: {e}",
|
||||
metadata=_thread_meta,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def _handle_reasoning_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /reasoning command — manage reasoning effort and display toggle.
|
||||
|
||||
@@ -9589,62 +9360,16 @@ class GatewayRunner:
|
||||
last_tool = [None] # Mutable container for tracking in closure
|
||||
last_progress_msg = [None] # Track last message for dedup
|
||||
repeat_count = [0] # How many times the same message repeated
|
||||
# First-touch onboarding latch: fires at most once per run, even if
|
||||
# several tools exceed the threshold.
|
||||
long_tool_hint_fired = [False]
|
||||
_LONG_TOOL_THRESHOLD_S = 30.0
|
||||
|
||||
|
||||
def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs):
|
||||
"""Callback invoked by agent on tool lifecycle events."""
|
||||
if not progress_queue or not _run_still_current():
|
||||
return
|
||||
|
||||
# First-touch onboarding: the first time a tool takes longer than
|
||||
# _LONG_TOOL_THRESHOLD_S during a run that's streaming every tool
|
||||
# (progress_mode == "all"), append a one-time hint suggesting
|
||||
# /verbose. We only fire when (a) the user hasn't seen the hint
|
||||
# before and (b) /verbose is actually usable on this platform
|
||||
# (gateway gate must be open). The CLI has its own trigger.
|
||||
if event_type == "tool.completed" and not long_tool_hint_fired[0]:
|
||||
try:
|
||||
duration = kwargs.get("duration") or 0
|
||||
if duration >= _LONG_TOOL_THRESHOLD_S and progress_mode == "all":
|
||||
from agent.onboarding import (
|
||||
TOOL_PROGRESS_FLAG,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_gateway,
|
||||
)
|
||||
_cfg = _load_gateway_config()
|
||||
gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False))
|
||||
if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG):
|
||||
long_tool_hint_fired[0] = True
|
||||
progress_queue.put(tool_progress_hint_gateway())
|
||||
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
|
||||
except Exception as _hint_err:
|
||||
logger.debug("tool-progress onboarding hint failed: %s", _hint_err)
|
||||
return
|
||||
|
||||
# Only act on tool.started events (ignore tool.completed, reasoning.available, etc.)
|
||||
if event_type not in ("tool.started",):
|
||||
return
|
||||
|
||||
# Suppress tool-progress bubbles once the user has sent `stop`.
|
||||
# When the LLM response carries N parallel tool calls, the agent
|
||||
# fires N "tool.started" events back-to-back before checking for
|
||||
# interrupts — without this guard, a late `stop` still renders
|
||||
# all N as 🔍 bubbles, making the interrupt feel ignored.
|
||||
# (agent lives in run_sync's scope; agent_holder[0] is the shared
|
||||
# handle across nested scopes — see line ~9607.)
|
||||
try:
|
||||
_agent_for_interrupt = agent_holder[0] if agent_holder else None
|
||||
if _agent_for_interrupt is not None and getattr(
|
||||
_agent_for_interrupt, "is_interrupted", False
|
||||
):
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# "new" mode: only report when tool changes
|
||||
if progress_mode == "new" and tool_name == last_tool[0]:
|
||||
return
|
||||
@@ -9751,22 +9476,6 @@ class GatewayRunner:
|
||||
|
||||
raw = progress_queue.get_nowait()
|
||||
|
||||
# Drain silently when interrupted: events queued in the
|
||||
# window between tool parse and interrupt processing
|
||||
# should not render as bubbles. The "⚡ Interrupting
|
||||
# current task" message is sent separately and is the
|
||||
# last progress-flavored bubble the user should see.
|
||||
try:
|
||||
_agent_for_interrupt = agent_holder[0] if agent_holder else None
|
||||
if _agent_for_interrupt is not None and getattr(
|
||||
_agent_for_interrupt, "is_interrupted", False
|
||||
):
|
||||
# Drop this event and continue draining.
|
||||
await asyncio.sleep(0)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Handle dedup messages: update last line with repeat counter
|
||||
if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__":
|
||||
_, base_msg, count = raw
|
||||
|
||||
+2
-2
@@ -4244,10 +4244,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
_PROVIDER_MODELS, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
|
||||
print()
|
||||
unavailable_models: list = []
|
||||
|
||||
@@ -84,7 +84,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -140,11 +142,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
|
||||
@@ -959,27 +959,6 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -1016,13 +995,6 @@ DEFAULT_CONFIG = {
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
|
||||
@@ -1,361 +0,0 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+2
-55
@@ -2315,13 +2315,13 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
# The live /models endpoint returns hundreds of models; the curated list
|
||||
# shows only agentic models users recognize from OpenRouter.
|
||||
from hermes_cli.models import (
|
||||
get_curated_nous_model_ids,
|
||||
_PROVIDER_MODELS,
|
||||
get_pricing_for_provider,
|
||||
check_nous_free_tier,
|
||||
partition_nous_models_by_tier,
|
||||
)
|
||||
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
if not model_ids:
|
||||
print("No curated models available for Nous Portal.")
|
||||
return
|
||||
@@ -4780,13 +4780,6 @@ def cmd_webhook(args):
|
||||
webhook_command(args)
|
||||
|
||||
|
||||
def cmd_kanban(args):
|
||||
"""Multi-profile collaboration board."""
|
||||
from hermes_cli.kanban import kanban_command
|
||||
|
||||
return kanban_command(args)
|
||||
|
||||
|
||||
def cmd_hooks(args):
|
||||
"""Shell-hook inspection and management."""
|
||||
from hermes_cli.hooks import hooks_command
|
||||
@@ -7230,9 +7223,6 @@ Examples:
|
||||
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
|
||||
hermes auth reset <provider> Clear exhaustion status for a provider
|
||||
hermes model Select default model
|
||||
hermes fallback [list] Show fallback provider chain
|
||||
hermes fallback add Add a fallback provider (same picker as `hermes model`)
|
||||
hermes fallback remove Remove a fallback provider from the chain
|
||||
hermes config View configuration
|
||||
hermes config edit Edit config in $EDITOR
|
||||
hermes config set model gpt-4 Set a config value
|
||||
@@ -7574,42 +7564,6 @@ For more help on a command:
|
||||
)
|
||||
model_parser.set_defaults(func=cmd_model)
|
||||
|
||||
# =========================================================================
|
||||
# fallback command — manage the fallback provider chain
|
||||
# =========================================================================
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
|
||||
fallback_parser = subparsers.add_parser(
|
||||
"fallback",
|
||||
help="Manage fallback providers (tried when the primary model fails)",
|
||||
description=(
|
||||
"Manage the fallback provider chain. Fallback providers are tried "
|
||||
"in order when the primary model fails with rate-limit, overload, or "
|
||||
"connection errors. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
|
||||
),
|
||||
)
|
||||
fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
|
||||
fallback_subparsers.add_parser(
|
||||
"list",
|
||||
aliases=["ls"],
|
||||
help="Show the current fallback chain (default when no subcommand)",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"add",
|
||||
help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"remove",
|
||||
aliases=["rm"],
|
||||
help="Pick an entry to delete from the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"clear",
|
||||
help="Remove all fallback entries",
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# gateway command
|
||||
# =========================================================================
|
||||
@@ -8123,13 +8077,6 @@ For more help on a command:
|
||||
|
||||
webhook_parser.set_defaults(func=cmd_webhook)
|
||||
|
||||
# =========================================================================
|
||||
# kanban command — multi-profile collaboration board
|
||||
# =========================================================================
|
||||
from hermes_cli.kanban import build_parser as _build_kanban_parser
|
||||
kanban_parser = _build_kanban_parser(subparsers)
|
||||
kanban_parser.set_defaults(func=cmd_kanban)
|
||||
|
||||
# =========================================================================
|
||||
# hooks command — shell-hook inspection and management
|
||||
# =========================================================================
|
||||
|
||||
@@ -1,329 +0,0 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
+1
-28
@@ -876,16 +876,7 @@ def fetch_openrouter_models(
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
|
||||
# snapshot when the manifest is unreachable. Both are curated lists that
|
||||
# drive the picker; the OpenRouter live /v1/models filter (tool support,
|
||||
# free pricing) is applied on top either way.
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_openrouter_models
|
||||
remote = get_curated_openrouter_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
@@ -938,24 +929,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def get_curated_nous_model_ids() -> list[str]:
|
||||
"""Return the curated Nous Portal model-id list.
|
||||
|
||||
Prefers the remotely-hosted catalog manifest (published under
|
||||
``website/static/api/model-catalog.json``); falls back to the in-repo
|
||||
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
|
||||
unreachable. Always returns a list (never None).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_nous_models
|
||||
remote = get_curated_nous_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
if remote:
|
||||
return list(remote)
|
||||
return list(_PROVIDER_MODELS.get("nous", []))
|
||||
|
||||
|
||||
def _ai_gateway_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True if an AI Gateway model has $0 input AND output pricing."""
|
||||
if not isinstance(pricing, dict):
|
||||
|
||||
+2
-1
@@ -10,7 +10,8 @@ import random
|
||||
|
||||
TIPS = [
|
||||
# --- Slash Commands ---
|
||||
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
|
||||
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
|
||||
"/background <prompt> runs a task in a separate session while your current one stays free.",
|
||||
"/branch forks the current session so you can explore a different direction without losing progress.",
|
||||
"/compress manually compresses conversation context when things get long.",
|
||||
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
|
||||
|
||||
@@ -3103,23 +3103,13 @@ def _mount_plugin_api_routes():
|
||||
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
|
||||
continue
|
||||
try:
|
||||
module_name = f"hermes_dashboard_plugin_{plugin['name']}"
|
||||
spec = importlib.util.spec_from_file_location(module_name, api_path)
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
continue
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules BEFORE exec_module so pydantic/FastAPI
|
||||
# can resolve forward references (e.g. models defined in a file
|
||||
# that uses `from __future__ import annotations`). Without this,
|
||||
# TypeAdapter lazy-build fails at first request with
|
||||
# "is not fully defined" because the module namespace isn't
|
||||
# reachable by name for string-annotation resolution.
|
||||
sys.modules[module_name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(mod)
|
||||
router = getattr(mod, "router", None)
|
||||
if router is None:
|
||||
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
import React from 'react';
|
||||
import { Box, useApp } from 'ink';
|
||||
import { VirtualizedMessageContainer } from './VirtualizedMessageContainer';
|
||||
import { usePerformanceMonitor } from './performanceHooks';
|
||||
|
||||
// This is a proof-of-concept component to demonstrate the performance fixes
|
||||
export const AppLayoutOptimized: React.FC = () => {
|
||||
const { stdout } = useApp();
|
||||
const { metrics, measureOperation } = usePerformanceMonitor('AppLayout', {
|
||||
logToConsole: true
|
||||
});
|
||||
|
||||
// Calculate viewport dimensions based on terminal size
|
||||
const viewportHeight = stdout.rows - 4; // Reserve space for input, etc.
|
||||
const viewportWidth = stdout.columns;
|
||||
|
||||
// In a real implementation, messages would come from app state
|
||||
const messages = React.useMemo(() => {
|
||||
return Array(1000).fill(null).map((_, index) => ({
|
||||
id: `msg-${index}`,
|
||||
role: index % 2 === 0 ? 'user' : 'assistant',
|
||||
content: `This is message ${index}. It contains some content that might wrap to multiple lines depending on the terminal width. This demonstrates how virtualization can significantly improve performance.`,
|
||||
}));
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box flexDirection="column" height={stdout.rows} width={stdout.columns}>
|
||||
<Box
|
||||
flexDirection="column"
|
||||
height={viewportHeight}
|
||||
width={viewportWidth}
|
||||
overflow="hidden"
|
||||
// Use stable scrollbar gutter to prevent layout shifts
|
||||
style={{ scrollbarGutter: 'stable' }}
|
||||
>
|
||||
<VirtualizedMessageContainer
|
||||
messages={messages}
|
||||
height={viewportHeight}
|
||||
width={viewportWidth}
|
||||
expandCode={true}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{/* Performance metrics display */}
|
||||
<Box marginTop={1}>
|
||||
<Box
|
||||
borderStyle="round"
|
||||
borderColor="yellow"
|
||||
paddingX={1}
|
||||
width={viewportWidth}
|
||||
>
|
||||
<Box flexDirection="column">
|
||||
<Box>
|
||||
<Box width={25}>Avg render time:</Box>
|
||||
<Box>{metrics.averageRenderTime.toFixed(2)}ms</Box>
|
||||
</Box>
|
||||
<Box>
|
||||
<Box width={25}>Total renders:</Box>
|
||||
<Box>{metrics.totalRenders}</Box>
|
||||
</Box>
|
||||
<Box>
|
||||
<Box width={25}>Slow renders:</Box>
|
||||
<Box>{metrics.slowRenders}</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,147 @@
|
||||
import React, { useEffect, useRef, useState } from 'react';
|
||||
import { FixedSizeList as List } from 'react-window';
|
||||
import { Box, Text } from 'ink';
|
||||
import { useTheme } from '../hooks/useTheme';
|
||||
import { MessageData } from '../gatewayTypes';
|
||||
import { Markdown } from './markdown';
|
||||
import { themed } from './themed';
|
||||
|
||||
// Estimated average height for message rows (will be refined later)
|
||||
const ESTIMATED_ROW_HEIGHT = 50;
|
||||
|
||||
// Overscan count - render this many items above/below the visible area
|
||||
const OVERSCAN_COUNT = 10;
|
||||
|
||||
interface MessageLineProps {
|
||||
message: MessageData;
|
||||
onRender?: () => void;
|
||||
isHighlighted?: boolean;
|
||||
expandCode?: boolean;
|
||||
}
|
||||
|
||||
export const MessageLine: React.FC<MessageLineProps> = React.memo(({
|
||||
message,
|
||||
onRender,
|
||||
isHighlighted = false,
|
||||
expandCode = false
|
||||
}) => {
|
||||
const theme = useTheme();
|
||||
const { role, content } = message;
|
||||
|
||||
useEffect(() => {
|
||||
onRender?.();
|
||||
}, [onRender]);
|
||||
|
||||
// Skip rendering for empty messages
|
||||
if (!content) return null;
|
||||
|
||||
const RoleLabel = themed(Text, {
|
||||
user: theme.message.user.label,
|
||||
assistant: theme.message.assistant.label,
|
||||
system: theme.message.system.label,
|
||||
tool: theme.message.tool.label,
|
||||
function: theme.message.function.label,
|
||||
});
|
||||
|
||||
const roleStyles = {
|
||||
user: theme.message.user.content,
|
||||
assistant: theme.message.assistant.content,
|
||||
system: theme.message.system.content,
|
||||
tool: theme.message.tool.content,
|
||||
function: theme.message.function.content,
|
||||
};
|
||||
|
||||
return (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
paddingX={0}
|
||||
paddingY={0}
|
||||
borderStyle={isHighlighted ? 'bold' : undefined}
|
||||
borderColor={isHighlighted ? theme.focused : undefined}
|
||||
>
|
||||
<Box>
|
||||
<RoleLabel variant={role as any}>{role}:</RoleLabel>
|
||||
</Box>
|
||||
<Box marginLeft={1}>
|
||||
<Markdown
|
||||
variant={role as keyof typeof roleStyles}
|
||||
content={content || ''}
|
||||
expandCode={expandCode}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}, (prevProps, nextProps) => {
|
||||
// Custom comparison logic for memoization
|
||||
return (
|
||||
prevProps.message.id === nextProps.message.id &&
|
||||
prevProps.message.content === nextProps.message.content &&
|
||||
prevProps.message.role === nextProps.message.role &&
|
||||
prevProps.isHighlighted === nextProps.isHighlighted &&
|
||||
prevProps.expandCode === nextProps.expandCode
|
||||
);
|
||||
});
|
||||
|
||||
interface MessageContainerProps {
|
||||
messages: MessageData[];
|
||||
height: number;
|
||||
width: number;
|
||||
expandCode?: boolean;
|
||||
highlightedMessageId?: string;
|
||||
}
|
||||
|
||||
export const VirtualizedMessageContainer: React.FC<MessageContainerProps> = ({
|
||||
messages,
|
||||
height,
|
||||
width,
|
||||
expandCode = false,
|
||||
highlightedMessageId,
|
||||
}) => {
|
||||
const listRef = useRef<List>(null);
|
||||
const [measuredHeights, setMeasuredHeights] = useState<Record<string, number>>({});
|
||||
|
||||
// Scroll to bottom on new messages
|
||||
useEffect(() => {
|
||||
if (listRef.current && messages.length > 0) {
|
||||
listRef.current.scrollToItem(messages.length - 1);
|
||||
}
|
||||
}, [messages.length]);
|
||||
|
||||
// Record the actual rendered heights for more accurate virtualization
|
||||
const handleMessageRender = (id: string, index: number) => {
|
||||
// In a real implementation, we would measure DOM nodes here
|
||||
// This is a placeholder for the concept
|
||||
if (!measuredHeights[id]) {
|
||||
setMeasuredHeights(prev => ({
|
||||
...prev,
|
||||
[id]: ESTIMATED_ROW_HEIGHT // In reality, we'd measure the actual height
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<List
|
||||
ref={listRef}
|
||||
height={height}
|
||||
width={width}
|
||||
itemCount={messages.length}
|
||||
itemSize={ESTIMATED_ROW_HEIGHT}
|
||||
overscanCount={OVERSCAN_COUNT}
|
||||
style={{ scrollbarGutter: 'stable' }}
|
||||
>
|
||||
{({ index, style }) => {
|
||||
const message = messages[index];
|
||||
return (
|
||||
<div style={style}>
|
||||
<MessageLine
|
||||
message={message}
|
||||
expandCode={expandCode}
|
||||
isHighlighted={message.id === highlightedMessageId}
|
||||
onRender={() => handleMessageRender(message.id, index)}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}}
|
||||
</List>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,188 @@
|
||||
import React, { useState, useRef, useEffect, useCallback } from 'react';
|
||||
import { Box, Text } from 'ink';
|
||||
import { useTheme } from '../hooks/useTheme';
|
||||
import { MessageData } from '../gatewayTypes';
|
||||
import { Markdown } from './markdown';
|
||||
import { themed } from './themed';
|
||||
import { usePerformanceMonitor, useScrollPerformance } from '../hooks/performanceHooks';
|
||||
|
||||
// Optimize the MessageLine component with proper memoization
|
||||
export const MessageLine: React.FC<{
|
||||
message: MessageData;
|
||||
isHighlighted?: boolean;
|
||||
expandCode?: boolean;
|
||||
}> = React.memo(({ message, isHighlighted = false, expandCode = false }) => {
|
||||
const theme = useTheme();
|
||||
const { role, content } = message;
|
||||
const { logEvent } = usePerformanceMonitor(`MessageLine-${role.substring(0,1)}${message.id?.substring(0,4)}`);
|
||||
|
||||
// Skip rendering for empty messages
|
||||
if (!content) return null;
|
||||
|
||||
const RoleLabel = themed(Text, {
|
||||
user: theme.message.user.label,
|
||||
assistant: theme.message.assistant.label,
|
||||
system: theme.message.system.label,
|
||||
tool: theme.message.tool.label,
|
||||
function: theme.message.function.label,
|
||||
});
|
||||
|
||||
const roleStyles = {
|
||||
user: theme.message.user.content,
|
||||
assistant: theme.message.assistant.content,
|
||||
system: theme.message.system.content,
|
||||
tool: theme.message.tool.content,
|
||||
function: theme.message.function.content,
|
||||
};
|
||||
|
||||
// Log initial render for performance monitoring
|
||||
useEffect(() => {
|
||||
logEvent('initial-render');
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
paddingX={0}
|
||||
paddingY={0}
|
||||
borderStyle={isHighlighted ? 'bold' : undefined}
|
||||
borderColor={isHighlighted ? theme.focused : undefined}
|
||||
>
|
||||
<Box>
|
||||
<RoleLabel variant={role as any}>{role}:</RoleLabel>
|
||||
</Box>
|
||||
<Box marginLeft={1}>
|
||||
<Markdown
|
||||
variant={role as keyof typeof roleStyles}
|
||||
content={content || ''}
|
||||
expandCode={expandCode}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}, (prevProps, nextProps) => {
|
||||
// Custom comparison to prevent unnecessary re-renders
|
||||
return (
|
||||
prevProps.message.id === nextProps.message.id &&
|
||||
prevProps.message.content === nextProps.message.content &&
|
||||
prevProps.message.role === nextProps.message.role &&
|
||||
prevProps.isHighlighted === nextProps.isHighlighted &&
|
||||
prevProps.expandCode === nextProps.expandCode
|
||||
);
|
||||
});
|
||||
|
||||
// Fixed window approach for rendering only visible + buffer messages
|
||||
export const MessageContainer: React.FC<{
|
||||
messages: MessageData[];
|
||||
scrollBuffer?: number;
|
||||
expandCode?: boolean;
|
||||
highlightedMessageId?: string;
|
||||
}> = ({ messages, scrollBuffer = 50, expandCode = false, highlightedMessageId }) => {
|
||||
const containerRef = useRef<HTMLDivElement>(null);
|
||||
const { onScroll } = useScrollPerformance('MessageContainer');
|
||||
const { logEvent } = usePerformanceMonitor('MessageContainer');
|
||||
|
||||
// Track visible range
|
||||
const [visibleRange, setVisibleRange] = useState({
|
||||
start: Math.max(0, messages.length - 30),
|
||||
end: messages.length
|
||||
});
|
||||
|
||||
// Handle scroll events to update visible range
|
||||
const handleScroll = useCallback(() => {
|
||||
if (!containerRef.current) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
|
||||
const scrollRatio = scrollTop / (scrollHeight - clientHeight);
|
||||
|
||||
// Calculate visible range based on scroll position
|
||||
const totalMessages = messages.length;
|
||||
const visibleCount = 30; // Approximate number of visible messages
|
||||
const bufferSize = scrollBuffer;
|
||||
|
||||
// Calculate start/end indices
|
||||
const middleIndex = Math.floor(scrollRatio * totalMessages);
|
||||
const halfVisible = Math.floor(visibleCount / 2);
|
||||
|
||||
let start = Math.max(0, middleIndex - halfVisible - bufferSize);
|
||||
let end = Math.min(totalMessages, middleIndex + halfVisible + bufferSize);
|
||||
|
||||
// Special case for start/end of list
|
||||
if (scrollRatio < 0.1) {
|
||||
start = 0;
|
||||
end = Math.min(totalMessages, visibleCount + bufferSize);
|
||||
} else if (scrollRatio > 0.9) {
|
||||
end = totalMessages;
|
||||
start = Math.max(0, totalMessages - visibleCount - bufferSize);
|
||||
}
|
||||
|
||||
setVisibleRange({ start, end });
|
||||
|
||||
// Performance monitoring
|
||||
onScroll();
|
||||
}, [messages.length, scrollBuffer, onScroll]);
|
||||
|
||||
// Auto-scroll to bottom on new messages
|
||||
useEffect(() => {
|
||||
if (containerRef.current) {
|
||||
const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
|
||||
const isNearBottom = scrollTop + clientHeight >= scrollHeight - 50;
|
||||
|
||||
if (isNearBottom) {
|
||||
// Only auto-scroll if we're already near the bottom
|
||||
logEvent('auto-scroll');
|
||||
containerRef.current.scrollTop = scrollHeight;
|
||||
|
||||
// Update visible range to show bottom messages
|
||||
setVisibleRange({
|
||||
start: Math.max(0, messages.length - 30 - scrollBuffer),
|
||||
end: messages.length
|
||||
});
|
||||
}
|
||||
}
|
||||
}, [messages.length, scrollBuffer]);
|
||||
|
||||
// Log rendering details
|
||||
useEffect(() => {
|
||||
logEvent(`render-range-${visibleRange.start}-${visibleRange.end}`);
|
||||
}, [visibleRange]);
|
||||
|
||||
// Get visible messages subset
|
||||
const visibleMessages = messages.slice(visibleRange.start, visibleRange.end);
|
||||
|
||||
return (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
overflow="auto"
|
||||
ref={containerRef}
|
||||
onScroll={handleScroll}
|
||||
style={{ scrollbarGutter: 'stable both-edges' }}
|
||||
>
|
||||
{/* Spacer for scroll position */}
|
||||
{visibleRange.start > 0 && (
|
||||
<Box
|
||||
height={visibleRange.start * 3}
|
||||
width="100%"
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Visible messages */}
|
||||
{visibleMessages.map((message) => (
|
||||
<MessageLine
|
||||
key={message.id}
|
||||
message={message}
|
||||
expandCode={expandCode}
|
||||
isHighlighted={message.id === highlightedMessageId}
|
||||
/>
|
||||
))}
|
||||
|
||||
{/* Spacer for remaining messages */}
|
||||
{visibleRange.end < messages.length && (
|
||||
<Box
|
||||
height={(messages.length - visibleRange.end) * 3}
|
||||
width="100%"
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,207 @@
|
||||
import { useRef, useCallback, useState, useEffect } from 'react';
|
||||
|
||||
/**
|
||||
* Custom hook for performance monitoring
|
||||
* Helps track and log performance metrics for components
|
||||
*/
|
||||
export function usePerformanceMonitor(componentName: string, options = {
|
||||
logToConsole: false,
|
||||
thresholdMs: 16 // 60fps threshold
|
||||
}) {
|
||||
const renderCountRef = useRef(0);
|
||||
const renderTimesRef = useRef<number[]>([]);
|
||||
const lastRenderTimeRef = useRef(performance.now());
|
||||
const [metrics, setMetrics] = useState({
|
||||
averageRenderTime: 0,
|
||||
totalRenders: 0,
|
||||
slowRenders: 0
|
||||
});
|
||||
|
||||
// Measure start of render cycle
|
||||
useEffect(() => {
|
||||
const startTime = performance.now();
|
||||
|
||||
return () => {
|
||||
const endTime = performance.now();
|
||||
const renderTime = endTime - startTime;
|
||||
|
||||
renderCountRef.current += 1;
|
||||
renderTimesRef.current.push(renderTime);
|
||||
|
||||
// Keep only the last 100 measurements
|
||||
if (renderTimesRef.current.length > 100) {
|
||||
renderTimesRef.current.shift();
|
||||
}
|
||||
|
||||
// Calculate average render time
|
||||
const average = renderTimesRef.current.reduce((sum, time) => sum + time, 0) /
|
||||
renderTimesRef.current.length;
|
||||
|
||||
// Count slow renders
|
||||
const slowRenders = renderTimesRef.current.filter(time => time > options.thresholdMs).length;
|
||||
|
||||
// Update metrics
|
||||
setMetrics({
|
||||
averageRenderTime: average,
|
||||
totalRenders: renderCountRef.current,
|
||||
slowRenders
|
||||
});
|
||||
|
||||
if (options.logToConsole && renderTime > options.thresholdMs) {
|
||||
console.log(
|
||||
`[PERF] ${componentName} render: ${renderTime.toFixed(2)}ms ` +
|
||||
`(avg: ${average.toFixed(2)}ms, slow: ${slowRenders}/${renderCountRef.current})`
|
||||
);
|
||||
}
|
||||
|
||||
lastRenderTimeRef.current = endTime;
|
||||
};
|
||||
});
|
||||
|
||||
// Function to measure specific operations
|
||||
const measureOperation = useCallback((operationName: string, fn: () => void) => {
|
||||
const start = performance.now();
|
||||
fn();
|
||||
const duration = performance.now() - start;
|
||||
|
||||
if (options.logToConsole && duration > options.thresholdMs) {
|
||||
console.log(`[PERF] ${componentName}.${operationName}: ${duration.toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
return duration;
|
||||
}, [componentName, options.logToConsole, options.thresholdMs]);
|
||||
|
||||
return {
|
||||
metrics,
|
||||
measureOperation,
|
||||
logEvent: (event: string, durationMs?: number) => {
|
||||
if (options.logToConsole) {
|
||||
const message = durationMs
|
||||
? `[PERF] ${componentName}.${event}: ${durationMs.toFixed(2)}ms`
|
||||
: `[PERF] ${componentName}.${event}`;
|
||||
console.log(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to debounce frequent updates
|
||||
*/
|
||||
export function useDebounce<T>(value: T, delay: number): T {
|
||||
const [debouncedValue, setDebouncedValue] = useState<T>(value);
|
||||
|
||||
useEffect(() => {
|
||||
const handler = setTimeout(() => {
|
||||
setDebouncedValue(value);
|
||||
}, delay);
|
||||
|
||||
return () => {
|
||||
clearTimeout(handler);
|
||||
};
|
||||
}, [value, delay]);
|
||||
|
||||
return debouncedValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to throttle frequent updates
|
||||
*/
|
||||
export function useThrottle<T>(value: T, limit: number): T {
|
||||
const [throttledValue, setThrottledValue] = useState<T>(value);
|
||||
const lastRan = useRef(Date.now());
|
||||
|
||||
useEffect(() => {
|
||||
const handler = setTimeout(() => {
|
||||
if (Date.now() - lastRan.current >= limit) {
|
||||
setThrottledValue(value);
|
||||
lastRan.current = Date.now();
|
||||
}
|
||||
}, limit - (Date.now() - lastRan.current));
|
||||
|
||||
return () => {
|
||||
clearTimeout(handler);
|
||||
};
|
||||
}, [value, limit]);
|
||||
|
||||
return throttledValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to measure and track scroll performance
|
||||
*/
|
||||
export function useScrollPerformance(componentName: string, options = {
|
||||
logToConsole: false,
|
||||
sampleRate: 0.1, // Only log 10% of scroll events to reduce noise
|
||||
thresholdMs: 16
|
||||
}) {
|
||||
const scrollCountRef = useRef(0);
|
||||
const scrollTimesRef = useRef<number[]>([]);
|
||||
const isScrollingRef = useRef(false);
|
||||
const scrollStartTimeRef = useRef(0);
|
||||
const scrollThrottleTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
|
||||
const onScrollStart = useCallback(() => {
|
||||
if (!isScrollingRef.current) {
|
||||
isScrollingRef.current = true;
|
||||
scrollStartTimeRef.current = performance.now();
|
||||
|
||||
if (options.logToConsole) {
|
||||
console.log(`[SCROLL] ${componentName} scroll started`);
|
||||
}
|
||||
}
|
||||
}, [componentName, options.logToConsole]);
|
||||
|
||||
const onScrollEnd = useCallback(() => {
|
||||
if (isScrollingRef.current) {
|
||||
const duration = performance.now() - scrollStartTimeRef.current;
|
||||
scrollTimesRef.current.push(duration);
|
||||
|
||||
// Keep array at reasonable size
|
||||
if (scrollTimesRef.current.length > 50) {
|
||||
scrollTimesRef.current.shift();
|
||||
}
|
||||
|
||||
isScrollingRef.current = false;
|
||||
|
||||
if (options.logToConsole && Math.random() < options.sampleRate) {
|
||||
const avg = scrollTimesRef.current.reduce((sum, time) => sum + time, 0) /
|
||||
scrollTimesRef.current.length;
|
||||
|
||||
console.log(
|
||||
`[SCROLL] ${componentName} scroll ended: ${duration.toFixed(2)}ms ` +
|
||||
`(avg: ${avg.toFixed(2)}ms)`
|
||||
);
|
||||
}
|
||||
}
|
||||
}, [componentName, options.logToConsole, options.sampleRate]);
|
||||
|
||||
const onScroll = useCallback(() => {
|
||||
scrollCountRef.current += 1;
|
||||
|
||||
// Start scrolling tracking if not already
|
||||
onScrollStart();
|
||||
|
||||
// Reset the scroll end timer
|
||||
if (scrollThrottleTimerRef.current) {
|
||||
clearTimeout(scrollThrottleTimerRef.current);
|
||||
}
|
||||
|
||||
// Set timer to detect when scrolling stops
|
||||
scrollThrottleTimerRef.current = setTimeout(() => {
|
||||
onScrollEnd();
|
||||
}, 150); // Consider scrolling stopped after 150ms of inactivity
|
||||
|
||||
}, [onScrollStart, onScrollEnd]);
|
||||
|
||||
// Clean up
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (scrollThrottleTimerRef.current) {
|
||||
clearTimeout(scrollThrottleTimerRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return { onScroll };
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
# TUI Performance Analysis
|
||||
|
||||
## Issues Identified
|
||||
|
||||
1. **Scrolling lag with large message history**
|
||||
- No virtualization or windowing in message rendering
|
||||
- Each message re-renders on scroll
|
||||
- Complete DOM reconstruction on each render
|
||||
|
||||
2. **Input jitter with scrollbar**
|
||||
- Composer width changes when scrollbar appears/disappears
|
||||
- Layout shifts when scrolling near bottom
|
||||
|
||||
3. **Layout thrashing**
|
||||
- Multiple successive layout recalculations
|
||||
- Excessive style computations in the render loop
|
||||
|
||||
## Investigation Areas
|
||||
|
||||
### 1. Message Rendering Performance
|
||||
|
||||
Current implementation in `messageLine.tsx` renders all messages in the transcript without virtualization. For long sessions, this means:
|
||||
|
||||
- Every message is always in the DOM
|
||||
- Complete re-rendering happens on each state change
|
||||
- No windowing or culling of off-screen content
|
||||
- Layout recalculations for entire transcript on each scroll
|
||||
|
||||
### 2. Re-rendering Optimization
|
||||
|
||||
- No memoization of message components
|
||||
- No element recycling
|
||||
- Each message potentially triggers layout shifts
|
||||
|
||||
### 3. Scrollbar Behavior
|
||||
|
||||
- Composer width calculation doesn't account for scrollbar presence
|
||||
- No stable layout constraints
|
||||
|
||||
## Proposed Solutions
|
||||
|
||||
### 1. Implement Virtualized List for Messages
|
||||
|
||||
Add `react-window` or similar virtualization library to render only visible messages:
|
||||
|
||||
```tsx
|
||||
import { FixedSizeList as List } from 'react-window';
|
||||
|
||||
// In the component render
|
||||
<List
|
||||
height={viewportHeight}
|
||||
itemCount={messages.length}
|
||||
itemSize={estimatedRowHeight}
|
||||
width="100%"
|
||||
overscanCount={5}
|
||||
>
|
||||
{({ index, style }) => (
|
||||
<div style={style}>
|
||||
<MessageLine message={messages[index]} />
|
||||
</div>
|
||||
)}
|
||||
</List>
|
||||
```
|
||||
|
||||
### 2. Memoize Message Components
|
||||
|
||||
Use `React.memo` to prevent unnecessary re-renders:
|
||||
|
||||
```tsx
|
||||
const MessageLine = React.memo(({ message, ...props }) => {
|
||||
// Component logic
|
||||
}, (prevProps, nextProps) => {
|
||||
// Custom comparison logic
|
||||
return prevProps.message.id === nextProps.message.id &&
|
||||
prevProps.message.content === nextProps.message.content;
|
||||
});
|
||||
```
|
||||
|
||||
### 3. Fix Scrollbar Layout Issues
|
||||
|
||||
- Add scrollbar-gutter CSS to reserve space for scrollbar
|
||||
- Stabilize layout with fixed container dimensions
|
||||
|
||||
```css
|
||||
.message-container {
|
||||
scrollbar-gutter: stable;
|
||||
overflow-y: auto;
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Add Performance Measurements
|
||||
|
||||
Add performance monitoring to identify bottlenecks:
|
||||
|
||||
```tsx
|
||||
useEffect(() => {
|
||||
const start = performance.now();
|
||||
// Measure key operations
|
||||
return () => {
|
||||
console.log(`Operation took ${performance.now() - start}ms`);
|
||||
};
|
||||
}, [dependencyArray]);
|
||||
```
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
1. Add virtualization for message rendering
|
||||
2. Implement memo optimization for components
|
||||
3. Fix scrollbar layout issues
|
||||
4. Add performance monitoring
|
||||
5. Optimize re-render triggers
|
||||
6. Improve scroll restoration
|
||||
|
||||
## Resources
|
||||
|
||||
- [React Window](https://github.com/bvaughn/react-window)
|
||||
- [React Virtualized](https://github.com/bvaughn/react-virtualized)
|
||||
- [CSS Scrollbar Gutter](https://developer.mozilla.org/en-US/docs/Web/CSS/scrollbar-gutter)
|
||||
-1591
File diff suppressed because it is too large
Load Diff
-752
@@ -1,752 +0,0 @@
|
||||
/*
|
||||
* Hermes Kanban — dashboard plugin styles.
|
||||
*
|
||||
* All colors reference theme CSS vars so the board reskins with the
|
||||
* active dashboard theme. No hardcoded palette.
|
||||
*/
|
||||
|
||||
.hermes-kanban {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* ---- Columns layout -------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.hermes-kanban-column {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: color-mix(in srgb, var(--color-card) 85%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius);
|
||||
padding: 0.5rem;
|
||||
min-height: 200px;
|
||||
max-height: calc(100vh - 220px);
|
||||
transition: border-color 120ms ease, background-color 120ms ease;
|
||||
}
|
||||
|
||||
.hermes-kanban-column--drop {
|
||||
border-color: var(--color-ring);
|
||||
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-column-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.25rem 0.25rem 0.35rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-label {
|
||||
flex: 1;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-count {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-add {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 1px solid var(--color-border);
|
||||
color: var(--color-foreground);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-column-add:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-sub {
|
||||
padding: 0 0.25rem 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
overflow-y: auto;
|
||||
padding-right: 0.1rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-empty {
|
||||
padding: 1.5rem 0.5rem;
|
||||
text-align: center;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Status dots ----------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-dot {
|
||||
display: inline-block;
|
||||
width: 0.5rem;
|
||||
height: 0.5rem;
|
||||
border-radius: 999px;
|
||||
background: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
|
||||
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
|
||||
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
|
||||
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
|
||||
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
|
||||
.hermes-kanban-dot-archived { background: var(--color-border); }
|
||||
|
||||
/* ---- Progress pill (N/M child tasks done) --------------------------- */
|
||||
|
||||
.hermes-kanban-progress {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.62rem;
|
||||
padding: 0.05rem 0.35rem;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hermes-kanban-progress--full {
|
||||
background: color-mix(in srgb, #3fb97d 22%, transparent);
|
||||
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
|
||||
|
||||
.hermes-kanban-lane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.25rem 0 0.35rem;
|
||||
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
.hermes-kanban-lane:first-child {
|
||||
border-top: 0;
|
||||
padding-top: 0;
|
||||
}
|
||||
.hermes-kanban-lane-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
padding: 0 0.1rem;
|
||||
}
|
||||
.hermes-kanban-lane-name {
|
||||
font-weight: 600;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-lane-count {
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ---- Card ------------------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card {
|
||||
cursor: grab;
|
||||
transition: transform 100ms ease, box-shadow 100ms ease;
|
||||
}
|
||||
.hermes-kanban-card:hover {
|
||||
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
|
||||
}
|
||||
.hermes-kanban-card:active {
|
||||
cursor: grabbing;
|
||||
transform: scale(0.995);
|
||||
}
|
||||
|
||||
.hermes-kanban-card-content {
|
||||
padding: 0.5rem 0.6rem !important;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-id {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.65rem;
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-title {
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
line-height: 1.3;
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-meta {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-priority {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-tag {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
}
|
||||
|
||||
.hermes-kanban-assignee {
|
||||
font-weight: 500;
|
||||
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
|
||||
}
|
||||
.hermes-kanban-unassigned {
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-ago {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- Inline create --------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-inline-create {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
background: color-mix(in srgb, var(--color-card) 70%, transparent);
|
||||
border: 1px dashed var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Drawer (task detail side panel) --------------------------------- */
|
||||
|
||||
.hermes-kanban-drawer-shade {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.45);
|
||||
z-index: 60;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer {
|
||||
width: min(480px, 92vw);
|
||||
height: 100vh;
|
||||
background: var(--color-card);
|
||||
border-left: 1px solid var(--color-border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
|
||||
animation: hermes-kanban-drawer-in 180ms ease-out;
|
||||
}
|
||||
|
||||
@keyframes hermes-kanban-drawer-in {
|
||||
from { transform: translateX(100%); opacity: 0.3; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.6rem 0.8rem;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-close {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
cursor: pointer;
|
||||
padding: 0 0.25rem;
|
||||
}
|
||||
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
|
||||
|
||||
.hermes-kanban-drawer-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.9rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
.hermes-kanban-meta-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.hermes-kanban-meta-label {
|
||||
width: 92px;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-meta-value {
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-pre {
|
||||
margin: 0;
|
||||
padding: 0.45rem 0.55rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.72rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-comment {
|
||||
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
|
||||
padding-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.2rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-comment-head {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-comment-author {
|
||||
font-weight: 600;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-comment-ago {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-event {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-event-kind {
|
||||
color: var(--color-foreground);
|
||||
min-width: 6rem;
|
||||
}
|
||||
.hermes-kanban-event-payload {
|
||||
color: var(--color-muted-foreground);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-comment-row {
|
||||
display: flex;
|
||||
gap: 0.4rem;
|
||||
padding: 0.55rem 0.75rem;
|
||||
border-top: 1px solid var(--color-border);
|
||||
background: color-mix(in srgb, var(--color-card) 90%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-count {
|
||||
display: inline-flex;
|
||||
gap: 0.2rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* ---- Selection chrome ----------------------------------------------- */
|
||||
|
||||
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-ring) inset,
|
||||
0 0 0 1px var(--color-ring) inset;
|
||||
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-card-check {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
accent-color: var(--color-ring);
|
||||
}
|
||||
|
||||
/* ---- Bulk action bar ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-bulk {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-bulk-count {
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
padding-right: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-bulk-btn {
|
||||
height: 1.7rem !important;
|
||||
padding: 0 0.5rem !important;
|
||||
font-size: 0.7rem !important;
|
||||
border: 1px solid var(--color-border);
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-bulk-btn:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
.hermes-kanban-bulk-reassign {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding-left: 0.5rem;
|
||||
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Dependency editor chips --------------------------------------- */
|
||||
|
||||
.hermes-kanban-deps-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.hermes-kanban-deps-label {
|
||||
font-size: 0.68rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.hermes-kanban-deps-chips {
|
||||
display: flex;
|
||||
gap: 0.3rem;
|
||||
flex-wrap: wrap;
|
||||
flex: 1;
|
||||
}
|
||||
.hermes-kanban-deps-empty {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-dep-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.15rem;
|
||||
padding: 0.1rem 0.35rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.68rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-dep-chip-x {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1;
|
||||
padding: 0 0.15rem;
|
||||
}
|
||||
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
|
||||
|
||||
/* ---- Inline edit affordances --------------------------------------- */
|
||||
|
||||
.hermes-kanban-editable {
|
||||
cursor: pointer;
|
||||
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
}
|
||||
.hermes-kanban-editable:hover {
|
||||
color: var(--color-foreground);
|
||||
border-bottom-color: var(--color-ring);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title-text {
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-drawer-title-text:hover {
|
||||
text-decoration: underline;
|
||||
text-decoration-color: var(--color-ring);
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}
|
||||
|
||||
.hermes-kanban-edit-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-edit-link {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
cursor: pointer;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
|
||||
|
||||
.hermes-kanban-textarea {
|
||||
width: 100%;
|
||||
min-height: 8rem;
|
||||
background: var(--color-card);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
padding: 0.5rem 0.6rem;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.5;
|
||||
resize: vertical;
|
||||
}
|
||||
.hermes-kanban-textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-ring);
|
||||
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Markdown rendering -------------------------------------------- */
|
||||
|
||||
.hermes-kanban-md {
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.55;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-md p { margin: 0.25rem 0; }
|
||||
.hermes-kanban-md h1,
|
||||
.hermes-kanban-md h2,
|
||||
.hermes-kanban-md h3,
|
||||
.hermes-kanban-md h4 {
|
||||
margin: 0.6rem 0 0.2rem;
|
||||
line-height: 1.25;
|
||||
}
|
||||
.hermes-kanban-md h1 { font-size: 1.05rem; }
|
||||
.hermes-kanban-md h2 { font-size: 0.95rem; }
|
||||
.hermes-kanban-md h3 { font-size: 0.88rem; }
|
||||
.hermes-kanban-md h4 { font-size: 0.82rem; }
|
||||
.hermes-kanban-md ul {
|
||||
margin: 0.25rem 0 0.25rem 1.1rem;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-md li { margin: 0.1rem 0; }
|
||||
.hermes-kanban-md a {
|
||||
color: var(--color-ring);
|
||||
text-decoration: underline;
|
||||
}
|
||||
.hermes-kanban-md code {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.75rem;
|
||||
padding: 0.05rem 0.3rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.hermes-kanban-md-code {
|
||||
margin: 0.35rem 0;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
overflow-x: auto;
|
||||
}
|
||||
.hermes-kanban-md-code code {
|
||||
background: transparent;
|
||||
padding: 0;
|
||||
font-size: 0.75rem;
|
||||
white-space: pre;
|
||||
}
|
||||
.hermes-kanban-md strong { font-weight: 600; }
|
||||
|
||||
/* ---- Touch-drag proxy ---------------------------------------------- */
|
||||
|
||||
.hermes-kanban-touch-proxy {
|
||||
pointer-events: none;
|
||||
opacity: 0.85;
|
||||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
|
||||
transform: scale(1.02);
|
||||
transition: none;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Staleness tiers ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px #d4b34888 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px #d4b348 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
|
||||
}
|
||||
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Worker log pane ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-log {
|
||||
max-height: 340px;
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
font-size: 0.7rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Run history (per-attempt log in the drawer) ------------------- */
|
||||
|
||||
.hermes-kanban-run {
|
||||
border-left: 2px solid var(--color-border);
|
||||
padding: 0.35rem 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
.hermes-kanban-run--active { border-left-color: #3fb97d; }
|
||||
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
|
||||
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
|
||||
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-run--crashed,
|
||||
.hermes-kanban-run--timed_out,
|
||||
.hermes-kanban-run--gave_up,
|
||||
.hermes-kanban-run--spawn_failed {
|
||||
border-left-color: var(--color-destructive, #d14a4a);
|
||||
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
|
||||
}
|
||||
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
|
||||
|
||||
.hermes-kanban-run-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-run-outcome {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-profile {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-elapsed {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-ago {
|
||||
margin-left: auto;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-summary {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0 0;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-error {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-destructive, #d14a4a);
|
||||
padding: 0.15rem 0 0;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-run-meta {
|
||||
display: block;
|
||||
font-size: 0.65rem;
|
||||
padding: 0.15rem 0 0;
|
||||
color: var(--color-muted-foreground);
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"name": "kanban",
|
||||
"label": "Kanban",
|
||||
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
|
||||
"icon": "Package",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/kanban",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"entry": "dist/index.js",
|
||||
"css": "dist/style.css",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
@@ -1,830 +0,0 @@
|
||||
"""Kanban dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
|
||||
|
||||
This layer is intentionally thin: every handler is a small wrapper around
|
||||
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
|
||||
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
|
||||
cannot drift.
|
||||
|
||||
Live updates arrive via the ``/events`` WebSocket, which tails the
|
||||
append-only ``task_events`` table on a short poll interval (WAL mode lets
|
||||
reads run alongside the dispatcher's IMMEDIATE write transactions).
|
||||
|
||||
Security note
|
||||
-------------
|
||||
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
|
||||
explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by
|
||||
design because the dashboard binds to localhost by default. For the
|
||||
WebSocket we still require the session token as a ``?token=`` query
|
||||
parameter (browsers cannot set the ``Authorization`` header on an upgrade
|
||||
request), matching the established pattern used by the in-browser PTY
|
||||
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
|
||||
``--host 0.0.0.0``, every plugin route — kanban included — becomes
|
||||
reachable from the network. Don't do that on a shared host.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from hermes_cli import kanban_db
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
|
||||
# existing plugin-bypass; this is documented above).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_ws_token(provided: Optional[str]) -> bool:
|
||||
"""Constant-time compare against the dashboard session token.
|
||||
|
||||
Imported lazily so the plugin still loads in test contexts where the
|
||||
dashboard web_server module isn't importable (e.g. the bare-FastAPI
|
||||
test harness).
|
||||
"""
|
||||
if not provided:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli import web_server as _ws
|
||||
except Exception:
|
||||
# No dashboard context (tests). Accept so the tail loop is still
|
||||
# testable; in production the dashboard module always imports
|
||||
# cleanly because it's the caller.
|
||||
return True
|
||||
expected = getattr(_ws, "_SESSION_TOKEN", None)
|
||||
if not expected:
|
||||
return True
|
||||
return hmac.compare_digest(str(provided), str(expected))
|
||||
|
||||
|
||||
def _conn():
|
||||
"""Open a kanban_db connection, creating the schema on first use.
|
||||
|
||||
Every handler that mutates the DB goes through this so the plugin
|
||||
self-heals on a fresh install (no user-visible "no such table"
|
||||
error if somebody hits POST /tasks before GET /board).
|
||||
``init_db`` is idempotent.
|
||||
"""
|
||||
try:
|
||||
kanban_db.init_db()
|
||||
except Exception as exc:
|
||||
log.warning("kanban init_db failed: %s", exc)
|
||||
return kanban_db.connect()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Columns shown by the dashboard, in left-to-right order. "archived" is
|
||||
# available via a filter toggle rather than a visible column.
|
||||
BOARD_COLUMNS: list[str] = [
|
||||
"triage", "todo", "ready", "running", "blocked", "done",
|
||||
]
|
||||
|
||||
|
||||
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
|
||||
d = asdict(task)
|
||||
# Add derived age metrics so the UI can colour stale cards without
|
||||
# computing deltas client-side.
|
||||
d["age"] = kanban_db.task_age(task)
|
||||
# Keep body short on list endpoints; full body comes from /tasks/:id.
|
||||
return d
|
||||
|
||||
|
||||
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
|
||||
return {
|
||||
"id": event.id,
|
||||
"task_id": event.task_id,
|
||||
"kind": event.kind,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at,
|
||||
"run_id": event.run_id,
|
||||
}
|
||||
|
||||
|
||||
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
|
||||
return {
|
||||
"id": c.id,
|
||||
"task_id": c.task_id,
|
||||
"author": c.author,
|
||||
"body": c.body,
|
||||
"created_at": c.created_at,
|
||||
}
|
||||
|
||||
|
||||
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
||||
"""Serialise a Run for the drawer's Run history section."""
|
||||
return {
|
||||
"id": r.id,
|
||||
"task_id": r.task_id,
|
||||
"profile": r.profile,
|
||||
"step_key": r.step_key,
|
||||
"status": r.status,
|
||||
"claim_lock": r.claim_lock,
|
||||
"claim_expires": r.claim_expires,
|
||||
"worker_pid": r.worker_pid,
|
||||
"max_runtime_seconds": r.max_runtime_seconds,
|
||||
"last_heartbeat_at": r.last_heartbeat_at,
|
||||
"started_at": r.started_at,
|
||||
"ended_at": r.ended_at,
|
||||
"outcome": r.outcome,
|
||||
"summary": r.summary,
|
||||
"metadata": r.metadata,
|
||||
"error": r.error,
|
||||
}
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
r["parent_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
children = [
|
||||
r["child_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
return {"parents": parents, "children": children}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/board")
|
||||
def get_board(
|
||||
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
|
||||
include_archived: bool = Query(False),
|
||||
):
|
||||
"""Return the full board grouped by status column.
|
||||
|
||||
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
|
||||
install doesn't surface a "failed to load" error on the plugin tab.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
tasks = kanban_db.list_tasks(
|
||||
conn, tenant=tenant, include_archived=include_archived
|
||||
)
|
||||
# Pre-fetch link counts per task (cheap: one query).
|
||||
link_counts: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT parent_id, child_id FROM task_links"
|
||||
).fetchall():
|
||||
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
|
||||
"children"
|
||||
] += 1
|
||||
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
|
||||
"parents"
|
||||
] += 1
|
||||
|
||||
# Comment + event counts (both cheap aggregates).
|
||||
comment_counts: dict[str, int] = {
|
||||
r["task_id"]: r["n"]
|
||||
for r in conn.execute(
|
||||
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
|
||||
)
|
||||
}
|
||||
|
||||
# Progress rollup: for each parent, how many children are done / total.
|
||||
# One pass over task_links joined with child status — cheaper than
|
||||
# N per-task queries and the plugin uses it to render "N/M".
|
||||
progress: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT l.parent_id AS pid, t.status AS cstatus "
|
||||
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
|
||||
).fetchall():
|
||||
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
|
||||
p["total"] += 1
|
||||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
||||
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
|
||||
if include_archived:
|
||||
columns["archived"] = []
|
||||
|
||||
for t in tasks:
|
||||
d = _task_dict(t)
|
||||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
# Stable per-column ordering already applied by list_tasks
|
||||
# (priority DESC, created_at ASC), keep as-is.
|
||||
|
||||
# List of known tenants for the UI filter dropdown.
|
||||
tenants = [
|
||||
r["tenant"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
|
||||
)
|
||||
]
|
||||
# List of distinct assignees for the lane-by-profile sub-grouping.
|
||||
assignees = [
|
||||
r["assignee"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
|
||||
"AND status != 'archived' ORDER BY assignee"
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"columns": [
|
||||
{"name": name, "tasks": columns[name]} for name in columns.keys()
|
||||
],
|
||||
"tenants": tenants,
|
||||
"assignees": assignees,
|
||||
"latest_event_id": int(latest_event_id),
|
||||
"now": int(time.time()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CreateTaskBody(BaseModel):
|
||||
title: str
|
||||
body: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
tenant: Optional[str] = None
|
||||
priority: int = 0
|
||||
workspace_kind: str = "scratch"
|
||||
workspace_path: Optional[str] = None
|
||||
parents: list[str] = Field(default_factory=list)
|
||||
triage: bool = False
|
||||
idempotency_key: Optional[str] = None
|
||||
max_runtime_seconds: Optional[int] = None
|
||||
skills: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.post("/tasks")
|
||||
def create_task(payload: CreateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task_id = kanban_db.create_task(
|
||||
conn,
|
||||
title=payload.title,
|
||||
body=payload.body,
|
||||
assignee=payload.assignee,
|
||||
created_by="dashboard",
|
||||
workspace_kind=payload.workspace_kind,
|
||||
workspace_path=payload.workspace_path,
|
||||
tenant=payload.tenant,
|
||||
priority=payload.priority,
|
||||
parents=payload.parents,
|
||||
triage=payload.triage,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
max_runtime_seconds=payload.max_runtime_seconds,
|
||||
skills=payload.skills,
|
||||
)
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(task) if task else None}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id (status / assignee / priority / title / body)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class UpdateTaskBody(BaseModel):
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
title: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
result: Optional[str] = None
|
||||
block_reason: Optional[str] = None
|
||||
# Structured handoff fields — forwarded to complete_task when status
|
||||
# transitions to 'done'. Dashboard parity with ``hermes kanban
|
||||
# complete --summary ... --metadata ...``.
|
||||
summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
@router.patch("/tasks/{task_id}")
|
||||
def update_task(task_id: str, payload: UpdateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
|
||||
# --- assignee ----------------------------------------------------
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
ok = kanban_db.assign_task(
|
||||
conn, task_id, payload.assignee or None,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=409, detail=str(e))
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="task not found")
|
||||
|
||||
# --- status -------------------------------------------------------
|
||||
if payload.status is not None:
|
||||
s = payload.status
|
||||
ok = True
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(
|
||||
conn, task_id,
|
||||
result=payload.result,
|
||||
summary=payload.summary,
|
||||
metadata=payload.metadata,
|
||||
)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
|
||||
elif s == "ready":
|
||||
# Re-open a blocked task, or just an explicit status set.
|
||||
current = kanban_db.get_task(conn, task_id)
|
||||
if current and current.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, task_id)
|
||||
else:
|
||||
# Direct status write for drag-drop (todo -> ready etc).
|
||||
ok = _set_status_direct(conn, task_id, "ready")
|
||||
elif s == "archived":
|
||||
ok = kanban_db.archive_task(conn, task_id)
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"status transition to {s!r} not valid from current state",
|
||||
)
|
||||
|
||||
# --- priority -----------------------------------------------------
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), task_id),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(task_id, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
|
||||
# --- title / body -------------------------------------------------
|
||||
if payload.title is not None or payload.body is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
sets, vals = [], []
|
||||
if payload.title is not None:
|
||||
if not payload.title.strip():
|
||||
raise HTTPException(status_code=400, detail="title cannot be empty")
|
||||
sets.append("title = ?")
|
||||
vals.append(payload.title.strip())
|
||||
if payload.body is not None:
|
||||
sets.append("body = ?")
|
||||
vals.append(payload.body)
|
||||
vals.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'edited', NULL, ?)",
|
||||
(task_id, int(time.time())),
|
||||
)
|
||||
|
||||
updated = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(updated) if updated else None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_status_direct(
|
||||
conn: sqlite3.Connection, task_id: str, new_status: str,
|
||||
) -> bool:
|
||||
"""Direct status write for drag-drop moves that aren't covered by the
|
||||
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
|
||||
running<->ready). Appends a ``status`` event row for the live feed.
|
||||
|
||||
When this transitions OFF ``running`` to anything other than the
|
||||
terminal verbs above (which own their own run closing), we close the
|
||||
active run with outcome='reclaimed' so attempt history isn't
|
||||
orphaned. ``running -> ready`` via drag-drop is the common case
|
||||
(user yanking a stuck worker back to the queue).
|
||||
"""
|
||||
with kanban_db.write_txn(conn):
|
||||
# Snapshot current state so we know whether to close a run.
|
||||
prev = conn.execute(
|
||||
"SELECT status, current_run_id FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if prev is None:
|
||||
return False
|
||||
was_running = prev["status"] == "running"
|
||||
|
||||
cur = conn.execute(
|
||||
"UPDATE tasks SET status = ?, "
|
||||
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
|
||||
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
|
||||
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
|
||||
"WHERE id = ?",
|
||||
(new_status, new_status, new_status, new_status, task_id),
|
||||
)
|
||||
if cur.rowcount != 1:
|
||||
return False
|
||||
run_id = None
|
||||
if was_running and new_status != "running" and prev["current_run_id"]:
|
||||
run_id = kanban_db._end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
summary=f"status changed to {new_status} (dashboard/direct)",
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
|
||||
"VALUES (?, ?, 'status', ?, ?)",
|
||||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CommentBody(BaseModel):
|
||||
body: str
|
||||
author: Optional[str] = "dashboard"
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/comments")
|
||||
def add_comment(task_id: str, payload: CommentBody):
|
||||
if not payload.body.strip():
|
||||
raise HTTPException(status_code=400, detail="body is required")
|
||||
conn = _conn()
|
||||
try:
|
||||
if kanban_db.get_task(conn, task_id) is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
kanban_db.add_comment(
|
||||
conn, task_id, author=payload.author or "dashboard", body=payload.body,
|
||||
)
|
||||
return {"ok": True}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LinkBody(BaseModel):
|
||||
parent_id: str
|
||||
child_id: str
|
||||
|
||||
|
||||
@router.post("/links")
|
||||
def add_link(payload: LinkBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
|
||||
return {"ok": True}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.delete("/links")
|
||||
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
|
||||
conn = _conn()
|
||||
try:
|
||||
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
|
||||
return {"ok": bool(ok)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions (multi-select on the board)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BulkTaskBody(BaseModel):
|
||||
ids: list[str]
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None # "" or None = unassign
|
||||
priority: Optional[int] = None
|
||||
archive: bool = False
|
||||
|
||||
|
||||
@router.post("/tasks/bulk")
|
||||
def bulk_update(payload: BulkTaskBody):
|
||||
"""Apply the same patch to every id in ``payload.ids``.
|
||||
|
||||
This is an *independent* iteration — per-task failures don't abort
|
||||
siblings. Returns per-id outcome so the UI can surface partials.
|
||||
"""
|
||||
ids = [i for i in (payload.ids or []) if i]
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
results: list[dict] = []
|
||||
conn = _conn()
|
||||
try:
|
||||
for tid in ids:
|
||||
entry: dict[str, Any] = {"id": tid, "ok": True}
|
||||
try:
|
||||
task = kanban_db.get_task(conn, tid)
|
||||
if task is None:
|
||||
entry.update(ok=False, error="not found")
|
||||
results.append(entry)
|
||||
continue
|
||||
if payload.archive:
|
||||
if not kanban_db.archive_task(conn, tid):
|
||||
entry.update(ok=False, error="archive refused")
|
||||
if payload.status is not None and not payload.archive:
|
||||
s = payload.status
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(conn, tid)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, tid)
|
||||
elif s == "ready":
|
||||
cur = kanban_db.get_task(conn, tid)
|
||||
if cur and cur.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
results.append(entry)
|
||||
continue
|
||||
if not ok:
|
||||
entry.update(ok=False, error=f"transition to {s!r} refused")
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
if not kanban_db.assign_task(
|
||||
conn, tid, payload.assignee or None,
|
||||
):
|
||||
entry.update(ok=False, error="assign refused")
|
||||
except RuntimeError as e:
|
||||
entry.update(ok=False, error=str(e))
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(tid, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
except Exception as e: # defensive — one bad id shouldn't kill the batch
|
||||
entry.update(ok=False, error=str(e))
|
||||
results.append(entry)
|
||||
return {"results": results}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/config")
|
||||
def get_config():
|
||||
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
|
||||
|
||||
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
|
||||
Used by the UI to pre-select tenant filters, toggle markdown rendering,
|
||||
or set column-width preferences without a round-trip per page load.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
dash_cfg = (cfg.get("dashboard") or {})
|
||||
# dashboard.kanban may itself be a dict; fall back to {}.
|
||||
k_cfg = dash_cfg.get("kanban") or {}
|
||||
return {
|
||||
"default_tenant": k_cfg.get("default_tenant") or "",
|
||||
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
|
||||
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
|
||||
"render_markdown": bool(k_cfg.get("render_markdown", True)),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats (per-profile / per-status counts + oldest-ready age)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats():
|
||||
"""Per-status + per-assignee counts + oldest-ready age.
|
||||
|
||||
Designed for the dashboard HUD and for router profiles that need to
|
||||
answer "is this specialist overloaded?" without scanning the whole
|
||||
board themselves.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return kanban_db.board_stats(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.get("/assignees")
|
||||
def get_assignees():
|
||||
"""Known profiles + per-profile task counts.
|
||||
|
||||
Returns the union of ``~/.hermes/profiles/*`` on disk and every
|
||||
distinct assignee currently used on the board. The dashboard uses
|
||||
this to populate its assignee dropdown so a freshly-created profile
|
||||
appears in the picker before it's been given any task.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return {"assignees": kanban_db.known_assignees(conn)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker log (read-only; file written by _default_spawn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}/log")
|
||||
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
|
||||
"""Return the worker's stdout/stderr log.
|
||||
|
||||
``tail`` caps the response size (bytes) so the dashboard drawer
|
||||
doesn't paginate megabytes into the browser. Returns 404 if the task
|
||||
has never spawned. The on-disk log is rotated at 2 MiB per
|
||||
``_rotate_worker_log`` — a single ``.log.1`` is kept, no further
|
||||
generations, so disk usage per task is bounded at ~4 MiB.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
|
||||
log_path = kanban_db.worker_log_path(task_id)
|
||||
size = log_path.stat().st_size if log_path.exists() else 0
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"path": str(log_path),
|
||||
"exists": content is not None,
|
||||
"size_bytes": size,
|
||||
"content": content or "",
|
||||
# Truncated when the on-disk file was larger than the tail cap.
|
||||
"truncated": bool(tail and size > tail),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/dispatch")
|
||||
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
|
||||
conn = _conn()
|
||||
try:
|
||||
result = kanban_db.dispatch_once(
|
||||
conn, dry_run=dry_run, max_spawn=max_n,
|
||||
)
|
||||
# DispatchResult is a dataclass.
|
||||
try:
|
||||
return asdict(result)
|
||||
except TypeError:
|
||||
return {"result": str(result)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket: /events?since=<event_id>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
|
||||
# the simplest and most robust approach; it adds a fraction of a percent
|
||||
# of CPU and has no shared state to synchronize across workers.
|
||||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
# set Authorization on a WS upgrade. This matches how the PTY bridge
|
||||
# authenticates in hermes_cli/web_server.py.
|
||||
token = ws.query_params.get("token")
|
||||
if not _check_ws_token(token):
|
||||
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
|
||||
return
|
||||
await ws.accept()
|
||||
try:
|
||||
since_raw = ws.query_params.get("since", "0")
|
||||
try:
|
||||
cursor = int(since_raw)
|
||||
except ValueError:
|
||||
cursor = 0
|
||||
|
||||
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
|
||||
conn = kanban_db.connect()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, task_id, run_id, kind, payload, created_at "
|
||||
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
|
||||
(cursor_val,),
|
||||
).fetchall()
|
||||
out: list[dict] = []
|
||||
new_cursor = cursor_val
|
||||
for r in rows:
|
||||
try:
|
||||
payload = json.loads(r["payload"]) if r["payload"] else None
|
||||
except Exception:
|
||||
payload = None
|
||||
out.append({
|
||||
"id": r["id"],
|
||||
"task_id": r["task_id"],
|
||||
"run_id": r["run_id"],
|
||||
"kind": r["kind"],
|
||||
"payload": payload,
|
||||
"created_at": r["created_at"],
|
||||
})
|
||||
new_cursor = r["id"]
|
||||
return new_cursor, out
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
while True:
|
||||
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
|
||||
if events:
|
||||
await ws.send_json({"events": events, "cursor": cursor})
|
||||
await asyncio.sleep(_EVENT_POLL_SECONDS)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
except Exception as exc: # defensive: never crash the dashboard worker
|
||||
log.warning("Kanban event stream error: %s", exc)
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1,17 +0,0 @@
|
||||
[Unit]
|
||||
Description=Hermes Kanban dispatcher (hermes kanban daemon)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env hermes kanban daemon --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
|
||||
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
+37
-110
@@ -86,7 +86,6 @@ from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
build_nous_subscription_prompt,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
@@ -893,6 +892,7 @@ class AIAgent:
|
||||
checkpoints_enabled: bool = False,
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
pass_session_id: bool = False,
|
||||
persist_session: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -964,6 +964,7 @@ class AIAgent:
|
||||
self.background_review_callback = None # Optional sync callback for gateway delivery
|
||||
self.skip_context_files = skip_context_files
|
||||
self.pass_session_id = pass_session_id
|
||||
self.persist_session = persist_session
|
||||
self._credential_pool = credential_pool
|
||||
self.log_prefix_chars = log_prefix_chars
|
||||
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
||||
@@ -3108,28 +3109,13 @@ class AIAgent:
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider whether a skill should be saved or updated.\n\n"
|
||||
"Work in this order — do not skip steps:\n\n"
|
||||
"1. SURVEY the existing skill landscape first. Call skills_list to see what you "
|
||||
"have. If anything looks potentially relevant, skill_view it before deciding. "
|
||||
"You are looking for the CLASS of task that just happened, not the exact task. "
|
||||
"Example: a successful Tauri build is in the class \"desktop app build "
|
||||
"troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
|
||||
"2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
|
||||
"What conditions will trigger this pattern again? Describe the class in one "
|
||||
"sentence before looking at what to save.\n\n"
|
||||
"3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
|
||||
"already covers the class — even partially — update it (skill_manage patch) "
|
||||
"with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
|
||||
"4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
|
||||
"When you create one, name and scope it at the class level "
|
||||
"(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
|
||||
"section must describe the class of situations, not this one session.\n\n"
|
||||
"5. If you notice two existing skills that overlap, note it in your response "
|
||||
"so a future review can consolidate them. Do not consolidate now unless the "
|
||||
"overlap is obvious and low-risk.\n\n"
|
||||
"Only act when something is genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
|
||||
"Focus on: was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome?\n\n"
|
||||
"If a relevant skill already exists, update it with what you learned. "
|
||||
"Otherwise, create a new skill if the approach is reusable.\n"
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
@@ -3139,16 +3125,9 @@ class AIAgent:
|
||||
"about how you should behave, their work style, or ways they want you to operate? "
|
||||
"If so, save using the memory tool.\n\n"
|
||||
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, changing course due to experiential findings, or a different method "
|
||||
"or outcome than the user expected? If so, work in this order:\n"
|
||||
" a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
|
||||
" b. Identify the CLASS of task, not the specific task "
|
||||
"(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
|
||||
" c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
|
||||
" d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
|
||||
"the class level, not this one session.\n"
|
||||
" e. If you notice overlapping skills during the survey, note it so a future "
|
||||
"review can consolidate them.\n\n"
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome? If a relevant skill "
|
||||
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
|
||||
"Only act if there's something genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
@@ -3246,25 +3225,12 @@ class AIAgent:
|
||||
with open(os.devnull, "w") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = self._current_main_runtime()
|
||||
review_agent = AIAgent(
|
||||
model=self.model,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
api_mode=_parent_runtime.get("api_mode") or None,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(self, "_credential_pool", None),
|
||||
parent_session_id=self.session_id,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
@@ -3365,7 +3331,10 @@ class AIAgent:
|
||||
"""Save session state to both JSON log and SQLite on any exit path.
|
||||
|
||||
Ensures conversations are never lost, even on errors or early returns.
|
||||
Skipped when ``persist_session=False`` (ephemeral helper flows).
|
||||
"""
|
||||
if not self.persist_session:
|
||||
return
|
||||
self._apply_persist_user_message_override(messages)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
@@ -4498,12 +4467,6 @@ class AIAgent:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in self.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
@@ -7888,17 +7851,7 @@ class AIAgent:
|
||||
api_msg["reasoning_content"] = existing
|
||||
return
|
||||
|
||||
# 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
|
||||
# for providers that use the internal 'reasoning' key.
|
||||
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
|
||||
# genuine reasoning content is not overwritten by the empty-string
|
||||
# fallback (#15812 regression in PR #15478).
|
||||
normalized_reasoning = source_msg.get("reasoning")
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
# 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
|
||||
# 2. DeepSeek / Kimi thinking mode: tool-call turns that lack
|
||||
# reasoning_content are "poisoned history" — a prior provider (MiniMax,
|
||||
# etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
|
||||
# is absent on replay; inject "" to satisfy the provider's requirement
|
||||
@@ -7914,6 +7867,13 @@ class AIAgent:
|
||||
api_msg["reasoning_content"] = ""
|
||||
return
|
||||
|
||||
# 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
|
||||
# for providers that use the internal 'reasoning' key.
|
||||
normalized_reasoning = source_msg.get("reasoning")
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
|
||||
# reasoning_content. Inject "" to satisfy the provider's requirement
|
||||
# when no explicit reasoning content is present.
|
||||
@@ -11047,69 +11007,36 @@ class AIAgent:
|
||||
continue
|
||||
|
||||
# ── Nous Portal: record rate limit & skip retries ─────
|
||||
# When Nous returns a 429 that is a genuine account-
|
||||
# level rate limit, record the reset time to a shared
|
||||
# file so ALL sessions (cron, gateway, auxiliary) know
|
||||
# not to pile on, then skip further retries -- each
|
||||
# one burns another RPH request and deepens the hole.
|
||||
# The retry loop's top-of-iteration guard will catch
|
||||
# this on the next pass and try fallback or bail.
|
||||
#
|
||||
# IMPORTANT: Nous Portal multiplexes multiple upstream
|
||||
# providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can
|
||||
# also mean an UPSTREAM provider is out of capacity
|
||||
# for one specific model -- transient, clears in
|
||||
# seconds, nothing to do with the caller's quota.
|
||||
# Tripping the cross-session breaker on that would
|
||||
# block every Nous model for minutes. We use
|
||||
# ``is_genuine_nous_rate_limit`` to tell the two
|
||||
# apart via the 429's own x-ratelimit-* headers and
|
||||
# the last-known-good state captured on the previous
|
||||
# successful response.
|
||||
# When Nous returns a 429, record the reset time to a
|
||||
# shared file so ALL sessions (cron, gateway, auxiliary)
|
||||
# know not to pile on. Then skip further retries —
|
||||
# each one burns another RPH request and deepens the
|
||||
# rate limit hole. The retry loop's top-of-iteration
|
||||
# guard will catch this on the next pass and try
|
||||
# fallback or bail with a clear message.
|
||||
if (
|
||||
is_rate_limited
|
||||
and self.provider == "nous"
|
||||
and classified.reason == FailoverReason.rate_limit
|
||||
and not recovered_with_pool
|
||||
):
|
||||
_genuine_nous_rate_limit = False
|
||||
try:
|
||||
from agent.nous_rate_guard import (
|
||||
is_genuine_nous_rate_limit,
|
||||
record_nous_rate_limit,
|
||||
)
|
||||
from agent.nous_rate_guard import record_nous_rate_limit
|
||||
_err_resp = getattr(api_error, "response", None)
|
||||
_err_hdrs = (
|
||||
getattr(_err_resp, "headers", None)
|
||||
if _err_resp else None
|
||||
)
|
||||
_genuine_nous_rate_limit = is_genuine_nous_rate_limit(
|
||||
record_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
last_known_state=self._rate_limit_state,
|
||||
error_context=error_context,
|
||||
)
|
||||
if _genuine_nous_rate_limit:
|
||||
record_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
error_context=error_context,
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
"Nous 429 looks like upstream capacity "
|
||||
"(no exhausted bucket in headers or "
|
||||
"last-known state) -- not tripping "
|
||||
"cross-session breaker."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if _genuine_nous_rate_limit:
|
||||
# Skip straight to max_retries -- the
|
||||
# top-of-loop guard will handle fallback or
|
||||
# bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
# Upstream capacity 429: fall through to normal
|
||||
# retry logic. A different model (or the same
|
||||
# model a moment later) will typically succeed.
|
||||
# Skip straight to max_retries — the top-of-loop
|
||||
# guard will handle fallback or bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
|
||||
is_payload_too_large = (
|
||||
classified.reason == FailoverReason.payload_too_large
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
|
||||
|
||||
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
|
||||
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
|
||||
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
|
||||
lets maintainers update model lists without shipping a Hermes release.
|
||||
|
||||
The runtime fetcher falls back to the same in-repo hardcoded lists if the
|
||||
manifest is unreachable, so this script is a convenience for keeping the
|
||||
manifest in sync — not a source of truth.
|
||||
|
||||
Usage::
|
||||
|
||||
python scripts/build_model_catalog.py
|
||||
|
||||
Output: ``website/static/api/model-catalog.json``
|
||||
|
||||
Live URL (after ``deploy-site.yml`` runs on merge to main):
|
||||
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, REPO_ROOT)
|
||||
|
||||
# Ensure HERMES_HOME is set for imports that touch it at module level.
|
||||
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
|
||||
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
|
||||
|
||||
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
|
||||
CATALOG_VERSION = 1
|
||||
|
||||
|
||||
def build_catalog() -> dict:
|
||||
return {
|
||||
"version": CATALOG_VERSION,
|
||||
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
|
||||
},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {
|
||||
"display_name": "OpenRouter",
|
||||
"note": (
|
||||
"Descriptions drive picker badges. Live /api/v1/models "
|
||||
"filters curated ids by tool-calling support and free pricing."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid, "description": desc}
|
||||
for mid, desc in OPENROUTER_MODELS
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {
|
||||
"display_name": "Nous Portal",
|
||||
"note": (
|
||||
"Free-tier gating is determined live via Portal pricing "
|
||||
"(partition_nous_models_by_tier), not this manifest."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid}
|
||||
for mid in _PROVIDER_MODELS.get("nous", [])
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
catalog = build_catalog()
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as fh:
|
||||
json.dump(catalog, fh, indent=2)
|
||||
fh.write("\n")
|
||||
|
||||
print(f"Wrote {OUTPUT_PATH}")
|
||||
for provider, block in catalog["providers"].items():
|
||||
print(f" {provider}: {len(block['models'])} models")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -43,7 +43,6 @@ AUTHOR_MAP = {
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"focusflow.app.help@gmail.com": "yes999zc",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
|
||||
@@ -281,6 +281,7 @@ Type these during an interactive chat session.
|
||||
### Utility
|
||||
```
|
||||
/branch (/fork) Branch the current session
|
||||
/btw Ephemeral side question (doesn't interrupt main task)
|
||||
/fast Toggle priority/fast processing
|
||||
/browser Open CDP browser connection
|
||||
/history Show conversation history (CLI)
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
---
|
||||
name: kanban-orchestrator
|
||||
description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, orchestration, routing]
|
||||
related_skills: [kanban-worker]
|
||||
---
|
||||
|
||||
# Kanban Orchestrator — Decomposition Playbook
|
||||
|
||||
> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing.
|
||||
|
||||
## When to use the board (vs. just doing the work)
|
||||
|
||||
Create Kanban tasks when any of these are true:
|
||||
|
||||
1. **Multiple specialists are needed.** Research + analysis + writing is three profiles.
|
||||
2. **The work should survive a crash or restart.** Long-running, recurring, or important.
|
||||
3. **The user might want to interject.** Human-in-the-loop at any step.
|
||||
4. **Multiple subtasks can run in parallel.** Fan-out for speed.
|
||||
5. **Review / iteration is expected.** A reviewer profile loops on drafter output.
|
||||
6. **The audit trail matters.** Board rows persist in SQLite forever.
|
||||
|
||||
If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly.
|
||||
|
||||
## The anti-temptation rules
|
||||
|
||||
Your job description says "route, don't execute." The rules that enforce that:
|
||||
|
||||
- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist.
|
||||
- **For any concrete task, create a Kanban task and assign it.** Every single time.
|
||||
- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough."
|
||||
- **Decompose, route, and summarize — that's the whole job.**
|
||||
|
||||
## The standard specialist roster (convention)
|
||||
|
||||
Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure.
|
||||
|
||||
| Profile | Does | Typical workspace |
|
||||
|---|---|---|
|
||||
| `researcher` | Reads sources, gathers facts, writes findings | `scratch` |
|
||||
| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` |
|
||||
| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault |
|
||||
| `reviewer` | Reads output, leaves findings, gates approval | `scratch` |
|
||||
| `backend-eng` | Writes server-side code | `worktree` |
|
||||
| `frontend-eng` | Writes client-side code | `worktree` |
|
||||
| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo |
|
||||
| `pm` | Writes specs, acceptance criteria | `scratch` |
|
||||
|
||||
## Decomposition playbook
|
||||
|
||||
### Step 1 — Understand the goal
|
||||
|
||||
Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet.
|
||||
|
||||
### Step 2 — Sketch the task graph
|
||||
|
||||
Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres":
|
||||
|
||||
```
|
||||
T1 researcher research: Postgres cost vs current
|
||||
T2 researcher research: Postgres performance vs current
|
||||
T3 analyst synthesize migration recommendation parents: T1, T2
|
||||
T4 writer draft decision memo parents: T3
|
||||
```
|
||||
|
||||
Show this to the user. Let them correct it before you create anything.
|
||||
|
||||
### Step 3 — Create tasks and link
|
||||
|
||||
```python
|
||||
t1 = kanban_create(
|
||||
title="research: Postgres cost vs current",
|
||||
assignee="researcher",
|
||||
body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.",
|
||||
tenant=os.environ.get("HERMES_TENANT"),
|
||||
)["task_id"]
|
||||
|
||||
t2 = kanban_create(
|
||||
title="research: Postgres performance vs current",
|
||||
assignee="researcher",
|
||||
body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.",
|
||||
)["task_id"]
|
||||
|
||||
t3 = kanban_create(
|
||||
title="synthesize migration recommendation",
|
||||
assignee="analyst",
|
||||
body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.",
|
||||
parents=[t1, t2],
|
||||
)["task_id"]
|
||||
|
||||
t4 = kanban_create(
|
||||
title="draft decision memo",
|
||||
assignee="writer",
|
||||
body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.",
|
||||
parents=[t3],
|
||||
)["task_id"]
|
||||
```
|
||||
|
||||
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
|
||||
|
||||
### Step 4 — Complete your own task
|
||||
|
||||
If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
|
||||
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation",
|
||||
metadata={
|
||||
"task_graph": {
|
||||
"T1": {"assignee": "researcher", "parents": []},
|
||||
"T2": {"assignee": "researcher", "parents": []},
|
||||
"T3": {"assignee": "analyst", "parents": ["T1", "T2"]},
|
||||
"T4": {"assignee": "writer", "parents": ["T3"]},
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### Step 5 — Report back to the user
|
||||
|
||||
Tell them what you created in plain prose:
|
||||
|
||||
> I've queued 4 tasks:
|
||||
> - **T1** (researcher): cost comparison
|
||||
> - **T2** (researcher): performance comparison, in parallel with T1
|
||||
> - **T3** (analyst): synthesizes T1 + T2 into a recommendation
|
||||
> - **T4** (writer): turns T3 into a CTO memo
|
||||
>
|
||||
> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail <id>` to follow along.
|
||||
|
||||
## Common patterns
|
||||
|
||||
**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents.
|
||||
|
||||
**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns.
|
||||
|
||||
**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory.
|
||||
|
||||
**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile.
|
||||
|
||||
**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`.
|
||||
|
||||
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
|
||||
|
||||
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
|
||||
@@ -1,134 +0,0 @@
|
||||
---
|
||||
name: kanban-worker
|
||||
description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, collaboration, workflow, pitfalls]
|
||||
related_skills: [kanban-orchestrator]
|
||||
---
|
||||
|
||||
# Kanban Worker — Pitfalls and Examples
|
||||
|
||||
> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases.
|
||||
|
||||
## Workspace handling
|
||||
|
||||
Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`:
|
||||
|
||||
| Kind | What it is | How to work |
|
||||
|---|---|---|
|
||||
| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
|
||||
| `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
|
||||
| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
|
||||
|
||||
## Tenant isolation
|
||||
|
||||
If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants:
|
||||
|
||||
- Good: `business-a: Acme is our biggest customer`
|
||||
- Bad (leaks): `Acme is our biggest customer`
|
||||
|
||||
## Good summary + metadata shapes
|
||||
|
||||
The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work:
|
||||
|
||||
**Coding task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass",
|
||||
metadata={
|
||||
"changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
|
||||
"tests_run": 14,
|
||||
"tests_passed": 14,
|
||||
"decisions": ["user_id primary, IP fallback for unauthenticated requests"],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Research task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency",
|
||||
metadata={
|
||||
"sources_read": 12,
|
||||
"recommendation": "vLLM",
|
||||
"benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Review task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)",
|
||||
metadata={
|
||||
"pr_number": 123,
|
||||
"findings": [
|
||||
{"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"},
|
||||
{"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"},
|
||||
],
|
||||
"approved": False,
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
|
||||
|
||||
## Block reasons that get answered fast
|
||||
|
||||
Bad: `"stuck"` — the human has no context.
|
||||
|
||||
Good: one sentence naming the specific decision you need. Leave longer context as a comment instead.
|
||||
|
||||
```python
|
||||
kanban_comment(
|
||||
task_id=os.environ["HERMES_KANBAN_TASK"],
|
||||
body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.",
|
||||
)
|
||||
kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?")
|
||||
```
|
||||
|
||||
The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task.
|
||||
|
||||
## Heartbeats worth sending
|
||||
|
||||
Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`.
|
||||
|
||||
Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes.
|
||||
|
||||
## Retry scenarios
|
||||
|
||||
If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics:
|
||||
|
||||
- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it.
|
||||
- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint.
|
||||
- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly.
|
||||
- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
|
||||
- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
|
||||
|
||||
## Do NOT
|
||||
|
||||
- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
|
||||
- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
|
||||
- Create follow-up tasks assigned to yourself — assign to the right specialist.
|
||||
- Complete a task you didn't actually finish. Block it instead.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running.
|
||||
|
||||
**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in.
|
||||
|
||||
**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban <verb>` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool.
|
||||
|
||||
## CLI fallback (for scripting)
|
||||
|
||||
Every tool has a CLI equivalent for human operators and scripts:
|
||||
- `kanban_show` ↔ `hermes kanban show <id> --json`
|
||||
- `kanban_complete` ↔ `hermes kanban complete <id> --summary "..." --metadata '{...}'`
|
||||
- `kanban_block` ↔ `hermes kanban block <id> "reason"`
|
||||
- `kanban_create` ↔ `hermes kanban create "title" --assignee <profile> [--parent <id>]`
|
||||
- etc.
|
||||
|
||||
Use the tools from inside an agent; the CLI exists for the human at the terminal.
|
||||
@@ -192,43 +192,6 @@ class TestDefaultContextLengths:
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_deepseek_v4_models_1m_context(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
expected_keys = {
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
}
|
||||
for key, value in expected_keys.items():
|
||||
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
|
||||
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
|
||||
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
|
||||
)
|
||||
|
||||
# Longest-first substring matching must resolve both the bare V4
|
||||
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
|
||||
# / Nous Portal) to 1M without probing down to the legacy 128K
|
||||
# ``deepseek`` substring fallback.
|
||||
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
|
||||
cases = [
|
||||
("deepseek-v4-pro", 1_000_000),
|
||||
("deepseek-v4-flash", 1_000_000),
|
||||
("deepseek/deepseek-v4-pro", 1_000_000),
|
||||
("deepseek/deepseek-v4-flash", 1_000_000),
|
||||
("deepseek-chat", 1_000_000),
|
||||
("deepseek-reasoner", 1_000_000),
|
||||
]
|
||||
for model_id, expected_ctx in cases:
|
||||
actual = get_model_context_length(model_id)
|
||||
assert actual == expected_ctx, (
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_all_values_positive(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
assert value > 0, f"{key} has non-positive context length"
|
||||
@@ -340,9 +303,7 @@ class TestCodexOAuthContextLength:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
|
||||
# matching the real direct-API value — Codex OAuth's 272k cap is
|
||||
# provider-specific and must not leak here).
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
@@ -353,7 +314,7 @@ class TestCodexOAuthContextLength:
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 1_050_000, (
|
||||
assert ctx == 400_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
|
||||
@@ -251,141 +251,3 @@ class TestAuxiliaryClientIntegration:
|
||||
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
|
||||
result = aux._try_nous()
|
||||
assert result == (None, None)
|
||||
|
||||
|
||||
class TestIsGenuineNousRateLimit:
|
||||
"""Tell a real account-level 429 apart from an upstream-capacity 429.
|
||||
|
||||
Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
|
||||
A 429 from an upstream out of capacity should NOT trip the
|
||||
cross-session breaker; a real user-quota 429 should.
|
||||
"""
|
||||
|
||||
def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_exhausted_tokens_bucket_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "0",
|
||||
"x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "0",
|
||||
"x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_healthy_headers_on_429_are_upstream_capacity(self):
|
||||
# Classic upstream-capacity symptom: Nous edge reports plenty of
|
||||
# headroom on every bucket, but returns 429 anyway because
|
||||
# upstream (DeepSeek / Kimi / ...) is out of capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "40",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7800000",
|
||||
"x-ratelimit-reset-tokens-1h": "3100",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_bare_429_with_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(headers=None) is False
|
||||
assert is_genuine_nous_rate_limit(headers={}) is False
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers={"content-type": "application/json"}
|
||||
) is False
|
||||
|
||||
def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
|
||||
# remaining == 0 but reset in < 60s: almost certainly a
|
||||
# secondary per-minute throttle that will clear immediately --
|
||||
# not worth tripping the cross-session breaker.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "0",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
|
||||
# Headers on the 429 lack rate-limit info, but the previous
|
||||
# successful response already showed the hourly bucket
|
||||
# exhausted -- the 429 is almost certainly that limit
|
||||
# continuing.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "100",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "700000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7000000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is True
|
||||
|
||||
def test_last_known_state_all_healthy_stays_upstream(self):
|
||||
# Prior state was healthy; bare 429 arrives; should be treated
|
||||
# as upstream capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "180",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7900000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is False
|
||||
|
||||
def test_none_last_state_and_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=None
|
||||
) is False
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
import pytest
|
||||
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
TOOL_PROGRESS_FLAG,
|
||||
busy_input_hint_cli,
|
||||
busy_input_hint_gateway,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
tool_progress_hint_gateway,
|
||||
)
|
||||
|
||||
|
||||
class TestIsSeen:
|
||||
def test_empty_config_unseen(self):
|
||||
assert is_seen({}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_missing_onboarding_unseen(self):
|
||||
assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_onboarding_not_dict_unseen(self):
|
||||
assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_dict_missing_flag(self):
|
||||
assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_flag_true(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is True
|
||||
|
||||
def test_seen_flag_falsy(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_other_flags_isolated(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
|
||||
class TestMarkSeen:
|
||||
def test_creates_missing_file_and_sets_flag(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_config(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"model": {"default": "claude-sonnet-4.6"},
|
||||
"display": {"skin": "default"},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["model"]["default"] == "claude-sonnet-4.6"
|
||||
assert loaded["display"]["skin"] == "default"
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_seen_flags(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_idempotent(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
first = cfg_path.read_text()
|
||||
|
||||
# Second call must be a no-op on-disk content (file may be touched,
|
||||
# but the YAML contents should be identical).
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
second = cfg_path.read_text()
|
||||
|
||||
assert yaml.safe_load(first) == yaml.safe_load(second)
|
||||
|
||||
def test_handles_non_dict_onboarding(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_handles_non_dict_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
|
||||
class TestHintMessages:
|
||||
def test_busy_input_hint_gateway_interrupt(self):
|
||||
msg = busy_input_hint_gateway("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
assert "interrupted" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_gateway_queue(self):
|
||||
msg = busy_input_hint_gateway("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
assert "queued" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_cli_interrupt(self):
|
||||
msg = busy_input_hint_cli("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
|
||||
def test_busy_input_hint_cli_queue(self):
|
||||
msg = busy_input_hint_cli("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
|
||||
def test_tool_progress_hints_mention_verbose(self):
|
||||
assert "/verbose" in tool_progress_hint_gateway()
|
||||
assert "/verbose" in tool_progress_hint_cli()
|
||||
|
||||
def test_hints_are_not_empty(self):
|
||||
for hint in (
|
||||
busy_input_hint_gateway("queue"),
|
||||
busy_input_hint_gateway("interrupt"),
|
||||
busy_input_hint_cli("queue"),
|
||||
busy_input_hint_cli("interrupt"),
|
||||
tool_progress_hint_gateway(),
|
||||
tool_progress_hint_cli(),
|
||||
):
|
||||
assert hint.strip()
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
"""After mark_seen, is_seen on the re-loaded config must return True."""
|
||||
|
||||
def test_mark_then_is_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
def test_mark_both_flags_independently(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True
|
||||
@@ -349,121 +349,3 @@ class TestBusySessionAck:
|
||||
|
||||
result = await runner._handle_active_session_busy_message(event, sk)
|
||||
assert result is False # not handled, let default path try
|
||||
|
||||
|
||||
class TestBusySessionOnboardingHint:
|
||||
"""First-touch hint appended to the busy-ack the first time it fires."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
|
||||
"""First busy-while-running message gets an extra hint about /busy."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# mark_seen imports utils.atomic_yaml_write; make sure it resolves
|
||||
# against a writable dir by pointing _hermes_home at tmp_path.
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
# Normal ack body
|
||||
assert "Interrupting" in content
|
||||
# First-touch hint appended
|
||||
assert "First-time tip" in content
|
||||
assert "/busy queue" in content
|
||||
|
||||
# The flag is now persisted to tmp_path/config.yaml
|
||||
import yaml
|
||||
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
|
||||
assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
|
||||
"""Once the flag is marked, the hint never appears again."""
|
||||
import gateway.run as _gr
|
||||
import yaml
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# Pre-populate the config so is_seen() returns True from the start.
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {"busy_input_prompt": True}},
|
||||
}))
|
||||
monkeypatch.setattr(
|
||||
_gr, "_load_gateway_config",
|
||||
lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
|
||||
)
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping again")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
assert "Interrupting" in content
|
||||
assert "First-time tip" not in content
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
|
||||
"""In queue mode the hint should suggest /busy interrupt, not /busy queue."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "queue"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="queue me")
|
||||
sk = build_session_key(event.source)
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
agent = MagicMock()
|
||||
runner._running_agents[sk] = agent
|
||||
|
||||
with patch("gateway.run.merge_pending_message_event"):
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
content = adapter._send_with_retry.call_args.kwargs.get("content", "")
|
||||
assert "Queued for the next turn" in content
|
||||
assert "First-time tip" in content
|
||||
assert "/busy interrupt" in content
|
||||
# Must NOT tell the user to /busy queue when they're already on queue.
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@@ -1,215 +0,0 @@
|
||||
"""Tests for interrupt-aware tool-progress suppression in gateway.
|
||||
|
||||
When a user sends `stop` while the agent is executing a batch of parallel
|
||||
tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
|
||||
and the drain loop should drop any already-queued events. Without this
|
||||
guard, the stop acknowledgement appears first but is followed by a trail
|
||||
of tool-progress bubbles for calls that were already parsed from the LLM
|
||||
response — making the interrupt feel ignored.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
class ProgressCaptureAdapter(BasePlatformAdapter):
|
||||
def __init__(self, platform=Platform.TELEGRAM):
|
||||
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
|
||||
self.sent = []
|
||||
self.edits = []
|
||||
self.typing = []
|
||||
|
||||
async def connect(self) -> bool:
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
return None
|
||||
|
||||
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
|
||||
self.sent.append({"chat_id": chat_id, "content": content})
|
||||
return SendResult(success=True, message_id="progress-1")
|
||||
|
||||
async def edit_message(self, chat_id, message_id, content) -> SendResult:
|
||||
self.edits.append({"message_id": message_id, "content": content})
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
|
||||
async def send_typing(self, chat_id, metadata=None) -> None:
|
||||
self.typing.append(chat_id)
|
||||
|
||||
async def stop_typing(self, chat_id) -> None:
|
||||
return None
|
||||
|
||||
async def get_chat_info(self, chat_id: str):
|
||||
return {"id": chat_id}
|
||||
|
||||
|
||||
class PreInterruptAgent:
|
||||
"""Fires tool-progress events BEFORE the interrupt lands.
|
||||
|
||||
These should render normally. Baseline for comparison with the
|
||||
interrupted case — proves the harness renders events when no
|
||||
interrupt is active.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
self._interrupt_requested = False
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
self.tool_progress_callback("tool.started", "web_search", "first search", {})
|
||||
time.sleep(0.35) # let the drain loop process
|
||||
return {"final_response": "done", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
class InterruptedAgent:
|
||||
"""Fires tool.started events AFTER interrupt — all should be suppressed.
|
||||
|
||||
Mirrors the failure mode in the bug report: LLM returned N parallel
|
||||
web_search calls, interrupt flag flipped, remaining events still
|
||||
rendered as bubbles. With the fix, none of these should appear.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
# Start already interrupted — simulates stop having already landed
|
||||
# by the time the agent batch starts firing tool.started events.
|
||||
self._interrupt_requested = True
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
# Parallel tool batch — in production these come from one LLM
|
||||
# response with 5 tool_calls. All are post-interrupt.
|
||||
self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
|
||||
time.sleep(0.35) # let the drain loop attempt to process the queue
|
||||
return {"final_response": "interrupted", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
def _make_runner(adapter):
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
GatewayRunner = gateway_run.GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.adapters = {adapter.platform: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner._prefill_messages = []
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._session_db = None
|
||||
runner._running_agents = {}
|
||||
runner._session_run_generation = {}
|
||||
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
||||
runner.config = SimpleNamespace(
|
||||
thread_sessions_per_user=False,
|
||||
group_sessions_per_user=False,
|
||||
stt_enabled=False,
|
||||
)
|
||||
return runner
|
||||
|
||||
|
||||
async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
|
||||
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = agent_cls
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
adapter = ProgressCaptureAdapter()
|
||||
runner = _make_runner(adapter)
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {"api_key": "fake"},
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1001",
|
||||
chat_type="group",
|
||||
thread_id="17585",
|
||||
)
|
||||
result = await runner._run_agent(
|
||||
message="hi",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id=session_id,
|
||||
session_key="agent:main:telegram:group:-1001:17585",
|
||||
)
|
||||
return adapter, result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
|
||||
"""Sanity check: when is_interrupted is False, tool-progress renders normally."""
|
||||
adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
|
||||
assert result["final_response"] == "done"
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
assert "first search" in rendered, (
|
||||
"baseline agent should render its tool-progress event — "
|
||||
"if this fails the test harness is broken, not the fix"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
|
||||
"""Post-interrupt tool.started events must not render as bubbles.
|
||||
|
||||
This is Bug B from the screenshot: user sends `stop`, agent acks with
|
||||
⚡ Interrupting, but 5 more 🔍 web_search bubbles still render because
|
||||
their tool.started events were already parsed from the LLM response.
|
||||
With the fix, progress_callback and the drain loop both check
|
||||
is_interrupted and skip these events.
|
||||
"""
|
||||
adapter, result = await _run_once(
|
||||
monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
|
||||
)
|
||||
assert result["final_response"] == "interrupted"
|
||||
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
|
||||
# None of the post-interrupt queries should appear.
|
||||
for leaked_query in (
|
||||
"cognee hermes",
|
||||
"McBee deer hunting",
|
||||
"kuzu graph db",
|
||||
"moonshot kimi api",
|
||||
"platform.moonshot.cn",
|
||||
):
|
||||
assert leaked_query not in rendered, (
|
||||
f"event '{leaked_query}' leaked into the UI after interrupt — "
|
||||
f"progress_callback / drain loop is not checking is_interrupted"
|
||||
)
|
||||
@@ -165,26 +165,3 @@ async def test_reasoning_rejected_mid_run():
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" in result
|
||||
assert "/reasoning" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_btw_dispatches_mid_run():
|
||||
"""/btw mid-run must dispatch to /background's handler, not hit the catch-all.
|
||||
|
||||
/btw is an alias of /background (see hermes_cli/commands.py). Typing
|
||||
/btw mid-turn must spawn a parallel background task — that's the whole
|
||||
point of the command. Before the mid-turn bypass was added for
|
||||
/background, /btw fell through to the "Agent is running — wait or
|
||||
/stop first" catch-all, making it useless in exactly the scenario it
|
||||
was designed for. The alias and the bypass together make it work.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
runner._handle_background_command = AsyncMock(
|
||||
return_value='🚀 Background task started: "what module owns titles?"'
|
||||
)
|
||||
|
||||
result = await runner._handle_message(_make_event("/btw what module owns titles?"))
|
||||
|
||||
runner._handle_background_command.assert_awaited_once()
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" not in result
|
||||
|
||||
@@ -177,53 +177,6 @@ class TestHandleVoiceCommand:
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"123"}
|
||||
|
||||
def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
|
||||
"""Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
|
||||
|
||||
The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
|
||||
persisted voice_mode is ``voice_only`` or ``all`` — without this,
|
||||
``/voice on`` was relying on a "not in disabled set" default that
|
||||
silently enabled auto-TTS for every chat.
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
runner._voice_mode = {
|
||||
"telegram:off_chat": "off",
|
||||
"telegram:on_chat": "voice_only",
|
||||
"telegram:tts_chat": "all",
|
||||
"slack:999": "voice_only", # wrong platform, must be ignored
|
||||
}
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"off_chat"}
|
||||
assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
|
||||
|
||||
def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
|
||||
"""Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
|
||||
from gateway.config import Platform
|
||||
|
||||
fake_cfg = {"voice": {"auto_tts": True}}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: fake_cfg,
|
||||
)
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_default is True
|
||||
|
||||
def test_restart_restores_voice_off_state(self, runner, tmp_path):
|
||||
from gateway.config import Platform
|
||||
runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
|
||||
@@ -2753,56 +2706,3 @@ class TestUDPKeepalive:
|
||||
mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
|
||||
finally:
|
||||
DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
|
||||
# on voice input. Regression test for Issue #16007.
|
||||
# =====================================================================
|
||||
|
||||
class TestShouldAutoTtsForChat:
|
||||
"""Three-layer gate: per-chat enable > per-chat disable > config default."""
|
||||
|
||||
def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
|
||||
"""Build a bare adapter with only the attrs the gate reads."""
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=default,
|
||||
_auto_tts_enabled_chats=set(enabled),
|
||||
_auto_tts_disabled_chats=set(disabled),
|
||||
)
|
||||
# Bind the unbound method — _should_auto_tts_for_chat only reads the
|
||||
# three attrs above via ``self.``, so an unbound call works.
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
return BasePlatformAdapter._should_auto_tts_for_chat, adapter
|
||||
|
||||
def test_default_false_no_override_suppresses(self):
|
||||
"""Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
|
||||
fn, adapter = self._make_adapter(default=False)
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_default_true_no_override_fires(self):
|
||||
fn, adapter = self._make_adapter(default=True)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_enable_overrides_false_default(self):
|
||||
"""``/voice on`` with config auto_tts=False still fires."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_disable_overrides_true_default(self):
|
||||
"""``/voice off`` with config auto_tts=True still suppresses."""
|
||||
fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_enabled_wins_over_disabled(self):
|
||||
"""An explicit enable beats an explicit disable (enable takes priority)."""
|
||||
fn, adapter = self._make_adapter(
|
||||
default=False, enabled={"chat1"}, disabled={"chat1"}
|
||||
)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_per_chat_isolation(self):
|
||||
"""Enable for chat1 doesn't leak to chat2."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
assert fn(adapter, "chat2") is False
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
"""Regression test for the `/model` picker confirmation display.
|
||||
|
||||
Bug (April 2026): after choosing a model from the interactive `/model` picker,
|
||||
``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
|
||||
straight from models.dev, which always reports the vendor-wide value (e.g.
|
||||
gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in
|
||||
particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
|
||||
``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
|
||||
``resolve_display_context_length()``; the picker path was missed, causing
|
||||
"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
|
||||
|
||||
Fix: both display paths now go through ``resolve_display_context_length()``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import ModelSwitchResult
|
||||
|
||||
|
||||
class _FakeModelInfo:
|
||||
context_window = 1_050_000
|
||||
max_output = 0
|
||||
|
||||
def has_cost_data(self):
|
||||
return False
|
||||
|
||||
def format_capabilities(self):
|
||||
return ""
|
||||
|
||||
|
||||
class _StubCLI:
|
||||
"""Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
|
||||
agent = None
|
||||
model = ""
|
||||
provider = ""
|
||||
requested_provider = ""
|
||||
api_key = ""
|
||||
_explicit_api_key = ""
|
||||
base_url = ""
|
||||
_explicit_base_url = ""
|
||||
api_mode = ""
|
||||
_pending_model_switch_note = ""
|
||||
|
||||
|
||||
def _run_display(monkeypatch, result):
|
||||
import cli as cli_mod
|
||||
|
||||
captured: list[str] = []
|
||||
monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
|
||||
# Avoid writing to ~/.hermes/config.yaml during the test.
|
||||
monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
|
||||
cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
|
||||
return captured
|
||||
|
||||
|
||||
def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
|
||||
"""``_apply_model_switch_result`` must prefer the provider-aware resolver
|
||||
(272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="gpt-5.5",
|
||||
target_provider="openai-codex",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_mode="codex_responses",
|
||||
warning_message="",
|
||||
provider_label="ChatGPT Codex",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # models.dev says 1.05M
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=272_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "272,000" in ctx_line, (
|
||||
f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
|
||||
)
|
||||
assert "1,050,000" not in ctx_line, (
|
||||
f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
|
||||
"""On providers with no enforced cap (e.g. OpenRouter), the picker path
|
||||
should surface the real 1.05M context for gpt-5.5 — resolver and models.dev
|
||||
agree here.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="openai/gpt-5.5",
|
||||
target_provider="openrouter",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="OpenRouter",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(),
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=1_050_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
|
||||
"""If ``get_model_context_length`` returns nothing (rare — truly unknown
|
||||
endpoint), the display still surfaces ``ModelInfo.context_window`` so the
|
||||
user sees *something* rather than a silent blank.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="some-model",
|
||||
target_provider="some-provider",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="Some Provider",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # context_window = 1_050_000
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=None,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
|
||||
)
|
||||
@@ -1,486 +0,0 @@
|
||||
"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture()
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _write_config(home: Path, data: dict) -> None:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
|
||||
|
||||
|
||||
def _read_config(home: Path) -> dict:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _read_chain / _write_chain
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReadChain:
|
||||
def test_returns_empty_list_when_unset(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
assert _read_chain({}) == []
|
||||
|
||||
def test_reads_new_list_format(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
|
||||
def test_migrates_legacy_single_dict(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
|
||||
assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
|
||||
|
||||
def test_skips_incomplete_entries(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter"}, # missing model
|
||||
{"model": "gpt-5.4"}, # missing provider
|
||||
{"provider": "nous", "model": "foo"}, # valid
|
||||
"not-a-dict", # noise
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
|
||||
|
||||
def test_returns_copies_not_aliases(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
|
||||
result = _read_chain(cfg)
|
||||
result[0]["provider"] = "mutated"
|
||||
assert cfg["fallback_providers"][0]["provider"] == "nous"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_fallback_from_model_cfg
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExtractFallback:
|
||||
def test_extracts_from_default_field(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
}
|
||||
|
||||
def test_extracts_optional_base_url_and_api_mode(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {
|
||||
"provider": "custom",
|
||||
"default": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "custom",
|
||||
"model": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
|
||||
def test_returns_none_without_provider(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
|
||||
|
||||
def test_returns_none_without_model(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
|
||||
|
||||
def test_returns_none_for_non_dict(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg("plain-string") is None
|
||||
assert _extract_fallback_from_model_cfg(None) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestListCommand:
|
||||
def test_list_empty(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
assert "hermes fallback add" in out
|
||||
|
||||
def test_list_with_entries(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain (2 entries)" in out
|
||||
assert "anthropic/claude-sonnet-4.6" in out
|
||||
assert "Hermes-4" in out
|
||||
# Primary should be shown too
|
||||
assert "claude-sonnet-4-6" in out
|
||||
|
||||
def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "1 entry" in out
|
||||
assert "gpt-5.4" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_add — mock select_provider_and_model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAddCommand:
|
||||
def test_add_appends_new_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# Simulate what the real picker does: writes the selection to config["model"]
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary is preserved
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
# Fallback was appended
|
||||
assert cfg["fallback_providers"] == [
|
||||
{
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Added fallback" in out
|
||||
|
||||
def test_add_rejects_duplicate(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Should still have exactly one entry
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "already in the fallback chain" in out
|
||||
|
||||
def test_add_rejects_same_as_primary(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "openrouter", "default": "gpt-5.4"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User picks the same thing that's already the primary
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
assert "matches the current primary" in out
|
||||
|
||||
def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
|
||||
"""The picker mutates config["model"]; fallback_add must restore the primary."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {
|
||||
"provider": "anthropic",
|
||||
"default": "claude-sonnet-4-6",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary exactly as it was
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
assert cfg["model"]["base_url"] == "https://api.anthropic.com"
|
||||
assert cfg["model"]["api_mode"] == "anthropic_messages"
|
||||
# Fallback added
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
assert cfg["fallback_providers"][0]["provider"] == "openrouter"
|
||||
|
||||
def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User cancelled — no change to config
|
||||
pass
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
# Either "No fallback added" (picker fully cancelled) or "matches the current primary"
|
||||
# (picker left config untouched) — both indicate a non-add outcome.
|
||||
assert ("No fallback added" in out) or ("matches the current primary" in out)
|
||||
|
||||
def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
|
||||
"""Simulate picker explicitly clearing model.default (unusual but possible)."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "", "default": ""}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback added" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_remove
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRemoveCommand:
|
||||
def test_remove_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_remove_selected_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
],
|
||||
})
|
||||
|
||||
# Picker returns index 1 (the middle entry, "nous / Hermes-4")
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg["fallback_providers"] == [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Removed fallback" in out
|
||||
assert "Hermes-4" in out
|
||||
|
||||
def test_remove_cancel_keeps_chain(self, isolated_home):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
# Cancel = last item (index == len(chain) == 1 in our menu)
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_clear
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestClearCommand:
|
||||
def test_clear_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to clear" in out
|
||||
|
||||
def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg.get("fallback_providers") == []
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain cleared" in out
|
||||
|
||||
def test_clear_cancelled(self, isolated_home, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDispatcher:
|
||||
def test_no_subcommand_lists(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command=None))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_list_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_remove_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_unknown_subcommand_exits(self, isolated_home):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
with pytest.raises(SystemExit):
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring — verify the subparser is registered
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestArgparseWiring:
|
||||
"""Verify `hermes fallback` is wired into main.py's argparse tree.
|
||||
|
||||
main() builds the parser inline, so we invoke main([...]) via subprocess
|
||||
with --help to introspect registered subcommands without side effects.
|
||||
"""
|
||||
|
||||
def test_fallback_help_lists_subcommands(self):
|
||||
import subprocess
|
||||
import sys
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
# --help exits 0
|
||||
assert result.returncode == 0, f"stderr: {result.stderr}"
|
||||
out = result.stdout + result.stderr
|
||||
# All four subcommands should appear in help
|
||||
assert "list" in out
|
||||
assert "add" in out
|
||||
assert "remove" in out
|
||||
assert "clear" in out
|
||||
@@ -1,210 +0,0 @@
|
||||
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban as kc
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value,expected",
|
||||
[
|
||||
("scratch", ("scratch", None)),
|
||||
("worktree", ("worktree", None)),
|
||||
("dir:/tmp/work", ("dir", "/tmp/work")),
|
||||
],
|
||||
)
|
||||
def test_parse_workspace_flag_valid(value, expected):
|
||||
assert kc._parse_workspace_flag(value) == expected
|
||||
|
||||
|
||||
def test_parse_workspace_flag_expands_user():
|
||||
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
||||
assert kind == "dir"
|
||||
assert path.endswith("/vault")
|
||||
assert not path.startswith("~")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
||||
def test_parse_workspace_flag_rejects(bad):
|
||||
if not bad:
|
||||
# Empty -> defaults; not an error.
|
||||
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
||||
return
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
kc._parse_workspace_flag(bad)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_slash_no_args_shows_usage(kanban_home):
|
||||
out = kc.run_slash("")
|
||||
assert "kanban" in out.lower()
|
||||
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_create_and_list(kanban_home):
|
||||
out = kc.run_slash("create 'ship feature' --assignee alice")
|
||||
assert "Created" in out
|
||||
out = kc.run_slash("list")
|
||||
assert "ship feature" in out
|
||||
assert "alice" in out
|
||||
|
||||
|
||||
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
||||
# Parent then child via --parent
|
||||
out1 = kc.run_slash("create 'parent' --assignee alice")
|
||||
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
||||
import re
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
assert m
|
||||
p = m.group(1)
|
||||
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
||||
assert "todo" in out2 # child starts as todo
|
||||
|
||||
# Complete parent; list should promote child to ready
|
||||
kc.run_slash(f"complete {p}")
|
||||
# Explicit filter: child should now be ready (was todo before complete).
|
||||
ready_list = kc.run_slash("list --status ready")
|
||||
assert "child" in ready_list
|
||||
|
||||
|
||||
def test_run_slash_show_includes_comments(kanban_home):
|
||||
out = kc.run_slash("create 'x'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "source is paywalled" in show
|
||||
|
||||
|
||||
def test_run_slash_block_unblock_cycle(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
# Claim first so block() finds it running
|
||||
kc.run_slash(f"claim {tid}")
|
||||
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
||||
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
||||
|
||||
|
||||
def test_run_slash_json_output(kanban_home):
|
||||
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
||||
payload = json.loads(out)
|
||||
assert payload["title"] == "jsontask"
|
||||
assert payload["assignee"] == "alice"
|
||||
assert payload["status"] == "ready"
|
||||
|
||||
|
||||
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
||||
kc.run_slash("create 'a' --assignee alice")
|
||||
kc.run_slash("create 'b' --assignee bob")
|
||||
out = kc.run_slash("dispatch --dry-run")
|
||||
assert "Spawned:" in out
|
||||
|
||||
|
||||
def test_run_slash_context_output_format(kanban_home):
|
||||
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
||||
ctx = kc.run_slash(f"context {tid}")
|
||||
assert "tech spec" in ctx
|
||||
assert "write an RFC" in ctx
|
||||
assert "performance section" in ctx
|
||||
|
||||
|
||||
def test_run_slash_tenant_filter(kanban_home):
|
||||
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
||||
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
||||
a = kc.run_slash("list --tenant biz-a")
|
||||
b = kc.run_slash("list --tenant biz-b")
|
||||
assert "biz-a task" in a and "biz-b task" not in a
|
||||
assert "biz-b task" in b and "biz-a task" not in b
|
||||
|
||||
|
||||
def test_run_slash_usage_error_returns_message(kanban_home):
|
||||
# Missing required argument for create
|
||||
out = kc.run_slash("create")
|
||||
assert "usage" in out.lower() or "error" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_assign_reassigns(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "bob" in show
|
||||
|
||||
|
||||
def test_run_slash_link_unlink(kanban_home):
|
||||
a = kc.run_slash("create 'a'")
|
||||
b = kc.run_slash("create 'b'")
|
||||
import re
|
||||
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
||||
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
||||
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
||||
# After link, b is todo
|
||||
show = kc.run_slash(f"show {tb}")
|
||||
assert "todo" in show
|
||||
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration with the COMMAND_REGISTRY
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_is_resolvable():
|
||||
from hermes_cli.commands import resolve_command
|
||||
|
||||
cmd = resolve_command("kanban")
|
||||
assert cmd is not None
|
||||
assert cmd.name == "kanban"
|
||||
|
||||
|
||||
def test_kanban_bypasses_active_session_guard():
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
assert should_bypass_active_session("kanban")
|
||||
|
||||
|
||||
def test_kanban_in_autocomplete_table():
|
||||
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
||||
|
||||
assert "/kanban" in COMMANDS
|
||||
subs = SUBCOMMANDS.get("/kanban") or []
|
||||
assert "create" in subs
|
||||
assert "dispatch" in subs
|
||||
|
||||
|
||||
def test_kanban_not_gateway_only():
|
||||
# kanban is available in BOTH CLI and gateway surfaces.
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,438 +0,0 @@
|
||||
"""Tests for the Kanban DB layer (hermes_cli.kanban_db)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema / init
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_init_db_is_idempotent(kanban_home):
|
||||
# Second call should not error or drop data.
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="persisted")
|
||||
kb.init_db()
|
||||
with kb.connect() as conn:
|
||||
tasks = kb.list_tasks(conn)
|
||||
assert len(tasks) == 1
|
||||
assert tasks[0].title == "persisted"
|
||||
|
||||
|
||||
def test_init_creates_expected_tables(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
||||
).fetchall()
|
||||
names = {r["name"] for r in rows}
|
||||
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task creation + status inference
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_no_parents_is_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
tid = kb.create_task(conn, title="ship it", assignee="alice")
|
||||
t = kb.get_task(conn, tid)
|
||||
assert t is not None
|
||||
assert t.status == "ready"
|
||||
assert t.assignee == "alice"
|
||||
assert t.workspace_kind == "scratch"
|
||||
|
||||
|
||||
def test_create_task_with_parent_is_todo_until_parent_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="parent")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, p, result="ok")
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_create_task_unknown_parent_errors(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"):
|
||||
kb.create_task(conn, title="orphan", parents=["t_ghost"])
|
||||
|
||||
|
||||
def test_workspace_kind_validation(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"):
|
||||
kb.create_task(conn, title="bad ws", workspace_kind="cloud")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links + dependency resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "todo"
|
||||
|
||||
|
||||
def test_link_keeps_ready_child_when_parent_already_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
kb.complete_task(conn, a)
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
|
||||
|
||||
def test_link_rejects_self_loop(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
with pytest.raises(ValueError, match="itself"):
|
||||
kb.link_tasks(conn, a, a)
|
||||
|
||||
|
||||
def test_link_detects_cycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, c, a)
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, b, a)
|
||||
|
||||
|
||||
def test_recompute_ready_cascades_through_chain(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
assert [kb.get_task(conn, x).status for x in (a, b, c)] == \
|
||||
["ready", "todo", "todo"]
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
c = kb.create_task(conn, title="c", parents=[a, b])
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Atomic claim (CAS)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_claim_once_wins_second_loses(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
first = kb.claim_task(conn, t, claimer="host:1")
|
||||
assert first is not None and first.status == "running"
|
||||
second = kb.claim_task(conn, t, claimer="host:2")
|
||||
assert second is None
|
||||
|
||||
|
||||
def test_claim_fails_on_non_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
# Move to todo by introducing an unsatisfied parent.
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.link_tasks(conn, p, t)
|
||||
assert kb.get_task(conn, t).status == "todo"
|
||||
assert kb.claim_task(conn, t) is None
|
||||
|
||||
|
||||
def test_stale_claim_reclaimed(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
# Rewind claim_expires so it looks stale.
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 3600, t),
|
||||
)
|
||||
reclaimed = kb.release_stale_claims(conn)
|
||||
assert reclaimed == 1
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_heartbeat_extends_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
claimer = "host:hb"
|
||||
kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60)
|
||||
original = kb.get_task(conn, t).claim_expires
|
||||
# Rewind then heartbeat.
|
||||
conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t))
|
||||
ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600)
|
||||
assert ok
|
||||
new = kb.get_task(conn, t).claim_expires
|
||||
assert new > int(time.time()) + 3000
|
||||
|
||||
|
||||
def test_concurrent_claims_only_one_wins(kanban_home):
|
||||
"""Fire N threads claiming the same task; exactly one must win."""
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="race", assignee="a")
|
||||
|
||||
def attempt(i):
|
||||
with kb.connect() as c:
|
||||
return kb.claim_task(c, t, claimer=f"host:{i}")
|
||||
|
||||
n_workers = 8
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex:
|
||||
results = list(ex.map(attempt, range(n_workers)))
|
||||
winners = [r for r in results if r is not None]
|
||||
assert len(winners) == 1
|
||||
assert winners[0].status == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Complete / block / unblock / archive / assign
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_complete_records_result(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
assert kb.complete_task(conn, t, result="done and dusted")
|
||||
task = kb.get_task(conn, t)
|
||||
assert task.status == "done"
|
||||
assert task.result == "done and dusted"
|
||||
assert task.completed_at is not None
|
||||
|
||||
|
||||
def test_block_then_unblock(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
assert kb.block_task(conn, t, reason="need input")
|
||||
assert kb.get_task(conn, t).status == "blocked"
|
||||
assert kb.unblock_task(conn, t)
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_assign_refuses_while_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
with pytest.raises(RuntimeError, match="currently running"):
|
||||
kb.assign_task(conn, t, "b")
|
||||
|
||||
|
||||
def test_assign_reassigns_when_not_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
assert kb.assign_task(conn, t, "b")
|
||||
assert kb.get_task(conn, t).assignee == "b"
|
||||
|
||||
|
||||
def test_archive_hides_from_default_list(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.complete_task(conn, t)
|
||||
assert kb.archive_task(conn, t)
|
||||
assert len(kb.list_tasks(conn)) == 0
|
||||
assert len(kb.list_tasks(conn, include_archived=True)) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments / events / worker context
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_comments_recorded_in_order(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.add_comment(conn, t, "user", "first")
|
||||
kb.add_comment(conn, t, "researcher", "second")
|
||||
comments = kb.list_comments(conn, t)
|
||||
assert [c.body for c in comments] == ["first", "second"]
|
||||
assert [c.author for c in comments] == ["user", "researcher"]
|
||||
|
||||
|
||||
def test_empty_comment_rejected(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
with pytest.raises(ValueError, match="body is required"):
|
||||
kb.add_comment(conn, t, "user", "")
|
||||
|
||||
|
||||
def test_events_capture_lifecycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
kb.complete_task(conn, t, result="ok")
|
||||
events = kb.list_events(conn, t)
|
||||
kinds = [e.kind for e in events]
|
||||
assert "created" in kinds
|
||||
assert "claimed" in kinds
|
||||
assert "completed" in kinds
|
||||
|
||||
|
||||
def test_worker_context_includes_parent_results_and_comments(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.complete_task(conn, p, result="PARENT_RESULT_MARKER")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER")
|
||||
ctx = kb.build_worker_context(conn, c)
|
||||
assert "PARENT_RESULT_MARKER" in ctx
|
||||
assert "CLARIFICATION_MARKER" in ctx
|
||||
assert c in ctx
|
||||
assert "child" in ctx
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_dispatch_dry_run_does_not_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t1 = kb.create_task(conn, title="a", assignee="alice")
|
||||
t2 = kb.create_task(conn, title="b", assignee="bob")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert {s[0] for s in res.spawned} == {t1, t2}
|
||||
with kb.connect() as conn:
|
||||
# Dry run must NOT mutate status.
|
||||
assert kb.get_task(conn, t1).status == "ready"
|
||||
assert kb.get_task(conn, t2).status == "ready"
|
||||
|
||||
|
||||
def test_dispatch_skips_unassigned(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="floater")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert t in res.skipped_unassigned
|
||||
assert not res.spawned
|
||||
|
||||
|
||||
def test_dispatch_promotes_ready_and_spawns(kanban_home):
|
||||
spawns = []
|
||||
|
||||
def fake_spawn(task, workspace):
|
||||
spawns.append((task.id, task.assignee, workspace))
|
||||
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p", assignee="alice")
|
||||
c = kb.create_task(conn, title="c", assignee="bob", parents=[p])
|
||||
# Finish parent outside dispatch; promotion happens inside.
|
||||
kb.complete_task(conn, p)
|
||||
res = kb.dispatch_once(conn, spawn_fn=fake_spawn)
|
||||
# Spawned c (a was already done when dispatch was called).
|
||||
assert len(spawns) == 1
|
||||
assert spawns[0][0] == c
|
||||
assert spawns[0][1] == "bob"
|
||||
# c is now running
|
||||
with kb.connect() as conn:
|
||||
assert kb.get_task(conn, c).status == "running"
|
||||
|
||||
|
||||
def test_dispatch_spawn_failure_releases_claim(kanban_home):
|
||||
def boom(task, workspace):
|
||||
raise RuntimeError("spawn failed")
|
||||
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="boom", assignee="alice")
|
||||
kb.dispatch_once(conn, spawn_fn=boom)
|
||||
# Must return to ready so the next tick can retry.
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
assert kb.get_task(conn, t).claim_lock is None
|
||||
|
||||
|
||||
def test_dispatch_reclaims_stale_before_spawning(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="alice")
|
||||
kb.claim_task(conn, t)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 1, t),
|
||||
)
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert res.reclaimed == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_scratch_workspace_created_under_hermes_home(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws.exists()
|
||||
assert ws.is_dir()
|
||||
assert "kanban" in str(ws)
|
||||
|
||||
|
||||
def test_dir_workspace_honors_given_path(kanban_home, tmp_path):
|
||||
target = tmp_path / "my-vault"
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="biz", workspace_kind="dir", workspace_path=str(target)
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws == target
|
||||
assert ws.exists()
|
||||
|
||||
|
||||
def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path):
|
||||
target = str(tmp_path / ".worktrees" / "my-task")
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="ship", workspace_kind="worktree", workspace_path=target
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
# We do NOT auto-create worktrees; the worker's skill handles that.
|
||||
assert str(ws) == target
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tenancy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_tenant_column_filters_listings(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="a1", tenant="biz-a")
|
||||
kb.create_task(conn, title="b1", tenant="biz-b")
|
||||
kb.create_task(conn, title="shared") # no tenant
|
||||
biz_a = kb.list_tasks(conn, tenant="biz-a")
|
||||
biz_b = kb.list_tasks(conn, tenant="biz-b")
|
||||
assert [t.title for t in biz_a] == ["a1"]
|
||||
assert [t.title for t in biz_b] == ["b1"]
|
||||
|
||||
|
||||
def test_tenant_propagates_to_events(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="tenant-task", tenant="biz-a")
|
||||
events = kb.list_events(conn, t)
|
||||
# The "created" event should have tenant in its payload.
|
||||
created = [e for e in events if e.kind == "created"]
|
||||
assert created and created[0].payload.get("tenant") == "biz-a"
|
||||
@@ -1,284 +0,0 @@
|
||||
"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
"""Isolate HERMES_HOME + reset any module-level catalog cache per test."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
# Force a fresh catalog module state for each test.
|
||||
import importlib
|
||||
from hermes_cli import model_catalog
|
||||
importlib.reload(model_catalog)
|
||||
yield home
|
||||
model_catalog.reset_cache()
|
||||
|
||||
|
||||
def _valid_manifest() -> dict:
|
||||
return {
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {"source": "test"},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {"display_name": "OpenRouter"},
|
||||
"models": [
|
||||
{"id": "anthropic/claude-opus-4.7", "description": "recommended"},
|
||||
{"id": "openai/gpt-5.4", "description": ""},
|
||||
{"id": "openrouter/elephant-alpha", "description": "free"},
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {"display_name": "Nous Portal"},
|
||||
"models": [
|
||||
{"id": "anthropic/claude-opus-4.7"},
|
||||
{"id": "moonshotai/kimi-k2.6"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestValidation:
|
||||
def test_accepts_well_formed_manifest(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
assert _validate_manifest(_valid_manifest()) is True
|
||||
|
||||
def test_rejects_non_dict(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
assert _validate_manifest("string") is False
|
||||
assert _validate_manifest([]) is False
|
||||
assert _validate_manifest(None) is False
|
||||
|
||||
def test_rejects_missing_version(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
del m["version"]
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_future_version(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["version"] = 999
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_missing_providers(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
del m["providers"]
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_malformed_model_entry(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["providers"]["openrouter"]["models"][0] = {"id": ""} # empty id
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_non_string_model_id(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["providers"]["openrouter"]["models"][0] = {"id": 42}
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
|
||||
class TestFetchSuccess:
|
||||
def test_fetch_and_cache_writes_disk(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
assert result == manifest
|
||||
assert fetch.called
|
||||
|
||||
cache_file = model_catalog._cache_path()
|
||||
assert cache_file.exists()
|
||||
with open(cache_file) as fh:
|
||||
assert json.load(fh) == manifest
|
||||
|
||||
def test_second_call_uses_in_process_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
model_catalog.get_catalog() # should not hit network again
|
||||
assert fetch.call_count == 1
|
||||
|
||||
def test_force_refresh_always_refetches(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
assert fetch.call_count == 2
|
||||
|
||||
|
||||
class TestFetchFailure:
|
||||
def test_network_failure_returns_empty_when_no_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
assert result == {}
|
||||
|
||||
def test_network_failure_falls_back_to_disk_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
# Prime disk cache with a fresh copy.
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=manifest):
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
# Now wipe in-process cache and simulate network failure on refetch.
|
||||
model_catalog.reset_cache()
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
assert result == manifest
|
||||
|
||||
def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
# Write stale cache directly (mtime in the past).
|
||||
cache = model_catalog._cache_path()
|
||||
cache.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(cache, "w") as fh:
|
||||
json.dump(manifest, fh)
|
||||
old = time.time() - 30 * 24 * 3600 # 30 days ago
|
||||
import os as _os
|
||||
_os.utime(cache, (old, old))
|
||||
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog()
|
||||
|
||||
# Stale cache is better than nothing.
|
||||
assert result == manifest
|
||||
|
||||
|
||||
class TestCuratedAccessors:
|
||||
def test_openrouter_returns_tuples(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = model_catalog.get_curated_openrouter_models()
|
||||
assert result == [
|
||||
("anthropic/claude-opus-4.7", "recommended"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
]
|
||||
|
||||
def test_nous_returns_ids(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = model_catalog.get_curated_nous_models()
|
||||
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
|
||||
|
||||
def test_openrouter_returns_none_when_catalog_empty(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
assert model_catalog.get_curated_openrouter_models() is None
|
||||
|
||||
def test_nous_returns_none_when_catalog_empty(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
assert model_catalog.get_curated_nous_models() is None
|
||||
|
||||
|
||||
class TestDisabled:
|
||||
def test_disabled_config_short_circuits(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog,
|
||||
"_load_catalog_config",
|
||||
return_value={
|
||||
"enabled": False,
|
||||
"url": "http://ignored",
|
||||
"ttl_hours": 24.0,
|
||||
"providers": {},
|
||||
},
|
||||
):
|
||||
with patch.object(model_catalog, "_fetch_manifest") as fetch:
|
||||
result = model_catalog.get_catalog()
|
||||
assert result == {}
|
||||
fetch.assert_not_called()
|
||||
|
||||
|
||||
class TestProviderOverride:
|
||||
def test_override_url_takes_precedence(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
|
||||
override_payload = {
|
||||
"version": 1,
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"models": [
|
||||
{"id": "override/model", "description": "custom"},
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def fake_fetch(url, timeout):
|
||||
if "override" in url:
|
||||
return override_payload
|
||||
return _valid_manifest()
|
||||
|
||||
with patch.object(
|
||||
model_catalog,
|
||||
"_load_catalog_config",
|
||||
return_value={
|
||||
"enabled": True,
|
||||
"url": "http://master",
|
||||
"ttl_hours": 24.0,
|
||||
"providers": {"openrouter": {"url": "http://override"}},
|
||||
},
|
||||
):
|
||||
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
|
||||
result = model_catalog.get_curated_openrouter_models()
|
||||
|
||||
assert result == [("override/model", "custom")]
|
||||
|
||||
|
||||
class TestIntegrationWithModelsModule:
|
||||
"""Exercise the fallback paths via the real callers in hermes_cli.models."""
|
||||
|
||||
def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog(
|
||||
self, isolated_home
|
||||
):
|
||||
from hermes_cli import model_catalog
|
||||
from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS
|
||||
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = get_curated_nous_model_ids()
|
||||
|
||||
assert result == list(_PROVIDER_MODELS["nous"])
|
||||
|
||||
def test_curated_nous_ids_prefers_manifest(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
from hermes_cli.models import get_curated_nous_model_ids
|
||||
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = get_curated_nous_model_ids()
|
||||
|
||||
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
|
||||
@@ -1,822 +0,0 @@
|
||||
"""Tests for the Kanban dashboard plugin backend (plugins/kanban/dashboard/plugin_api.py).
|
||||
|
||||
The plugin mounts as /api/plugins/kanban/ inside the dashboard's FastAPI app,
|
||||
but here we attach its router to a bare FastAPI instance so we can test the
|
||||
REST surface without spinning up the whole dashboard.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_plugin_router():
|
||||
"""Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router."""
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py"
|
||||
assert plugin_file.exists(), f"plugin file missing: {plugin_file}"
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"hermes_dashboard_plugin_kanban_test", plugin_file,
|
||||
)
|
||||
assert spec is not None and spec.loader is not None
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = mod
|
||||
spec.loader.exec_module(mod)
|
||||
return mod.router
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(kanban_home):
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board on an empty DB
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_empty(client):
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# All canonical columns present (triage + the rest), each empty.
|
||||
names = [c["name"] for c in data["columns"]]
|
||||
for expected in ("triage", "todo", "ready", "running", "blocked", "done"):
|
||||
assert expected in names, f"missing column {expected}: {names}"
|
||||
assert all(len(c["tasks"]) == 0 for c in data["columns"])
|
||||
assert data["tenants"] == []
|
||||
assert data["assignees"] == []
|
||||
assert data["latest_event_id"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks then GET /board sees it
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_task_appears_on_board(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "Research LLM caching",
|
||||
"assignee": "researcher",
|
||||
"priority": 3,
|
||||
"tenant": "acme",
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["title"] == "Research LLM caching"
|
||||
assert task["assignee"] == "researcher"
|
||||
assert task["status"] == "ready" # no parents -> immediately ready
|
||||
assert task["priority"] == 3
|
||||
assert task["tenant"] == "acme"
|
||||
task_id = task["id"]
|
||||
|
||||
# Board now lists it under 'ready'.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
ready = next(c for c in data["columns"] if c["name"] == "ready")
|
||||
assert len(ready["tasks"]) == 1
|
||||
assert ready["tasks"][0]["id"] == task_id
|
||||
assert "acme" in data["tenants"]
|
||||
assert "researcher" in data["assignees"]
|
||||
|
||||
|
||||
def test_tenant_filter(client):
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "A", "tenant": "t1"})
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "B", "tenant": "t2"})
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t1")
|
||||
counts = {c["name"]: len(c["tasks"]) for c in r.json()["columns"]}
|
||||
total = sum(counts.values())
|
||||
assert total == 1
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t2")
|
||||
total = sum(len(c["tasks"]) for c in r.json()["columns"])
|
||||
assert total == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id returns body + comments + events + links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_task_detail_includes_links_and_events(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "child", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo" # parent not done yet
|
||||
|
||||
# Detail for the child shows the parent link.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{child['id']}")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["task"]["id"] == child["id"]
|
||||
assert parent["id"] in data["links"]["parents"]
|
||||
|
||||
# Detail for the parent shows the child.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{parent['id']}")
|
||||
assert child["id"] in r.json()["links"]["children"]
|
||||
|
||||
# Events exist from creation.
|
||||
assert len(data["events"]) >= 1
|
||||
|
||||
|
||||
def test_task_detail_404_on_unknown(client):
|
||||
r = client.get("/api/plugins/kanban/tasks/does-not-exist")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id — status transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_patch_status_complete(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "done", "result": "shipped"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "done"
|
||||
|
||||
# Board reflects the move.
|
||||
done = next(
|
||||
c for c in client.get("/api/plugins/kanban/board").json()["columns"]
|
||||
if c["name"] == "done"
|
||||
)
|
||||
assert any(x["id"] == t["id"] for x in done["tasks"])
|
||||
|
||||
|
||||
def test_patch_block_then_unblock(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "blocked", "block_reason": "need input"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "blocked"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_drag_drop_move_todo_to_ready(client):
|
||||
"""Direct status write: the drag-drop path for statuses without a
|
||||
dedicated verb (e.g. manually promoting todo -> ready)."""
|
||||
parent = client.post("/api/plugins/kanban/tasks", json={"title": "p"}).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "c", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_reassign(client):
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "x", "assignee": "a"},
|
||||
).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"assignee": "b"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["assignee"] == "b"
|
||||
|
||||
|
||||
def test_patch_priority_and_edit(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"priority": 5, "title": "renamed"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()["task"]
|
||||
assert data["priority"] == 5
|
||||
assert data["title"] == "renamed"
|
||||
|
||||
|
||||
def test_patch_invalid_status(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "banana"},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments + Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_add_comment(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": "how's progress?", "author": "teknium"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{t['id']}")
|
||||
comments = r.json()["comments"]
|
||||
assert len(comments) == 1
|
||||
assert comments[0]["body"] == "how's progress?"
|
||||
assert comments[0]["author"] == "teknium"
|
||||
|
||||
|
||||
def test_add_comment_empty_rejected(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": " "},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_add_link_and_delete_link(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{b['id']}")
|
||||
assert a["id"] in r.json()["links"]["parents"]
|
||||
|
||||
r = client.delete(
|
||||
"/api/plugins/kanban/links",
|
||||
params={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["ok"] is True
|
||||
|
||||
|
||||
def test_add_link_cycle_rejected(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": b["id"], "child_id": a["id"]},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dispatch_dry_run(client):
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "work", "assignee": "researcher"},
|
||||
)
|
||||
r = client.post("/api/plugins/kanban/dispatch?dry_run=true&max=4")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# DispatchResult is serialized as a dataclass dict.
|
||||
assert isinstance(body, dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Triage column (new v1 status)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_triage_lands_in_triage_column(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "rough idea, spec me", "triage": True},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
task = r.json()["task"]
|
||||
assert task["status"] == "triage"
|
||||
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert triage["tasks"][0]["title"] == "rough idea, spec me"
|
||||
|
||||
|
||||
def test_triage_task_not_promoted_to_ready(client):
|
||||
"""Triage tasks must stay in triage even when they have no parents."""
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "must stay put", "triage": True},
|
||||
)
|
||||
# Run the dispatcher — it should NOT promote the triage task.
|
||||
client.post("/api/plugins/kanban/dispatch?dry_run=false&max=4")
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
ready = next(c for c in r.json()["columns"] if c["name"] == "ready")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert len(ready["tasks"]) == 0
|
||||
|
||||
|
||||
def test_patch_status_triage_works(client):
|
||||
"""A user (or specifier) can push a task back into triage, and out of it."""
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "x"},
|
||||
).json()["task"]
|
||||
# Normal creation is 'ready'; push to triage.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "triage"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "triage"
|
||||
|
||||
# Now promote to todo.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "todo"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "todo"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Progress rollup (done children / total children)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_progress_rollup(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child_a = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
child_b = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
# Children start as "todo" because the parent isn't done yet; promote
|
||||
# them to "ready" so complete_task will accept the transition.
|
||||
for cid in (child_a["id"], child_b["id"]):
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{cid}", json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
# 0/2 done.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 0, "total": 2}
|
||||
|
||||
# Complete one child. 1/2.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child_a['id']}",
|
||||
json={"status": "done"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 1, "total": 2}
|
||||
|
||||
# Childless tasks report progress=None, not {0/0}.
|
||||
assert next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == child_b["id"]
|
||||
)["progress"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-init on first board read
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_auto_initializes_missing_db(tmp_path, monkeypatch):
|
||||
"""If kanban.db doesn't exist yet, GET /board must create it, not 500."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
# Deliberately DO NOT call kb.init_db().
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
r = c.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
assert (home / "kanban.db").exists(), "init_db wasn't invoked by /board"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket auth (query-param token)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
|
||||
"""When _SESSION_TOKEN is set (normal dashboard context), a missing or
|
||||
wrong ?token= query param must be rejected with policy-violation."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
|
||||
# Stub web_server so _check_ws_token has a token to compare against.
|
||||
import types
|
||||
stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
|
||||
monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
|
||||
# No token → policy violation close.
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Wrong token → policy violation close.
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events?token=nope"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Correct token → accepted (connect then close cleanly from our side).
|
||||
with c.websocket_connect(
|
||||
"/api/plugins/kanban/events?token=secret-xyz"
|
||||
) as ws:
|
||||
assert ws is not None # handshake succeeded
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_bulk_status_ready(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
# Parent-less tasks land in "ready" already; push them to blocked first.
|
||||
for tid in (a["id"], b["id"], c2["id"]):
|
||||
client.patch(f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "blocked", "block_reason": "wait"})
|
||||
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"], c2["id"]], "status": "ready"})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert all(r["ok"] for r in results)
|
||||
# All three are now ready.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ready = next(col for col in board["columns"] if col["name"] == "ready")
|
||||
ids = {t["id"] for t in ready["tasks"]}
|
||||
assert {a["id"], b["id"], c2["id"]}.issubset(ids)
|
||||
|
||||
|
||||
def test_bulk_archive(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "archive": True})
|
||||
assert r.status_code == 200
|
||||
assert all(r["ok"] for r in r.json()["results"])
|
||||
# Default board (archived hidden) — both gone.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ids = {t["id"] for col in board["columns"] for t in col["tasks"]}
|
||||
assert a["id"] not in ids
|
||||
assert b["id"] not in ids
|
||||
|
||||
|
||||
def test_bulk_reassign(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "old"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "assignee": "old"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "assignee": "new"})
|
||||
assert r.status_code == 200
|
||||
for tid in (a["id"], b["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["assignee"] == "new"
|
||||
|
||||
|
||||
def test_bulk_unassign_via_empty_string(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "x"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"]], "assignee": ""})
|
||||
assert r.status_code == 200
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{a['id']}").json()["task"]
|
||||
assert t["assignee"] is None
|
||||
|
||||
|
||||
def test_bulk_partial_failure_doesnt_abort_siblings(client):
|
||||
"""One bad id in the middle of a batch must not prevent others from
|
||||
applying."""
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], "bogus-id", c2["id"]], "priority": 7})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert len(results) == 3
|
||||
ok_ids = {r["id"] for r in results if r["ok"]}
|
||||
assert a["id"] in ok_ids
|
||||
assert c2["id"] in ok_ids
|
||||
assert any(not r["ok"] and r["id"] == "bogus-id" for r in results)
|
||||
# Good siblings actually got the priority bump.
|
||||
for tid in (a["id"], c2["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["priority"] == 7
|
||||
|
||||
|
||||
def test_bulk_empty_ids_400(client):
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk", json={"ids": []})
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /config endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_config_returns_defaults_when_section_missing(client):
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Defaults when dashboard.kanban is missing.
|
||||
assert data["default_tenant"] == ""
|
||||
assert data["lane_by_profile"] is True
|
||||
assert data["include_archived_by_default"] is False
|
||||
assert data["render_markdown"] is True
|
||||
|
||||
|
||||
def test_config_reads_dashboard_kanban_section(tmp_path, monkeypatch, client):
|
||||
home = Path(os.environ["HERMES_HOME"])
|
||||
(home / "config.yaml").write_text(
|
||||
"dashboard:\n"
|
||||
" kanban:\n"
|
||||
" default_tenant: acme\n"
|
||||
" lane_by_profile: false\n"
|
||||
" include_archived_by_default: true\n"
|
||||
" render_markdown: false\n"
|
||||
)
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["default_tenant"] == "acme"
|
||||
assert data["lane_by_profile"] is False
|
||||
assert data["include_archived_by_default"] is True
|
||||
assert data["render_markdown"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runs surfacing (vulcan-artivus RFC feedback)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_task_detail_includes_runs(client):
|
||||
"""GET /tasks/:id carries a runs[] array with the attempt history."""
|
||||
r = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "port x", "assignee": "worker"}).json()
|
||||
tid = r["task"]["id"]
|
||||
|
||||
# Drive status running to force a run creation: PATCH to running
|
||||
# doesn't call claim_task (the PATCH path uses _set_status_direct),
|
||||
# so use the bulk/claim indirection via the kernel.
|
||||
import hermes_cli.kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.claim_task(conn, tid)
|
||||
_kb.complete_task(
|
||||
conn, tid,
|
||||
result="done",
|
||||
summary="tested on rate limiter",
|
||||
metadata={"changed_files": ["limiter.py"]},
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{tid}").json()
|
||||
assert "runs" in d
|
||||
assert len(d["runs"]) == 1
|
||||
run = d["runs"][0]
|
||||
assert run["outcome"] == "completed"
|
||||
assert run["profile"] == "worker"
|
||||
assert run["summary"] == "tested on rate limiter"
|
||||
assert run["metadata"] == {"changed_files": ["limiter.py"]}
|
||||
assert run["ended_at"] is not None
|
||||
|
||||
|
||||
def test_task_detail_runs_empty_before_claim(client):
|
||||
"""A task that's never been claimed has an empty runs[] list, not
|
||||
a missing key."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "fresh"}).json()
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{r['task']['id']}").json()
|
||||
assert d["runs"] == []
|
||||
|
||||
|
||||
def test_patch_status_done_with_summary_and_metadata(client):
|
||||
"""PATCH /tasks/:id with status=done + summary + metadata must
|
||||
reach complete_task, so the dashboard has CLI parity."""
|
||||
# Create + claim.
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "x", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={
|
||||
"status": "done",
|
||||
"summary": "shipped the thing",
|
||||
"metadata": {"changed_files": ["a.py", "b.py"], "tests_run": 7},
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
|
||||
# The run must have the summary + metadata attached.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "shipped the thing"
|
||||
assert run.metadata == {"changed_files": ["a.py", "b.py"], "tests_run": 7}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_done_without_summary_still_works(client):
|
||||
"""Back-compat: PATCH without the new fields still completes."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "y", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "done", "result": "legacy shape"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "legacy shape" # falls back to result
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_archive_closes_running_run(client):
|
||||
"""PATCH to archived while running must close the in-flight run."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "z", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
open_run = kb.latest_run(conn, tid)
|
||||
assert open_run.ended_at is None
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "archived"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn, tid)
|
||||
assert task.status == "archived"
|
||||
assert task.current_run_id is None
|
||||
assert kb.latest_run(conn, tid).outcome == "reclaimed"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_event_dict_includes_run_id(client):
|
||||
"""GET /tasks/:id returns events with run_id populated."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "e", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
run_id = kb.latest_run(conn, tid).id
|
||||
kb.complete_task(conn, tid, summary="wss")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{tid}")
|
||||
assert r.status_code == 200
|
||||
events = r.json()["events"]
|
||||
# Every event in the response must have a run_id key (None or int).
|
||||
for e in events:
|
||||
assert "run_id" in e, f"missing run_id in event: {e}"
|
||||
# completed event must have the actual run_id.
|
||||
comp = [e for e in events if e["kind"] == "completed"]
|
||||
assert comp[0]["run_id"] == run_id
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task force-loaded skills via REST
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_with_skills_roundtrips(client):
|
||||
"""POST /tasks accepts `skills: [...]`, GET /tasks/:id returns it."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "translate docs",
|
||||
"assignee": "linguist",
|
||||
"skills": ["translation", "github-code-review"],
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
# Fetch via GET /tasks/:id as the drawer does.
|
||||
got = client.get(f"/api/plugins/kanban/tasks/{task['id']}").json()
|
||||
assert got["task"]["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
|
||||
def test_create_task_without_skills_defaults_to_empty_list(client):
|
||||
"""_task_dict serializes Task.skills=None as [] so the drawer can
|
||||
always .length check without guarding against null."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "no skills", "assignee": "x"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
# Task.skills is None in-memory; _task_dict serializes via
|
||||
# dataclasses.asdict which keeps it None. The drawer's
|
||||
# `t.skills && t.skills.length > 0` guard handles both null and [].
|
||||
assert task.get("skills") in (None, [])
|
||||
@@ -1,78 +0,0 @@
|
||||
"""Behavior tests for the class-first skill review prompts.
|
||||
|
||||
The skill review / combined review prompts steer the background review agent
|
||||
toward generalizing existing skills rather than accumulating near-duplicates.
|
||||
These tests assert the behavioral *instructions* are present — they do NOT
|
||||
snapshot the full prompt text (change-detector).
|
||||
"""
|
||||
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
def test_skill_review_prompt_instructs_survey_first():
|
||||
"""Prompt must tell the reviewer to list existing skills before deciding."""
|
||||
prompt = AIAgent._SKILL_REVIEW_PROMPT
|
||||
assert "skills_list" in prompt, "must instruct the reviewer to call skills_list"
|
||||
assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates"
|
||||
assert "SURVEY" in prompt, "must name the survey step explicitly"
|
||||
|
||||
|
||||
def test_skill_review_prompt_is_class_first():
|
||||
"""Prompt must steer toward the CLASS of task, not the specific task."""
|
||||
prompt = AIAgent._SKILL_REVIEW_PROMPT
|
||||
assert "CLASS" in prompt, "must tell the reviewer to think about the task class"
|
||||
assert "class level" in prompt, "must anchor naming at the class level"
|
||||
|
||||
|
||||
def test_skill_review_prompt_prefers_updating_existing():
|
||||
"""Prompt must prefer generalizing an existing skill over creating a new one."""
|
||||
prompt = AIAgent._SKILL_REVIEW_PROMPT
|
||||
assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, (
|
||||
"must state the update-over-create preference"
|
||||
)
|
||||
assert "ONLY CREATE A NEW SKILL" in prompt, (
|
||||
"must gate new-skill creation behind a last-resort clause"
|
||||
)
|
||||
|
||||
|
||||
def test_skill_review_prompt_flags_overlap_for_followup():
|
||||
"""Prompt must ask the reviewer to note overlapping skills for future review."""
|
||||
prompt = AIAgent._SKILL_REVIEW_PROMPT
|
||||
assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol"
|
||||
|
||||
|
||||
def test_skill_review_prompt_preserves_opt_out_clause():
|
||||
"""The 'Nothing to save.' escape clause must remain."""
|
||||
prompt = AIAgent._SKILL_REVIEW_PROMPT
|
||||
assert "Nothing to save." in prompt
|
||||
|
||||
|
||||
def test_combined_review_prompt_keeps_memory_section():
|
||||
"""Combined prompt must still cover memory review."""
|
||||
prompt = AIAgent._COMBINED_REVIEW_PROMPT
|
||||
assert "**Memory**" in prompt
|
||||
assert "memory tool" in prompt
|
||||
|
||||
|
||||
def test_combined_review_prompt_skills_section_is_class_first():
|
||||
"""The **Skills** half of the combined prompt must follow the same protocol."""
|
||||
prompt = AIAgent._COMBINED_REVIEW_PROMPT
|
||||
assert "**Skills**" in prompt
|
||||
assert "SURVEY" in prompt
|
||||
assert "CLASS" in prompt
|
||||
assert "skills_list" in prompt
|
||||
assert "ONLY CREATE A NEW SKILL" in prompt
|
||||
|
||||
|
||||
def test_combined_review_prompt_preserves_opt_out_clause():
|
||||
prompt = AIAgent._COMBINED_REVIEW_PROMPT
|
||||
assert "Nothing to save." in prompt
|
||||
|
||||
|
||||
def test_memory_review_prompt_unchanged_in_structure():
|
||||
"""Memory-only review prompt stays focused on user facts — not touched by this change."""
|
||||
prompt = AIAgent._MEMORY_REVIEW_PROMPT
|
||||
# Guardrails: the memory-only prompt must NOT mention skills/surveys.
|
||||
assert "skills_list" not in prompt
|
||||
assert "SURVEY" not in prompt
|
||||
assert "memory tool" in prompt
|
||||
@@ -1,41 +0,0 @@
|
||||
# Stress / battle-test suite
|
||||
|
||||
Long-running tests that exercise the Kanban kernel under adversarial
|
||||
conditions. **Not run by `scripts/run_tests.sh`** because they can
|
||||
take 30+ seconds each and spawn real subprocesses.
|
||||
|
||||
Run manually:
|
||||
|
||||
```bash
|
||||
./venv/bin/python -m pytest tests/stress/ -v -s
|
||||
# or individual files:
|
||||
./venv/bin/python tests/stress/test_concurrency.py
|
||||
./venv/bin/python tests/stress/test_subprocess_e2e.py
|
||||
./venv/bin/python tests/stress/test_property_fuzzing.py
|
||||
./venv/bin/python tests/stress/test_benchmarks.py
|
||||
```
|
||||
|
||||
## What's covered
|
||||
|
||||
- **test_concurrency.py** — 5 workers, 100 tasks, race-for-claim. Asserts
|
||||
no double-claims, no orphan runs, no SQLite errors escape retry.
|
||||
- **test_concurrency_mixed.py** — 10 workers + 1 reclaimer, 500 tasks,
|
||||
random ops (claim/complete/block/unblock/archive). Same invariants
|
||||
under adversarial scheduling.
|
||||
- **test_concurrency_reclaim_race.py** — TTL < work duration so the
|
||||
reclaimer intentionally yanks tasks mid-work; verifies the worker's
|
||||
late-complete is refused cleanly (CAS guard works).
|
||||
- **test_subprocess_e2e.py** — dispatcher spawns real Python subprocess
|
||||
workers that heartbeat + complete via the CLI; crash detection
|
||||
against a real dead PID.
|
||||
- **test_property_fuzzing.py** — 500 random operation sequences,
|
||||
~40k operations total, 9 invariant checks after each step.
|
||||
- **test_atypical_scenarios.py** — 28 scenarios covering atypical
|
||||
user inputs: unicode/emoji/RTL, 1 MB strings, SQL injection
|
||||
attempts, cycles, self-parents, wide fan-in/out, clock skew,
|
||||
HERMES_HOME with spaces/unicode/symlinks, 1000 runs on one
|
||||
task, idempotency-key race across processes, terminal-state
|
||||
resurrection attempts, dashboard REST with weird JSON.
|
||||
- **test_benchmarks.py** — latency at 100/1k/10k tasks for dispatch,
|
||||
recompute_ready, list_tasks, build_worker_context, etc. Results saved
|
||||
to JSON for regression diffing.
|
||||
@@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fake worker process that exercises the real subprocess contract.
|
||||
|
||||
Reads HERMES_KANBAN_TASK from env, heartbeats periodically, does short
|
||||
work, completes via the CLI. Designed to be spawned by the dispatcher
|
||||
exactly the way `hermes chat -q` would be, minus the LLM cost.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def main():
|
||||
tid = os.environ["HERMES_KANBAN_TASK"]
|
||||
workspace = os.environ.get("HERMES_KANBAN_WORKSPACE", "")
|
||||
|
||||
# Announce via CLI (goes through real argparse + init_db + etc)
|
||||
subprocess.run(
|
||||
["hermes", "kanban", "heartbeat", tid, "--note", "started"],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
# Simulate work with periodic heartbeats
|
||||
for i in range(3):
|
||||
time.sleep(0.3)
|
||||
subprocess.run(
|
||||
["hermes", "kanban", "heartbeat", tid, "--note", f"progress {i+1}/3"],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
# Complete with structured handoff
|
||||
subprocess.run(
|
||||
[
|
||||
"hermes", "kanban", "complete", tid,
|
||||
"--summary", f"real-subprocess worker finished {tid}",
|
||||
"--metadata", json.dumps({
|
||||
"workspace": workspace,
|
||||
"worker_pid": os.getpid(),
|
||||
"iterations": 3,
|
||||
}),
|
||||
],
|
||||
check=True, capture_output=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,37 +0,0 @@
|
||||
"""pytest config for the stress/ subdirectory.
|
||||
|
||||
These tests are slow (30s+), spawn subprocesses, and are not run by
|
||||
default. Enable via `pytest --run-stress` or by running the scripts
|
||||
directly.
|
||||
|
||||
The scripts are primarily __main__-executable entry points; pytest
|
||||
isn't expected to collect individual test functions from them.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
if config.getoption("--run-stress", default=False):
|
||||
return
|
||||
skip_stress = pytest.mark.skip(
|
||||
reason="stress test (opt-in via --run-stress or run script directly)"
|
||||
)
|
||||
for item in items:
|
||||
if "tests/stress" in str(item.fspath):
|
||||
item.add_marker(skip_stress)
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--run-stress",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run the stress/battle-test suite (slow, spawns subprocesses).",
|
||||
)
|
||||
|
||||
|
||||
collect_ignore_glob = [
|
||||
# The stress scripts have top-level code and hard-coded paths; they're
|
||||
# meant to run as `python tests/stress/<name>.py`, not as pytest modules.
|
||||
"*.py",
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,221 +0,0 @@
|
||||
"""Scale benchmarks for the Kanban kernel.
|
||||
|
||||
Measures:
|
||||
- dispatch_once latency at 100, 1000, 10000 tasks
|
||||
- recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs
|
||||
- build_worker_context latency with 1, 10, 50 parent dependencies
|
||||
- board list/stats query latency
|
||||
- task_runs query latency at scale
|
||||
|
||||
Results printed as a table. Saved to JSON for regression-diffing in CI
|
||||
or future reviews. Not a pass/fail test — records numbers so we know
|
||||
when a change regresses latency by 10x and can decide whether to care.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def bench(label, fn, iterations=5):
|
||||
"""Time fn over `iterations` runs, return (min, median, max) in ms."""
|
||||
times = []
|
||||
for _ in range(iterations):
|
||||
t0 = time.perf_counter()
|
||||
fn()
|
||||
times.append((time.perf_counter() - t0) * 1000)
|
||||
times.sort()
|
||||
mn = times[0]
|
||||
md = times[len(times) // 2]
|
||||
mx = times[-1]
|
||||
return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}
|
||||
|
||||
|
||||
def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):
|
||||
"""Seed n tasks. Optionally give each task 5 parents."""
|
||||
ids = []
|
||||
for i in range(n):
|
||||
if with_parents and i >= 5:
|
||||
parents = random.sample(ids[:i], 5)
|
||||
else:
|
||||
parents = ()
|
||||
tid = kb.create_task(
|
||||
conn, title=f"bench {i}", assignee=assignee,
|
||||
tenant="bench", parents=parents,
|
||||
)
|
||||
ids.append(tid)
|
||||
return ids
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_bench_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
|
||||
results = []
|
||||
|
||||
# ============ dispatch_once latency ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== dispatch_once @ {n} tasks ==")
|
||||
# Fresh DB each time so we're not measuring cumulative effects
|
||||
import shutil
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn
|
||||
r = bench(
|
||||
f"dispatch_once (n={n}, no spawn)",
|
||||
lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ recompute_ready at scale with parent graphs ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)
|
||||
# Complete the first 100 so some todo tasks might get promoted
|
||||
for tid in ids[:min(100, n // 10)]:
|
||||
kb.complete_task(conn, tid, result="bench")
|
||||
r = bench(
|
||||
f"recompute_ready (n={n}, with parents)",
|
||||
lambda: kb.recompute_ready(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ build_worker_context with N parents ============
|
||||
for parent_count in [1, 10, 50]:
|
||||
print(f"\n== build_worker_context with {parent_count} parents ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
# Create parents, complete them with summaries+metadata
|
||||
parent_ids = []
|
||||
for i in range(parent_count):
|
||||
pid = kb.create_task(conn, title=f"parent {i}", assignee="p")
|
||||
kb.claim_task(conn, pid)
|
||||
kb.complete_task(
|
||||
conn, pid,
|
||||
summary=f"parent {i} result that is longer than a single token "
|
||||
f"so we actually measure the IO",
|
||||
metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},
|
||||
)
|
||||
parent_ids.append(pid)
|
||||
child_id = kb.create_task(
|
||||
conn, title="child", assignee="c", parents=parent_ids,
|
||||
)
|
||||
r = bench(
|
||||
f"build_worker_context (parents={parent_count})",
|
||||
lambda: kb.build_worker_context(conn, child_id),
|
||||
iterations=10,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["parent_count"] = parent_count
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ list_tasks at scale ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== list_tasks @ {n} ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n)
|
||||
r = bench(
|
||||
f"list_tasks (n={n})",
|
||||
lambda: kb.list_tasks(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ board_stats at scale ============
|
||||
for n in [100, 1000, 10000]:
|
||||
print(f"\n== board_stats @ {n} ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
seed_tasks(conn, kb, n)
|
||||
r = bench(
|
||||
f"board_stats (n={n})",
|
||||
lambda: kb.board_stats(conn),
|
||||
iterations=5,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["n"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ list_runs at scale ============
|
||||
for n in [100, 1000]:
|
||||
print(f"\n== list_runs for task with {n} attempts ==")
|
||||
shutil.rmtree(home, ignore_errors=True)
|
||||
os.makedirs(home)
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
tid = kb.create_task(conn, title="x", assignee="w")
|
||||
# Create N attempts via claim/release
|
||||
for i in range(n):
|
||||
kb.claim_task(conn, tid, ttl_seconds=0)
|
||||
kb.release_stale_claims(conn)
|
||||
r = bench(
|
||||
f"list_runs (runs={n})",
|
||||
lambda: kb.list_runs(conn, tid),
|
||||
iterations=10,
|
||||
)
|
||||
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
||||
r["run_count"] = n
|
||||
results.append(r)
|
||||
conn.close()
|
||||
|
||||
# ============ SUMMARY TABLE ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")
|
||||
for r in results:
|
||||
print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")
|
||||
|
||||
# Save for future diffing.
|
||||
out_path = "/tmp/kanban_bench_results.json"
|
||||
with open(out_path, "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
print(f"\nResults saved to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,302 +0,0 @@
|
||||
"""Multi-process concurrency stress test for the Kanban kernel.
|
||||
|
||||
5 worker processes race for claims on a shared DB with 100 tasks. Each
|
||||
worker loops: claim -> simulate work -> complete. Asserts the invariants
|
||||
that make the system worth building:
|
||||
|
||||
- No task claimed by two workers simultaneously
|
||||
- No task completed twice
|
||||
- Every claim produces exactly one run row
|
||||
- Every completion closes exactly one run row
|
||||
- Zero SQLite locking errors that escape the retry layer
|
||||
- Total run count == total claim events == total completed events
|
||||
|
||||
This test is the primary justification for WAL + CAS-based claim. If it
|
||||
passes, the architecture holds. If it fails, we have a real bug to fix
|
||||
before anyone runs this in anger.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
NUM_WORKERS = 5
|
||||
NUM_TASKS = 100
|
||||
WORKER_TIMEOUT_S = 60
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
"""One worker's inner loop. Runs in a fresh Python process.
|
||||
|
||||
Tries to claim a ready task, marks it done with a per-worker summary,
|
||||
repeats until the ready pool is empty. Records every claim + complete
|
||||
into its own JSON result file for later aggregation.
|
||||
"""
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
empty_polls = 0
|
||||
start = time.monotonic()
|
||||
|
||||
while time.monotonic() - start < WORKER_TIMEOUT_S:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Find any ready task (non-deterministic order intentional — we
|
||||
# want workers to race on popular assignees).
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status = 'ready' "
|
||||
"AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
empty_polls += 1
|
||||
if empty_polls > 20:
|
||||
break # queue empty long enough, stop
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
empty_polls = 0
|
||||
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(
|
||||
conn, tid, claimer=f"worker-{worker_id}",
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err_on_claim", "task": tid, "err": str(e)})
|
||||
continue
|
||||
if claimed is None:
|
||||
# Someone else beat us — expected contention, not an error.
|
||||
events.append({"kind": "lost_claim_race", "task": tid})
|
||||
continue
|
||||
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({
|
||||
"kind": "claimed",
|
||||
"task": tid,
|
||||
"worker": worker_id,
|
||||
"run_id": run.id,
|
||||
"t": time.monotonic() - start,
|
||||
})
|
||||
|
||||
# Simulate short, variable work
|
||||
time.sleep(random.uniform(0.001, 0.05))
|
||||
|
||||
try:
|
||||
kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"done by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} finished task {tid}",
|
||||
metadata={"worker_id": worker_id, "run_id": run.id},
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err_on_complete", "task": tid, "err": str(e)})
|
||||
continue
|
||||
events.append({
|
||||
"kind": "completed",
|
||||
"task": tid,
|
||||
"worker": worker_id,
|
||||
"run_id": run.id,
|
||||
"t": time.monotonic() - start,
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_concurrency_")
|
||||
print(f"HERMES_HOME = {home}")
|
||||
|
||||
# Seed.
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
tids = []
|
||||
for i in range(NUM_TASKS):
|
||||
tid = kb.create_task(
|
||||
conn, title=f"task #{i}", assignee="shared",
|
||||
tenant="concurrency-test",
|
||||
)
|
||||
tids.append(tid)
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks.")
|
||||
|
||||
# Spawn workers.
|
||||
ctx = mp.get_context("spawn")
|
||||
result_files = [f"/tmp/concurrency_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
procs = []
|
||||
start = time.monotonic()
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, result_files[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=WORKER_TIMEOUT_S + 30)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
print(f"All workers done in {elapsed:.1f}s")
|
||||
|
||||
# Aggregate worker events.
|
||||
all_events = []
|
||||
for i, f in enumerate(result_files):
|
||||
if not os.path.isfile(f):
|
||||
print(f" WORKER {i} produced no result file — died?")
|
||||
continue
|
||||
with open(f) as fh:
|
||||
events = json.load(fh)
|
||||
all_events.extend(events)
|
||||
|
||||
# ============ INVARIANT CHECKS ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("INVARIANT CHECKS")
|
||||
print("=" * 60)
|
||||
|
||||
failures = []
|
||||
|
||||
# Check 1: no task claimed by two different workers
|
||||
claims_by_task = {}
|
||||
for e in all_events:
|
||||
if e["kind"] == "claimed":
|
||||
if e["task"] in claims_by_task:
|
||||
prev = claims_by_task[e["task"]]
|
||||
if prev["worker"] != e["worker"]:
|
||||
failures.append(
|
||||
f"DOUBLE CLAIM: task {e['task']} claimed by "
|
||||
f"worker {prev['worker']} AND worker {e['worker']}"
|
||||
)
|
||||
claims_by_task[e["task"]] = e
|
||||
|
||||
# Check 2: every completion has a matching claim from the same worker
|
||||
for e in all_events:
|
||||
if e["kind"] == "completed":
|
||||
prev_claim = claims_by_task.get(e["task"])
|
||||
if prev_claim is None:
|
||||
failures.append(f"COMPLETION WITHOUT CLAIM: task {e['task']}")
|
||||
elif prev_claim["worker"] != e["worker"]:
|
||||
failures.append(
|
||||
f"WORKER MISMATCH: task {e['task']} claimed by "
|
||||
f"{prev_claim['worker']} but completed by {e['worker']}"
|
||||
)
|
||||
|
||||
# Check 3: DB state — every task should be in 'done', no dangling claims
|
||||
conn = kb.connect()
|
||||
try:
|
||||
bad_status = conn.execute(
|
||||
"SELECT id, status, claim_lock, current_run_id FROM tasks "
|
||||
"WHERE status != 'done' OR claim_lock IS NOT NULL "
|
||||
"OR current_run_id IS NOT NULL"
|
||||
).fetchall()
|
||||
if bad_status:
|
||||
for row in bad_status:
|
||||
failures.append(
|
||||
f"BAD FINAL STATE: task {row['id']} status={row['status']} "
|
||||
f"claim_lock={row['claim_lock']} current_run_id={row['current_run_id']}"
|
||||
)
|
||||
|
||||
# Check 4: exactly one run per task, all closed as completed
|
||||
bad_runs = conn.execute(
|
||||
"SELECT task_id, COUNT(*) as n FROM task_runs "
|
||||
"GROUP BY task_id HAVING n != 1"
|
||||
).fetchall()
|
||||
if bad_runs:
|
||||
for row in bad_runs:
|
||||
failures.append(
|
||||
f"WRONG RUN COUNT: task {row['task_id']} has {row['n']} runs (expected 1)"
|
||||
)
|
||||
|
||||
open_runs = conn.execute(
|
||||
"SELECT id, task_id FROM task_runs WHERE ended_at IS NULL"
|
||||
).fetchall()
|
||||
for row in open_runs:
|
||||
failures.append(f"OPEN RUN: run {row['id']} on task {row['task_id']}")
|
||||
|
||||
wrong_outcomes = conn.execute(
|
||||
"SELECT task_id, outcome FROM task_runs "
|
||||
"WHERE outcome IS NULL OR outcome != 'completed'"
|
||||
).fetchall()
|
||||
for row in wrong_outcomes:
|
||||
failures.append(
|
||||
f"WRONG OUTCOME: task {row['task_id']} run outcome={row['outcome']}"
|
||||
)
|
||||
|
||||
# Check 5: event counts — exactly NUM_TASKS completed events
|
||||
completed_events = conn.execute(
|
||||
"SELECT COUNT(*) as n FROM task_events WHERE kind='completed'"
|
||||
).fetchone()["n"]
|
||||
if completed_events != NUM_TASKS:
|
||||
failures.append(
|
||||
f"EVENT COUNT MISMATCH: {completed_events} completed events "
|
||||
f"expected {NUM_TASKS}"
|
||||
)
|
||||
|
||||
# Check 6: count SQLite errors that escaped retry
|
||||
sqlite_errs = sum(
|
||||
1 for e in all_events if e["kind"].startswith("sqlite_err")
|
||||
)
|
||||
if sqlite_errs > 0:
|
||||
failures.append(f"UNRETRIED SQLITE ERRORS: {sqlite_errs}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ============ STATS ============
|
||||
print()
|
||||
total_claims = sum(1 for e in all_events if e["kind"] == "claimed")
|
||||
total_completes = sum(1 for e in all_events if e["kind"] == "completed")
|
||||
total_lost_races = sum(1 for e in all_events if e["kind"] == "lost_claim_race")
|
||||
|
||||
per_worker = {}
|
||||
for e in all_events:
|
||||
if e["kind"] == "completed":
|
||||
per_worker.setdefault(e["worker"], 0)
|
||||
per_worker[e["worker"]] += 1
|
||||
|
||||
print(f"Total claims: {total_claims}")
|
||||
print(f"Total completes: {total_completes}")
|
||||
print(f"Lost claim races: {total_lost_races} (expected contention; not a bug)")
|
||||
print(f"Elapsed: {elapsed:.2f}s")
|
||||
print(f"Throughput: {NUM_TASKS/elapsed:.1f} tasks/sec")
|
||||
print(f"Per-worker completions:")
|
||||
for w in sorted(per_worker.keys()):
|
||||
print(f" worker-{w}: {per_worker[w]}")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"FAILURES ({len(failures)}):")
|
||||
print("=" * 60)
|
||||
for f in failures[:20]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 20:
|
||||
print(f" ... and {len(failures) - 20} more")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print()
|
||||
print("✔ ALL INVARIANTS HELD")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,350 +0,0 @@
|
||||
"""Harder concurrency stress: mixed operations + larger scale.
|
||||
|
||||
Scales to 500 tasks, 10 workers, 60s runtime. Each worker randomly:
|
||||
- claims + completes (70%)
|
||||
- claims + blocks with a reason (15%)
|
||||
- unblocks a random blocked task (10%)
|
||||
- archives a random done task (5%)
|
||||
|
||||
Adds a background "dispatcher" process that calls release_stale_claims
|
||||
and detect_crashed_workers every 200ms, racing against the workers to
|
||||
surface TTL + crash detection races.
|
||||
|
||||
Pass criteria: runs invariant holds, no double-completions, no orphan
|
||||
runs, no SQLite errors escape the retry layer.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
NUM_WORKERS = 10
|
||||
NUM_TASKS = 500
|
||||
RUN_DURATION_S = 30
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
idle_rounds = 0
|
||||
|
||||
while time.monotonic() - start < RUN_DURATION_S:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
op = random.random()
|
||||
|
||||
if op < 0.10:
|
||||
# Try to unblock a blocked task.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='blocked' "
|
||||
"ORDER BY RANDOM() LIMIT 1"
|
||||
).fetchone()
|
||||
if row:
|
||||
try:
|
||||
ok = kb.unblock_task(conn, row["id"])
|
||||
events.append({"kind": "unblocked" if ok else "unblock_noop",
|
||||
"task": row["id"], "worker": worker_id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "unblock",
|
||||
"task": row["id"], "err": str(e)[:100]})
|
||||
continue
|
||||
|
||||
if op < 0.15:
|
||||
# Try to archive a done task.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='done' "
|
||||
"ORDER BY RANDOM() LIMIT 1"
|
||||
).fetchone()
|
||||
if row:
|
||||
try:
|
||||
kb.archive_task(conn, row["id"])
|
||||
events.append({"kind": "archived", "task": row["id"],
|
||||
"worker": worker_id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "archive",
|
||||
"task": row["id"], "err": str(e)[:100]})
|
||||
continue
|
||||
|
||||
# Default: claim + complete-or-block.
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='ready' "
|
||||
"AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
idle_rounds += 1
|
||||
if idle_rounds > 50:
|
||||
break
|
||||
time.sleep(0.02)
|
||||
continue
|
||||
idle_rounds = 0
|
||||
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(
|
||||
conn, tid, claimer=f"worker-{worker_id}",
|
||||
ttl_seconds=5, # short TTL so reclaim races in
|
||||
)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "claim",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
continue
|
||||
if claimed is None:
|
||||
events.append({"kind": "lost_claim_race", "task": tid})
|
||||
continue
|
||||
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
|
||||
"run_id": run.id, "t": time.monotonic() - start})
|
||||
|
||||
time.sleep(random.uniform(0.005, 0.05))
|
||||
|
||||
# 20% of the time, block instead of complete
|
||||
if random.random() < 0.20:
|
||||
try:
|
||||
kb.block_task(conn, tid,
|
||||
reason=f"blocked by worker-{worker_id}")
|
||||
events.append({"kind": "blocked", "task": tid,
|
||||
"worker": worker_id, "run_id": run.id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "block",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
else:
|
||||
try:
|
||||
kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"done by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} ok",
|
||||
metadata={"worker_id": worker_id},
|
||||
)
|
||||
events.append({"kind": "completed", "task": tid,
|
||||
"worker": worker_id, "run_id": run.id,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "complete",
|
||||
"task": tid, "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
|
||||
"""Background dispatcher-like loop that reclaims stale tasks."""
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
while time.monotonic() - start < RUN_DURATION_S + 2:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
try:
|
||||
reclaimed = kb.release_stale_claims(conn)
|
||||
if reclaimed:
|
||||
events.append({"kind": "reclaimed", "count": reclaimed,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "reclaim",
|
||||
"err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
time.sleep(0.2)
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_mixed_stress_")
|
||||
print(f"HERMES_HOME = {home}")
|
||||
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
for i in range(NUM_TASKS):
|
||||
kb.create_task(
|
||||
conn, title=f"t#{i}", assignee="shared", tenant="mixed-stress",
|
||||
)
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks, launching {NUM_WORKERS} workers + 1 reclaimer")
|
||||
|
||||
ctx = mp.get_context("spawn")
|
||||
worker_results = [f"/tmp/mixed_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
reclaim_result = "/tmp/mixed_reclaim.json"
|
||||
|
||||
procs = []
|
||||
start = time.monotonic()
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
|
||||
r.start()
|
||||
procs.append(r)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=RUN_DURATION_S + 30)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
elapsed = time.monotonic() - start
|
||||
print(f"Done in {elapsed:.1f}s")
|
||||
|
||||
# Aggregate.
|
||||
all_events = []
|
||||
for i, f in enumerate(worker_results):
|
||||
if os.path.isfile(f):
|
||||
with open(f) as fh:
|
||||
all_events.extend(json.load(fh))
|
||||
else:
|
||||
print(f" WORKER {i} died with no result file!")
|
||||
reclaim_events = []
|
||||
if os.path.isfile(reclaim_result):
|
||||
with open(reclaim_result) as fh:
|
||||
reclaim_events = json.load(fh)
|
||||
|
||||
# ============ INVARIANT CHECKS ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("INVARIANT CHECKS")
|
||||
print("=" * 60)
|
||||
|
||||
failures = []
|
||||
|
||||
# Per-run attribution tracking
|
||||
claims = [e for e in all_events if e["kind"] == "claimed"]
|
||||
completions = [e for e in all_events if e["kind"] == "completed"]
|
||||
blocks = [e for e in all_events if e["kind"] == "blocked"]
|
||||
|
||||
# Every completion must have a matching claim on the same run_id AND
|
||||
# the same worker (workers don't steal each other's runs).
|
||||
claims_by_run = {c["run_id"]: c for c in claims}
|
||||
for comp in completions:
|
||||
claim = claims_by_run.get(comp["run_id"])
|
||||
if claim is None:
|
||||
# It's possible this worker saw a reclaimed run from another worker
|
||||
# — that's still a bug: the worker shouldn't be able to complete
|
||||
# a run it didn't claim. But let me check if reclaim happened first.
|
||||
failures.append(
|
||||
f"COMPLETION WITHOUT CLAIM: task {comp['task']} run {comp['run_id']} "
|
||||
f"by worker {comp['worker']}"
|
||||
)
|
||||
elif claim["worker"] != comp["worker"]:
|
||||
failures.append(
|
||||
f"CROSS-WORKER COMPLETION: run {comp['run_id']} claimed by "
|
||||
f"worker {claim['worker']} but completed by worker {comp['worker']}"
|
||||
)
|
||||
|
||||
# SQLite errors that escaped the retry layer
|
||||
sqlite_errs = [e for e in all_events if e["kind"] == "sqlite_err"]
|
||||
if sqlite_errs:
|
||||
for e in sqlite_errs[:5]:
|
||||
failures.append(f"SQLITE ERROR: op={e.get('op')} err={e.get('err')}")
|
||||
if len(sqlite_errs) > 5:
|
||||
failures.append(f" ... and {len(sqlite_errs) - 5} more sqlite errs")
|
||||
|
||||
# DB final state — every task should be in a clean terminal state.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Invariant: current_run_id NULL iff latest run is terminal
|
||||
inconsistent = conn.execute("""
|
||||
SELECT t.id, t.status, t.current_run_id
|
||||
FROM tasks t
|
||||
WHERE t.current_run_id IS NOT NULL
|
||||
AND EXISTS (SELECT 1 FROM task_runs r
|
||||
WHERE r.id = t.current_run_id AND r.ended_at IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in inconsistent:
|
||||
failures.append(
|
||||
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
|
||||
f"has current_run_id={row['current_run_id']} but run is ended"
|
||||
)
|
||||
|
||||
# Invariant: no orphan open runs
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id, r.status
|
||||
FROM task_runs r
|
||||
LEFT JOIN tasks t ON t.current_run_id = r.id
|
||||
WHERE r.ended_at IS NULL AND t.id IS NULL
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(
|
||||
f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}"
|
||||
)
|
||||
|
||||
# Counts — should roughly balance.
|
||||
status_counts = dict(
|
||||
conn.execute("SELECT status, COUNT(*) FROM tasks GROUP BY status").fetchall()
|
||||
)
|
||||
run_outcome_counts = dict(
|
||||
conn.execute(
|
||||
"SELECT outcome, COUNT(*) FROM task_runs "
|
||||
"WHERE ended_at IS NOT NULL GROUP BY outcome"
|
||||
).fetchall()
|
||||
)
|
||||
active_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE ended_at IS NULL"
|
||||
).fetchone()[0]
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ============ STATS ============
|
||||
print()
|
||||
print(f"Workers: {NUM_WORKERS}, Tasks: {NUM_TASKS}")
|
||||
print(f"Elapsed: {elapsed:.1f}s")
|
||||
print(f"Events collected: {len(all_events)} (+{len(reclaim_events)} reclaim)")
|
||||
print()
|
||||
print("Operations:")
|
||||
op_counts = {}
|
||||
for e in all_events:
|
||||
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
|
||||
for k in sorted(op_counts.keys()):
|
||||
print(f" {k:<25} {op_counts[k]}")
|
||||
|
||||
print()
|
||||
print("Final task status:")
|
||||
for s, n in sorted(status_counts.items()):
|
||||
print(f" {s:<10} {n}")
|
||||
print("Final run outcomes:")
|
||||
for o, n in sorted(run_outcome_counts.items(), key=lambda x: (x[0] or '',)):
|
||||
print(f" {o:<12} {n}")
|
||||
print(f" active {active_runs}")
|
||||
|
||||
if failures:
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"FAILURES ({len(failures)}):")
|
||||
print("=" * 60)
|
||||
for f in failures[:30]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 30:
|
||||
print(f" ... and {len(failures) - 30} more")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print()
|
||||
print("✔ ALL INVARIANTS HELD UNDER MIXED STRESS")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,241 +0,0 @@
|
||||
"""Target the reclaim race specifically.
|
||||
|
||||
Workers claim tasks with a 1s TTL but sleep 2s before completing. The
|
||||
reclaimer runs every 200ms. Scenario: worker claims, reclaimer expires
|
||||
the claim mid-work, worker tries to complete AFTER its run has been
|
||||
reclaimed.
|
||||
|
||||
Expected behavior (per design): the worker's complete_task should
|
||||
either succeed on the reclaimed-and-re-claimed-by-another-worker case
|
||||
(no, it should refuse — the claim was invalidated), OR succeed by
|
||||
grace (we "forgive" a late complete from the original worker if no
|
||||
one else picked it up).
|
||||
|
||||
Actually looking at complete_task: it doesn't check claim_lock. It just
|
||||
transitions from 'running' -> 'done'. So if the reclaimer moved it back
|
||||
to 'ready', the late worker's complete_task will fail (CAS on
|
||||
status='running' fails). This is the CORRECT behavior.
|
||||
|
||||
Invariant being tested: race between worker.complete and
|
||||
dispatcher.reclaim must not produce a double-run-close or other
|
||||
inconsistency.
|
||||
"""
|
||||
|
||||
import json
|
||||
import multiprocessing as mp
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
NUM_WORKERS = 5
|
||||
NUM_TASKS = 50
|
||||
TTL = 1
|
||||
WORK_DURATION_S = 2.0 # longer than TTL => reclaimer wins
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
|
||||
|
||||
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
idle = 0
|
||||
|
||||
while time.monotonic() - start < 40:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
row = conn.execute(
|
||||
"SELECT id FROM tasks WHERE status='ready' AND claim_lock IS NULL LIMIT 1"
|
||||
).fetchone()
|
||||
if row is None:
|
||||
idle += 1
|
||||
if idle > 30:
|
||||
break
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
idle = 0
|
||||
tid = row["id"]
|
||||
try:
|
||||
claimed = kb.claim_task(conn, tid, claimer=f"worker-{worker_id}",
|
||||
ttl_seconds=TTL)
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "claim", "err": str(e)[:100]})
|
||||
continue
|
||||
if claimed is None:
|
||||
events.append({"kind": "lost_claim", "task": tid})
|
||||
continue
|
||||
run = kb.latest_run(conn, tid)
|
||||
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
|
||||
"run_id": run.id})
|
||||
|
||||
# Sleep longer than TTL so reclaimer has a chance to intervene
|
||||
time.sleep(WORK_DURATION_S + random.uniform(-0.3, 0.3))
|
||||
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=f"by worker-{worker_id}",
|
||||
summary=f"worker-{worker_id} finished",
|
||||
)
|
||||
events.append({"kind": "complete_ok" if ok else "complete_refused",
|
||||
"task": tid, "worker": worker_id, "run_id": run.id})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "op": "complete", "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
|
||||
os.environ["HERMES_HOME"] = hermes_home
|
||||
os.environ["HOME"] = hermes_home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
events = []
|
||||
start = time.monotonic()
|
||||
while time.monotonic() - start < 42:
|
||||
conn = kb.connect()
|
||||
try:
|
||||
try:
|
||||
n = kb.release_stale_claims(conn)
|
||||
if n:
|
||||
events.append({"kind": "reclaimed", "count": n,
|
||||
"t": time.monotonic() - start})
|
||||
except sqlite3.OperationalError as e:
|
||||
events.append({"kind": "sqlite_err", "err": str(e)[:100]})
|
||||
finally:
|
||||
conn.close()
|
||||
time.sleep(0.2)
|
||||
with open(result_file, "w") as f:
|
||||
json.dump(events, f)
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_reclaim_race_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
for i in range(NUM_TASKS):
|
||||
kb.create_task(conn, title=f"t{i}", assignee="shared",
|
||||
tenant="reclaim-race")
|
||||
conn.close()
|
||||
print(f"Seeded {NUM_TASKS} tasks. TTL={TTL}s, work_duration={WORK_DURATION_S}s")
|
||||
print(f"(worker work > TTL guarantees reclaims)")
|
||||
|
||||
ctx = mp.get_context("spawn")
|
||||
worker_results = [f"/tmp/rc_worker_{i}.json" for i in range(NUM_WORKERS)]
|
||||
reclaim_result = "/tmp/rc_reclaim.json"
|
||||
procs = []
|
||||
for i in range(NUM_WORKERS):
|
||||
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
|
||||
p.start()
|
||||
procs.append(p)
|
||||
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
|
||||
r.start()
|
||||
procs.append(r)
|
||||
|
||||
for p in procs:
|
||||
p.join(timeout=60)
|
||||
if p.is_alive():
|
||||
p.terminate()
|
||||
p.join()
|
||||
|
||||
# Aggregate.
|
||||
all_events = []
|
||||
for f in worker_results:
|
||||
if os.path.isfile(f):
|
||||
with open(f) as fh:
|
||||
all_events.extend(json.load(fh))
|
||||
reclaim_events = []
|
||||
if os.path.isfile(reclaim_result):
|
||||
with open(reclaim_result) as fh:
|
||||
reclaim_events = json.load(fh)
|
||||
|
||||
op_counts = {}
|
||||
for e in all_events:
|
||||
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
|
||||
total_reclaims = sum(e.get("count", 0) for e in reclaim_events)
|
||||
print(f"\nReclaimer fired {len(reclaim_events)} times, total tasks reclaimed: {total_reclaims}")
|
||||
print("Worker events:")
|
||||
for k in sorted(op_counts):
|
||||
print(f" {k:<25} {op_counts[k]}")
|
||||
|
||||
# Invariant checks
|
||||
failures = []
|
||||
conn = kb.connect()
|
||||
try:
|
||||
# Any task stuck with current_run_id pointing at a closed run?
|
||||
bad = conn.execute("""
|
||||
SELECT t.id, t.status, t.current_run_id, r.ended_at, r.outcome
|
||||
FROM tasks t
|
||||
JOIN task_runs r ON r.id = t.current_run_id
|
||||
WHERE r.ended_at IS NOT NULL
|
||||
""").fetchall()
|
||||
for row in bad:
|
||||
failures.append(
|
||||
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
|
||||
f"current_run_id={row['current_run_id']} but run ended "
|
||||
f"outcome={row['outcome']}"
|
||||
)
|
||||
# Every run with NULL ended_at should still have the task pointing at it
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id
|
||||
FROM task_runs r
|
||||
LEFT JOIN tasks t ON t.current_run_id = r.id
|
||||
WHERE r.ended_at IS NULL AND t.id IS NULL
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}")
|
||||
# Event counts
|
||||
claim_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='claimed'").fetchone()[0]
|
||||
reclaim_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='reclaimed'").fetchone()[0]
|
||||
comp_evts = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_events WHERE kind='completed'").fetchone()[0]
|
||||
print(f"\nDB event counts: claimed={claim_evts} reclaimed={reclaim_evts} completed={comp_evts}")
|
||||
# Every reclaimed run must have ended_at set
|
||||
unended_reclaims = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed' AND ended_at IS NULL"
|
||||
).fetchone()[0]
|
||||
if unended_reclaims:
|
||||
failures.append(f"UNENDED RECLAIMED RUNS: {unended_reclaims}")
|
||||
# Count of completed runs
|
||||
comp_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='completed'"
|
||||
).fetchone()[0]
|
||||
reclaim_runs = conn.execute(
|
||||
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed'"
|
||||
).fetchone()[0]
|
||||
print(f"DB run outcomes: completed={comp_runs} reclaimed={reclaim_runs}")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if reclaim_runs == 0:
|
||||
failures.append("NO RECLAIMS HAPPENED — test didn't stress what it was supposed to")
|
||||
|
||||
if failures:
|
||||
print(f"\nFAILURES ({len(failures)}):")
|
||||
for f in failures[:20]:
|
||||
print(f" {f}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\n✔ RECLAIM RACE INVARIANTS HELD")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,283 +0,0 @@
|
||||
"""Randomized property testing for the Kanban kernel.
|
||||
|
||||
Generates 1000 random operation sequences, each 20-50 ops, on small
|
||||
task graphs. After each step, checks the full invariant set:
|
||||
|
||||
I1. If tasks.current_run_id IS NOT NULL, the run MUST exist AND
|
||||
ended_at MUST be NULL (we never point at a closed run).
|
||||
I2. If a run has ended_at NULL, SOME task MUST have current_run_id
|
||||
pointing at it (no orphan open runs).
|
||||
I3. task.status in the valid set {triage, todo, ready, running,
|
||||
blocked, done, archived}.
|
||||
I4. task.claim_lock NULL iff status not in (running,).
|
||||
I5. Every run has started_at <= ended_at (or ended_at is NULL).
|
||||
I6. If outcome is set, ended_at must also be set.
|
||||
I7. Events are strictly monotonic in (created_at, id).
|
||||
I8. task_events.run_id references a task_runs.id that exists
|
||||
(or is NULL).
|
||||
I9. Parent completion invariant: if all parents are 'done', the
|
||||
child cannot be in 'todo' status (recompute_ready should have
|
||||
promoted it). This is called out in the comment on
|
||||
recompute_ready; verify it holds after every random seq.
|
||||
|
||||
Not using hypothesis the lib; just Python random for simplicity.
|
||||
"""
|
||||
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
NUM_SEQUENCES = 500
|
||||
OPS_PER_SEQUENCE = 100
|
||||
TASK_POOL = 10
|
||||
|
||||
OPS = [
|
||||
"create", "create_child", "claim", "complete", "block", "unblock",
|
||||
"archive", "heartbeat", "release_stale", "detect_crashed",
|
||||
"recompute_ready", "reassign",
|
||||
]
|
||||
|
||||
|
||||
def assert_invariants(conn, kb, ops_log):
|
||||
"""Run all invariant checks; raise AssertionError with context on any."""
|
||||
failures = []
|
||||
|
||||
# I1: current_run_id → run exists and not ended
|
||||
bad_ptr = conn.execute("""
|
||||
SELECT t.id, t.current_run_id, r.ended_at, r.outcome
|
||||
FROM tasks t
|
||||
LEFT JOIN task_runs r ON r.id = t.current_run_id
|
||||
WHERE t.current_run_id IS NOT NULL
|
||||
AND (r.id IS NULL OR r.ended_at IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in bad_ptr:
|
||||
if row["ended_at"] is None and row["outcome"] is None:
|
||||
detail = "missing"
|
||||
else:
|
||||
detail = f"closed ({row['outcome']})"
|
||||
failures.append(
|
||||
f"I1: task {row['id']} points at run {row['current_run_id']} "
|
||||
f"which is {detail}"
|
||||
)
|
||||
|
||||
# I2: open run → some task points at it
|
||||
orphans = conn.execute("""
|
||||
SELECT r.id, r.task_id
|
||||
FROM task_runs r
|
||||
WHERE r.ended_at IS NULL
|
||||
AND NOT EXISTS (SELECT 1 FROM tasks t WHERE t.current_run_id = r.id)
|
||||
""").fetchall()
|
||||
for row in orphans:
|
||||
failures.append(f"I2: open run {row['id']} on task {row['task_id']} has no pointer")
|
||||
|
||||
# I3: valid statuses
|
||||
valid = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
|
||||
bad_status = conn.execute("SELECT id, status FROM tasks").fetchall()
|
||||
for row in bad_status:
|
||||
if row["status"] not in valid:
|
||||
failures.append(f"I3: task {row['id']} has invalid status {row['status']!r}")
|
||||
|
||||
# I4: claim_lock set only when running
|
||||
bad_lock = conn.execute("""
|
||||
SELECT id, status, claim_lock FROM tasks
|
||||
WHERE (status != 'running' AND claim_lock IS NOT NULL)
|
||||
""").fetchall()
|
||||
for row in bad_lock:
|
||||
failures.append(
|
||||
f"I4: task {row['id']} status={row['status']} but claim_lock={row['claim_lock']!r}"
|
||||
)
|
||||
|
||||
# I5: run started_at <= ended_at
|
||||
bad_times = conn.execute("""
|
||||
SELECT id, started_at, ended_at FROM task_runs
|
||||
WHERE ended_at IS NOT NULL AND started_at > ended_at
|
||||
""").fetchall()
|
||||
for row in bad_times:
|
||||
failures.append(
|
||||
f"I5: run {row['id']} started_at={row['started_at']} > ended_at={row['ended_at']}"
|
||||
)
|
||||
|
||||
# I6: outcome set → ended_at set
|
||||
bad_outcome = conn.execute("""
|
||||
SELECT id, outcome, ended_at FROM task_runs
|
||||
WHERE outcome IS NOT NULL AND ended_at IS NULL
|
||||
""").fetchall()
|
||||
for row in bad_outcome:
|
||||
failures.append(f"I6: run {row['id']} outcome={row['outcome']} but ended_at NULL")
|
||||
|
||||
# I7: events monotonic in id (always true for autoincrement)
|
||||
# Skip — autoincrement guarantees it.
|
||||
|
||||
# I8: event.run_id references existing run
|
||||
bad_ev_fk = conn.execute("""
|
||||
SELECT e.id, e.run_id FROM task_events e
|
||||
LEFT JOIN task_runs r ON r.id = e.run_id
|
||||
WHERE e.run_id IS NOT NULL AND r.id IS NULL
|
||||
""").fetchall()
|
||||
for row in bad_ev_fk:
|
||||
failures.append(f"I8: event {row['id']} references missing run {row['run_id']}")
|
||||
|
||||
# I9: if all parents done → child not in todo
|
||||
# (Only applies to children with at least one parent)
|
||||
orphaned_todo = conn.execute("""
|
||||
SELECT c.id AS child_id,
|
||||
COUNT(*) AS n_parents,
|
||||
SUM(CASE WHEN p.status = 'done' THEN 1 ELSE 0 END) AS done_parents
|
||||
FROM tasks c
|
||||
JOIN task_links l ON l.child_id = c.id
|
||||
JOIN tasks p ON p.id = l.parent_id
|
||||
WHERE c.status = 'todo'
|
||||
GROUP BY c.id
|
||||
HAVING n_parents > 0 AND n_parents = done_parents
|
||||
""").fetchall()
|
||||
for row in orphaned_todo:
|
||||
failures.append(
|
||||
f"I9: task {row['child_id']} is todo but all {row['n_parents']} parents are done"
|
||||
)
|
||||
|
||||
if failures:
|
||||
print(f"\n!!! INVARIANT VIOLATION after {len(ops_log)} ops:")
|
||||
for f in failures[:10]:
|
||||
print(f" {f}")
|
||||
if len(failures) > 10:
|
||||
print(f" ... and {len(failures) - 10} more")
|
||||
print("\nLast 10 ops:")
|
||||
for op in ops_log[-10:]:
|
||||
print(f" {op}")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def random_op(rng, conn, kb, task_pool):
|
||||
op = rng.choice(OPS)
|
||||
|
||||
if op == "create":
|
||||
tid = kb.create_task(
|
||||
conn,
|
||||
title=f"rand {rng.randint(0, 1000)}",
|
||||
assignee=rng.choice(["w1", "w2", "w3", None]),
|
||||
)
|
||||
task_pool.append(tid)
|
||||
return {"op": "create", "tid": tid}
|
||||
|
||||
if op == "create_child" and task_pool:
|
||||
parent = rng.choice(task_pool)
|
||||
tid = kb.create_task(
|
||||
conn, title=f"child of {parent}",
|
||||
assignee=rng.choice(["w1", "w2", "w3", None]),
|
||||
parents=[parent],
|
||||
)
|
||||
task_pool.append(tid)
|
||||
return {"op": "create_child", "tid": tid, "parent": parent}
|
||||
|
||||
if not task_pool:
|
||||
return None
|
||||
|
||||
tid = rng.choice(task_pool)
|
||||
task = kb.get_task(conn, tid)
|
||||
if task is None:
|
||||
task_pool.remove(tid)
|
||||
return None
|
||||
|
||||
if op == "claim":
|
||||
claimed = kb.claim_task(conn, tid, ttl_seconds=rng.choice([1, 3, 10]))
|
||||
return {"op": "claim", "tid": tid, "ok": claimed is not None}
|
||||
if op == "complete":
|
||||
summary = rng.choice([None, f"done via op {rng.randint(0, 1000)}"])
|
||||
ok = kb.complete_task(conn, tid, summary=summary)
|
||||
return {"op": "complete", "tid": tid, "ok": ok}
|
||||
if op == "block":
|
||||
reason = rng.choice([None, "rand block"])
|
||||
ok = kb.block_task(conn, tid, reason=reason)
|
||||
return {"op": "block", "tid": tid, "ok": ok}
|
||||
if op == "unblock":
|
||||
ok = kb.unblock_task(conn, tid)
|
||||
return {"op": "unblock", "tid": tid, "ok": ok}
|
||||
if op == "archive":
|
||||
ok = kb.archive_task(conn, tid)
|
||||
if ok:
|
||||
task_pool.remove(tid)
|
||||
return {"op": "archive", "tid": tid, "ok": ok}
|
||||
if op == "heartbeat":
|
||||
ok = kb.heartbeat_worker(conn, tid)
|
||||
return {"op": "heartbeat", "tid": tid, "ok": ok}
|
||||
if op == "release_stale":
|
||||
n = kb.release_stale_claims(conn)
|
||||
return {"op": "release_stale", "n": n}
|
||||
if op == "detect_crashed":
|
||||
# Force-kill a fake PID first so there's something to detect
|
||||
crashed = kb.detect_crashed_workers(conn)
|
||||
return {"op": "detect_crashed", "n": len(crashed)}
|
||||
if op == "recompute_ready":
|
||||
n = kb.recompute_ready(conn)
|
||||
return {"op": "recompute_ready", "promoted": n}
|
||||
if op == "reassign":
|
||||
# Reassignment isn't a direct API; simulate via assign_task
|
||||
new_a = rng.choice(["w1", "w2", "w3", None])
|
||||
try:
|
||||
kb.assign_task(conn, tid, new_a)
|
||||
return {"op": "reassign", "tid": tid, "to": new_a}
|
||||
except Exception as e:
|
||||
return {"op": "reassign", "tid": tid, "err": str(e)[:50]}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
total_ops = 0
|
||||
total_violations = 0
|
||||
|
||||
for seq_idx in range(NUM_SEQUENCES):
|
||||
seed = random.randint(0, 10**9)
|
||||
rng = random.Random(seed)
|
||||
home = tempfile.mkdtemp(prefix=f"hermes_fuzz_{seq_idx}_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
|
||||
# Fresh module state per sequence to avoid cached init paths.
|
||||
for m in list(sys.modules.keys()):
|
||||
if m.startswith("hermes_cli"):
|
||||
del sys.modules[m]
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
task_pool = []
|
||||
ops_log = []
|
||||
|
||||
try:
|
||||
for i in range(OPS_PER_SEQUENCE):
|
||||
result = random_op(rng, conn, kb, task_pool)
|
||||
if result is None:
|
||||
continue
|
||||
ops_log.append(result)
|
||||
total_ops += 1
|
||||
if not assert_invariants(conn, kb, ops_log):
|
||||
total_violations += 1
|
||||
print(f" sequence {seq_idx} (seed={seed}) failed at op {i}")
|
||||
break
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
if seq_idx % 10 == 0:
|
||||
print(f" seq {seq_idx:3d}: {total_ops} ops so far, {total_violations} violations")
|
||||
|
||||
print()
|
||||
print("=" * 60)
|
||||
print(f"Total sequences: {NUM_SEQUENCES}")
|
||||
print(f"Total operations: {total_ops}")
|
||||
print(f"Invariant violations: {total_violations}")
|
||||
if total_violations == 0:
|
||||
print("\n✔ ALL INVARIANTS HELD ACROSS RANDOMIZED SEQUENCES")
|
||||
else:
|
||||
print("\n✗ INVARIANT VIOLATIONS FOUND")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,228 +0,0 @@
|
||||
"""E2E: dispatcher spawns real Python subprocess workers.
|
||||
|
||||
This validates the IPC + lifecycle story that mocks can't:
|
||||
- spawn_fn returns a real PID
|
||||
- the child process resolves hermes_cli.kanban_db on its own
|
||||
- the child writes heartbeats via the CLI (real argparse, real init_db)
|
||||
- the child completes via the CLI with --summary + --metadata
|
||||
- the dispatcher observes all of this through the DB only
|
||||
- worker logs are captured to HERMES_HOME/kanban/logs/<task>.log
|
||||
- crash detection works against a real dead PID
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
WT = str(Path(__file__).resolve().parents[2])
|
||||
FAKE_WORKER = str(Path(__file__).parent / "_fake_worker.py")
|
||||
PY = sys.executable
|
||||
|
||||
|
||||
def make_spawn_fn(home: str):
|
||||
"""Return a spawn_fn the dispatcher can call. Launches the fake
|
||||
worker as a detached subprocess."""
|
||||
|
||||
def _spawn(task, workspace):
|
||||
log_path = os.path.join(home, f"worker_{task.id}.log")
|
||||
env = {
|
||||
**os.environ,
|
||||
"HERMES_HOME": home,
|
||||
"HOME": home,
|
||||
"PYTHONPATH": WT,
|
||||
"HERMES_KANBAN_TASK": task.id,
|
||||
"HERMES_KANBAN_WORKSPACE": workspace,
|
||||
"PATH": f"{os.path.dirname(PY)}:{os.environ.get('PATH','')}",
|
||||
}
|
||||
log_f = open(log_path, "ab")
|
||||
proc = subprocess.Popen(
|
||||
[PY, FAKE_WORKER],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=log_f,
|
||||
stderr=subprocess.STDOUT,
|
||||
env=env,
|
||||
start_new_session=True,
|
||||
)
|
||||
return proc.pid
|
||||
|
||||
return _spawn
|
||||
|
||||
|
||||
def main():
|
||||
home = tempfile.mkdtemp(prefix="hermes_e2e_")
|
||||
os.environ["HERMES_HOME"] = home
|
||||
os.environ["HOME"] = home
|
||||
sys.path.insert(0, WT)
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
# Point the `hermes` CLI child processes will run at the worktree
|
||||
# hermes_cli.main. We do this by putting a shim on PATH.
|
||||
shim_dir = os.path.join(home, "bin")
|
||||
os.makedirs(shim_dir, exist_ok=True)
|
||||
shim_path = os.path.join(shim_dir, "hermes")
|
||||
with open(shim_path, "w") as f:
|
||||
f.write(f"""#!/bin/sh
|
||||
exec {PY} -m hermes_cli.main "$@"
|
||||
""")
|
||||
os.chmod(shim_path, 0o755)
|
||||
os.environ["PATH"] = f"{shim_dir}:{os.environ.get('PATH','')}"
|
||||
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
|
||||
# ============ SCENARIO A: happy path, 3 tasks ============
|
||||
print("=" * 60)
|
||||
print("A. Real-subprocess happy path (3 tasks)")
|
||||
print("=" * 60)
|
||||
|
||||
tids = []
|
||||
for i in range(3):
|
||||
tid = kb.create_task(
|
||||
conn, title=f"real-e2e-{i}", assignee="worker",
|
||||
)
|
||||
tids.append(tid)
|
||||
|
||||
spawn_fn = make_spawn_fn(home)
|
||||
result = kb.dispatch_once(conn, spawn_fn=spawn_fn)
|
||||
print(f" dispatched: {len(result.spawned)} spawned")
|
||||
spawned_pids = []
|
||||
# The dispatcher sets worker_pid on each claimed task via _set_worker_pid.
|
||||
for tid in tids:
|
||||
task = kb.get_task(conn, tid)
|
||||
spawned_pids.append(task.worker_pid)
|
||||
print(f" task {tid}: pid={task.worker_pid} status={task.status}")
|
||||
|
||||
# Wait for all workers to complete (up to 10s).
|
||||
deadline = time.monotonic() + 10
|
||||
while time.monotonic() < deadline:
|
||||
statuses = [kb.get_task(conn, tid).status for tid in tids]
|
||||
if all(s == "done" for s in statuses):
|
||||
break
|
||||
time.sleep(0.2)
|
||||
|
||||
print()
|
||||
failures = []
|
||||
for tid in tids:
|
||||
task = kb.get_task(conn, tid)
|
||||
runs = kb.list_runs(conn, tid)
|
||||
print(f" task {tid}: status={task.status}, current_run_id={task.current_run_id}, "
|
||||
f"runs={[(r.id, r.outcome) for r in runs]}")
|
||||
if task.status != "done":
|
||||
failures.append(f"task {tid} not done: status={task.status}")
|
||||
if task.current_run_id is not None:
|
||||
failures.append(f"task {tid} has dangling current_run_id={task.current_run_id}")
|
||||
if len(runs) != 1:
|
||||
failures.append(f"task {tid} has {len(runs)} runs, expected 1")
|
||||
else:
|
||||
r = runs[0]
|
||||
if r.outcome != "completed":
|
||||
failures.append(f"task {tid} run outcome={r.outcome}, expected completed")
|
||||
if not r.summary or "real-subprocess worker finished" not in r.summary:
|
||||
failures.append(f"task {tid} summary missing: {r.summary!r}")
|
||||
if not r.metadata or r.metadata.get("iterations") != 3:
|
||||
failures.append(f"task {tid} metadata missing iterations: {r.metadata}")
|
||||
# Heartbeat events should be present
|
||||
events = kb.list_events(conn, tid)
|
||||
heartbeats = [e for e in events if e.kind == "heartbeat"]
|
||||
if len(heartbeats) < 3: # start + 3 progress
|
||||
failures.append(f"task {tid} heartbeats={len(heartbeats)} expected >=3")
|
||||
|
||||
if failures:
|
||||
print("\nFAILURES:")
|
||||
for f in failures:
|
||||
print(f" {f}")
|
||||
sys.exit(1)
|
||||
|
||||
print("\n ✔ Scenario A: all 3 real-subprocess workers completed cleanly")
|
||||
|
||||
# ============ SCENARIO B: crashed worker ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("B. Crashed worker (kill -9 mid-heartbeat)")
|
||||
print("=" * 60)
|
||||
|
||||
crash_tid = kb.create_task(
|
||||
conn, title="crash-e2e", assignee="worker",
|
||||
)
|
||||
|
||||
# Spawn a worker that sleeps long enough for us to kill it.
|
||||
# CRITICAL: spawn through a double-fork so when we kill the child it
|
||||
# doesn't zombify under our pid (which would fool kill -0 liveness
|
||||
# checks into thinking it's still alive). In production the
|
||||
# dispatcher daemon is long-lived but its workers are reaped by init
|
||||
# after exit; the test needs to match that orphaning behavior.
|
||||
def spawn_sleeper(task, workspace):
|
||||
r, w = os.pipe()
|
||||
middleman = subprocess.Popen(
|
||||
[
|
||||
PY, "-c",
|
||||
"import os,sys,subprocess;"
|
||||
"p=subprocess.Popen(['sleep','30'],"
|
||||
"stdin=subprocess.DEVNULL,"
|
||||
"stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL,"
|
||||
"start_new_session=True);"
|
||||
"os.write(int(sys.argv[1]), str(p.pid).encode());"
|
||||
"sys.exit(0)",
|
||||
str(w),
|
||||
],
|
||||
pass_fds=(w,),
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
os.close(w)
|
||||
middleman.wait() # middleman exits immediately, orphaning the sleep
|
||||
grandchild_pid = int(os.read(r, 16))
|
||||
os.close(r)
|
||||
return grandchild_pid
|
||||
|
||||
result = kb.dispatch_once(conn, spawn_fn=spawn_sleeper)
|
||||
task = kb.get_task(conn, crash_tid)
|
||||
print(f" spawned sleeper pid={task.worker_pid} for {crash_tid}")
|
||||
# Kill the sleeper forcibly
|
||||
os.kill(task.worker_pid, 9)
|
||||
# Give the OS a moment to reap
|
||||
time.sleep(0.5)
|
||||
|
||||
# Simulate next dispatcher tick — should detect the crashed PID
|
||||
crashed = kb.detect_crashed_workers(conn)
|
||||
print(f" detect_crashed_workers returned {len(crashed)} crashed (expected 1)")
|
||||
|
||||
task = kb.get_task(conn, crash_tid)
|
||||
runs = kb.list_runs(conn, crash_tid)
|
||||
print(f" task status={task.status}, runs={[(r.id, r.outcome) for r in runs]}")
|
||||
|
||||
if len(crashed) < 1:
|
||||
print(" ✗ crash NOT detected")
|
||||
sys.exit(1)
|
||||
if task.status != "ready":
|
||||
print(f" ✗ task should be back to ready, got {task.status}")
|
||||
sys.exit(1)
|
||||
if runs[0].outcome != "crashed":
|
||||
print(f" ✗ run outcome should be 'crashed', got {runs[0].outcome!r}")
|
||||
sys.exit(1)
|
||||
print("\n ✔ Scenario B: crash detected, task re-queued, run outcome=crashed")
|
||||
|
||||
# ============ SCENARIO C: worker log was captured ============
|
||||
print()
|
||||
print("=" * 60)
|
||||
print("C. Worker log captured to disk")
|
||||
print("=" * 60)
|
||||
# Scenario A workers wrote to /tmp/hermes_e2e_*/worker_*.log
|
||||
import glob
|
||||
logs = glob.glob(os.path.join(home, "worker_*.log"))
|
||||
print(f" {len(logs)} worker log files")
|
||||
for lp in logs[:3]:
|
||||
size = os.path.getsize(lp)
|
||||
print(f" {os.path.basename(lp)}: {size} bytes")
|
||||
# Our fake worker is quiet (no prints); size=0 is fine
|
||||
|
||||
conn.close()
|
||||
print("\n✔ ALL E2E SCENARIOS PASS")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,494 +0,0 @@
|
||||
"""Tests for the Kanban tool surface (tools/kanban_tools.py).
|
||||
|
||||
Verifies:
|
||||
- Tools are gated on HERMES_KANBAN_TASK: a normal chat session sees
|
||||
zero kanban tools in its schema; a worker session sees all seven.
|
||||
- Each handler's happy path.
|
||||
- Error paths (missing required args, bad metadata type, etc).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_tools_hidden_without_env_var(monkeypatch, tmp_path):
|
||||
"""Normal `hermes chat` sessions (no HERMES_KANBAN_TASK) must have
|
||||
zero kanban_* tools in their schema."""
|
||||
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
import tools.kanban_tools # ensure registered
|
||||
from tools.registry import registry
|
||||
from toolsets import resolve_toolset
|
||||
|
||||
schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True)
|
||||
names = {s["function"].get("name") for s in schema if "function" in s}
|
||||
kanban = {n for n in names if n and n.startswith("kanban_")}
|
||||
assert kanban == set(), (
|
||||
f"kanban tools leaked into normal chat schema: {kanban}"
|
||||
)
|
||||
|
||||
|
||||
def test_kanban_tools_visible_with_env_var(monkeypatch, tmp_path):
|
||||
"""Worker sessions (HERMES_KANBAN_TASK set) must have all 7 tools."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
import tools.kanban_tools # ensure registered
|
||||
from tools.registry import registry
|
||||
from toolsets import resolve_toolset
|
||||
|
||||
schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True)
|
||||
names = {s["function"].get("name") for s in schema if "function" in s}
|
||||
kanban = {n for n in names if n and n.startswith("kanban_")}
|
||||
expected = {
|
||||
"kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat",
|
||||
"kanban_comment", "kanban_create", "kanban_link",
|
||||
}
|
||||
assert kanban == expected, f"expected {expected}, got {kanban}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handler happy paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def worker_env(monkeypatch, tmp_path):
|
||||
"""Simulate being a worker: HERMES_HOME isolated, HERMES_KANBAN_TASK set
|
||||
after we've created the task."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setenv("HERMES_PROFILE", "test-worker")
|
||||
from pathlib import Path as _Path
|
||||
monkeypatch.setattr(_Path, "home", lambda: tmp_path)
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
kb._INITIALIZED_PATHS.clear()
|
||||
kb.init_db()
|
||||
conn = kb.connect()
|
||||
try:
|
||||
tid = kb.create_task(conn, title="worker-test", assignee="test-worker")
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", tid)
|
||||
return tid
|
||||
|
||||
|
||||
def test_show_defaults_to_env_task_id(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_show({})
|
||||
d = json.loads(out)
|
||||
assert "task" in d
|
||||
assert d["task"]["id"] == worker_env
|
||||
assert d["task"]["status"] == "running"
|
||||
assert "worker_context" in d
|
||||
assert "runs" in d
|
||||
|
||||
|
||||
def test_show_explicit_task_id(worker_env):
|
||||
"""Peek at a different task than the one in env."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
other = kb.create_task(conn, title="other task", assignee="peer")
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_show({"task_id": other})
|
||||
d = json.loads(out)
|
||||
assert d["task"]["id"] == other
|
||||
|
||||
|
||||
def test_complete_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({
|
||||
"summary": "got the thing done",
|
||||
"metadata": {"files": 2},
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["task_id"] == worker_env
|
||||
# Verify via kernel
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, worker_env)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "got the thing done"
|
||||
assert run.metadata == {"files": 2}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_complete_with_result_only(worker_env):
|
||||
"""`result` alone (without summary) is accepted for legacy compat."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({"result": "legacy result"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_complete_rejects_no_handoff(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({})
|
||||
assert json.loads(out).get("error"), "should have errored"
|
||||
|
||||
|
||||
def test_complete_rejects_non_dict_metadata(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_complete({"summary": "x", "metadata": [1, 2, 3]})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_block_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_block({"reason": "need clarification"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
assert kb.get_task(conn, worker_env).status == "blocked"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_block_rejects_empty_reason(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
for bad in ["", " ", None]:
|
||||
out = kt._handle_block({"reason": bad})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_heartbeat_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_heartbeat({"note": "progress"})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_heartbeat_without_note(worker_env):
|
||||
"""note is optional."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_heartbeat({})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_comment_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({
|
||||
"task_id": worker_env,
|
||||
"body": "hello thread",
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["comment_id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
comments = kb.list_comments(conn, worker_env)
|
||||
assert len(comments) == 1
|
||||
# Author defaults to HERMES_PROFILE env we set in the fixture
|
||||
assert comments[0].author == "test-worker"
|
||||
assert comments[0].body == "hello thread"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_comment_rejects_empty_body(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({"task_id": worker_env, "body": " "})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_comment_custom_author(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_comment({
|
||||
"task_id": worker_env, "body": "hi", "author": "custom-bot",
|
||||
})
|
||||
assert json.loads(out)["ok"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
comments = kb.list_comments(conn, worker_env)
|
||||
assert comments[0].author == "custom-bot"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_happy_path(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "child task",
|
||||
"assignee": "peer",
|
||||
"parents": [worker_env],
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
assert d["task_id"]
|
||||
assert d["status"] == "todo" # parent isn't done yet
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
child = kb.get_task(conn, d["task_id"])
|
||||
assert child.title == "child task"
|
||||
assert child.assignee == "peer"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_create_rejects_no_title(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_create({"assignee": "x"})).get("error")
|
||||
assert json.loads(kt._handle_create({"title": " ", "assignee": "x"})).get("error")
|
||||
|
||||
|
||||
def test_create_rejects_no_assignee(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_create({"title": "t"})).get("error")
|
||||
|
||||
|
||||
def test_create_rejects_non_list_parents(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({"title": "t", "assignee": "a", "parents": 42})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_create_accepts_string_parent(worker_env):
|
||||
"""Convenience: a single parent id as string is coerced to [id]."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "t", "assignee": "a", "parents": worker_env,
|
||||
})
|
||||
assert json.loads(out)["ok"]
|
||||
|
||||
|
||||
def test_create_accepts_skills_list(worker_env):
|
||||
"""Tool writes the per-task skills through to the kernel."""
|
||||
from tools import kanban_tools as kt
|
||||
from hermes_cli import kanban_db as kb
|
||||
out = kt._handle_create({
|
||||
"title": "skilled",
|
||||
"assignee": "linguist",
|
||||
"skills": ["translation", "github-code-review"],
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
with kb.connect() as conn:
|
||||
task = kb.get_task(conn, d["task_id"])
|
||||
assert task.skills == ["translation", "github-code-review"]
|
||||
|
||||
|
||||
def test_create_accepts_skills_string(worker_env):
|
||||
"""Convenience: a single skill name as string is coerced to [name]."""
|
||||
from tools import kanban_tools as kt
|
||||
from hermes_cli import kanban_db as kb
|
||||
out = kt._handle_create({
|
||||
"title": "one-skill",
|
||||
"assignee": "a",
|
||||
"skills": "translation",
|
||||
})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
with kb.connect() as conn:
|
||||
task = kb.get_task(conn, d["task_id"])
|
||||
assert task.skills == ["translation"]
|
||||
|
||||
|
||||
def test_create_rejects_non_list_skills(worker_env):
|
||||
"""skills: 42 must be rejected, not silently dropped."""
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_create({
|
||||
"title": "t", "assignee": "a", "skills": 42,
|
||||
})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_link_happy_path(worker_env):
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
a = kb.create_task(conn, title="A", assignee="x")
|
||||
b = kb.create_task(conn, title="B", assignee="x")
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": a, "child_id": b})
|
||||
d = json.loads(out)
|
||||
assert d["ok"] is True
|
||||
|
||||
|
||||
def test_link_rejects_self_reference(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": worker_env, "child_id": worker_env})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
def test_link_rejects_missing_args(worker_env):
|
||||
from tools import kanban_tools as kt
|
||||
assert json.loads(kt._handle_link({"parent_id": "x"})).get("error")
|
||||
assert json.loads(kt._handle_link({"child_id": "y"})).get("error")
|
||||
|
||||
|
||||
def test_link_rejects_cycle(worker_env):
|
||||
"""A → B, then try to link B → A."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
a = kb.create_task(conn, title="A", assignee="x")
|
||||
b = kb.create_task(conn, title="B", assignee="x", parents=[a])
|
||||
finally:
|
||||
conn.close()
|
||||
from tools import kanban_tools as kt
|
||||
out = kt._handle_link({"parent_id": b, "child_id": a})
|
||||
assert json.loads(out).get("error")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# End-to-end: simulate a full worker lifecycle through the tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_worker_lifecycle_through_tools(worker_env):
|
||||
"""Drive the full claim -> heartbeat -> comment -> complete lifecycle
|
||||
exclusively through the tools, then verify the DB state matches what
|
||||
the dispatcher/notifier expect."""
|
||||
from tools import kanban_tools as kt
|
||||
|
||||
# 1. show — worker orientation
|
||||
show = json.loads(kt._handle_show({}))
|
||||
assert show["task"]["id"] == worker_env
|
||||
|
||||
# 2. heartbeat during long op
|
||||
assert json.loads(kt._handle_heartbeat({"note": "warming up"}))["ok"]
|
||||
|
||||
# 3. comment for a future peer
|
||||
assert json.loads(kt._handle_comment({
|
||||
"task_id": worker_env,
|
||||
"body": "note: using stdlib sqlite3 bindings",
|
||||
}))["ok"]
|
||||
|
||||
# 4. spawn a child task for follow-up
|
||||
child_out = json.loads(kt._handle_create({
|
||||
"title": "write integration test",
|
||||
"assignee": "qa",
|
||||
"parents": [worker_env],
|
||||
}))
|
||||
assert child_out["ok"]
|
||||
|
||||
# 5. complete with structured handoff
|
||||
comp = json.loads(kt._handle_complete({
|
||||
"summary": "implemented + spawned QA follow-up",
|
||||
"metadata": {"child_task": child_out["task_id"]},
|
||||
}))
|
||||
assert comp["ok"]
|
||||
|
||||
# Verify final state
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.get_task(conn, worker_env)
|
||||
assert parent.status == "done"
|
||||
assert parent.current_run_id is None
|
||||
run = kb.latest_run(conn, worker_env)
|
||||
assert run.outcome == "completed"
|
||||
assert run.metadata == {"child_task": child_out["task_id"]}
|
||||
# Child is todo (parent just finished, but recompute_ready may
|
||||
# have promoted it — complete_task runs recompute internally).
|
||||
child = kb.get_task(conn, child_out["task_id"])
|
||||
assert child.status == "ready", (
|
||||
f"child should be ready after parent done, got {child.status}"
|
||||
)
|
||||
# Comment is visible
|
||||
assert len(kb.list_comments(conn, worker_env)) == 1
|
||||
# Heartbeat event recorded
|
||||
hb = [e for e in kb.list_events(conn, worker_env) if e.kind == "heartbeat"]
|
||||
assert len(hb) == 1
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System-prompt guidance injection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_guidance_not_in_normal_prompt(monkeypatch, tmp_path):
|
||||
"""A normal chat session (no HERMES_KANBAN_TASK) must NOT have
|
||||
KANBAN_GUIDANCE in its system prompt."""
|
||||
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from run_agent import AIAgent
|
||||
a = AIAgent(
|
||||
api_key="test",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
prompt = a._build_system_prompt()
|
||||
assert "You are a Kanban worker" not in prompt
|
||||
assert "kanban_show()" not in prompt
|
||||
|
||||
|
||||
def test_kanban_guidance_in_worker_prompt(monkeypatch, tmp_path):
|
||||
"""A worker session (HERMES_KANBAN_TASK set) MUST have the full
|
||||
lifecycle guidance in its system prompt."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from run_agent import AIAgent
|
||||
a = AIAgent(
|
||||
api_key="test",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
prompt = a._build_system_prompt()
|
||||
# Header phrase
|
||||
assert "You are a Kanban worker" in prompt
|
||||
# Lifecycle signals
|
||||
assert "kanban_show()" in prompt
|
||||
assert "kanban_complete" in prompt
|
||||
assert "kanban_block" in prompt
|
||||
assert "kanban_create" in prompt
|
||||
# Anti-shell guidance
|
||||
assert "Do not shell out" in prompt or "tools — they work" in prompt
|
||||
|
||||
|
||||
def test_kanban_guidance_prompt_size_bounded(monkeypatch, tmp_path):
|
||||
"""Sanity: the guidance block is under 4 KB so it doesn't blow
|
||||
up the cached prompt."""
|
||||
monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake")
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
from pathlib import Path as _P
|
||||
monkeypatch.setattr(_P, "home", lambda: tmp_path)
|
||||
|
||||
from agent.prompt_builder import KANBAN_GUIDANCE
|
||||
assert 1_500 < len(KANBAN_GUIDANCE) < 4_096, (
|
||||
f"KANBAN_GUIDANCE is {len(KANBAN_GUIDANCE)} chars — too short (missing?) or too long"
|
||||
)
|
||||
@@ -491,36 +491,11 @@ def test_configure_callback_port_uses_explicit_port():
|
||||
assert cfg["_resolved_port"] == 54321
|
||||
|
||||
|
||||
def test_build_oauth_auth_preserves_server_url_path():
|
||||
"""server_url with path is forwarded to OAuthClientProvider unmodified.
|
||||
|
||||
Regression for #16015: previously ``_parse_base_url`` stripped the path,
|
||||
collapsing ``https://mcp.notion.com/mcp`` to ``https://mcp.notion.com`` and
|
||||
breaking RFC 9728 protected-resource validation against servers whose PRM
|
||||
advertises a path-scoped resource (Notion). The MCP SDK strips the path
|
||||
itself for authorization-server discovery via
|
||||
``OAuthContext.get_authorization_base_url``; Hermes must not pre-strip.
|
||||
"""
|
||||
from tools import mcp_oauth
|
||||
|
||||
captured: dict = {}
|
||||
|
||||
class _FakeProvider:
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
|
||||
with patch.object(mcp_oauth, "_OAUTH_AVAILABLE", True), \
|
||||
patch.object(mcp_oauth, "OAuthClientProvider", _FakeProvider), \
|
||||
patch.object(mcp_oauth, "_is_interactive", return_value=True), \
|
||||
patch.object(mcp_oauth, "_maybe_preregister_client"), \
|
||||
patch.object(mcp_oauth, "HermesTokenStorage") as mock_storage_cls:
|
||||
mock_storage_cls.return_value = MagicMock(has_cached_tokens=lambda: True)
|
||||
build_oauth_auth(
|
||||
server_name="notion",
|
||||
server_url="https://mcp.notion.com/mcp",
|
||||
oauth_config={},
|
||||
)
|
||||
|
||||
assert captured["server_url"] == "https://mcp.notion.com/mcp"
|
||||
def test_parse_base_url_strips_path():
|
||||
"""_parse_base_url drops path components for OAuth discovery."""
|
||||
from tools.mcp_oauth import _parse_base_url
|
||||
|
||||
assert _parse_base_url("https://example.com/mcp/v1") == "https://example.com"
|
||||
assert _parse_base_url("https://example.com") == "https://example.com"
|
||||
assert _parse_base_url("https://host.example.com:8080/api") == "https://host.example.com:8080"
|
||||
|
||||
|
||||
@@ -1,726 +0,0 @@
|
||||
"""Kanban tools — structured tool-call surface for worker + orchestrator agents.
|
||||
|
||||
These tools are only registered into the model's schema when the agent is
|
||||
running under the dispatcher (env var ``HERMES_KANBAN_TASK`` set). A
|
||||
normal ``hermes chat`` session sees **zero** kanban tools in its schema.
|
||||
|
||||
Why tools instead of just shelling out to ``hermes kanban``?
|
||||
|
||||
1. **Backend portability.** A worker whose terminal tool points at Docker
|
||||
/ Modal / Singularity / SSH would run ``hermes kanban complete …``
|
||||
inside the container, where ``hermes`` isn't installed and the DB
|
||||
isn't mounted. Tools run in the agent's Python process, so they
|
||||
always reach ``~/.hermes/kanban.db`` regardless of terminal backend.
|
||||
|
||||
2. **No shell-quoting footguns.** Passing ``--metadata '{"x": [...]}'``
|
||||
through shlex+argparse is fragile. Structured tool args skip it.
|
||||
|
||||
3. **Better errors.** Tool-call failures return structured JSON the
|
||||
model can reason about, not stderr strings it has to parse.
|
||||
|
||||
Humans continue to use the CLI (``hermes kanban …``), the dashboard
|
||||
(``hermes dashboard``), and the slash command (``/kanban …``) — all
|
||||
three bypass the agent entirely. The tools are ONLY for the worker
|
||||
agent's handoff back to the kernel.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
from tools.registry import registry, tool_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gating
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_kanban_mode() -> bool:
|
||||
"""Tools are available iff the current process has ``HERMES_KANBAN_TASK``
|
||||
set in its env, which the dispatcher sets when spawning a worker.
|
||||
|
||||
Humans running ``hermes chat`` see zero kanban tools. Workers spawned
|
||||
by ``hermes kanban daemon`` see all seven.
|
||||
"""
|
||||
return bool(os.environ.get("HERMES_KANBAN_TASK"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _default_task_id(arg: Optional[str]) -> Optional[str]:
|
||||
"""Resolve ``task_id`` arg or fall back to the env var the dispatcher set."""
|
||||
if arg:
|
||||
return arg
|
||||
env_tid = os.environ.get("HERMES_KANBAN_TASK")
|
||||
return env_tid or None
|
||||
|
||||
|
||||
def _connect():
|
||||
"""Import + connect lazily so the module imports cleanly in non-kanban
|
||||
contexts (e.g. test rigs that import every tool module)."""
|
||||
from hermes_cli import kanban_db as kb
|
||||
return kb, kb.connect()
|
||||
|
||||
|
||||
def _ok(**fields: Any) -> str:
|
||||
return json.dumps({"ok": True, **fields})
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _handle_show(args: dict, **kw) -> str:
|
||||
"""Read a task's full state: task row, parents, children, comments,
|
||||
runs (attempt history), and the last N events."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
task = kb.get_task(conn, tid)
|
||||
if task is None:
|
||||
return tool_error(f"task {tid} not found")
|
||||
comments = kb.list_comments(conn, tid)
|
||||
events = kb.list_events(conn, tid)
|
||||
runs = kb.list_runs(conn, tid)
|
||||
parents = kb.parent_ids(conn, tid)
|
||||
children = kb.child_ids(conn, tid)
|
||||
|
||||
def _task_dict(t):
|
||||
return {
|
||||
"id": t.id, "title": t.title, "body": t.body,
|
||||
"assignee": t.assignee, "status": t.status,
|
||||
"tenant": t.tenant, "priority": t.priority,
|
||||
"workspace_kind": t.workspace_kind,
|
||||
"workspace_path": t.workspace_path,
|
||||
"created_by": t.created_by, "created_at": t.created_at,
|
||||
"started_at": t.started_at,
|
||||
"completed_at": t.completed_at,
|
||||
"result": t.result,
|
||||
"current_run_id": t.current_run_id,
|
||||
}
|
||||
|
||||
def _run_dict(r):
|
||||
return {
|
||||
"id": r.id, "profile": r.profile,
|
||||
"status": r.status, "outcome": r.outcome,
|
||||
"summary": r.summary, "error": r.error,
|
||||
"metadata": r.metadata,
|
||||
"started_at": r.started_at, "ended_at": r.ended_at,
|
||||
}
|
||||
|
||||
return json.dumps({
|
||||
"task": _task_dict(task),
|
||||
"parents": parents,
|
||||
"children": children,
|
||||
"comments": [
|
||||
{"author": c.author, "body": c.body,
|
||||
"created_at": c.created_at}
|
||||
for c in comments
|
||||
],
|
||||
"events": [
|
||||
{"kind": e.kind, "payload": e.payload,
|
||||
"created_at": e.created_at, "run_id": e.run_id}
|
||||
for e in events[-50:] # cap; full log via CLI
|
||||
],
|
||||
"runs": [_run_dict(r) for r in runs],
|
||||
# Also surface the worker's own context block so the
|
||||
# agent can include it directly if it wants. This is
|
||||
# the same string build_worker_context returns to the
|
||||
# dispatcher at spawn time.
|
||||
"worker_context": kb.build_worker_context(conn, tid),
|
||||
})
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_show failed")
|
||||
return tool_error(f"kanban_show: {e}")
|
||||
|
||||
|
||||
def _handle_complete(args: dict, **kw) -> str:
|
||||
"""Mark the current task done with a structured handoff."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
summary = args.get("summary")
|
||||
metadata = args.get("metadata")
|
||||
result = args.get("result")
|
||||
if not (summary or result):
|
||||
return tool_error(
|
||||
"provide at least one of: summary (preferred), result"
|
||||
)
|
||||
if metadata is not None and not isinstance(metadata, dict):
|
||||
return tool_error(
|
||||
f"metadata must be an object/dict, got {type(metadata).__name__}"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.complete_task(
|
||||
conn, tid,
|
||||
result=result, summary=summary, metadata=metadata,
|
||||
)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not complete {tid} (unknown id or already terminal)"
|
||||
)
|
||||
run = kb.latest_run(conn, tid)
|
||||
return _ok(task_id=tid, run_id=run.id if run else None)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_complete failed")
|
||||
return tool_error(f"kanban_complete: {e}")
|
||||
|
||||
|
||||
def _handle_block(args: dict, **kw) -> str:
|
||||
"""Transition the task to blocked with a reason a human will read."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
reason = args.get("reason")
|
||||
if not reason or not str(reason).strip():
|
||||
return tool_error("reason is required — explain what input you need")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.block_task(conn, tid, reason=reason)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not block {tid} (unknown id or not in "
|
||||
f"running/ready)"
|
||||
)
|
||||
run = kb.latest_run(conn, tid)
|
||||
return _ok(task_id=tid, run_id=run.id if run else None)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_block failed")
|
||||
return tool_error(f"kanban_block: {e}")
|
||||
|
||||
|
||||
def _handle_heartbeat(args: dict, **kw) -> str:
|
||||
"""Signal that the worker is still alive during a long operation."""
|
||||
tid = _default_task_id(args.get("task_id"))
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (or set HERMES_KANBAN_TASK in the env)"
|
||||
)
|
||||
note = args.get("note")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
ok = kb.heartbeat_worker(conn, tid, note=note)
|
||||
if not ok:
|
||||
return tool_error(
|
||||
f"could not heartbeat {tid} (unknown id or not running)"
|
||||
)
|
||||
return _ok(task_id=tid)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_heartbeat failed")
|
||||
return tool_error(f"kanban_heartbeat: {e}")
|
||||
|
||||
|
||||
def _handle_comment(args: dict, **kw) -> str:
|
||||
"""Append a comment to a task's thread."""
|
||||
tid = args.get("task_id")
|
||||
if not tid:
|
||||
return tool_error(
|
||||
"task_id is required (use the current task id if that's what "
|
||||
"you mean — pulls from env but kept explicit here)"
|
||||
)
|
||||
body = args.get("body")
|
||||
if not body or not str(body).strip():
|
||||
return tool_error("body is required")
|
||||
author = args.get("author") or os.environ.get("HERMES_PROFILE") or "worker"
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
cid = kb.add_comment(conn, tid, author=author, body=str(body))
|
||||
return _ok(task_id=tid, comment_id=cid)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_comment failed")
|
||||
return tool_error(f"kanban_comment: {e}")
|
||||
|
||||
|
||||
def _handle_create(args: dict, **kw) -> str:
|
||||
"""Create a child task. Orchestrator workers use this to fan out.
|
||||
|
||||
``parents`` can be a list of task ids; dependency-gated promotion
|
||||
works as usual.
|
||||
"""
|
||||
title = args.get("title")
|
||||
if not title or not str(title).strip():
|
||||
return tool_error("title is required")
|
||||
assignee = args.get("assignee")
|
||||
if not assignee:
|
||||
return tool_error(
|
||||
"assignee is required — name the profile that should execute this "
|
||||
"task (the dispatcher will only spawn tasks with an assignee)"
|
||||
)
|
||||
body = args.get("body")
|
||||
parents = args.get("parents") or []
|
||||
tenant = args.get("tenant") or os.environ.get("HERMES_TENANT")
|
||||
priority = args.get("priority")
|
||||
workspace_kind = args.get("workspace_kind") or "scratch"
|
||||
workspace_path = args.get("workspace_path")
|
||||
triage = bool(args.get("triage"))
|
||||
idempotency_key = args.get("idempotency_key")
|
||||
max_runtime_seconds = args.get("max_runtime_seconds")
|
||||
skills = args.get("skills")
|
||||
if isinstance(skills, str):
|
||||
# Accept a single skill name as a string for convenience.
|
||||
skills = [skills]
|
||||
if skills is not None and not isinstance(skills, (list, tuple)):
|
||||
return tool_error(
|
||||
f"skills must be a list of skill names, got {type(skills).__name__}"
|
||||
)
|
||||
if isinstance(parents, str):
|
||||
parents = [parents]
|
||||
if not isinstance(parents, (list, tuple)):
|
||||
return tool_error(
|
||||
f"parents must be a list of task ids, got {type(parents).__name__}"
|
||||
)
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
new_tid = kb.create_task(
|
||||
conn,
|
||||
title=str(title).strip(),
|
||||
body=body,
|
||||
assignee=str(assignee),
|
||||
parents=tuple(parents),
|
||||
tenant=tenant,
|
||||
priority=int(priority) if priority is not None else 0,
|
||||
workspace_kind=str(workspace_kind),
|
||||
workspace_path=workspace_path,
|
||||
triage=triage,
|
||||
idempotency_key=idempotency_key,
|
||||
max_runtime_seconds=(
|
||||
int(max_runtime_seconds)
|
||||
if max_runtime_seconds is not None else None
|
||||
),
|
||||
skills=skills,
|
||||
created_by=os.environ.get("HERMES_PROFILE") or "worker",
|
||||
)
|
||||
new_task = kb.get_task(conn, new_tid)
|
||||
return _ok(
|
||||
task_id=new_tid,
|
||||
status=new_task.status if new_task else None,
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.exception("kanban_create failed")
|
||||
return tool_error(f"kanban_create: {e}")
|
||||
|
||||
|
||||
def _handle_link(args: dict, **kw) -> str:
|
||||
"""Add a parent→child dependency edge after the fact."""
|
||||
parent_id = args.get("parent_id")
|
||||
child_id = args.get("child_id")
|
||||
if not parent_id or not child_id:
|
||||
return tool_error("both parent_id and child_id are required")
|
||||
try:
|
||||
kb, conn = _connect()
|
||||
try:
|
||||
kb.link_tasks(conn, parent_id=parent_id, child_id=child_id)
|
||||
return _ok(parent_id=parent_id, child_id=child_id)
|
||||
finally:
|
||||
conn.close()
|
||||
except ValueError as e:
|
||||
# Covers cycle + self-parent rejections
|
||||
return tool_error(f"kanban_link: {e}")
|
||||
except Exception as e:
|
||||
logger.exception("kanban_link failed")
|
||||
return tool_error(f"kanban_link: {e}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DESC_TASK_ID_DEFAULT = (
|
||||
"Task id. If omitted, defaults to HERMES_KANBAN_TASK from the env "
|
||||
"(the task the dispatcher spawned you to work on)."
|
||||
)
|
||||
|
||||
KANBAN_SHOW_SCHEMA = {
|
||||
"name": "kanban_show",
|
||||
"description": (
|
||||
"Read a task's full state — title, body, assignee, parent task "
|
||||
"handoffs, your prior attempts on this task if any, comments, "
|
||||
"and recent events. Use this to (re)orient yourself before "
|
||||
"starting work, especially on retries. The response includes a "
|
||||
"pre-formatted ``worker_context`` string suitable for inclusion "
|
||||
"verbatim in your reasoning."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_COMPLETE_SCHEMA = {
|
||||
"name": "kanban_complete",
|
||||
"description": (
|
||||
"Mark your current task done with a structured handoff for "
|
||||
"downstream workers and humans. Prefer ``summary`` for a "
|
||||
"human-readable 1-3 sentence description of what you did; put "
|
||||
"machine-readable facts in ``metadata`` (changed_files, "
|
||||
"tests_run, decisions, findings, etc). At least one of "
|
||||
"``summary`` or ``result`` is required."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Human-readable handoff, 1-3 sentences. Appears in "
|
||||
"Run History on the dashboard and in downstream "
|
||||
"workers' context."
|
||||
),
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"description": (
|
||||
"Free-form dict of structured facts about this "
|
||||
"attempt — {\"changed_files\": [...], \"tests_run\": 12, "
|
||||
"\"findings\": [...]}. Surfaced to downstream "
|
||||
"workers alongside ``summary``."
|
||||
),
|
||||
},
|
||||
"result": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Short result log line (legacy field, maps to "
|
||||
"task.result). Use ``summary`` instead when "
|
||||
"possible; this exists for compatibility with "
|
||||
"callers that still set --result on the CLI."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_BLOCK_SCHEMA = {
|
||||
"name": "kanban_block",
|
||||
"description": (
|
||||
"Transition the task to blocked because you need human input "
|
||||
"to proceed. ``reason`` will be shown to the human on the "
|
||||
"board and included in context when someone unblocks you. "
|
||||
"Use for genuine blockers only — don't block on things you can "
|
||||
"resolve yourself."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"What you need answered, in one or two sentences. "
|
||||
"Don't paste the whole conversation; the human has "
|
||||
"the board and can ask follow-ups via comments."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["reason"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_HEARTBEAT_SCHEMA = {
|
||||
"name": "kanban_heartbeat",
|
||||
"description": (
|
||||
"Signal that you're still alive during a long operation "
|
||||
"(training, encoding, large crawls). Call every few minutes so "
|
||||
"humans see liveness separately from PID checks. Pure side "
|
||||
"effect — no work changes."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": _DESC_TASK_ID_DEFAULT,
|
||||
},
|
||||
"note": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional short note describing current progress. "
|
||||
"Shown in the event log."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_COMMENT_SCHEMA = {
|
||||
"name": "kanban_comment",
|
||||
"description": (
|
||||
"Append a comment to a task's thread. Use for durable notes "
|
||||
"that should outlive this run (questions for the next worker, "
|
||||
"partial findings, rationale). Ephemeral reasoning doesn't "
|
||||
"belong here — use your normal response instead."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Task id. Required (may be your own task or "
|
||||
"another's — comment threads are per-task)."
|
||||
),
|
||||
},
|
||||
"body": {
|
||||
"type": "string",
|
||||
"description": "Markdown-supported comment body.",
|
||||
},
|
||||
"author": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Override author name. Defaults to the current "
|
||||
"profile (HERMES_PROFILE env)."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["task_id", "body"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_CREATE_SCHEMA = {
|
||||
"name": "kanban_create",
|
||||
"description": (
|
||||
"Create a new kanban task, optionally as a child of the current "
|
||||
"one (pass the current task id in ``parents``). Used by "
|
||||
"orchestrator workers to fan out — decompose work into child "
|
||||
"tasks with specific assignees, link them into a pipeline, "
|
||||
"then complete your own task. The dispatcher picks up the new "
|
||||
"tasks on its next tick and spawns the assigned profiles."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Short task title (required).",
|
||||
},
|
||||
"assignee": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Profile name that should execute this task "
|
||||
"(e.g. 'researcher-a', 'reviewer', 'writer'). "
|
||||
"Required — tasks without an assignee are never "
|
||||
"dispatched."
|
||||
),
|
||||
},
|
||||
"body": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Opening post: full spec, acceptance criteria, "
|
||||
"links. The assigned worker reads this as part of "
|
||||
"its context."
|
||||
),
|
||||
},
|
||||
"parents": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Parent task ids. The new task stays in 'todo' "
|
||||
"until every parent reaches 'done'; then it "
|
||||
"auto-promotes to 'ready'. Typical fan-in: list "
|
||||
"all the researcher task ids when creating a "
|
||||
"synthesizer task."
|
||||
),
|
||||
},
|
||||
"tenant": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Optional namespace for multi-project isolation. "
|
||||
"Defaults to HERMES_TENANT env if set."
|
||||
),
|
||||
},
|
||||
"priority": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Dispatcher tiebreaker. Higher = picked sooner "
|
||||
"when multiple ready tasks share an assignee."
|
||||
),
|
||||
},
|
||||
"workspace_kind": {
|
||||
"type": "string",
|
||||
"enum": ["scratch", "dir", "worktree"],
|
||||
"description": (
|
||||
"Workspace flavor: 'scratch' (fresh tmp dir, "
|
||||
"default), 'dir' (shared directory, requires "
|
||||
"absolute workspace_path), 'worktree' (git worktree)."
|
||||
),
|
||||
},
|
||||
"workspace_path": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Absolute path for 'dir' or 'worktree' workspace. "
|
||||
"Relative paths are rejected at dispatch."
|
||||
),
|
||||
},
|
||||
"triage": {
|
||||
"type": "boolean",
|
||||
"description": (
|
||||
"If true, task lands in 'triage' instead of 'todo' "
|
||||
"— a specifier profile is expected to flesh out "
|
||||
"the body before work starts."
|
||||
),
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"If a non-archived task with this key already "
|
||||
"exists, return that task's id instead of creating "
|
||||
"a duplicate. Useful for retry-safe automation."
|
||||
),
|
||||
},
|
||||
"max_runtime_seconds": {
|
||||
"type": "integer",
|
||||
"description": (
|
||||
"Per-task runtime cap. When exceeded, the "
|
||||
"dispatcher SIGTERMs the worker and re-queues the "
|
||||
"task with outcome='timed_out'."
|
||||
),
|
||||
},
|
||||
"skills": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": (
|
||||
"Skill names to force-load into the dispatched "
|
||||
"worker (in addition to the built-in kanban-worker "
|
||||
"skill). Use this to pin a task to a specialist "
|
||||
"context — e.g. ['translation'] for a translation "
|
||||
"task, ['github-code-review'] for a reviewer task. "
|
||||
"The names must match skills installed on the "
|
||||
"assignee's profile."
|
||||
),
|
||||
},
|
||||
},
|
||||
"required": ["title", "assignee"],
|
||||
},
|
||||
}
|
||||
|
||||
KANBAN_LINK_SCHEMA = {
|
||||
"name": "kanban_link",
|
||||
"description": (
|
||||
"Add a parent→child dependency edge after both tasks already "
|
||||
"exist. The child won't promote to 'ready' until all parents "
|
||||
"are 'done'. Cycles and self-links are rejected."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parent_id": {"type": "string", "description": "Parent task id."},
|
||||
"child_id": {"type": "string", "description": "Child task id."},
|
||||
},
|
||||
"required": ["parent_id", "child_id"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
registry.register(
|
||||
name="kanban_show",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_SHOW_SCHEMA,
|
||||
handler=_handle_show,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="📋",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_complete",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_COMPLETE_SCHEMA,
|
||||
handler=_handle_complete,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="✔",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_block",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_BLOCK_SCHEMA,
|
||||
handler=_handle_block,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="⏸",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_heartbeat",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_HEARTBEAT_SCHEMA,
|
||||
handler=_handle_heartbeat,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="💓",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_comment",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_COMMENT_SCHEMA,
|
||||
handler=_handle_comment,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="💬",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_create",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_CREATE_SCHEMA,
|
||||
handler=_handle_create,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="➕",
|
||||
)
|
||||
|
||||
registry.register(
|
||||
name="kanban_link",
|
||||
toolset="kanban",
|
||||
schema=KANBAN_LINK_SCHEMA,
|
||||
handler=_handle_link,
|
||||
check_fn=_check_kanban_mode,
|
||||
emoji="🔗",
|
||||
)
|
||||
+7
-1
@@ -519,6 +519,12 @@ def _maybe_preregister_client(
|
||||
logger.debug("Pre-registered client_id=%s for '%s'", client_id, storage._server_name)
|
||||
|
||||
|
||||
def _parse_base_url(server_url: str) -> str:
|
||||
"""Strip path component from server URL, returning the base origin."""
|
||||
parsed = urlparse(server_url)
|
||||
return f"{parsed.scheme}://{parsed.netloc}"
|
||||
|
||||
|
||||
def build_oauth_auth(
|
||||
server_name: str,
|
||||
server_url: str,
|
||||
@@ -564,7 +570,7 @@ def build_oauth_auth(
|
||||
_maybe_preregister_client(storage, cfg, client_metadata)
|
||||
|
||||
return OAuthClientProvider(
|
||||
server_url=server_url,
|
||||
server_url=_parse_base_url(server_url),
|
||||
client_metadata=client_metadata,
|
||||
storage=storage,
|
||||
redirect_handler=_redirect_handler,
|
||||
|
||||
@@ -362,6 +362,7 @@ class MCPOAuthManager:
|
||||
_configure_callback_port,
|
||||
_is_interactive,
|
||||
_maybe_preregister_client,
|
||||
_parse_base_url,
|
||||
_redirect_handler,
|
||||
_wait_for_callback,
|
||||
)
|
||||
@@ -386,7 +387,7 @@ class MCPOAuthManager:
|
||||
|
||||
return _HERMES_PROVIDER_CLS(
|
||||
server_name=server_name,
|
||||
server_url=entry.server_url,
|
||||
server_url=_parse_base_url(entry.server_url),
|
||||
client_metadata=client_metadata,
|
||||
storage=storage,
|
||||
redirect_handler=_redirect_handler,
|
||||
|
||||
-21
@@ -60,11 +60,6 @@ _HERMES_CORE_TOOLS = [
|
||||
"send_message",
|
||||
# Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
|
||||
"ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
|
||||
# Kanban multi-agent coordination — only in schema when the agent is
|
||||
# spawned as a kanban worker (HERMES_KANBAN_TASK env set), otherwise
|
||||
# zero schema footprint. Gated via check_fn in tools/kanban_tools.py.
|
||||
"kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat",
|
||||
"kanban_comment", "kanban_create", "kanban_link",
|
||||
]
|
||||
|
||||
|
||||
@@ -207,22 +202,6 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"kanban": {
|
||||
"description": (
|
||||
"Kanban multi-agent coordination — only active when the agent "
|
||||
"is spawned by `hermes kanban daemon` (HERMES_KANBAN_TASK env "
|
||||
"set). Lets workers mark tasks done with structured handoffs, "
|
||||
"block for human input, heartbeat during long ops, comment "
|
||||
"on threads, and (for orchestrators) fan out into child tasks."
|
||||
),
|
||||
"tools": [
|
||||
"kanban_show", "kanban_complete", "kanban_block",
|
||||
"kanban_heartbeat", "kanban_comment",
|
||||
"kanban_create", "kanban_link",
|
||||
],
|
||||
"includes": [],
|
||||
},
|
||||
|
||||
"discord": {
|
||||
"description": "Discord read and participate tools (fetch messages, search members, create threads)",
|
||||
"tools": ["discord"],
|
||||
|
||||
@@ -2550,6 +2550,48 @@ def _(rid, params: dict) -> dict:
|
||||
return _ok(rid, {"task_id": task_id})
|
||||
|
||||
|
||||
@method("prompt.btw")
|
||||
def _(rid, params: dict) -> dict:
|
||||
session, err = _sess(params, rid)
|
||||
if err:
|
||||
return err
|
||||
text, sid = params.get("text", ""), params.get("session_id", "")
|
||||
if not text:
|
||||
return _err(rid, 4012, "text required")
|
||||
snapshot = list(session.get("history", []))
|
||||
|
||||
def run():
|
||||
session_tokens = _set_session_context(session["session_key"])
|
||||
try:
|
||||
from run_agent import AIAgent
|
||||
|
||||
result = AIAgent(
|
||||
model=_resolve_model(),
|
||||
quiet_mode=True,
|
||||
platform="tui",
|
||||
max_iterations=8,
|
||||
enabled_toolsets=[],
|
||||
).run_conversation(text, conversation_history=snapshot)
|
||||
_emit(
|
||||
"btw.complete",
|
||||
sid,
|
||||
{
|
||||
"text": (
|
||||
result.get("final_response", str(result))
|
||||
if isinstance(result, dict)
|
||||
else str(result)
|
||||
)
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
_emit("btw.complete", sid, {"text": f"error: {e}"})
|
||||
finally:
|
||||
_clear_session_context(session_tokens)
|
||||
|
||||
threading.Thread(target=run, daemon=True).start()
|
||||
return _ok(rid, {"status": "running"})
|
||||
|
||||
|
||||
# ── Methods: respond ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -252,6 +252,7 @@ Primary event types the client handles today:
|
||||
| `sudo.request` | `{ request_id }` |
|
||||
| `secret.request` | `{ prompt, env_var, request_id }` |
|
||||
| `background.complete` | `{ task_id, text }` |
|
||||
| `btw.complete` | `{ text }` |
|
||||
| `error` | `{ message }` |
|
||||
| `gateway.stderr` | synthesized from child stderr |
|
||||
| `gateway.protocol_error` | synthesized from malformed stdout |
|
||||
|
||||
@@ -9,9 +9,9 @@ import { type FocusMove, type SelectionState, shiftAnchor } from '../selection.j
|
||||
* Returns no-op functions when fullscreen mode is disabled.
|
||||
*/
|
||||
export function useSelection(): {
|
||||
copySelection: () => Promise<string>
|
||||
copySelection: () => string
|
||||
/** Copy without clearing the highlight (for copy-on-select). */
|
||||
copySelectionNoClear: () => Promise<string>
|
||||
copySelectionNoClear: () => string
|
||||
clearSelection: () => void
|
||||
hasSelection: () => boolean
|
||||
/** Read the raw mutable selection state (for drag-to-scroll). */
|
||||
@@ -48,8 +48,8 @@ export function useSelection(): {
|
||||
return useMemo(() => {
|
||||
if (!ink) {
|
||||
return {
|
||||
copySelection: async () => '',
|
||||
copySelectionNoClear: async () => '',
|
||||
copySelection: () => '',
|
||||
copySelectionNoClear: () => '',
|
||||
clearSelection: () => {},
|
||||
hasSelection: () => false,
|
||||
getState: () => null,
|
||||
|
||||
@@ -1297,13 +1297,11 @@ export default class Ink {
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the current text selection to the system clipboard without clearing the
|
||||
* selection. Returns the copied text when a clipboard path succeeded (native
|
||||
* tool fired, tmux buffer loaded, or OSC 52 emitted), or '' when no path was
|
||||
* taken (e.g. headless Linux without tmux). Matches iTerm2's copy-on-select
|
||||
* behavior where the selected region stays visible after the automatic copy.
|
||||
* Copy the current selection to the clipboard without clearing the
|
||||
* highlight. Matches iTerm2's copy-on-select behavior where the selected
|
||||
* region stays visible after the automatic copy.
|
||||
*/
|
||||
async copySelectionNoClear(): Promise<string> {
|
||||
copySelectionNoClear(): string {
|
||||
if (!hasSelection(this.selection)) {
|
||||
return ''
|
||||
}
|
||||
@@ -1311,41 +1309,28 @@ export default class Ink {
|
||||
const text = getSelectedText(this.selection, this.frontFrame.screen)
|
||||
|
||||
if (text) {
|
||||
try {
|
||||
const { sequence, success } = await setClipboard(text)
|
||||
|
||||
if (sequence) {
|
||||
this.options.stdout.write(sequence)
|
||||
// Raw OSC 52, or DCS-passthrough-wrapped OSC 52 inside tmux (tmux
|
||||
// drops it silently unless allow-passthrough is on — no regression).
|
||||
void setClipboard(text).then(raw => {
|
||||
if (raw) {
|
||||
this.options.stdout.write(raw)
|
||||
}
|
||||
|
||||
if (success) {
|
||||
return text
|
||||
}
|
||||
|
||||
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
|
||||
console.error('[clipboard] no path reached the clipboard (headless + no tmux?) — set HERMES_TUI_FORCE_OSC52=1 to force the escape sequence')
|
||||
}
|
||||
} catch (err) {
|
||||
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
|
||||
console.error('[clipboard] error:', err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return ''
|
||||
return text
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy the current text selection to the system clipboard via OSC 52
|
||||
* and clear the selection. Returns the copied text (empty if no selection
|
||||
* or clipboard operation failed).
|
||||
* and clear the selection. Returns the copied text (empty if no selection).
|
||||
*/
|
||||
async copySelection(): Promise<string> {
|
||||
copySelection(): string {
|
||||
if (!hasSelection(this.selection)) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const text = await this.copySelectionNoClear()
|
||||
const text = this.copySelectionNoClear()
|
||||
clearSelection(this.selection)
|
||||
this.notifySelectionChange()
|
||||
|
||||
|
||||
@@ -26,26 +26,4 @@ describe('shouldEmitClipboardSequence', () => {
|
||||
shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv)
|
||||
).toBe(false)
|
||||
})
|
||||
|
||||
it('HERMES_TUI_FORCE_OSC52 takes precedence over TMUX suppression', () => {
|
||||
// Without the override, local-in-tmux suppresses the OSC 52 sequence
|
||||
// so the terminal multiplexer path wins. FORCE_OSC52=1 flips that
|
||||
// back on for users whose tmux config supports passthrough.
|
||||
expect(shouldEmitClipboardSequence({ TMUX: '/tmp/t,1,0' } as NodeJS.ProcessEnv)).toBe(false)
|
||||
expect(
|
||||
shouldEmitClipboardSequence({
|
||||
HERMES_TUI_FORCE_OSC52: '1',
|
||||
TMUX: '/tmp/t,1,0'
|
||||
} as NodeJS.ProcessEnv)
|
||||
).toBe(true)
|
||||
})
|
||||
|
||||
it('HERMES_TUI_FORCE_OSC52=0 suppresses OSC 52 even for remote or plain terminals', () => {
|
||||
expect(
|
||||
shouldEmitClipboardSequence({
|
||||
HERMES_TUI_FORCE_OSC52: '0',
|
||||
SSH_CONNECTION: '1'
|
||||
} as NodeJS.ProcessEnv)
|
||||
).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -84,11 +84,7 @@ export function getClipboardPath(): ClipboardPath {
|
||||
}
|
||||
|
||||
export function shouldEmitClipboardSequence(env: NodeJS.ProcessEnv = process.env): boolean {
|
||||
const override = (
|
||||
env.HERMES_TUI_FORCE_OSC52 ??
|
||||
env.HERMES_TUI_CLIPBOARD_OSC52 ??
|
||||
env.HERMES_TUI_COPY_OSC52 ?? ''
|
||||
).trim()
|
||||
const override = (env.HERMES_TUI_CLIPBOARD_OSC52 ?? env.HERMES_TUI_COPY_OSC52 ?? '').trim()
|
||||
|
||||
if (ENV_ON_RE.test(override)) {
|
||||
return true
|
||||
@@ -166,23 +162,10 @@ export async function tmuxLoadBuffer(text: string): Promise<boolean> {
|
||||
* utilities (pbcopy/wl-copy/xclip/xsel/clip.exe) always work locally. Over
|
||||
* SSH these would write to the remote clipboard — OSC 52 is the right path there.
|
||||
*
|
||||
* Returns { sequence, success }:
|
||||
* - `sequence` is the bytes to write to stdout (raw OSC 52 outside tmux,
|
||||
* DCS-wrapped inside; empty string when we shouldn't emit).
|
||||
* - `success` is true when we believe SOME path reached the clipboard:
|
||||
* native tool fired (local), tmux buffer loaded, or an OSC 52 sequence
|
||||
* was emitted to the terminal. False only when no path was taken at
|
||||
* all (headless Linux with no tmux + osc52 suppressed, effectively).
|
||||
* This is best-effort — pbcopy/xclip are fire-and-forget, and OSC 52
|
||||
* depends on the outer terminal honoring the sequence — but it lets
|
||||
* callers distinguish "nothing attempted" from "attempted".
|
||||
* Returns the sequence for the caller to write to stdout (raw OSC 52
|
||||
* outside tmux, DCS-wrapped inside).
|
||||
*/
|
||||
export type ClipboardResult = {
|
||||
sequence: string
|
||||
success: boolean
|
||||
}
|
||||
|
||||
export async function setClipboard(text: string): Promise<ClipboardResult> {
|
||||
export async function setClipboard(text: string): Promise<string> {
|
||||
const b64 = Buffer.from(text, 'utf8').toString('base64')
|
||||
const raw = osc(OSC.CLIPBOARD, 'c', b64)
|
||||
const emitSequence = shouldEmitClipboardSequence(process.env)
|
||||
@@ -194,28 +177,20 @@ export async function setClipboard(text: string): Promise<ClipboardResult> {
|
||||
// (https://anthropic.slack.com/archives/C07VBSHV7EV/p1773943921788829).
|
||||
// Gated on SSH_CONNECTION (not SSH_TTY) since tmux panes inherit SSH_TTY
|
||||
// forever but SSH_CONNECTION is in tmux's default update-environment and
|
||||
// clears on local attach. Fire-and-forget, but `copyNativeAttempted`
|
||||
// tells us whether ANY native path will be tried on this platform.
|
||||
const nativeAttempted =
|
||||
!process.env['SSH_CONNECTION'] && copyNative(text)
|
||||
// clears on local attach. Fire-and-forget.
|
||||
if (!process.env['SSH_CONNECTION']) {
|
||||
copyNative(text)
|
||||
}
|
||||
|
||||
const tmuxBufferLoaded = await tmuxLoadBuffer(text)
|
||||
|
||||
// Inner OSC uses BEL directly (not osc()) — ST's ESC would need doubling
|
||||
// too, and BEL works everywhere for OSC 52.
|
||||
const sequence = tmuxBufferLoaded
|
||||
? (emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '')
|
||||
: (emitSequence ? raw : '')
|
||||
if (tmuxBufferLoaded) {
|
||||
return emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : ''
|
||||
}
|
||||
|
||||
// Success if any path was taken. Native and tmux are fire-and-forget,
|
||||
// so we can't truly confirm the clipboard was written — but if native
|
||||
// was attempted OR tmux buffer loaded OR we emitted OSC 52, the user's
|
||||
// paste is likely to work. The only false case is "we did literally
|
||||
// nothing" (e.g. local-in-tmux with osc52 suppressed and tmux buffer
|
||||
// load failed), in which case reporting failure to the user is honest.
|
||||
const success = nativeAttempted || tmuxBufferLoaded || sequence.length > 0
|
||||
|
||||
return { sequence, success }
|
||||
return emitSequence ? raw : ''
|
||||
}
|
||||
|
||||
// Linux clipboard tool: undefined = not yet probed, null = none available.
|
||||
@@ -223,95 +198,65 @@ export async function setClipboard(text: string): Promise<ClipboardResult> {
|
||||
// Cached after first attempt so repeated mouse-ups skip the probe chain.
|
||||
let linuxCopy: 'wl-copy' | 'xclip' | 'xsel' | null | undefined
|
||||
|
||||
/** Internal: probe once and cache — wl-copy first, then xclip, then xsel. */
|
||||
async function probeLinuxCopy(): Promise<'wl-copy' | 'xclip' | 'xsel' | null> {
|
||||
const opts = { useCwd: false, timeout: 500 }
|
||||
|
||||
const r = await execFileNoThrow('wl-copy', [], opts)
|
||||
|
||||
if (r.code === 0) {
|
||||
return 'wl-copy'
|
||||
}
|
||||
|
||||
const r2 = await execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
|
||||
|
||||
if (r2.code === 0) {
|
||||
return 'xclip'
|
||||
}
|
||||
|
||||
const r3 = await execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
|
||||
|
||||
return r3.code === 0 ? 'xsel' : null
|
||||
}
|
||||
|
||||
/**
|
||||
* Shell out to a native clipboard utility as a safety net for OSC 52.
|
||||
* Only called when not in an SSH session (over SSH, these would write to
|
||||
* the remote machine's clipboard — OSC 52 is the right path there).
|
||||
* Fire-and-forget: failures are silent since OSC 52 may have succeeded.
|
||||
*
|
||||
* Returns true when a native copy path was (or will be) attempted — i.e.
|
||||
* we'll spawn pbcopy on macOS, clip on Windows, or a known-working Linux
|
||||
* tool. Returns false only when we know no native tool is viable (Linux
|
||||
* without DISPLAY/WAYLAND_DISPLAY, or previously-probed-to-null). The
|
||||
* return value is used to decide whether to tell the user the copy
|
||||
* succeeded — spawning is best-effort but good enough to claim success.
|
||||
*
|
||||
* Linux behaviour: if DISPLAY and WAYLAND_DISPLAY are both unset, native
|
||||
* clipboard tools cannot work (they need a display server). In that case
|
||||
* we skip probing entirely and treat linuxCopy as permanently null.
|
||||
*/
|
||||
function copyNative(text: string): boolean {
|
||||
function copyNative(text: string): void {
|
||||
const opts = { input: text, useCwd: false, timeout: 2000 }
|
||||
|
||||
switch (process.platform) {
|
||||
case 'darwin':
|
||||
void execFileNoThrow('pbcopy', [], opts)
|
||||
|
||||
return true
|
||||
return
|
||||
case 'linux': {
|
||||
// If we already probed (success or hard-fail), short-circuit.
|
||||
if (linuxCopy !== undefined) {
|
||||
if (linuxCopy === null) {
|
||||
// No working native tool — skip silently.
|
||||
return false
|
||||
}
|
||||
|
||||
// linuxCopy is a known-working tool; fire-and-forget.
|
||||
void execFileNoThrow(linuxCopy, linuxCopy === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)
|
||||
|
||||
return true
|
||||
if (linuxCopy === null) {
|
||||
return
|
||||
}
|
||||
|
||||
// No display server → native tools will fail immediately. Cache null.
|
||||
if (!process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) {
|
||||
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
|
||||
console.error('[clipboard] [native] Linux: no DISPLAY or WAYLAND_DISPLAY — native clipboard unavailable')
|
||||
}
|
||||
if (linuxCopy === 'wl-copy') {
|
||||
void execFileNoThrow('wl-copy', [], opts)
|
||||
|
||||
linuxCopy = null
|
||||
|
||||
return false
|
||||
return
|
||||
}
|
||||
// First call: probe in the background and cache the result for future copies.
|
||||
// We don't await — this is fire-and-forget. Treat as an attempt:
|
||||
// the probe will discover a tool and spawn it. If probing finds
|
||||
// nothing, the NEXT copy will short-circuit above.
|
||||
void (async () => {
|
||||
const winner = await probeLinuxCopy()
|
||||
linuxCopy = winner
|
||||
|
||||
if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) {
|
||||
console.error(`[clipboard] [native] Linux: clipboard probe complete → ${winner ?? 'no tool available'}`)
|
||||
if (linuxCopy === 'xclip') {
|
||||
void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if (linuxCopy === 'xsel') {
|
||||
void execFileNoThrow('xsel', ['--clipboard', '--input'], opts)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// First call: probe wl-copy (Wayland) then xclip/xsel (X11), cache winner.
|
||||
void execFileNoThrow('wl-copy', [], opts).then(r => {
|
||||
if (r.code === 0) {
|
||||
linuxCopy = 'wl-copy'
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Actually perform the copy with the discovered tool.
|
||||
if (winner) {
|
||||
void execFileNoThrow(winner, winner === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts)
|
||||
}
|
||||
})()
|
||||
void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts).then(r2 => {
|
||||
if (r2.code === 0) {
|
||||
linuxCopy = 'xclip'
|
||||
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
void execFileNoThrow('xsel', ['--clipboard', '--input'], opts).then(r3 => {
|
||||
linuxCopy = r3.code === 0 ? 'xsel' : null
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
case 'win32':
|
||||
@@ -319,10 +264,8 @@ function copyNative(text: string): boolean {
|
||||
// imperfect (system locale encoding) but good enough for a fallback.
|
||||
void execFileNoThrow('clip', [], opts)
|
||||
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/** @internal test-only */
|
||||
|
||||
@@ -363,7 +363,7 @@ const buildComposer = () => ({
|
||||
hasSelection: false,
|
||||
paste: vi.fn(),
|
||||
queueRef: { current: [] as string[] },
|
||||
selection: { copySelection: vi.fn(async () => '') },
|
||||
selection: { copySelection: vi.fn(() => '') },
|
||||
setInput: vi.fn()
|
||||
})
|
||||
|
||||
|
||||
@@ -431,6 +431,12 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
|
||||
|
||||
return
|
||||
|
||||
case 'btw.complete':
|
||||
dropBgTask('btw:x')
|
||||
sys(`[btw] ${ev.payload.text}`)
|
||||
|
||||
return
|
||||
|
||||
case 'subagent.spawn_requested':
|
||||
// Child built but not yet running (waiting on ThreadPoolExecutor slot).
|
||||
// Preserve completed state if a later event races in before this one.
|
||||
|
||||
@@ -32,7 +32,7 @@ export type StatusBarMode = 'bottom' | 'off' | 'top'
|
||||
|
||||
export interface SelectionApi {
|
||||
clearSelection: () => void
|
||||
copySelection: () => Promise<string>
|
||||
copySelection: () => string
|
||||
}
|
||||
|
||||
export interface CompletionItem {
|
||||
|
||||
@@ -251,17 +251,11 @@ export const coreCommands: SlashCommand[] = [
|
||||
{
|
||||
help: 'copy selection or assistant message',
|
||||
name: 'copy',
|
||||
run: async (arg, ctx) => {
|
||||
run: (arg, ctx) => {
|
||||
const { sys } = ctx.transcript
|
||||
|
||||
if (!arg && ctx.composer.hasSelection) {
|
||||
const text = await ctx.composer.selection.copySelection()
|
||||
|
||||
if (text) {
|
||||
return sys(`copied ${text.length} characters`)
|
||||
} else {
|
||||
return sys('clipboard copy failed — try HERMES_TUI_FORCE_OSC52=1 to force the escape sequence; HERMES_TUI_DEBUG_CLIPBOARD=1 for details')
|
||||
}
|
||||
if (!arg && ctx.composer.hasSelection && ctx.composer.selection.copySelection()) {
|
||||
return sys('copied selection')
|
||||
}
|
||||
|
||||
if (arg && Number.isNaN(parseInt(arg, 10))) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js'
|
||||
import type {
|
||||
BackgroundStartResponse,
|
||||
BtwStartResponse,
|
||||
ConfigGetValueResponse,
|
||||
ConfigSetResponse,
|
||||
ImageAttachResponse,
|
||||
@@ -17,7 +18,7 @@ import type { SlashCommand } from '../types.js'
|
||||
|
||||
export const sessionCommands: SlashCommand[] = [
|
||||
{
|
||||
aliases: ['bg', 'btw'],
|
||||
aliases: ['bg'],
|
||||
help: 'launch a background prompt',
|
||||
name: 'background',
|
||||
run: (arg, ctx) => {
|
||||
@@ -38,6 +39,23 @@ export const sessionCommands: SlashCommand[] = [
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'by-the-way follow-up',
|
||||
name: 'btw',
|
||||
run: (arg, ctx) => {
|
||||
if (!arg) {
|
||||
return ctx.transcript.sys('/btw <question>')
|
||||
}
|
||||
|
||||
ctx.gateway.rpc<BtwStartResponse>('prompt.btw', { session_id: ctx.sid, text: arg }).then(
|
||||
ctx.guarded(() => {
|
||||
patchUiState(state => ({ ...state, bgTasks: new Set(state.bgTasks).add('btw:x') }))
|
||||
ctx.transcript.sys('btw running…')
|
||||
})
|
||||
)
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
help: 'change or show model',
|
||||
aliases: ['provider'],
|
||||
|
||||
@@ -0,0 +1,197 @@
|
||||
import React from 'react';
|
||||
import { Box, useApp } from 'ink';
|
||||
import { usePerformanceMonitor } from '../hooks/usePerformance';
|
||||
|
||||
/**
|
||||
* A fixed window scroller component for efficient rendering of large lists
|
||||
* This is a lightweight virtualization component that only renders visible items
|
||||
* plus a configurable overscan buffer for smooth scrolling
|
||||
*/
|
||||
export const FixedWindowScroller = React.forwardRef(({
|
||||
items,
|
||||
height,
|
||||
width,
|
||||
itemHeight = 3, // Average height of each item in terminal rows
|
||||
renderItem,
|
||||
overscrollItems = 20, // Number of items to render outside visible area
|
||||
onScroll,
|
||||
initialScrollToEnd = true,
|
||||
}, ref) => {
|
||||
const { stdout } = useApp();
|
||||
const { logEvent } = usePerformanceMonitor('FixedWindowScroller', {
|
||||
logToConsole: false
|
||||
});
|
||||
|
||||
// Container ref for scroll measurements
|
||||
const containerRef = React.useRef(null);
|
||||
|
||||
// Track scroll state
|
||||
const lastScrollTopRef = React.useRef(0);
|
||||
const lastItemsLengthRef = React.useRef(items.length);
|
||||
|
||||
// Calculate visible window based on container dimensions
|
||||
const [visibleWindow, setVisibleWindow] = React.useState({
|
||||
startIndex: Math.max(0, items.length - Math.floor(height / itemHeight) - overscrollItems),
|
||||
endIndex: items.length,
|
||||
scrollTop: 0
|
||||
});
|
||||
|
||||
// Expose scroll methods via ref
|
||||
React.useImperativeHandle(ref, () => ({
|
||||
scrollToItem: (index, align = 'auto') => {
|
||||
if (!containerRef.current) return;
|
||||
|
||||
const container = containerRef.current;
|
||||
const itemOffset = index * itemHeight;
|
||||
|
||||
if (align === 'start') {
|
||||
container.scrollTop = itemOffset;
|
||||
} else if (align === 'end') {
|
||||
container.scrollTop = itemOffset - height + itemHeight;
|
||||
} else if (align === 'center') {
|
||||
container.scrollTop = itemOffset - height / 2 + itemHeight / 2;
|
||||
} else {
|
||||
// Auto alignment - only scroll if item is outside visible area
|
||||
const { scrollTop } = container;
|
||||
const visibleBottom = scrollTop + height;
|
||||
|
||||
if (itemOffset < scrollTop) {
|
||||
container.scrollTop = itemOffset;
|
||||
} else if (itemOffset + itemHeight > visibleBottom) {
|
||||
container.scrollTop = itemOffset - height + itemHeight;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
scrollToTop: () => {
|
||||
if (containerRef.current) {
|
||||
containerRef.current.scrollTop = 0;
|
||||
}
|
||||
},
|
||||
|
||||
scrollToBottom: () => {
|
||||
if (containerRef.current) {
|
||||
containerRef.current.scrollTop = containerRef.current.scrollHeight;
|
||||
}
|
||||
},
|
||||
|
||||
// Compatibility with ScrollBoxHandle
|
||||
getScrollTop: () => containerRef.current?.scrollTop || 0,
|
||||
getViewportHeight: () => height,
|
||||
getPendingDelta: () => 0,
|
||||
isSticky: () => visibleWindow.startIndex === items.length - visibleItemCount,
|
||||
}), [height, itemHeight, items.length, visibleWindow.startIndex]);
|
||||
|
||||
// Calculate how many items fit in the viewport
|
||||
const visibleItemCount = Math.ceil(height / itemHeight);
|
||||
|
||||
// Handle scroll events
|
||||
const handleScroll = React.useCallback((event) => {
|
||||
if (!containerRef.current) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
|
||||
const scrollTopDiff = Math.abs(scrollTop - lastScrollTopRef.current);
|
||||
|
||||
// Only update if we've scrolled a significant amount
|
||||
if (scrollTopDiff > (itemHeight / 2)) {
|
||||
const totalItems = items.length;
|
||||
const visibleItems = Math.floor(clientHeight / itemHeight);
|
||||
|
||||
// Calculate the first visible item index
|
||||
const firstVisibleItemIndex = Math.floor(scrollTop / itemHeight);
|
||||
|
||||
// Calculate start and end indices with overscroll
|
||||
const startIndex = Math.max(0, firstVisibleItemIndex - overscrollItems);
|
||||
const endIndex = Math.min(
|
||||
totalItems,
|
||||
firstVisibleItemIndex + visibleItems + overscrollItems
|
||||
);
|
||||
|
||||
logEvent(`window-update-${startIndex}-${endIndex}`);
|
||||
|
||||
setVisibleWindow({ startIndex, endIndex, scrollTop });
|
||||
lastScrollTopRef.current = scrollTop;
|
||||
|
||||
// Call external scroll handler if provided
|
||||
if (onScroll) {
|
||||
onScroll({
|
||||
scrollTop,
|
||||
scrollHeight,
|
||||
clientHeight,
|
||||
firstVisibleItemIndex,
|
||||
lastVisibleItemIndex: firstVisibleItemIndex + visibleItems,
|
||||
isAtTop: scrollTop < itemHeight,
|
||||
isAtBottom: scrollTop + clientHeight >= scrollHeight - itemHeight
|
||||
});
|
||||
}
|
||||
}
|
||||
}, [items.length, itemHeight, overscrollItems, onScroll, logEvent]);
|
||||
|
||||
// Auto-scroll to bottom when new items are added
|
||||
React.useEffect(() => {
|
||||
if (!containerRef.current) return;
|
||||
|
||||
const isNewMessagesAdded = items.length > lastItemsLengthRef.current;
|
||||
const isNearBottom = containerRef.current.scrollHeight - containerRef.current.clientHeight - containerRef.current.scrollTop < itemHeight * 3;
|
||||
|
||||
if ((isNewMessagesAdded && isNearBottom) || initialScrollToEnd) {
|
||||
containerRef.current.scrollTop = containerRef.current.scrollHeight;
|
||||
|
||||
// Update the visible window to show the end
|
||||
setVisibleWindow({
|
||||
startIndex: Math.max(0, items.length - Math.floor(height / itemHeight) - overscrollItems),
|
||||
endIndex: items.length,
|
||||
scrollTop: containerRef.current.scrollHeight
|
||||
});
|
||||
|
||||
logEvent('auto-scroll');
|
||||
}
|
||||
|
||||
lastItemsLengthRef.current = items.length;
|
||||
}, [items.length, height, itemHeight, overscrollItems, initialScrollToEnd, logEvent]);
|
||||
|
||||
// Get the visible subset of items
|
||||
const visibleItems = items.slice(visibleWindow.startIndex, visibleWindow.endIndex);
|
||||
|
||||
return (
|
||||
<Box
|
||||
ref={containerRef}
|
||||
overflow="auto"
|
||||
width={width}
|
||||
height={height}
|
||||
onScroll={handleScroll}
|
||||
style={{ scrollbarGutter: 'stable' }}
|
||||
>
|
||||
{/* Top spacer */}
|
||||
{visibleWindow.startIndex > 0 && (
|
||||
<Box
|
||||
width="100%"
|
||||
height={visibleWindow.startIndex * itemHeight}
|
||||
padding={0}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Visible items */}
|
||||
{visibleItems.map((item, index) =>
|
||||
renderItem({
|
||||
item,
|
||||
index: visibleWindow.startIndex + index,
|
||||
isVisible: true
|
||||
})
|
||||
)}
|
||||
|
||||
{/* Bottom spacer */}
|
||||
{visibleWindow.endIndex < items.length && (
|
||||
<Box
|
||||
width="100%"
|
||||
height={(items.length - visibleWindow.endIndex) * itemHeight}
|
||||
padding={0}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
});
|
||||
|
||||
FixedWindowScroller.displayName = 'FixedWindowScroller';
|
||||
|
||||
export default FixedWindowScroller;
|
||||
@@ -0,0 +1,76 @@
|
||||
import React from 'react';
|
||||
import { Box } from 'ink';
|
||||
import { FixedWindowScroller } from './FixedWindowScroller';
|
||||
import { usePerformanceMonitor } from '../hooks/usePerformance';
|
||||
|
||||
/**
|
||||
* OptimizedTranscriptPane is a drop-in replacement for the transcript area
|
||||
* that uses virtualization to dramatically improve performance with large
|
||||
* message histories.
|
||||
*/
|
||||
export const OptimizedTranscriptPane = React.memo(({
|
||||
messages,
|
||||
renderMessage,
|
||||
height,
|
||||
width,
|
||||
onScroll,
|
||||
}) => {
|
||||
const { logEvent } = usePerformanceMonitor('OptimizedTranscriptPane', {
|
||||
logToConsole: false
|
||||
});
|
||||
|
||||
// Reference to the scroller component
|
||||
const scrollerRef = React.useRef(null);
|
||||
|
||||
// Keep track of visible window for debugging
|
||||
const [visibleRange, setVisibleRange] = React.useState({ start: 0, end: 0 });
|
||||
|
||||
// Handle scroll events
|
||||
const handleScroll = React.useCallback((scrollInfo) => {
|
||||
setVisibleRange({
|
||||
start: scrollInfo.firstVisibleItemIndex,
|
||||
end: scrollInfo.lastVisibleItemIndex
|
||||
});
|
||||
|
||||
if (onScroll) {
|
||||
onScroll(scrollInfo);
|
||||
}
|
||||
}, [onScroll]);
|
||||
|
||||
// Memoize the render function for better performance
|
||||
const renderItem = React.useCallback(({ item, index, isVisible }) => {
|
||||
if (!isVisible) {
|
||||
return <Box height={3} />; // Placeholder with approximate height
|
||||
}
|
||||
|
||||
return renderMessage(item, index);
|
||||
}, [renderMessage]);
|
||||
|
||||
// Log performance data
|
||||
React.useEffect(() => {
|
||||
logEvent(`render-range-${visibleRange.start}-${visibleRange.end}`);
|
||||
}, [visibleRange, logEvent]);
|
||||
|
||||
return (
|
||||
<Box
|
||||
flexDirection="column"
|
||||
height={height}
|
||||
width={width}
|
||||
style={{ scrollbarGutter: 'stable' }}
|
||||
>
|
||||
<FixedWindowScroller
|
||||
ref={scrollerRef}
|
||||
items={messages}
|
||||
height={height}
|
||||
width={width}
|
||||
itemHeight={3} // Average message height (will be refined)
|
||||
renderItem={renderItem}
|
||||
overscrollItems={25} // Number of off-screen items to keep mounted
|
||||
onScroll={handleScroll}
|
||||
initialScrollToEnd={true}
|
||||
/>
|
||||
</Box>
|
||||
);
|
||||
});
|
||||
|
||||
export default OptimizedTranscriptPane;
|
||||
@@ -1,5 +1,7 @@
|
||||
import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
|
||||
import { useStore } from '@nanostores/react'
|
||||
import { $uiState } from '../app/uiStore.js'
|
||||
import { OptimizedTranscriptPane } from './OptimizedTranscriptPane.js'
|
||||
import { memo } from 'react'
|
||||
|
||||
import { useGateway } from '../app/gatewayContext.js'
|
||||
@@ -98,21 +100,23 @@ const StreamingAssistant = memo(function StreamingAssistant({
|
||||
})
|
||||
|
||||
const TranscriptPane = memo(function TranscriptPane({
|
||||
actions,
|
||||
composer,
|
||||
progress,
|
||||
transcript
|
||||
const TranscriptPane = memo(function TranscriptPane({
|
||||
actions,
|
||||
composer,
|
||||
progress,
|
||||
transcript
|
||||
}: Pick<AppLayoutProps, 'actions' | 'composer' | 'progress' | 'transcript'>) {
|
||||
const ui = useStore($uiState)
|
||||
|
||||
return (
|
||||
<>
|
||||
<ScrollBox flexDirection="column" flexGrow={1} flexShrink={1} ref={transcript.scrollRef} stickyScroll>
|
||||
<Box flexDirection="column" paddingX={1}>
|
||||
{transcript.virtualHistory.topSpacer > 0 ? <Box height={transcript.virtualHistory.topSpacer} /> : null}
|
||||
|
||||
{transcript.virtualRows.slice(transcript.virtualHistory.start, transcript.virtualHistory.end).map(row => (
|
||||
<Box flexDirection="column" key={row.key} ref={transcript.virtualHistory.measureRef(row.key)}>
|
||||
const ui = useStore($uiState)
|
||||
const usePerfMode = true // Always use performance mode for better scrolling
|
||||
return (
|
||||
<>
|
||||
{usePerfMode ? (
|
||||
<OptimizedTranscriptPane
|
||||
messages={transcript.virtualRows}
|
||||
height={ui.rows - 6} // Reserve space for input/status
|
||||
width={composer.cols}
|
||||
renderMessage={(row) => (
|
||||
<Box flexDirection="column" key={row.key} paddingX={1}>
|
||||
{row.msg.kind === 'intro' ? (
|
||||
<Box flexDirection="column" paddingTop={1}>
|
||||
<Banner t={ui.theme} />
|
||||
@@ -132,18 +136,35 @@ const TranscriptPane = memo(function TranscriptPane({
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
))}
|
||||
)}
|
||||
/>
|
||||
) : (
|
||||
<ScrollBox flexDirection="column" flexGrow={1} flexShrink={1} ref={transcript.scrollRef} stickyScroll>
|
||||
<Box flexDirection="column" paddingX={1}>
|
||||
{transcript.virtualHistory.topSpacer > 0 ? <Box height={transcript.virtualHistory.topSpacer} /> : null}
|
||||
|
||||
{transcript.virtualHistory.bottomSpacer > 0 ? <Box height={transcript.virtualHistory.bottomSpacer} /> : null}
|
||||
{transcript.virtualRows.slice(transcript.virtualHistory.start, transcript.virtualHistory.end).map(row => (
|
||||
<Box flexDirection="column" key={row.key} ref={transcript.virtualHistory.measureRef(row.key)}>
|
||||
{row.msg.kind === 'intro' ? (
|
||||
<Box flexDirection="column" paddingTop={1}>
|
||||
<Banner t={ui.theme} />
|
||||
|
||||
<StreamingAssistant
|
||||
busy={ui.busy}
|
||||
cols={composer.cols}
|
||||
compact={ui.compact}
|
||||
detailsMode={ui.detailsMode}
|
||||
progress={progress}
|
||||
sections={ui.sections}
|
||||
t={ui.theme}
|
||||
{row.msg.info?.version && <SessionPanel info={row.msg.info} sid={ui.sid} t={ui.theme} />}
|
||||
</Box>
|
||||
) : row.msg.kind === 'panel' && row.msg.panelData ? (
|
||||
<Panel sections={row.msg.panelData.sections} t={ui.theme} title={row.msg.panelData.title} />
|
||||
) : (
|
||||
<MessageLine
|
||||
cols={composer.cols}
|
||||
compact={ui.compact}
|
||||
detailsMode={ui.detailsMode}
|
||||
msg={row.msg}
|
||||
sections={ui.sections}
|
||||
t={ui.theme}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
))}
|
||||
/>
|
||||
</Box>
|
||||
</ScrollBox>
|
||||
|
||||
@@ -178,6 +178,10 @@ export interface BackgroundStartResponse {
|
||||
task_id?: string
|
||||
}
|
||||
|
||||
export interface BtwStartResponse {
|
||||
ok?: boolean
|
||||
}
|
||||
|
||||
export interface ClarifyRespondResponse {
|
||||
ok?: boolean
|
||||
}
|
||||
@@ -399,6 +403,7 @@ export type GatewayEvent =
|
||||
| { payload: { request_id: string }; session_id?: string; type: 'sudo.request' }
|
||||
| { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
|
||||
| { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
|
||||
| { payload: { text: string }; session_id?: string; type: 'btw.complete' }
|
||||
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
|
||||
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
|
||||
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
|
||||
|
||||
@@ -0,0 +1,421 @@
|
||||
import { useRef, useCallback, useState, useEffect, useLayoutEffect } from 'react';
|
||||
|
||||
/**
|
||||
* Custom hook for performance monitoring
|
||||
* Helps track and log performance metrics for components
|
||||
*/
|
||||
export function usePerformanceMonitor(componentName: string, options = {
|
||||
logToConsole: false,
|
||||
thresholdMs: 16 // 60fps threshold
|
||||
}) {
|
||||
const renderCountRef = useRef(0);
|
||||
const renderTimesRef = useRef<number[]>([]);
|
||||
const lastRenderTimeRef = useRef(performance.now());
|
||||
const [metrics, setMetrics] = useState({
|
||||
averageRenderTime: 0,
|
||||
totalRenders: 0,
|
||||
slowRenders: 0
|
||||
});
|
||||
|
||||
// Measure start of render cycle
|
||||
useEffect(() => {
|
||||
const startTime = performance.now();
|
||||
|
||||
return () => {
|
||||
const endTime = performance.now();
|
||||
const renderTime = endTime - startTime;
|
||||
|
||||
renderCountRef.current += 1;
|
||||
renderTimesRef.current.push(renderTime);
|
||||
|
||||
// Keep only the last 100 measurements
|
||||
if (renderTimesRef.current.length > 100) {
|
||||
renderTimesRef.current.shift();
|
||||
}
|
||||
|
||||
// Calculate average render time
|
||||
const average = renderTimesRef.current.reduce((sum, time) => sum + time, 0) /
|
||||
renderTimesRef.current.length;
|
||||
|
||||
// Count slow renders
|
||||
const slowRenders = renderTimesRef.current.filter(time => time > options.thresholdMs).length;
|
||||
|
||||
// Update metrics
|
||||
setMetrics({
|
||||
averageRenderTime: average,
|
||||
totalRenders: renderCountRef.current,
|
||||
slowRenders
|
||||
});
|
||||
|
||||
if (options.logToConsole && renderTime > options.thresholdMs) {
|
||||
console.log(
|
||||
`[PERF] ${componentName} render: ${renderTime.toFixed(2)}ms ` +
|
||||
`(avg: ${average.toFixed(2)}ms, slow: ${slowRenders}/${renderCountRef.current})`
|
||||
);
|
||||
}
|
||||
|
||||
lastRenderTimeRef.current = endTime;
|
||||
};
|
||||
});
|
||||
|
||||
// Function to measure specific operations
|
||||
const measureOperation = useCallback((operationName: string, fn: () => void) => {
|
||||
const start = performance.now();
|
||||
fn();
|
||||
const duration = performance.now() - start;
|
||||
|
||||
if (options.logToConsole && duration > options.thresholdMs) {
|
||||
console.log(`[PERF] ${componentName}.${operationName}: ${duration.toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
return duration;
|
||||
}, [componentName, options.logToConsole, options.thresholdMs]);
|
||||
|
||||
return {
|
||||
metrics,
|
||||
measureOperation,
|
||||
logEvent: (event: string, durationMs?: number) => {
|
||||
if (options.logToConsole) {
|
||||
const message = durationMs
|
||||
? `[PERF] ${componentName}.${event}: ${durationMs.toFixed(2)}ms`
|
||||
: `[PERF] ${componentName}.${event}`;
|
||||
console.log(message);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Enhanced version of useVirtualHistory with better performance characteristics
|
||||
* Uses the same API as the original but with optimizations for large message lists
|
||||
*/
|
||||
export function useEnhancedVirtualHistory(
|
||||
scrollRef: any,
|
||||
items: readonly { key: string }[],
|
||||
columns: number,
|
||||
options = {}
|
||||
) {
|
||||
// Core state
|
||||
const nodesRef = useRef(new Map<string, unknown>());
|
||||
const heightsRef = useRef(new Map<string, number>());
|
||||
const refsMap = useRef(new Map<string, (el: unknown) => void>());
|
||||
const [version, setVersion] = useState(0);
|
||||
|
||||
// Performance tracking
|
||||
const measureTime = useRef({
|
||||
offsetCalculation: 0,
|
||||
heightUpdate: 0,
|
||||
rangeCalculation: 0
|
||||
});
|
||||
|
||||
// Default options
|
||||
const {
|
||||
estimate = 4,
|
||||
overscan = 40,
|
||||
maxMounted = 260,
|
||||
coldStartCount = 40,
|
||||
logPerformance = false
|
||||
} = options;
|
||||
|
||||
// Width change handling with scaling
|
||||
const prevColumns = useRef(columns);
|
||||
const skipMeasurement = useRef(false);
|
||||
const prevRange = useRef<null | readonly [number, number]>(null);
|
||||
const freezeRenders = useRef(0);
|
||||
|
||||
// Handle column width changes - scale heights to avoid full remeasurement
|
||||
if (prevColumns.current !== columns && prevColumns.current > 0 && columns > 0) {
|
||||
const ratio = prevColumns.current / columns;
|
||||
prevColumns.current = columns;
|
||||
|
||||
const start = performance.now();
|
||||
|
||||
for (const [k, h] of heightsRef.current) {
|
||||
heightsRef.current.set(k, Math.max(1, Math.round(h * ratio)));
|
||||
}
|
||||
|
||||
if (logPerformance) {
|
||||
console.log(`[PERF] Height scaling: ${(performance.now() - start).toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
skipMeasurement.current = true;
|
||||
freezeRenders.current = 2; // Freeze for 2 renders to allow memos to stabilize
|
||||
}
|
||||
|
||||
// Track scroll position and viewport
|
||||
const metricsRef = useRef({
|
||||
sticky: true,
|
||||
top: 0,
|
||||
viewportHeight: 0,
|
||||
scrollTop: 0,
|
||||
pendingDelta: 0
|
||||
});
|
||||
|
||||
// Update scroll metrics whenever the scroll position changes
|
||||
useEffect(() => {
|
||||
if (!scrollRef.current) return;
|
||||
|
||||
const updateMetrics = () => {
|
||||
const s = scrollRef.current;
|
||||
if (!s) return;
|
||||
|
||||
metricsRef.current = {
|
||||
sticky: s.isSticky?.() ?? true,
|
||||
top: Math.max(0, s.getScrollTop?.() ?? 0),
|
||||
viewportHeight: Math.max(0, s.getViewportHeight?.() ?? 0),
|
||||
scrollTop: Math.max(0, s.getScrollTop?.() ?? 0),
|
||||
pendingDelta: s.getPendingDelta?.() ?? 0
|
||||
};
|
||||
|
||||
// Force update if we need to recalculate visible range
|
||||
setVersion(v => v + 1);
|
||||
};
|
||||
|
||||
// Initial update
|
||||
updateMetrics();
|
||||
|
||||
// Subscribe to scroll events if supported
|
||||
const unsubscribe = scrollRef.current.subscribe?.(updateMetrics) ?? (() => {});
|
||||
|
||||
return unsubscribe;
|
||||
}, [scrollRef.current]);
|
||||
|
||||
// Clean up stale items
|
||||
useEffect(() => {
|
||||
const keep = new Set(items.map(i => i.key));
|
||||
let dirty = false;
|
||||
|
||||
for (const k of heightsRef.current.keys()) {
|
||||
if (!keep.has(k)) {
|
||||
heightsRef.current.delete(k);
|
||||
nodesRef.current.delete(k);
|
||||
refsMap.current.delete(k);
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
setVersion(v => v + 1);
|
||||
}
|
||||
}, [items]);
|
||||
|
||||
// Calculate offsets based on cached heights - memoized to avoid recalculation
|
||||
const offsets = React.useMemo(() => {
|
||||
void version; // Depends on version to trigger recalculation
|
||||
|
||||
const start = performance.now();
|
||||
const out = new Array<number>(items.length + 1).fill(0);
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
out[i + 1] = out[i]! + Math.max(1, Math.floor(heightsRef.current.get(items[i]!.key) ?? estimate));
|
||||
}
|
||||
|
||||
measureTime.current.offsetCalculation = performance.now() - start;
|
||||
if (logPerformance && measureTime.current.offsetCalculation > 5) {
|
||||
console.log(`[PERF] Offset calculation: ${measureTime.current.offsetCalculation.toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
return out;
|
||||
}, [estimate, items, version]);
|
||||
|
||||
// Calculate visible range
|
||||
const rangeStart = React.useMemo(() => {
|
||||
const start = performance.now();
|
||||
|
||||
const n = items.length;
|
||||
const total = offsets[n] ?? 0;
|
||||
const metrics = metricsRef.current;
|
||||
const { top, viewportHeight, sticky } = metrics;
|
||||
|
||||
// Handle frozen range for width changes
|
||||
const frozenRange =
|
||||
freezeRenders.current > 0 && prevRange.current && prevRange.current[0] < n ? prevRange.current : null;
|
||||
|
||||
let startIdx = 0;
|
||||
let endIdx = n;
|
||||
|
||||
if (frozenRange) {
|
||||
startIdx = frozenRange[0];
|
||||
endIdx = Math.min(frozenRange[1], n);
|
||||
} else if (n > 0) {
|
||||
if (viewportHeight <= 0) {
|
||||
startIdx = Math.max(0, n - coldStartCount);
|
||||
} else {
|
||||
// Binary search for start and end indices
|
||||
let lo = 0;
|
||||
let hi = n;
|
||||
|
||||
// Find start index (first item below top - overscan)
|
||||
while (lo < hi) {
|
||||
const mid = (lo + hi) >> 1;
|
||||
offsets[mid]! <= Math.max(0, top - overscan) ? (lo = mid + 1) : (hi = mid);
|
||||
}
|
||||
startIdx = Math.max(0, lo - 1);
|
||||
|
||||
// Find end index (first item below top + viewportHeight + overscan)
|
||||
lo = startIdx;
|
||||
hi = n;
|
||||
while (lo < hi) {
|
||||
const mid = (lo + hi) >> 1;
|
||||
offsets[mid]! <= top + viewportHeight + overscan ? (lo = mid + 1) : (hi = mid);
|
||||
}
|
||||
endIdx = lo;
|
||||
}
|
||||
}
|
||||
|
||||
// Limit number of mounted items
|
||||
if (endIdx - startIdx > maxMounted) {
|
||||
sticky ? (startIdx = Math.max(0, endIdx - maxMounted)) : (endIdx = Math.min(n, startIdx + maxMounted));
|
||||
}
|
||||
|
||||
// Update freeze counter
|
||||
if (freezeRenders.current > 0) {
|
||||
freezeRenders.current--;
|
||||
} else {
|
||||
prevRange.current = [startIdx, endIdx];
|
||||
}
|
||||
|
||||
measureTime.current.rangeCalculation = performance.now() - start;
|
||||
if (logPerformance && measureTime.current.rangeCalculation > 5) {
|
||||
console.log(`[PERF] Range calculation: ${measureTime.current.rangeCalculation.toFixed(2)}ms`);
|
||||
}
|
||||
|
||||
return { start: startIdx, end: endIdx };
|
||||
}, [items.length, offsets, version, overscan, maxMounted, coldStartCount]);
|
||||
|
||||
// Create measurement ref callback
|
||||
const measureRef = useCallback((key: string) => {
|
||||
let fn = refsMap.current.get(key);
|
||||
|
||||
if (!fn) {
|
||||
fn = (el: unknown) => (el ? nodesRef.current.set(key, el) : nodesRef.current.delete(key));
|
||||
refsMap.current.set(key, fn);
|
||||
}
|
||||
|
||||
return fn;
|
||||
}, []);
|
||||
|
||||
// Update height measurements after render
|
||||
useLayoutEffect(() => {
|
||||
const start = performance.now();
|
||||
let dirty = false;
|
||||
|
||||
if (skipMeasurement.current) {
|
||||
skipMeasurement.current = false;
|
||||
} else {
|
||||
for (let i = rangeStart.start; i < rangeStart.end; i++) {
|
||||
const k = items[i]?.key;
|
||||
|
||||
if (!k) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const node = nodesRef.current.get(k) as any;
|
||||
const h = Math.ceil(node?.yogaNode?.getComputedHeight?.() ?? 0);
|
||||
|
||||
if (h > 0 && heightsRef.current.get(k) !== h) {
|
||||
heightsRef.current.set(k, h);
|
||||
dirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
setVersion(v => v + 1);
|
||||
}
|
||||
|
||||
measureTime.current.heightUpdate = performance.now() - start;
|
||||
if (logPerformance && measureTime.current.heightUpdate > 5) {
|
||||
console.log(`[PERF] Height update: ${measureTime.current.heightUpdate.toFixed(2)}ms`);
|
||||
}
|
||||
}, [rangeStart.end, rangeStart.start, items]);
|
||||
|
||||
// Return the same API as the original hook for compatibility
|
||||
return {
|
||||
bottomSpacer: Math.max(0, offsets[items.length] ?? 0 - (offsets[rangeStart.end] ?? 0)),
|
||||
end: rangeStart.end,
|
||||
measureRef,
|
||||
offsets,
|
||||
start: rangeStart.start,
|
||||
topSpacer: offsets[rangeStart.start] ?? 0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook to throttle scroll events and track scroll performance
|
||||
*/
|
||||
export function useScrollPerformance(componentName: string, options = {
|
||||
logToConsole: false,
|
||||
sampleRate: 0.1, // Only log 10% of scroll events to reduce noise
|
||||
thresholdMs: 16
|
||||
}) {
|
||||
const scrollCountRef = useRef(0);
|
||||
const scrollTimesRef = useRef<number[]>([]);
|
||||
const isScrollingRef = useRef(false);
|
||||
const scrollStartTimeRef = useRef(0);
|
||||
const scrollThrottleTimerRef = useRef<NodeJS.Timeout | null>(null);
|
||||
|
||||
const onScrollStart = useCallback(() => {
|
||||
if (!isScrollingRef.current) {
|
||||
isScrollingRef.current = true;
|
||||
scrollStartTimeRef.current = performance.now();
|
||||
|
||||
if (options.logToConsole) {
|
||||
console.log(`[SCROLL] ${componentName} scroll started`);
|
||||
}
|
||||
}
|
||||
}, [componentName, options.logToConsole]);
|
||||
|
||||
const onScrollEnd = useCallback(() => {
|
||||
if (isScrollingRef.current) {
|
||||
const duration = performance.now() - scrollStartTimeRef.current;
|
||||
scrollTimesRef.current.push(duration);
|
||||
|
||||
// Keep array at reasonable size
|
||||
if (scrollTimesRef.current.length > 50) {
|
||||
scrollTimesRef.current.shift();
|
||||
}
|
||||
|
||||
isScrollingRef.current = false;
|
||||
|
||||
if (options.logToConsole && Math.random() < options.sampleRate) {
|
||||
const avg = scrollTimesRef.current.reduce((sum, time) => sum + time, 0) /
|
||||
scrollTimesRef.current.length;
|
||||
|
||||
console.log(
|
||||
`[SCROLL] ${componentName} scroll ended: ${duration.toFixed(2)}ms ` +
|
||||
`(avg: ${avg.toFixed(2)}ms)`
|
||||
);
|
||||
}
|
||||
}
|
||||
}, [componentName, options.logToConsole, options.sampleRate]);
|
||||
|
||||
const onScroll = useCallback(() => {
|
||||
scrollCountRef.current += 1;
|
||||
|
||||
// Start scrolling tracking if not already
|
||||
onScrollStart();
|
||||
|
||||
// Reset the scroll end timer
|
||||
if (scrollThrottleTimerRef.current) {
|
||||
clearTimeout(scrollThrottleTimerRef.current);
|
||||
}
|
||||
|
||||
// Set timer to detect when scrolling stops
|
||||
scrollThrottleTimerRef.current = setTimeout(() => {
|
||||
onScrollEnd();
|
||||
}, 150); // Consider scrolling stopped after 150ms of inactivity
|
||||
|
||||
}, [onScrollStart, onScrollEnd]);
|
||||
|
||||
// Clean up
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (scrollThrottleTimerRef.current) {
|
||||
clearTimeout(scrollThrottleTimerRef.current);
|
||||
}
|
||||
};
|
||||
}, []);
|
||||
|
||||
return { onScroll };
|
||||
}
|
||||
Vendored
+2
-2
@@ -83,8 +83,8 @@ declare module '@hermes/ink' {
|
||||
export function withInkSuspended(run: RunExternalProcess): Promise<void>
|
||||
export function useInput(handler: InputHandler, options?: { readonly isActive?: boolean }): void
|
||||
export function useSelection(): {
|
||||
readonly copySelection: () => Promise<string>
|
||||
readonly copySelectionNoClear: () => Promise<string>
|
||||
readonly copySelection: () => string
|
||||
readonly copySelectionNoClear: () => string
|
||||
readonly clearSelection: () => void
|
||||
readonly hasSelection: () => boolean
|
||||
readonly getState: () => unknown
|
||||
|
||||
@@ -32,6 +32,7 @@ export type GatewayEventName =
|
||||
| "sudo.request"
|
||||
| "secret.request"
|
||||
| "background.complete"
|
||||
| "btw.complete"
|
||||
| "error"
|
||||
| "skin.changed"
|
||||
| (string & {});
|
||||
|
||||
+10
-27
@@ -269,17 +269,17 @@ export default function ChatPage() {
|
||||
const payload = data.slice(semi + 1);
|
||||
if (payload === "?" || payload === "") return false; // read/clear — ignore
|
||||
try {
|
||||
// atob returns a binary string (one byte per char); we need UTF-8
|
||||
// decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip
|
||||
// correctly. Without this step, the three UTF-8 bytes of `≥`
|
||||
// would land in the clipboard as the three separate Latin-1
|
||||
// characters `≥`.
|
||||
const binary = atob(payload);
|
||||
const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0));
|
||||
const text = new TextDecoder("utf-8").decode(bytes);
|
||||
navigator.clipboard.writeText(text).catch((err) => {
|
||||
// Most common reason: the Clipboard API requires a user gesture.
|
||||
// This can fail when the OSC 52 response arrives outside the
|
||||
// original keydown event's activation. Log to aid debugging.
|
||||
console.warn("[dashboard clipboard] OSC 52 write failed:", err.message);
|
||||
});
|
||||
} catch (e) {
|
||||
console.warn("[dashboard clipboard] malformed OSC 52 payload");
|
||||
navigator.clipboard.writeText(text).catch(() => {});
|
||||
} catch {
|
||||
// Malformed base64 — silently drop.
|
||||
}
|
||||
return true;
|
||||
});
|
||||
@@ -290,31 +290,16 @@ export default function ChatPage() {
|
||||
term.attachCustomKeyEventHandler((ev) => {
|
||||
if (ev.type !== "keydown") return true;
|
||||
|
||||
// Copy: Cmd+C on macOS, Ctrl+Shift+C on other platforms. Bare Ctrl+C
|
||||
// is reserved for SIGINT to the TUI child — matches xterm / gnome-terminal /
|
||||
// konsole / Windows Terminal. Ctrl+Shift+C only copies if a selection exists;
|
||||
// without a selection it passes through to the TUI so agents can still
|
||||
// react to the keypress.
|
||||
// Paste: Cmd+Shift+V on macOS, Ctrl+Shift+V on others.
|
||||
const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
|
||||
const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey;
|
||||
|
||||
if (copyModifier && ev.key.toLowerCase() === "c") {
|
||||
const sel = term.getSelection();
|
||||
if (sel) {
|
||||
// Direct writeText inside the keydown handler preserves the user
|
||||
// gesture — async round-trips through OSC 52 can lose activation
|
||||
// and fail with "Document is not focused".
|
||||
navigator.clipboard.writeText(sel).catch((err) => {
|
||||
console.warn("[dashboard clipboard] direct copy failed:", err.message);
|
||||
});
|
||||
// Clear xterm.js's highlight after copy (matches gnome-terminal).
|
||||
term.clearSelection();
|
||||
navigator.clipboard.writeText(sel).catch(() => {});
|
||||
ev.preventDefault();
|
||||
return false;
|
||||
}
|
||||
// No selection → fall through so the TUI receives Ctrl+Shift+C
|
||||
// (or the bare ev if the user used a different modifier).
|
||||
}
|
||||
|
||||
if (pasteModifier && ev.key.toLowerCase() === "v") {
|
||||
@@ -323,9 +308,7 @@ export default function ChatPage() {
|
||||
.then((text) => {
|
||||
if (text) term.paste(text);
|
||||
})
|
||||
.catch((err) => {
|
||||
console.warn("[dashboard clipboard] paste failed:", err.message);
|
||||
});
|
||||
.catch(() => {});
|
||||
ev.preventDefault();
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -45,7 +45,6 @@ hermes [global-options] <command> [subcommand/options]
|
||||
| `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
|
||||
| `hermes status` | Show agent, auth, and platform status. |
|
||||
| `hermes cron` | Inspect and tick the cron scheduler. |
|
||||
| `hermes kanban` | Multi-profile collaboration board (tasks, links, dispatcher). |
|
||||
| `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. |
|
||||
| `hermes doctor` | Diagnose config and dependency issues. |
|
||||
| `hermes dump` | Copy-pasteable setup summary for support/debugging. |
|
||||
@@ -273,38 +272,6 @@ hermes cron <list|create|edit|pause|resume|run|remove|status|tick>
|
||||
| `status` | Check whether the cron scheduler is running. |
|
||||
| `tick` | Run due jobs once and exit. |
|
||||
|
||||
## `hermes kanban`
|
||||
|
||||
```bash
|
||||
hermes kanban <action> [options]
|
||||
```
|
||||
|
||||
Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace.
|
||||
|
||||
| Action | Purpose |
|
||||
|--------|---------|
|
||||
| `init` | Create `kanban.db` if missing. Idempotent. |
|
||||
| `create "<title>"` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. |
|
||||
| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. |
|
||||
| `show <id>` | Show a task with comments and events. `--json` for machine output. |
|
||||
| `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. |
|
||||
| `link <parent> <child>` | Add a dependency. Cycle-detected. |
|
||||
| `unlink <parent> <child>` | Remove a dependency. |
|
||||
| `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. |
|
||||
| `comment <id> "<text>"` | Append a comment. Visible to the next worker that runs the task. |
|
||||
| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). |
|
||||
| `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. |
|
||||
| `unblock <id>` | Return a blocked task to ready. |
|
||||
| `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. |
|
||||
| `tail <id>` | Follow a task's event stream. |
|
||||
| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. |
|
||||
| `context <id>` | Print the full context a worker would see (title + body + parent results + comments). |
|
||||
| `gc` | Remove scratch workspaces for archived tasks. |
|
||||
|
||||
All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface.
|
||||
|
||||
For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban).
|
||||
|
||||
## `hermes webhook`
|
||||
|
||||
```bash
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
---
|
||||
sidebar_position: 11
|
||||
title: Model Catalog
|
||||
description: Remotely-hosted manifest driving curated model picker lists for OpenRouter and Nous Portal.
|
||||
---
|
||||
|
||||
# Model Catalog
|
||||
|
||||
Hermes fetches curated model lists for **OpenRouter** and **Nous Portal** from a JSON manifest hosted alongside the docs site. This lets maintainers update picker lists without shipping a new `hermes-agent` release.
|
||||
|
||||
When the manifest is unreachable (offline, network blocked, hosting failure), Hermes silently falls back to the in-repo snapshot that ships with the CLI. The manifest never breaks the picker — worst case you see whatever list was bundled with your installed version.
|
||||
|
||||
## Live manifest URL
|
||||
|
||||
```
|
||||
https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
|
||||
```
|
||||
|
||||
Published on every merge to `main` via the existing `deploy-site.yml` GitHub Pages pipeline. The source of truth lives in the repo at `website/static/api/model-catalog.json`.
|
||||
|
||||
## Schema
|
||||
|
||||
```json
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {},
|
||||
"models": [
|
||||
{"id": "moonshotai/kimi-k2.6", "description": "recommended", "metadata": {}},
|
||||
{"id": "openai/gpt-5.4", "description": ""}
|
||||
]
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {},
|
||||
"models": [
|
||||
{"id": "anthropic/claude-opus-4.7"},
|
||||
{"id": "moonshotai/kimi-k2.6"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Field notes:
|
||||
|
||||
- **`version`** — integer schema version. Future schemas bump this; Hermes refuses manifests with versions it doesn't understand and falls back to the hardcoded snapshot.
|
||||
- **`metadata`** — free-form dict at the manifest, provider, and model level. Any keys. Hermes ignores unknown fields, so you can annotate entries (`"tier": "paid"`, `"tags": [...]`, etc.) without coordinating a schema change.
|
||||
- **`description`** — OpenRouter-only. Drives picker badge text (`"recommended"`, `"free"`, or empty). Nous Portal doesn't use this — free-tier gating is determined live from the Portal's pricing endpoint.
|
||||
- **Pricing and context length** are NOT in the manifest. Those come from live provider APIs (`/v1/models` endpoints, models.dev) at fetch time.
|
||||
|
||||
## Fetch behavior
|
||||
|
||||
| When | What happens |
|
||||
|---|---|
|
||||
| `/model` or `hermes model` | Fetches if disk cache is stale, else uses cache |
|
||||
| Disk cache fresh (< TTL) | No network hit |
|
||||
| Network failure with cache | Silent fallback to cache, one log line |
|
||||
| Network failure, no cache | Silent fallback to in-repo snapshot |
|
||||
| Manifest fails schema validation | Treated as unreachable |
|
||||
|
||||
Cache location: `~/.hermes/cache/model_catalog.json`.
|
||||
|
||||
## Config
|
||||
|
||||
```yaml
|
||||
model_catalog:
|
||||
enabled: true
|
||||
url: https://hermes-agent.nousresearch.com/docs/api/model-catalog.json
|
||||
ttl_hours: 24
|
||||
providers: {}
|
||||
```
|
||||
|
||||
Set `enabled: false` to disable remote fetch entirely and always use the in-repo snapshot.
|
||||
|
||||
### Per-provider override URLs
|
||||
|
||||
Third parties can self-host their own curation list using the same schema. Point a provider at a custom URL:
|
||||
|
||||
```yaml
|
||||
model_catalog:
|
||||
providers:
|
||||
openrouter:
|
||||
url: https://example.com/my-openrouter-curation.json
|
||||
```
|
||||
|
||||
The overriding manifest only needs to populate the provider block(s) it cares about. Other providers continue to resolve against the master URL.
|
||||
|
||||
## Updating the manifest
|
||||
|
||||
Maintainers:
|
||||
|
||||
```bash
|
||||
# Re-generate from the in-repo hardcoded lists (keeps manifest in sync after
|
||||
# editing OPENROUTER_MODELS or _PROVIDER_MODELS["nous"] in hermes_cli/models.py).
|
||||
python scripts/build_model_catalog.py
|
||||
```
|
||||
|
||||
Then PR the resulting change to `website/static/api/model-catalog.json` to `main`. The docs site auto-deploys on merge and the new manifest is live within a few minutes.
|
||||
|
||||
You can also hand-edit the JSON directly for fine-grained metadata changes that don't belong in the in-repo snapshot — the generator script is a convenience, not the single source of truth.
|
||||
@@ -36,7 +36,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/resume [name]` | Resume a previously-named session |
|
||||
| `/status` | Show session info |
|
||||
| `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. |
|
||||
| `/background <prompt>` (alias: `/bg`, `/btw`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
|
||||
| `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
|
||||
| `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
|
||||
| `/branch [name]` (alias: `/fork`) | Branch the current session (explore a different path) |
|
||||
|
||||
### Configuration
|
||||
|
||||
@@ -242,10 +242,6 @@ You can also change it inside the CLI:
|
||||
/busy status
|
||||
```
|
||||
|
||||
:::tip First-touch hint
|
||||
The very first time you press Enter while Hermes is working, Hermes prints a one-line reminder explaining the `/busy` knob (`"(tip) Your message interrupted the current run…"`). It only fires once per install — a flag in `config.yaml` under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
|
||||
:::
|
||||
|
||||
### Suspending to Background
|
||||
|
||||
On Unix systems, press **`Ctrl+Z`** to suspend Hermes to the background — just like any terminal process. The shell prints a confirmation:
|
||||
|
||||
@@ -1,263 +0,0 @@
|
||||
# Kanban tutorial
|
||||
|
||||
A walkthrough of the four use-cases the Hermes Kanban system was designed for, with the dashboard open in a browser. If you haven't read the [Kanban overview](./kanban) yet, start there — this assumes you know what a task, run, assignee, and dispatcher are.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
hermes kanban init # optional; first `hermes kanban <anything>` auto-inits
|
||||
hermes dashboard # opens http://127.0.0.1:9119 in your browser
|
||||
# click Kanban in the left nav
|
||||
```
|
||||
|
||||
The dashboard is the most comfortable place to learn the system. Everything you see here is also available via `hermes kanban <verb>` on the CLI — the two surfaces share the same SQLite database at `~/.hermes/kanban.db`.
|
||||
|
||||
## The board at a glance
|
||||
|
||||

|
||||
|
||||
Six columns, left to right:
|
||||
|
||||
- **Triage** — raw ideas, a specifier will flesh out the spec before anyone works on them.
|
||||
- **Todo** — created but waiting on dependencies, or not yet assigned.
|
||||
- **Ready** — assigned and waiting for the dispatcher to claim.
|
||||
- **In progress** — a worker is actively running the task. With "Lanes by profile" on (the default), this column sub-groups by assignee so you can see at a glance what each worker is doing.
|
||||
- **Blocked** — a worker asked for human input, or the circuit breaker tripped.
|
||||
- **Done** — completed.
|
||||
|
||||
The top bar has filters for search, tenant, and assignee, plus a `Lanes by profile` toggle and a `Nudge dispatcher` button that runs one dispatch tick right now instead of waiting for the daemon's next interval. Clicking any card opens its drawer on the right.
|
||||
|
||||
### Flat view
|
||||
|
||||
If the profile lanes are noisy, toggle "Lanes by profile" off and the In Progress column collapses to a single flat list ordered by claim time:
|
||||
|
||||

|
||||
|
||||
## Story 1 — Solo dev shipping a feature
|
||||
|
||||
You're building a feature. Classic flow: design a schema, implement the API, write the tests. Three tasks with parent→child dependencies.
|
||||
|
||||
```bash
|
||||
SCHEMA=$(hermes kanban create "Design auth schema" \
|
||||
--assignee backend-dev --tenant auth-project --priority 2 \
|
||||
--body "Design the user/session/token schema for the auth module." \
|
||||
--json | jq -r .id)
|
||||
|
||||
API=$(hermes kanban create "Implement auth API endpoints" \
|
||||
--assignee backend-dev --tenant auth-project --priority 2 \
|
||||
--parent $SCHEMA \
|
||||
--body "POST /register, POST /login, POST /refresh, POST /logout." \
|
||||
--json | jq -r .id)
|
||||
|
||||
hermes kanban create "Write auth integration tests" \
|
||||
--assignee qa-dev --tenant auth-project --priority 2 \
|
||||
--parent $API \
|
||||
--body "Cover happy path, wrong password, expired token, concurrent refresh."
|
||||
```
|
||||
|
||||
Because `API` has `SCHEMA` as its parent, and `tests` has `API` as its parent, only `SCHEMA` starts in `ready`. The other two sit in `todo` until their parents complete. This is the dependency promotion engine doing its job — no other worker will pick up the test-writing until there's an API to test.
|
||||
|
||||
Claim the schema task, do the work, hand off:
|
||||
|
||||
```bash
|
||||
hermes kanban claim $SCHEMA
|
||||
|
||||
# (you design the schema, commit, etc.)
|
||||
|
||||
hermes kanban complete $SCHEMA \
|
||||
--summary "users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens stored as sessions with type='refresh'" \
|
||||
--metadata '{
|
||||
"changed_files": ["migrations/001_users.sql", "migrations/002_sessions.sql"],
|
||||
"decisions": ["bcrypt for hashing", "JWT for session tokens", "7-day refresh, 15-min access"]
|
||||
}'
|
||||
```
|
||||
|
||||
When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will read `SCHEMA`'s summary and metadata in its context — so it knows the schema decisions without re-reading a long design doc.
|
||||
|
||||
Click the completed schema task on the board and the drawer shows everything:
|
||||
|
||||

|
||||
|
||||
The Run History section at the bottom is the key addition. One attempt: outcome `completed`, worker `@backend-dev`, duration, timestamp, and the handoff summary in full. The metadata blob (`changed_files`, `decisions`) is stored on the run too and surfaced to any downstream worker that reads this parent.
|
||||
|
||||
On the CLI:
|
||||
|
||||
```bash
|
||||
hermes kanban show $SCHEMA
|
||||
hermes kanban runs $SCHEMA
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 completed backend-dev 0s 2026-04-27 19:34
|
||||
# → users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens ...
|
||||
```
|
||||
|
||||
## Story 2 — Fleet farming
|
||||
|
||||
You have three workers (a translator, a transcriber, a copywriter) and a pile of independent tasks. You want all three pulling in parallel and making visible progress. This is the simplest kanban use-case and the one the original design optimized for.
|
||||
|
||||
Create the work:
|
||||
|
||||
```bash
|
||||
for lang in Spanish French German; do
|
||||
hermes kanban create "Translate homepage to $lang" \
|
||||
--assignee translator --tenant content-ops
|
||||
done
|
||||
for i in 1 2 3 4 5; do
|
||||
hermes kanban create "Transcribe Q3 customer call #$i" \
|
||||
--assignee transcriber --tenant content-ops
|
||||
done
|
||||
for sku in 1001 1002 1003 1004; do
|
||||
hermes kanban create "Generate product description: SKU-$sku" \
|
||||
--assignee copywriter --tenant content-ops
|
||||
done
|
||||
```
|
||||
|
||||
Start the daemon and walk away:
|
||||
|
||||
```bash
|
||||
hermes kanban daemon --assignee translator &
|
||||
hermes kanban daemon --assignee transcriber &
|
||||
hermes kanban daemon --assignee copywriter &
|
||||
```
|
||||
|
||||
Now filter the board to `content-ops` (or just search for "Transcribe") and you get this:
|
||||
|
||||

|
||||
|
||||
Two transcribes done, one running, two ready waiting for the next dispatcher tick. The In Progress column is grouped by profile (the "Lanes by profile" default) so you see each worker's active task without scanning a mixed list. The dispatcher will promote the next ready task to running as soon as the current one completes. With three daemons working on three assignee pools in parallel, the whole content queue drains without further human input.
|
||||
|
||||
**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call can pass `--summary "translated 4 pages, style matched existing marketing voice"` and `--metadata '{"duration_seconds": 720, "tokens_used": 2100}'` — useful for analytics and for any downstream task that depends on this one.
|
||||
|
||||
## Story 3 — Role pipeline with retry
|
||||
|
||||
This is where Kanban earns its keep over a flat TODO list. A PM writes a spec. An engineer implements it. A reviewer rejects the first attempt. The engineer tries again with changes. The reviewer approves.
|
||||
|
||||
The dashboard view, filtered by `auth-project`:
|
||||
|
||||

|
||||
|
||||
Three-stage chain visible at once: `Spec: password reset flow` (DONE, pm), `Implement password reset flow` (DONE, backend-dev), `Review password reset PR` (READY, reviewer). Each has its parent in green at the bottom and children as dependencies.
|
||||
|
||||
The interesting one is the implementation task, because it was blocked and retried:
|
||||
|
||||
```bash
|
||||
# PM completes the spec with acceptance criteria in metadata
|
||||
hermes kanban complete $SPEC \
|
||||
--summary "spec approved; POST /forgot-password sends email, GET /reset/:token renders form, POST /reset applies new password" \
|
||||
--metadata '{"acceptance": [
|
||||
"expired token returns 410",
|
||||
"reused last-3 password returns 400 with message",
|
||||
"successful reset invalidates all active sessions"
|
||||
]}'
|
||||
|
||||
# Engineer claims + implements, but review blocks it for missing strength check
|
||||
hermes kanban claim $IMPL
|
||||
hermes kanban block $IMPL "Review: password strength check missing, reset link isn't single-use (can be replayed within 30min)"
|
||||
|
||||
# Engineer iterates, resolves, completes
|
||||
hermes kanban unblock $IMPL
|
||||
hermes kanban claim $IMPL
|
||||
hermes kanban complete $IMPL \
|
||||
--summary "added zxcvbn strength check, reset tokens are now single-use (stored + deleted on success)" \
|
||||
--metadata '{
|
||||
"changed_files": ["auth/reset.py", "auth/tests/test_reset.py", "migrations/003_single_use_reset_tokens.sql"],
|
||||
"tests_run": 11,
|
||||
"review_iteration": 2
|
||||
}'
|
||||
```
|
||||
|
||||
Click the implementation task. The drawer shows **two attempts**:
|
||||
|
||||

|
||||
|
||||
- **Run 1** — `blocked` by `@backend-dev`. The review feedback sits right under the outcome: "password strength check missing, reset link isn't single-use (can be replayed within 30min)".
|
||||
- **Run 2** — `completed` by `@backend-dev`. Fresh summary, fresh metadata.
|
||||
|
||||
Each run is a row in `task_runs` with its own outcome, summary, and metadata. Retry history is not a conceptual afterthought layered on top of a "latest state" task — it's the primary representation. When a retrying worker opens the task, `build_worker_context` shows it the prior attempts, so the second-pass worker sees why the first pass was blocked and addresses those specific findings instead of re-running from scratch.
|
||||
|
||||
The reviewer picks up next. When they open `Review password reset PR`, they see:
|
||||
|
||||

|
||||
|
||||
The parent link is the completed implementation. When the reviewer's worker calls `build_worker_context`, it pulls the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff.
|
||||
|
||||
## Story 4 — Circuit breaker and crash recovery
|
||||
|
||||
Real workers fail. Missing credentials, OOM kills, transient network errors. The dispatcher has two lines of defense: a **circuit breaker** that auto-blocks after N consecutive failures so the board doesn't thrash forever, and **crash detection** that reclaims a task whose worker PID went away before its TTL expired.
|
||||
|
||||
### Circuit breaker — permanent-looking failure
|
||||
|
||||
A deploy task that can't spawn its worker because `AWS_ACCESS_KEY_ID` isn't set in the profile's environment:
|
||||
|
||||
```bash
|
||||
hermes kanban create "Deploy to staging (missing creds)" \
|
||||
--assignee deploy-bot --tenant ops
|
||||
```
|
||||
|
||||
The dispatcher tries to spawn the worker. Spawn fails (`RuntimeError: AWS_ACCESS_KEY_ID not set`). The dispatcher releases the claim, increments a failure counter, and tries again next tick. After three consecutive failures (the default `failure_limit`), the circuit trips: the task goes to `blocked` with outcome `gave_up`. No more retries until a human unblocks it.
|
||||
|
||||
Click the blocked task:
|
||||
|
||||

|
||||
|
||||
Three runs, all with the same error on the `error` field. The first two are `spawn_failed` (retryable), the third is `gave_up` (terminal). The event log above shows the full sequence: `created → claimed → spawn_failed → claimed → spawn_failed → claimed → gave_up`.
|
||||
|
||||
On the terminal:
|
||||
|
||||
```bash
|
||||
hermes kanban runs t_ef5d
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 spawn_failed deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
# 2 spawn_failed deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
# 3 gave_up deploy-bot 0s 2026-04-27 19:34
|
||||
# ! AWS_ACCESS_KEY_ID not set in deploy-bot env
|
||||
```
|
||||
|
||||
If Telegram / Discord / Slack is wired in, a gateway notification fires on the `gave_up` event so you hear about the outage without having to check the board.
|
||||
|
||||
### Crash recovery — worker dies mid-flight
|
||||
|
||||
Sometimes the spawn succeeds but the worker process dies later — segfault, OOM, `systemctl stop`. The dispatcher polls `kill(pid, 0)` and detects the dead pid; the claim releases, the task goes back to `ready`, and the next tick gives it to a fresh worker.
|
||||
|
||||
The example in the seed data is a migration that was running out of memory:
|
||||
|
||||
```bash
|
||||
# Worker claims, starts scanning 2.4M rows, OOM kills it at ~2.3M
|
||||
# Dispatcher detects dead pid, releases claim, increments attempt counter
|
||||
# Retry with a chunked strategy succeeds
|
||||
```
|
||||
|
||||
The drawer shows the full two-attempt history:
|
||||
|
||||

|
||||
|
||||
Run 1 — `crashed`, with the error `OOM kill at row 2.3M (process 99999 gone)`. Run 2 — `completed`, with `"strategy": "chunked with LIMIT + WHERE id > last_id"` in its metadata. The retrying worker saw the crash of run 1 in its context and picked a safer strategy; the metadata makes it obvious to a future observer (or postmortem writer) what changed.
|
||||
|
||||
## Structured handoff — why `--summary` and `--metadata` matter
|
||||
|
||||
In every story above, workers passed `--summary` and `--metadata` on completion. That's not decoration — it's the primary handoff channel between stages of a workflow.
|
||||
|
||||
When a worker on task B reads its context, it gets:
|
||||
|
||||
- B's **prior attempts** (previous runs: outcome, summary, error, metadata) so a retrying worker doesn't repeat a failed path.
|
||||
- **Parent task results** — for each parent, the most-recent completed run's summary and metadata — so downstream workers see why and how the upstream work was done.
|
||||
|
||||
This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff.
|
||||
|
||||
The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case.
|
||||
|
||||
## Inspecting a task currently running
|
||||
|
||||
For completeness — here's the drawer of a task still in flight (the API implementation from Story 1, claimed by `backend-dev` but not yet complete):
|
||||
|
||||

|
||||
|
||||
Status is `Running`. The active run appears in the Run History section with outcome `active` and no `ended_at`. If this worker dies or times out, the dispatcher closes this run with the appropriate outcome and opens a new one on the next claim — the attempt row never disappears.
|
||||
|
||||
## Next steps
|
||||
|
||||
- [Kanban overview](./kanban) — the full data model, event vocabulary, and CLI reference.
|
||||
- `hermes kanban --help` — every subcommand, every flag.
|
||||
- `hermes kanban watch --kinds completed,gave_up,timed_out` — live stream terminal events across the whole board.
|
||||
- `hermes kanban notify-subscribe <task> --platform telegram --chat-id <id>` — get a gateway ping when a specific task finishes.
|
||||
@@ -1,500 +0,0 @@
|
||||
---
|
||||
sidebar_position: 12
|
||||
title: "Kanban (Multi-Agent Board)"
|
||||
description: "Durable SQLite-backed task board for coordinating multiple Hermes profiles"
|
||||
---
|
||||
|
||||
# Kanban — Multi-Agent Profile Collaboration
|
||||
|
||||
> **Want a walkthrough?** Read the [Kanban tutorial](./kanban-tutorial) — four user stories (solo dev, fleet farming, role pipeline with retry, circuit breaker) with dashboard screenshots of each. This page is the reference; the tutorial is the narrative.
|
||||
|
||||
Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity.
|
||||
|
||||
This is the shape that covers the workloads `delegate_task` can't:
|
||||
|
||||
- **Research triage** — parallel researchers + analyst + writer, human-in-the-loop.
|
||||
- **Scheduled ops** — recurring daily briefs that build a journal over weeks.
|
||||
- **Digital twins** — persistent named assistants (`inbox-triage`, `ops-review`) that accumulate memory over time.
|
||||
- **Engineering pipelines** — decompose → implement in parallel worktrees → review → iterate → PR.
|
||||
- **Fleet work** — one specialist managing N subjects (50 social accounts, 12 monitored services).
|
||||
|
||||
For the full design rationale, comparative analysis against Cline Kanban / Paperclip / NanoClaw / Google Gemini Enterprise, and the eight canonical collaboration patterns, see `docs/hermes-kanban-v1-spec.pdf` in the repository.
|
||||
|
||||
## Kanban vs. `delegate_task`
|
||||
|
||||
They look similar; they are not the same primitive.
|
||||
|
||||
| | `delegate_task` | Kanban |
|
||||
|---|---|---|
|
||||
| Shape | RPC call (fork → join) | Durable message queue + state machine |
|
||||
| Parent | Blocks until child returns | Fire-and-forget after `create` |
|
||||
| Child identity | Anonymous subagent | Named profile with persistent memory |
|
||||
| Resumability | None — failed = failed | Block → unblock → re-run; crash → reclaim |
|
||||
| Human in the loop | Not supported | Comment / unblock at any point |
|
||||
| Agents per task | One call = one subagent | N agents over task's life (retry, review, follow-up) |
|
||||
| Audit trail | Lost on context compression | Durable rows in SQLite forever |
|
||||
| Coordination | Hierarchical (caller → callee) | Peer — any profile reads/writes any task |
|
||||
|
||||
**One-sentence distinction:** `delegate_task` is a function call; Kanban is a work queue where every handoff is a row any profile (or human) can see and edit.
|
||||
|
||||
**Use `delegate_task` when** the parent agent needs a short reasoning answer before continuing, no humans involved, result goes back into the parent's context.
|
||||
|
||||
**Use Kanban when** work crosses agent boundaries, needs to survive restarts, might need human input, might be picked up by a different role, or needs to be discoverable after the fact.
|
||||
|
||||
They coexist: a kanban worker may call `delegate_task` internally during its run.
|
||||
|
||||
## Core concepts
|
||||
|
||||
- **Task** — a row with title, optional body, one assignee (a profile name), status (`triage | todo | ready | running | blocked | done | archived`), optional tenant namespace, optional idempotency key (dedup for retried automation).
|
||||
- **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`.
|
||||
- **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context.
|
||||
- **Workspace** — the directory a worker operates in. Three kinds:
|
||||
- `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`.
|
||||
- `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). **Must be an absolute path.** Relative paths like `dir:../tenants/foo/` are rejected at dispatch because they'd resolve against whatever CWD the dispatcher happens to be in, which is ambiguous and a confused-deputy escape vector. The path is otherwise trusted — it's your box, your filesystem, the worker runs with your uid. This is the trusted-local-user threat model; kanban is single-host by design.
|
||||
- `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. Worker-side `git worktree add` creates it.
|
||||
- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs as `hermes kanban daemon` (foreground) or as a systemd user service. After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc.
|
||||
- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix.
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
# 1. Create the board
|
||||
hermes kanban init
|
||||
|
||||
# 2. Start the dispatcher (foreground; Ctrl-C to stop)
|
||||
hermes kanban daemon &
|
||||
|
||||
# 3. Create a task
|
||||
hermes kanban create "research AI funding landscape" --assignee researcher
|
||||
|
||||
# 4. Watch activity live
|
||||
hermes kanban watch
|
||||
|
||||
# 5. See the board
|
||||
hermes kanban list
|
||||
hermes kanban stats
|
||||
```
|
||||
|
||||
### Running the dispatcher as a service
|
||||
|
||||
For production, install the systemd user unit shipped at
|
||||
`plugins/kanban/systemd/hermes-kanban-dispatcher.service`:
|
||||
|
||||
```bash
|
||||
mkdir -p ~/.config/systemd/user
|
||||
cp plugins/kanban/systemd/hermes-kanban-dispatcher.service \
|
||||
~/.config/systemd/user/
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now hermes-kanban-dispatcher.service
|
||||
systemctl --user status hermes-kanban-dispatcher
|
||||
journalctl --user -u hermes-kanban-dispatcher -f # follow logs
|
||||
```
|
||||
|
||||
Without a running dispatcher `ready` tasks stay where they are — `hermes kanban init` will remind you of this on first run.
|
||||
|
||||
### Idempotent create (for automation / webhooks)
|
||||
|
||||
```bash
|
||||
# First call creates the task. Any subsequent call with the same key
|
||||
# returns the existing task id instead of duplicating.
|
||||
hermes kanban create "nightly ops review" \
|
||||
--assignee ops \
|
||||
--idempotency-key "nightly-ops-$(date -u +%Y-%m-%d)" \
|
||||
--json
|
||||
```
|
||||
|
||||
### Bulk CLI verbs
|
||||
|
||||
All the lifecycle verbs accept multiple ids so you can clean up a batch
|
||||
in one command:
|
||||
|
||||
```bash
|
||||
hermes kanban complete t_abc t_def t_hij --result "batch wrap"
|
||||
hermes kanban archive t_abc t_def t_hij
|
||||
hermes kanban unblock t_abc t_def
|
||||
hermes kanban block t_abc "need input" --ids t_def t_hij
|
||||
```
|
||||
|
||||
## How workers interact with the board
|
||||
|
||||
When the dispatcher spawns a worker, it sets `HERMES_KANBAN_TASK` in the child's env. That env var is the gate for a dedicated **kanban toolset** — 7 tools that the normal agent schema never sees:
|
||||
|
||||
| Tool | Purpose |
|
||||
|---|---|
|
||||
| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full `worker_context`). Defaults to the env's task id. |
|
||||
| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. |
|
||||
| `kanban_block` | Escalate for human input. |
|
||||
| `kanban_heartbeat` | Signal liveness during long operations. |
|
||||
| `kanban_comment` | Append to the task thread. |
|
||||
| `kanban_create` | (Orchestrators) fan out into child tasks. |
|
||||
| `kanban_link` | (Orchestrators) add dependency edges after the fact. |
|
||||
|
||||
**Why tools and not just shelling to `hermes kanban`?** Three reasons:
|
||||
|
||||
1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` inside the container where `hermes` isn't installed and the DB isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend.
|
||||
2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it.
|
||||
3. **Better errors.** Tool results are structured JSON the model can reason about, not stderr strings it has to parse.
|
||||
|
||||
**Zero schema footprint on normal sessions.** A regular `hermes chat` session has zero `kanban_*` tools in its schema. The `check_fn` on each tool only returns True when `HERMES_KANBAN_TASK` is set, which only happens when the dispatcher spawned this process. No tool bloat for users who never touch kanban.
|
||||
|
||||
The `kanban-worker` and `kanban-orchestrator` skills teach the model which tool to call when and in what order.
|
||||
|
||||
### The worker skill
|
||||
|
||||
Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle:
|
||||
|
||||
1. On spawn, call `kanban_show()` to read title + body + parent handoffs + prior attempts + full comment thread.
|
||||
2. `cd $HERMES_KANBAN_WORKSPACE` and do the work there.
|
||||
3. Call `kanban_heartbeat(note="...")` every few minutes during long operations.
|
||||
4. Complete with `kanban_complete(summary="...", metadata={...})`, or `kanban_block(reason="...")` if stuck.
|
||||
|
||||
Load it with:
|
||||
|
||||
```bash
|
||||
hermes skills install devops/kanban-worker
|
||||
```
|
||||
|
||||
The dispatcher also auto-passes `--skills kanban-worker` when spawning every worker, so the worker always has the pattern library available even if a profile's default skills config doesn't include it.
|
||||
|
||||
### Pinning extra skills to a specific task
|
||||
|
||||
Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task:
|
||||
|
||||
```bash
|
||||
# CLI — repeat --skill for each extra skill
|
||||
hermes kanban create "translate README to Japanese" \
|
||||
--assignee linguist \
|
||||
--skill translation
|
||||
|
||||
# Multiple skills
|
||||
hermes kanban create "audit auth flow" \
|
||||
--assignee reviewer \
|
||||
--skill security-pr-audit \
|
||||
--skill github-code-review
|
||||
```
|
||||
|
||||
From the dashboard's inline create form, type the skills comma-separated into the **skills** field. From another agent (orchestrator pattern), use `kanban_create(skills=[...])`:
|
||||
|
||||
```
|
||||
kanban_create(
|
||||
title="translate README to Japanese",
|
||||
assignee="linguist",
|
||||
skills=["translation"],
|
||||
)
|
||||
```
|
||||
|
||||
These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills <name>` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install.
|
||||
|
||||
### The orchestrator skill
|
||||
|
||||
A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook.
|
||||
|
||||
Load it into your orchestrator profile:
|
||||
|
||||
```bash
|
||||
hermes skills install devops/kanban-orchestrator
|
||||
```
|
||||
|
||||
For best results, pair it with a profile whose toolsets are restricted to board operations (`kanban`, `gateway`, `memory`) so the orchestrator literally cannot execute implementation tasks even if it tries.
|
||||
|
||||
## Dashboard (GUI)
|
||||
|
||||
The `/kanban` CLI and slash command are enough to run the board headlessly, but a visual board is often the right interface for humans-in-the-loop: triage, cross-profile supervision, reading comment threads, and dragging cards between columns. Hermes ships this as a **bundled dashboard plugin** at `plugins/kanban/` — not a core feature, not a separate service — following the model laid out in [Extending the Dashboard](./extending-the-dashboard).
|
||||
|
||||
Open it with:
|
||||
|
||||
```bash
|
||||
hermes kanban init # one-time: create kanban.db if not already present
|
||||
hermes dashboard # "Kanban" tab appears in the nav, after "Skills"
|
||||
```
|
||||
|
||||
### What the plugin gives you
|
||||
|
||||
- A **Kanban** tab showing one column per status: `triage`, `todo`, `ready`, `running`, `blocked`, `done` (plus `archived` when the toggle is on).
|
||||
- `triage` is the parking column for rough ideas a specifier is expected to flesh out. Tasks created with `hermes kanban create --triage` (or via the Triage column's inline create) land here and the dispatcher leaves them alone until a human or specifier promotes them to `todo` / `ready`.
|
||||
- Cards show the task id, title, priority badge, tenant tag, assigned profile, comment/link counts, a **progress pill** (`N/M` children done when the task has dependents), and "created N ago". A per-card checkbox enables multi-select.
|
||||
- **Per-profile lanes inside Running** — toolbar checkbox toggles sub-grouping of the Running column by assignee.
|
||||
- **Live updates via WebSocket** — the plugin tails the append-only `task_events` table on a short poll interval; the board reflects changes the instant any profile (CLI, gateway, or another dashboard tab) acts. Reloads are debounced so a burst of events triggers a single refetch.
|
||||
- **Drag-drop** cards between columns to change status. The drop sends `PATCH /api/plugins/kanban/tasks/:id` which routes through the same `kanban_db` code the CLI uses — the three surfaces can never drift. Moves into destructive statuses (`done`, `archived`, `blocked`) prompt for confirmation. Touch devices use a pointer-based fallback so the board is usable from a tablet.
|
||||
- **Inline create** — click `+` on any column header to type a title, assignee, priority, and (optionally) a parent task from a dropdown over every existing task. Creating from the Triage column automatically parks the new task in triage.
|
||||
- **Multi-select with bulk actions** — shift/ctrl-click a card or tick its checkbox to add it to the selection. A bulk action bar appears at the top with batch status transitions, archive, and reassign (by profile dropdown, or "(unassign)"). Destructive batches confirm first. Per-id partial failures are reported without aborting the rest.
|
||||
- **Click a card** (without shift/ctrl) to open a side drawer (Escape or click-outside closes) with:
|
||||
- **Editable title** — click the heading to rename.
|
||||
- **Editable assignee / priority** — click the meta row to rewrite.
|
||||
- **Editable description** — markdown-rendered by default (headings, bold, italic, inline code, fenced code, `http(s)` / `mailto:` links, bullet lists), with an "edit" button that swaps in a textarea. Markdown rendering is a tiny, XSS-safe renderer — every substitution runs on HTML-escaped input, only `http(s)` / `mailto:` links pass through, and `target="_blank"` + `rel="noopener noreferrer"` are always set.
|
||||
- **Dependency editor** — chip list of parents and children, each with an `×` to unlink, plus dropdowns over every other task to add a new parent or child. Cycle attempts are rejected server-side with a clear message.
|
||||
- **Status action row** (→ triage / → ready / → running / block / unblock / complete / archive) with confirm prompts for destructive transitions.
|
||||
- Result section (also markdown-rendered), comment thread with Enter-to-submit, the last 20 events.
|
||||
- **Toolbar filters** — free-text search, tenant dropdown (defaults to `dashboard.kanban.default_tenant` from `config.yaml`), assignee dropdown, "show archived" toggle, "lanes by profile" toggle, and a **Nudge dispatcher** button so you don't have to wait for the next 60 s tick.
|
||||
|
||||
Visually the target is the familiar Linear / Fusion layout: dark theme, column headers with counts, coloured status dots, pill chips for priority and tenant. The plugin reads only theme CSS vars (`--color-*`, `--radius`, `--font-mono`, ...), so it reskins automatically with whichever dashboard theme is active.
|
||||
|
||||
### Architecture
|
||||
|
||||
The GUI is strictly a **read-through-the-DB + write-through-kanban_db** layer with no domain logic of its own:
|
||||
|
||||
```
|
||||
┌────────────────────────┐ WebSocket (tails task_events)
|
||||
│ React SPA (plugin) │ ◀──────────────────────────────────┐
|
||||
│ HTML5 drag-and-drop │ │
|
||||
└──────────┬─────────────┘ │
|
||||
│ REST over fetchJSON │
|
||||
▼ │
|
||||
┌────────────────────────┐ writes call kanban_db.* │
|
||||
│ FastAPI router │ directly — same code path │
|
||||
│ plugins/kanban/ │ the CLI /kanban verbs use │
|
||||
│ dashboard/plugin_api.py │
|
||||
└──────────┬─────────────┘ │
|
||||
│ │
|
||||
▼ │
|
||||
┌────────────────────────┐ │
|
||||
│ ~/.hermes/kanban.db │ ───── append task_events ──────────┘
|
||||
│ (WAL, shared) │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
### REST surface
|
||||
|
||||
All routes are mounted under `/api/plugins/kanban/` and protected by the dashboard's ephemeral session token:
|
||||
|
||||
| Method | Path | Purpose |
|
||||
|---|---|---|
|
||||
| `GET` | `/board?tenant=<name>&include_archived=…` | Full board grouped by status column, plus tenants + assignees for filter dropdowns |
|
||||
| `GET` | `/tasks/:id` | Task + comments + events + links |
|
||||
| `POST` | `/tasks` | Create (wraps `kanban_db.create_task`, accepts `triage: bool` and `parents: [id, …]`) |
|
||||
| `PATCH` | `/tasks/:id` | Status / assignee / priority / title / body / result |
|
||||
| `POST` | `/tasks/bulk` | Apply the same patch (status / archive / assignee / priority) to every id in `ids`. Per-id failures reported without aborting siblings |
|
||||
| `POST` | `/tasks/:id/comments` | Append a comment |
|
||||
| `POST` | `/links` | Add a dependency (`parent_id` → `child_id`) |
|
||||
| `DELETE` | `/links?parent_id=…&child_id=…` | Remove a dependency |
|
||||
| `POST` | `/dispatch?max=…&dry_run=…` | Nudge the dispatcher — skip the 60 s wait |
|
||||
| `GET` | `/config` | Read `dashboard.kanban` preferences from `config.yaml` — `default_tenant`, `lane_by_profile`, `include_archived_by_default`, `render_markdown` |
|
||||
| `WS` | `/events?since=<event_id>` | Live stream of `task_events` rows |
|
||||
|
||||
Every handler is a thin wrapper — the plugin is ~700 lines of Python (router + WebSocket tail + bulk batcher + config reader) and adds no new business logic. A tiny `_conn()` helper auto-initializes `kanban.db` on every read and write, so a fresh install works whether the user opened the dashboard first, hit the REST API directly, or ran `hermes kanban init`.
|
||||
|
||||
### Dashboard config
|
||||
|
||||
Any of these keys under `dashboard.kanban` in `~/.hermes/config.yaml` changes the tab's defaults — the plugin reads them at load time via `GET /config`:
|
||||
|
||||
```yaml
|
||||
dashboard:
|
||||
kanban:
|
||||
default_tenant: acme # preselects the tenant filter
|
||||
lane_by_profile: true # default for the "lanes by profile" toggle
|
||||
include_archived_by_default: false
|
||||
render_markdown: true # set false for plain <pre> rendering
|
||||
```
|
||||
|
||||
Each key is optional and falls back to the shown default.
|
||||
|
||||
### Security model
|
||||
|
||||
The dashboard's HTTP auth middleware [explicitly skips `/api/plugins/`](./extending-the-dashboard#backend-api-routes) — plugin routes are unauthenticated by design because the dashboard binds to localhost by default. That means the kanban REST surface is reachable from any process on the host.
|
||||
|
||||
The WebSocket takes one additional step: it requires the dashboard's ephemeral session token as a `?token=…` query parameter (browsers can't set `Authorization` on an upgrade request), matching the pattern used by the in-browser PTY bridge.
|
||||
|
||||
If you run `hermes dashboard --host 0.0.0.0`, every plugin route — kanban included — becomes reachable from the network. **Don't do that on a shared host.** The board contains task bodies, comments, and workspace paths; an attacker reaching these routes gets read access to your entire collaboration surface and can also create / reassign / archive tasks.
|
||||
|
||||
Tasks in `~/.hermes/kanban.db` are profile-agnostic on purpose (that's the coordination primitive). If you open the dashboard with `hermes -p <profile> dashboard`, the board still shows tasks created by any other profile on the host. Same user owns all profiles, but this is worth knowing if multiple personas coexist.
|
||||
|
||||
### Live updates
|
||||
|
||||
`task_events` is an append-only SQLite table with a monotonic `id`. The WebSocket endpoint holds each client's last-seen event id and pushes new rows as they land. When a burst of events arrives, the frontend reloads the (very cheap) board endpoint — simpler and more correct than trying to patch local state from every event kind. WAL mode means the read loop never blocks the dispatcher's `BEGIN IMMEDIATE` claim transactions.
|
||||
|
||||
### Extending it
|
||||
|
||||
The plugin uses the standard Hermes dashboard plugin contract — see [Extending the Dashboard](./extending-the-dashboard) for the full manifest reference, shell slots, page-scoped slots, and the Plugin SDK. Extra columns, custom card chrome, tenant-filtered layouts, or full `tab.override` replacements are all expressible without forking this plugin.
|
||||
|
||||
To disable without removing: add `dashboard.plugins.kanban.enabled: false` to `config.yaml` (or delete `plugins/kanban/dashboard/manifest.json`).
|
||||
|
||||
### Scope boundary
|
||||
|
||||
The GUI is deliberately thin. Everything the plugin does is reachable from the CLI; the plugin just makes it comfortable for humans. Auto-assignment, budgets, governance gates, and org-chart views remain user-space — a router profile, another plugin, or a reuse of `tools/approval.py` — exactly as listed in the out-of-scope section of the design spec.
|
||||
|
||||
## CLI command reference
|
||||
|
||||
```
|
||||
hermes kanban init # create kanban.db + print daemon hint
|
||||
hermes kanban create "<title>" [--body ...] [--assignee <profile>]
|
||||
[--parent <id>]... [--tenant <name>]
|
||||
[--workspace scratch|worktree|dir:<path>]
|
||||
[--priority N] [--triage] [--idempotency-key KEY]
|
||||
[--max-runtime 30m|2h|1d|<seconds>]
|
||||
[--skill <name>]...
|
||||
[--json]
|
||||
hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived] [--json]
|
||||
hermes kanban show <id> [--json]
|
||||
hermes kanban assign <id> <profile> # or 'none' to unassign
|
||||
hermes kanban link <parent_id> <child_id>
|
||||
hermes kanban unlink <parent_id> <child_id>
|
||||
hermes kanban claim <id> [--ttl SECONDS]
|
||||
hermes kanban comment <id> "<text>" [--author NAME]
|
||||
|
||||
# Bulk verbs — accept multiple ids:
|
||||
hermes kanban complete <id>... [--result "..."]
|
||||
hermes kanban block <id> "<reason>" [--ids <id>...]
|
||||
hermes kanban unblock <id>...
|
||||
hermes kanban archive <id>...
|
||||
|
||||
hermes kanban tail <id> # follow a single task's event stream
|
||||
hermes kanban watch [--assignee P] [--tenant T] # live stream ALL events to the terminal
|
||||
[--kinds completed,blocked,…] [--interval SECS]
|
||||
hermes kanban heartbeat <id> [--note "..."] # worker liveness signal for long ops
|
||||
hermes kanban runs <id> [--json] # attempt history (one row per run)
|
||||
hermes kanban assignees [--json] # profiles on disk + per-assignee task counts
|
||||
hermes kanban dispatch [--dry-run] [--max N] # one-shot pass
|
||||
[--failure-limit N] [--json]
|
||||
hermes kanban daemon [--interval SECS] [--max N] # long-lived loop
|
||||
[--failure-limit N] [--pidfile PATH] [-v]
|
||||
hermes kanban stats [--json] # per-status + per-assignee counts
|
||||
hermes kanban log <id> [--tail BYTES] # worker log from ~/.hermes/kanban/logs/
|
||||
hermes kanban notify-subscribe <id> # gateway bridge hook (used by /kanban in the gateway)
|
||||
--platform <name> --chat-id <id> [--thread-id <id>] [--user-id <id>]
|
||||
hermes kanban notify-list [<id>] [--json]
|
||||
hermes kanban notify-unsubscribe <id>
|
||||
--platform <name> --chat-id <id> [--thread-id <id>]
|
||||
hermes kanban context <id> # what a worker sees
|
||||
hermes kanban gc [--event-retention-days N] # workspaces + old events + old logs
|
||||
[--log-retention-days N]
|
||||
```
|
||||
|
||||
All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting.
|
||||
|
||||
## Collaboration patterns
|
||||
|
||||
The board supports these eight patterns without any new primitives:
|
||||
|
||||
| Pattern | Shape | Example |
|
||||
|---|---|---|
|
||||
| **P1 Fan-out** | N siblings, same role | "research 5 angles in parallel" |
|
||||
| **P2 Pipeline** | role chain: scout → editor → writer | daily brief assembly |
|
||||
| **P3 Voting / quorum** | N siblings + 1 aggregator | 3 researchers → 1 reviewer picks |
|
||||
| **P4 Long-running journal** | same profile + shared dir + cron | Obsidian vault |
|
||||
| **P5 Human-in-the-loop** | worker blocks → user comments → unblock | ambiguous decisions |
|
||||
| **P6 `@mention`** | inline routing from prose | `@reviewer look at this` |
|
||||
| **P7 Thread-scoped workspace** | `/kanban here` in a thread | per-project gateway threads |
|
||||
| **P8 Fleet farming** | one profile, N subjects | 50 social accounts |
|
||||
| **P9 Triage specifier** | rough idea → `triage` → specifier expands body → `todo` | "turn this one-liner into a spec' task" |
|
||||
|
||||
For worked examples of each, see `docs/hermes-kanban-v1-spec.pdf`.
|
||||
|
||||
## Multi-tenant usage
|
||||
|
||||
When one specialist fleet serves multiple businesses, tag each task with a tenant:
|
||||
|
||||
```bash
|
||||
hermes kanban create "monthly report" \
|
||||
--assignee researcher \
|
||||
--tenant business-a \
|
||||
--workspace dir:~/tenants/business-a/data/
|
||||
```
|
||||
|
||||
Workers receive `$HERMES_TENANT` and namespace their memory writes by prefix. The board, the dispatcher, and the profile definitions are all shared; only the data is scoped.
|
||||
|
||||
## Gateway notifications
|
||||
|
||||
When you run `/kanban create …` from the gateway (Telegram, Discord, Slack, etc.), the originating chat is automatically subscribed to the new task. The gateway's background notifier polls `task_events` every few seconds and delivers one message per terminal event (`completed`, `blocked`, `gave_up`, `crashed`, `timed_out`) to that chat. Completed tasks also send the first line of the worker's `--result` so you see the outcome without having to `/kanban show`.
|
||||
|
||||
You can manage subscriptions explicitly from the CLI — useful when a script / cron job wants to notify a chat it didn't originate from:
|
||||
|
||||
```bash
|
||||
hermes kanban notify-subscribe t_abcd \
|
||||
--platform telegram --chat-id 12345678 --thread-id 7
|
||||
hermes kanban notify-list
|
||||
hermes kanban notify-unsubscribe t_abcd \
|
||||
--platform telegram --chat-id 12345678 --thread-id 7
|
||||
```
|
||||
|
||||
A subscription removes itself automatically once the task reaches `done` or `archived`; no cleanup needed.
|
||||
|
||||
## Runs — one row per attempt
|
||||
|
||||
A task is a logical unit of work; a **run** is one attempt to execute it. When the dispatcher claims a ready task it creates a row in `task_runs` and points `tasks.current_run_id` at it. When that attempt ends — completed, blocked, crashed, timed out, spawn-failed, reclaimed — the run row closes with an `outcome` and the task's pointer clears. A task that's been attempted three times has three `task_runs` rows.
|
||||
|
||||
Why two tables instead of just mutating the task: you need **full attempt history** for real-world postmortems ("the second reviewer attempt got to approve, the third merged"), and you need a clean place to hang per-attempt metadata — which files changed, which tests ran, which findings a reviewer noted. Those are run facts, not task facts.
|
||||
|
||||
Runs are also where **structured handoff** lives. When a worker completes a task it can pass:
|
||||
|
||||
- `--result "<short log line>"` — goes on the task row as before (for back-compat).
|
||||
- `--summary "<human handoff>"` — goes on the run; downstream children see it in their `build_worker_context`.
|
||||
- `--metadata '{"changed_files": [...], "tests_run": 12}'` — JSON dict on the run; children see it serialized alongside the summary.
|
||||
|
||||
Downstream children read the most recent completed run's summary + metadata for each parent. Retrying workers read the prior attempts on their own task (outcome, summary, error) so they don't repeat a path that already failed.
|
||||
|
||||
```bash
|
||||
# Worker completes with a structured handoff:
|
||||
hermes kanban complete t_abcd \
|
||||
--result "rate limiter shipped" \
|
||||
--summary "implemented token bucket, keys on user_id with IP fallback, all tests pass" \
|
||||
--metadata '{"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}'
|
||||
|
||||
# Review the attempt history on a retried task:
|
||||
hermes kanban runs t_abcd
|
||||
# # OUTCOME PROFILE ELAPSED STARTED
|
||||
# 1 blocked worker 12s 2026-04-27 14:02
|
||||
# → BLOCKED: need decision on rate-limit key
|
||||
# 2 completed worker 8m 2026-04-27 15:18
|
||||
# → implemented token bucket, keys on user_id with IP fallback
|
||||
```
|
||||
|
||||
Runs are exposed on the dashboard (Run History section in the drawer, one coloured row per attempt) and on the REST API (`GET /api/plugins/kanban/tasks/:id` returns a `runs[]` array). `PATCH /api/plugins/kanban/tasks/:id` with `{status: "done", summary, metadata}` forwards both to the kernel, so the dashboard's "mark done" button is CLI-equivalent. `task_events` rows carry the `run_id` they belong to so the UI can group them by attempt, and the `completed` event embeds the first-line summary in its payload (capped at 400 chars) so gateway notifiers can render structured handoffs without a second SQL round-trip.
|
||||
|
||||
**Bulk close caveat.** `hermes kanban complete a b c --summary X` is refused — structured handoff is per-run, so copy-pasting the same summary to N tasks is almost always wrong. Bulk close *without* `--summary` / `--metadata` still works for the common "I finished a pile of admin tasks" case.
|
||||
|
||||
**Reclaimed runs from status changes.** If you drag a running task off `running` in the dashboard (back to `ready`, or straight to `todo`), or archive a task that was still running, the in-flight run closes with `outcome='reclaimed'` rather than being orphaned. The `task_runs` row is always in a terminal state when `tasks.current_run_id` is `NULL`, and vice versa — that invariant holds across CLI, dashboard, dispatcher, and notifier.
|
||||
|
||||
**Synthetic runs for never-claimed completions.** Completing or blocking a task that was never claimed (e.g. a human closes a `ready` task from the dashboard with a summary, or a CLI user runs `hermes kanban complete <ready-task> --summary X`) would otherwise drop the handoff. Instead the kernel inserts a zero-duration run row (`started_at == ended_at`) carrying the summary / metadata / reason so attempt history stays complete. The `completed` / `blocked` event's `run_id` points at that row.
|
||||
|
||||
**Live drawer refresh.** When the dashboard's WebSocket event stream reports new events for the task the user is currently viewing, the drawer reloads itself (via a per-task event counter threaded into its `useEffect` dependency list). Closing and reopening is no longer required to see a run's new row or updated outcome.
|
||||
|
||||
### Forward compatibility
|
||||
|
||||
Two nullable columns on `tasks` are reserved for v2 workflow routing: `workflow_template_id` (which template this task belongs to) and `current_step_key` (which step in that template is active). The v1 kernel ignores them for routing but lets clients write them, so a v2 release can add the routing machinery without another schema migration.
|
||||
|
||||
## Event reference
|
||||
|
||||
Every transition appends a row to `task_events`. Each row carries an optional `run_id` so UIs can group events by attempt. Kinds group into three clusters so filtering is easy (`hermes kanban watch --kinds completed,gave_up,timed_out`):
|
||||
|
||||
**Lifecycle** (what changed about the task as a logical unit):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `created` | `{assignee, status, parents, tenant}` | Task inserted. `run_id` is `NULL`. |
|
||||
| `promoted` | — | `todo → ready` because all parents hit `done`. `run_id` is `NULL`. |
|
||||
| `claimed` | `{lock, expires, run_id}` | Dispatcher atomically claimed a `ready` task for spawn. |
|
||||
| `completed` | `{result_len, summary?}` | Worker wrote `--result` / `--summary` and task hit `done`. `summary` is the first-line handoff (400-char cap); full version lives on the run row. If `complete_task` is called on a never-claimed task with handoff fields, a zero-duration run is synthesized so `run_id` still points at something. |
|
||||
| `blocked` | `{reason}` | Worker or human flipped the task to `blocked`. Synthesizes a zero-duration run when called on a never-claimed task with `--reason`. |
|
||||
| `unblocked` | — | `blocked → ready`, either manually or via `/unblock`. `run_id` is `NULL`. |
|
||||
| `archived` | — | Hidden from the default board. If the task was still running, carries the `run_id` of the run that was reclaimed as a side effect. |
|
||||
|
||||
**Edits** (human-driven changes that aren't transitions):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `assigned` | `{assignee}` | Assignee changed (including unassignment). |
|
||||
| `edited` | `{fields}` | Title or body updated. |
|
||||
| `reprioritized` | `{priority}` | Priority changed. |
|
||||
| `status` | `{status}` | Dashboard drag-drop wrote a status directly (e.g. `todo → ready`). Carries the `run_id` of the run that was reclaimed when dragging off `running`; otherwise `run_id` is NULL. |
|
||||
|
||||
**Worker telemetry** (about the execution process, not the logical task):
|
||||
|
||||
| Kind | Payload | When |
|
||||
|---|---|---|
|
||||
| `spawned` | `{pid}` | Dispatcher successfully started a worker process. |
|
||||
| `heartbeat` | `{note?}` | Worker called `hermes kanban heartbeat $TASK` to signal liveness during long operations. |
|
||||
| `reclaimed` | `{stale_lock}` | Claim TTL expired without a completion; task goes back to `ready`. |
|
||||
| `crashed` | `{pid, claimer}` | Worker PID no longer alive but TTL hadn't expired yet. |
|
||||
| `timed_out` | `{pid, elapsed_seconds, limit_seconds, sigkill}` | `max_runtime_seconds` exceeded; dispatcher SIGTERM'd (then SIGKILL'd after 5 s grace) and re-queued. |
|
||||
| `spawn_failed` | `{error, failures}` | One spawn attempt failed (missing PATH, workspace unmountable, …). Counter increments; task returns to `ready` for retry. |
|
||||
| `gave_up` | `{failures, error}` | Circuit breaker fired after N consecutive `spawn_failed`. Task auto-blocks with the last error. Default N = 5; override via `--failure-limit`. |
|
||||
|
||||
`hermes kanban tail <id>` shows these for a single task. `hermes kanban watch` streams them board-wide.
|
||||
|
||||
## Out of scope
|
||||
|
||||
Kanban is deliberately single-host. `~/.hermes/kanban.db` is a local SQLite file and the dispatcher spawns workers on the same machine. Running a shared board across two hosts is not supported — there's no coordination primitive for "worker X on host A, worker Y on host B," and the crash-detection path assumes PIDs are host-local. If you need multi-host, run an independent board per host and use `delegate_task` / a message queue to bridge them.
|
||||
|
||||
## Design spec
|
||||
|
||||
The complete design — architecture, concurrency correctness, comparison with other systems, implementation plan, risks, open questions — lives in `docs/hermes-kanban-v1-spec.pdf`. Read that before filing any behavior-change PR.
|
||||
@@ -219,17 +219,6 @@ Send any message while the agent is working to interrupt it. Key behaviors:
|
||||
- **Multiple messages are combined** — messages sent during interruption are joined into one prompt
|
||||
- **`/stop` command** — interrupts without queuing a follow-up message
|
||||
|
||||
### Queue vs interrupt (busy-input mode)
|
||||
|
||||
By default, messaging a busy agent interrupts it. To switch the whole install so follow-ups queue behind the current task instead, set:
|
||||
|
||||
```yaml
|
||||
display:
|
||||
busy_input_mode: queue # default: interrupt
|
||||
```
|
||||
|
||||
The first time you message a busy agent on any platform, Hermes appends a one-line reminder to the busy-ack explaining the knob (`"💡 First-time tip — …"`). The reminder fires once per install — a flag under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again.
|
||||
|
||||
## Tool Progress Notifications
|
||||
|
||||
Control how much tool activity is displayed in `~/.hermes/config.yaml`:
|
||||
|
||||
+1
@@ -298,6 +298,7 @@ Type these during an interactive chat session.
|
||||
### Utility
|
||||
```
|
||||
/branch (/fork) Branch the current session
|
||||
/btw Ephemeral side question (doesn't interrupt main task)
|
||||
/fast Toggle priority/fast processing
|
||||
/browser Open CDP browser connection
|
||||
/history Show conversation history (CLI)
|
||||
|
||||
@@ -60,8 +60,6 @@ const sidebars: SidebarsConfig = {
|
||||
items: [
|
||||
'user-guide/features/cron',
|
||||
'user-guide/features/delegation',
|
||||
'user-guide/features/kanban',
|
||||
'user-guide/features/kanban-tutorial',
|
||||
'user-guide/features/code-execution',
|
||||
'user-guide/features/hooks',
|
||||
'user-guide/features/batch-processing',
|
||||
@@ -615,7 +613,6 @@ const sidebars: SidebarsConfig = {
|
||||
'reference/tools-reference',
|
||||
'reference/toolsets-reference',
|
||||
'reference/mcp-config-reference',
|
||||
'reference/model-catalog',
|
||||
'reference/skills-catalog',
|
||||
'reference/optional-skills-catalog',
|
||||
'reference/faq',
|
||||
|
||||
@@ -1,259 +0,0 @@
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-26T12:34:42Z",
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog"
|
||||
},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {
|
||||
"display_name": "OpenRouter",
|
||||
"note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing."
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"id": "moonshotai/kimi-k2.6",
|
||||
"description": "recommended"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-pro",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-flash",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.7",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.6",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-sonnet-4.6",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "qwen/qwen3.6-plus",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-sonnet-4.5",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-haiku-4.5",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "openrouter/elephant-alpha",
|
||||
"description": "free"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.5",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.4-mini",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "xiaomi/mimo-v2.5-pro",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "xiaomi/mimo-v2.5",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.3-codex",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3-pro-image-preview",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3-flash-preview",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3.1-pro-preview",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3.1-flash-lite-preview",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "qwen/qwen3.5-plus-02-15",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "qwen/qwen3.5-35b-a3b",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "stepfun/step-3.5-flash",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.7",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.5",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.5:free",
|
||||
"description": "free"
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5.1",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5v-turbo",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5-turbo",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "x-ai/grok-4.20",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "nvidia/nemotron-3-super-120b-a12b",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "nvidia/nemotron-3-super-120b-a12b:free",
|
||||
"description": "free"
|
||||
},
|
||||
{
|
||||
"id": "arcee-ai/trinity-large-preview:free",
|
||||
"description": "free"
|
||||
},
|
||||
{
|
||||
"id": "arcee-ai/trinity-large-thinking",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.5-pro",
|
||||
"description": ""
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.4-nano",
|
||||
"description": ""
|
||||
}
|
||||
]
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {
|
||||
"display_name": "Nous Portal",
|
||||
"note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest."
|
||||
},
|
||||
"models": [
|
||||
{
|
||||
"id": "moonshotai/kimi-k2.6"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-pro"
|
||||
},
|
||||
{
|
||||
"id": "deepseek/deepseek-v4-flash"
|
||||
},
|
||||
{
|
||||
"id": "xiaomi/mimo-v2.5-pro"
|
||||
},
|
||||
{
|
||||
"id": "xiaomi/mimo-v2.5"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.7"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-opus-4.6"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-sonnet-4.6"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-sonnet-4.5"
|
||||
},
|
||||
{
|
||||
"id": "anthropic/claude-haiku-4.5"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.5"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.4-mini"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.3-codex"
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3-pro-preview"
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3-flash-preview"
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3.1-pro-preview"
|
||||
},
|
||||
{
|
||||
"id": "google/gemini-3.1-flash-lite-preview"
|
||||
},
|
||||
{
|
||||
"id": "qwen/qwen3.5-plus-02-15"
|
||||
},
|
||||
{
|
||||
"id": "qwen/qwen3.5-35b-a3b"
|
||||
},
|
||||
{
|
||||
"id": "stepfun/step-3.5-flash"
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.7"
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.5"
|
||||
},
|
||||
{
|
||||
"id": "minimax/minimax-m2.5:free"
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5.1"
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5v-turbo"
|
||||
},
|
||||
{
|
||||
"id": "z-ai/glm-5-turbo"
|
||||
},
|
||||
{
|
||||
"id": "x-ai/grok-4.20-beta"
|
||||
},
|
||||
{
|
||||
"id": "nvidia/nemotron-3-super-120b-a12b"
|
||||
},
|
||||
{
|
||||
"id": "arcee-ai/trinity-large-thinking"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.5-pro"
|
||||
},
|
||||
{
|
||||
"id": "openai/gpt-5.4-nano"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 748 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user