Compare commits

..

9 Commits

Author SHA1 Message Date
Brooklyn Nicholson 036dd14425 feat(tui): add model picker and approval/clarify prompt workflows
Three new static prompt components for showroom capture (no useInput):

- ApprovalPromptStatic: double-bordered warning box with command preview,
  4 options (allow once/session/always/deny), ▸ selection, footer hints
- ClarifyPromptStatic: heading + numbered choices with 'Other' option
- ModelPickerStatic: double-bordered popup with provider/model lists,
  current model header, persist toggle, quick-pick numbers

New workflows:
- interactive-prompts: approval → clarify → deploy result
- model-picker: provider stage → model stage → switch result
2026-04-26 01:17:21 -05:00
Brooklyn Nicholson 1e499a7136 feat(tui): add interactive prompts workflow to showroom
Approval prompt and clarify prompt rendered as static mocks (no useInput)
so they work with the snap() capture pipeline. Shows:
- user asking for npm install
- approval prompt with double-bordered warning box, 4 options
- user asking to deploy
- clarify prompt with region selection
- deployment result panel

Static components match real prompt visuals: same borders, colors,
selection indicators, and footer hints.
2026-04-26 01:15:42 -05:00
Brooklyn Nicholson e58308c680 feat(tui): restore xterm.js via importmap, add frame fade animations
- xterm.js loads via importmap (CDN, cached by browser) instead of
  dynamic import — no async fetch latency after page load
- CSS link tag in head for parallel xterm.css download
- Terminal container fades in on init (300ms ease-out via .is-visible)
- Frame elements targetable by id for fade/highlight/spotlight overlays
- Proper viewport cellWidth (9px) matching real xterm rendering
- Clean CSS: removed dead .showroom-frame styles, added transitions
  for highlights, smooth overlay animations
2026-04-26 01:11:27 -05:00
Brooklyn Nicholson 6147a867cd perf(tui): drop xterm.js from showroom, use lightweight ANSI parser
The ANSI frames from Ink renders only use cursor-forward (ESC[NC) and
control sequences (cursor hide/show, bracketed paste). No color SGR.
Loading a full terminal emulator from CDN was pure overhead — ~200ms
network + heavy DOM reconciliation for a few <150 byte strings.

Replaced with a 30-line ANSI-to-HTML parser. Zero network deps,
instant render.
2026-04-26 01:02:32 -05:00
Brooklyn Nicholson 7603126c86 chore(tui): run formatter on showroom files 2026-04-26 00:44:36 -05:00
Brooklyn Nicholson 3eadf10047 chore(tui): strip showroom chrome and beef up slash demo
- drop title bar, "real ink" tag, meta line — terminal box is the surface
- single bottom controls row: ↻ · scale · speed · picker · progress
- slash workflow now types each command, echoes a slash msg, then renders the panel
- adds a /help scene (real Panel grouped by category)
- README minus the "real ink" marketing
2026-04-25 23:39:06 -05:00
Brooklyn Nicholson 72ca0809c4 feat(tui): showroom now renders real ui-tui frames via xterm.js
- record.tsx imports MessageLine, Panel, Box, Text and snapshots Ink output as ANSI
- frame action writes captured ANSI into xterm.js (jsDelivr CDN)
- captions, spotlights, fades, highlights still layer over frames by id
- dropped CSS-mock workflows; all 4 sample workflows now use real Ink output
- compact 80x16 viewport, 1x–4x scale picker, blink cursor, intro fade
2026-04-25 23:33:40 -05:00
Brooklyn Nicholson 70c43d5da1 feat(tui): showroom MVP with picker, speeds, sample workflows
- multi-workflow listing + browser picker, /api/workflows + /api/workflow/:name
- build emits dist/<name>.html for every workflow + dist/index.html
- player adds 0.5x/1x/2x speed control, blinking cursor, intro fade, progress bar
- new sample workflows: subagent trail, slash commands tour, voice mode
2026-04-25 23:08:01 -05:00
Brooklyn Nicholson 7d79dbc5ad feat(tui): add scripted showroom demos 2026-04-25 22:04:12 -05:00
168 changed files with 3526 additions and 25228 deletions
+1 -10
View File
@@ -390,16 +390,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
}
if normalized_base_url:
# Azure Anthropic endpoints require an ``api-version`` query parameter.
# Pass it via default_query so the SDK appends it to every request URL
# without corrupting the base_url (appending it directly produces
# malformed paths like /anthropic?api-version=.../v1/messages).
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
if _is_azure_endpoint and "api-version" not in normalized_base_url:
kwargs["base_url"] = normalized_base_url.rstrip("/")
kwargs["default_query"] = {"api-version": "2025-04-15"}
else:
kwargs["base_url"] = normalized_base_url
kwargs["base_url"] = normalized_base_url
common_betas = _common_betas_for_base_url(normalized_base_url)
if _is_kimi_coding_endpoint(base_url):
+6 -25
View File
@@ -42,7 +42,6 @@ import time
from pathlib import Path # noqa: F401 — used by test mocks
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse, parse_qs, urlunparse
from openai import OpenAI
@@ -53,17 +52,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
logger = logging.getLogger(__name__)
def _extract_url_query_params(url: str):
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
parsed = urlparse(url)
if parsed.query:
clean = urlunparse(parsed._replace(query=""))
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
return clean, params
return url, None
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
_stale_base_url_warned = False
@@ -1169,10 +1157,8 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
return None, None
model = _read_main_model() or "gpt-4o-mini"
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
_clean_base, _dq = _extract_url_query_params(custom_base)
_extra = {"default_query": _dq} if _dq else {}
if custom_mode == "codex_responses":
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
return CodexAuxiliaryClient(real_client, model), model
if custom_mode == "anthropic_messages":
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -1186,12 +1172,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
"Custom endpoint declares api_mode=anthropic_messages but the "
"anthropic SDK is not installed — falling back to OpenAI-wire."
)
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=custom_base), model
return (
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
model,
)
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
return OpenAI(api_key=custom_key, base_url=custom_base), model
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1839,15 +1825,12 @@ def resolve_provider_client(
provider,
)
extra = {}
_clean_base, _dq = _extract_url_query_params(custom_base)
if _dq:
extra["default_query"] = _dq
if base_url_host_matches(custom_base, "api.kimi.com"):
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
@@ -1884,8 +1867,6 @@ def resolve_provider_client(
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
provider,
)
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
_extra2 = {"default_query": _dq2} if _dq2 else {}
logger.debug(
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
provider, final_model, entry_api_mode or "chat_completions")
@@ -1903,7 +1884,7 @@ def resolve_provider_client(
"installed — falling back to OpenAI-wire.",
provider,
)
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
client = OpenAI(api_key=custom_key, base_url=custom_base)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
sync_anthropic = AnthropicAuxiliaryClient(
@@ -1912,7 +1893,7 @@ def resolve_provider_client(
if async_mode:
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
return sync_anthropic, final_model
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
client = OpenAI(api_key=custom_key, base_url=custom_base)
# codex_responses or inherited auto-detect (via _wrap_if_needed).
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
# override). Named-provider entry api_mode=codex_responses also
+1 -124
View File
@@ -227,23 +227,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
# Message format conversion
# ---------------------------------------------------------------------------
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
"""Normalize a Responses assistant message status for replay.
The API accepts completed/incomplete/in_progress on replayed assistant
output messages. Preserve those exactly (modulo case/hyphen spelling) so
incomplete Codex continuation turns don't get falsely marked completed.
"""
if isinstance(value, str):
status = value.strip().lower().replace("-", "_").replace(" ", "_")
if status in _RESPONSE_MESSAGE_STATUSES:
return status
return default
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
@@ -289,57 +272,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
seen_item_ids.add(item_id)
has_codex_reasoning = True
# Replay exact assistant message items (with id/phase) from
# previous turns so the API can maintain prefix-cache hits.
# OpenAI docs: "preserve and resend phase on all assistant
# messages — dropping it can degrade performance."
codex_message_items = msg.get("codex_message_items")
replayed_message_items = 0
if isinstance(codex_message_items, list):
for raw_item in codex_message_items:
if not isinstance(raw_item, dict):
continue
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
continue
raw_content_parts = raw_item.get("content")
if not isinstance(raw_content_parts, list):
continue
normalized_content_parts = []
for part in raw_content_parts:
if not isinstance(part, dict):
continue
part_type = str(part.get("type") or "").strip()
if part_type not in {"output_text", "text"}:
continue
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content_parts.append({"type": "output_text", "text": text})
if not normalized_content_parts:
continue
replay_item = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(raw_item.get("status")),
"content": normalized_content_parts,
}
item_id = raw_item.get("id")
if isinstance(item_id, str) and item_id.strip():
replay_item["id"] = item_id.strip()
phase = raw_item.get("phase")
if isinstance(phase, str) and phase.strip():
replay_item["phase"] = phase.strip()
items.append(replay_item)
replayed_message_items += 1
if replayed_message_items > 0:
pass
elif content_parts:
if content_parts:
items.append({"role": "assistant", "content": content_parts})
elif content_text.strip():
items.append({"role": "assistant", "content": content_text})
@@ -499,47 +432,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
normalized.append(reasoning_item)
continue
if item_type == "message":
role = item.get("role")
if role != "assistant":
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
content = item.get("content")
if not isinstance(content, list):
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
normalized_content = []
for part_idx, part in enumerate(content):
if not isinstance(part, dict):
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
)
part_type = part.get("type")
if part_type not in {"output_text", "text"}:
raise ValueError(
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
)
text = part.get("text", "")
if text is None:
text = ""
if not isinstance(text, str):
text = str(text)
normalized_content.append({"type": "output_text", "text": text})
if not normalized_content:
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
normalized_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item.get("status")),
"content": normalized_content,
}
item_id = item.get("id")
if isinstance(item_id, str) and item_id.strip():
normalized_item["id"] = item_id.strip()
phase = item.get("phase")
if isinstance(phase, str) and phase.strip():
normalized_item["phase"] = phase.strip()
normalized.append(normalized_item)
continue
role = item.get("role")
if role in {"user", "assistant"}:
content = item.get("content", "")
@@ -824,7 +716,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
message_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
saw_commentary_phase = False
@@ -843,7 +734,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
if item_type == "message":
item_phase = getattr(item, "phase", None)
normalized_phase = None
if isinstance(item_phase, str):
normalized_phase = item_phase.strip().lower()
if normalized_phase in {"commentary", "analysis"}:
@@ -853,18 +743,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
message_text = _extract_responses_message_text(item)
if message_text:
content_parts.append(message_text)
raw_message_item: Dict[str, Any] = {
"type": "message",
"role": "assistant",
"status": _normalize_responses_message_status(item_status),
"content": [{"type": "output_text", "text": message_text}],
}
item_id = getattr(item, "id", None)
if isinstance(item_id, str) and item_id:
raw_message_item["id"] = item_id
if normalized_phase:
raw_message_item["phase"] = normalized_phase
message_items_raw.append(raw_message_item)
elif item_type == "reasoning":
reasoning_text = _extract_responses_reasoning_text(item)
if reasoning_text:
@@ -977,7 +855,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
reasoning_content=None,
reasoning_details=None,
codex_reasoning_items=reasoning_items_raw or None,
codex_message_items=message_items_raw or None,
)
if tool_calls:
+8 -39
View File
@@ -106,11 +106,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
_ENDPOINT_MODEL_CACHE_TTL = 300
# Descending tiers for context length probing when the model is unknown.
# We start at 256K (covers GPT-5.x, many current large-context models) and
# step down on context-length errors until one works. Tier[0] is also the
# default fallback when no detection method succeeds.
# We start at 128K (a safe default for most modern models) and step down
# on context-length errors until one works.
CONTEXT_PROBE_TIERS = [
256_000,
128_000,
64_000,
32_000,
@@ -145,11 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
"claude": 200000,
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
# Source: https://developers.openai.com/api/docs/models
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
# This hardcoded value is only reached when every probe misses.
"gpt-5.5": 1050000,
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
"gpt-5.5": 400000,
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -165,17 +162,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"gemma-4-31b": 256000,
"gemma-3": 131072,
"gemma": 8192, # fallback for older gemma models
# DeepSeek — V4 family ships with a 1M context window. The legacy
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
# and inherit the same 1M window. The ``deepseek`` substring entry
# below remains as a 128K fallback for older / unknown DeepSeek model
# ids (e.g. via custom endpoints).
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
# DeepSeek
"deepseek": 128000,
# Meta
"llama": 131072,
@@ -1206,7 +1193,6 @@ def get_model_context_length(
api_key: str = "",
config_context_length: int | None = None,
provider: str = "",
custom_providers: list | None = None,
) -> int:
"""Get the context length for a model.
@@ -1227,23 +1213,6 @@ def get_model_context_length(
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
return config_context_length
# 0b. custom_providers per-model override — check before any probe.
# This closes the gap where /model switch and display paths used to fall
# back to 128K despite the user having a per-model context_length set.
# See #15779.
if custom_providers and base_url and model:
try:
from hermes_cli.config import get_custom_provider_context_length
cp_ctx = get_custom_provider_context_length(
model=model,
base_url=base_url,
custom_providers=custom_providers,
)
if cp_ctx:
return cp_ctx
except Exception:
pass # fall through to probing
# Normalise provider-prefixed model names (e.g. "local:model-name" →
# "model-name") so cache lookups and server queries use the bare ID that
# local servers actually know about. Ollama "model:tag" colons are preserved.
@@ -1383,7 +1352,7 @@ def get_model_context_length(
# 6. OpenRouter live API metadata (provider-unaware fallback)
metadata = fetch_model_metadata()
if model in metadata:
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
return metadata[model].get("context_length", 128000)
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
# Only check `default_model in model` (is the key a substring of the input).
-142
View File
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
h, remainder = divmod(s, 3600)
m = remainder // 60
return f"{h}h {m}m" if m else f"{h}h"
# Buckets with reset windows shorter than this are treated as transient
# (upstream jitter, secondary throttling) rather than a genuine quota
# exhaustion worth a cross-session breaker trip.
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
def is_genuine_nous_rate_limit(
*,
headers: Optional[Mapping[str, str]] = None,
last_known_state: Optional[Any] = None,
) -> bool:
"""Decide whether a 429 from Nous Portal is a real account rate limit.
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
exhausted a genuine rate limit that will last until the
bucket resets.
(b) The upstream provider is out of capacity for a specific model
transient, clears in seconds, and has nothing to do with
the caller's quota on Nous.
Tripping the cross-session breaker on (b) blocks ALL Nous requests
(and all models, since Nous is one provider key) for minutes even
though the caller's account is healthy and a different model would
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
We tell the two apart by looking at:
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
the full suite on every response including 429s. An exhausted
bucket (``remaining == 0`` with a reset window >= 60s) is
proof of (a).
2. The last-known-good rate-limit state captured by
``_capture_rate_limits()`` on the previous successful
response. If any bucket there was already near-exhausted with
a substantial reset window, the current 429 is almost
certainly (a) continuing from that condition.
If neither signal fires, we treat the 429 as (b): fail the single
request, let the retry loop or model-switch proceed, and do NOT
write the cross-session breaker file.
Returns True when the evidence points at (a).
"""
# Signal 1: current 429 response headers.
state = _parse_buckets_from_headers(headers)
if _has_exhausted_bucket(state):
return True
# Signal 2: last-known-good state from a recent successful response.
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
# or a dict of bucket snapshots.
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
return True
return False
def _parse_buckets_from_headers(
headers: Optional[Mapping[str, str]],
) -> dict[str, tuple[Optional[int], Optional[float]]]:
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
Returns empty dict when no rate-limit headers are present.
"""
if not headers:
return {}
lowered = {k.lower(): v for k, v in headers.items()}
if not any(k.startswith("x-ratelimit-") for k in lowered):
return {}
def _maybe_int(raw: Optional[str]) -> Optional[int]:
if raw is None:
return None
try:
return int(float(raw))
except (TypeError, ValueError):
return None
def _maybe_float(raw: Optional[str]) -> Optional[float]:
if raw is None:
return None
try:
return float(raw)
except (TypeError, ValueError):
return None
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
if remaining is not None or reset is not None:
result[tag] = (remaining, reset)
return result
def _has_exhausted_bucket(
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
) -> bool:
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
for remaining, reset in buckets.values():
if remaining is None or remaining > 0:
continue
if reset is None:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
def _has_exhausted_bucket_in_object(state: Any) -> bool:
"""Check a RateLimitState-like object for an exhausted bucket.
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
exposed as attributes ``requests_min``, ``requests_hour``,
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
object missing those attributes.
"""
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
bucket = getattr(state, attr, None)
if bucket is None:
continue
limit = getattr(bucket, "limit", 0) or 0
remaining = getattr(bucket, "remaining", 0) or 0
# Prefer the adjusted "remaining_seconds_now" property when present;
# fall back to raw reset_seconds.
reset = getattr(bucket, "remaining_seconds_now", None)
if reset is None:
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
if limit <= 0:
continue
if remaining > 0:
continue
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
return True
return False
-144
View File
@@ -1,144 +0,0 @@
"""
Contextual first-touch onboarding hints.
Instead of blocking first-run questionnaires, show a one-time hint the *first*
time a user hits a behavior fork message-while-running, first long-running
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
``onboarding.seen.<flag>``) and then never again.
Keep this module tiny and dependency-free so both the CLI and gateway can import
it without pulling in heavy modules.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import Any, Mapping, Optional
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------------
# Flag names (stable — used as config.yaml keys under onboarding.seen)
# -------------------------------------------------------------------------
BUSY_INPUT_FLAG = "busy_input_prompt"
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
# -------------------------------------------------------------------------
# Hint content
# -------------------------------------------------------------------------
def busy_input_hint_gateway(mode: str) -> str:
"""Hint shown the first time a user messages while the agent is busy.
``mode`` is the effective busy_input_mode that was just applied, so the
message matches reality ("I just interrupted…" vs "I just queued…").
"""
if mode == "queue":
return (
"💡 First-time tip — I queued your message instead of interrupting. "
"Send `/busy interrupt` to make new messages stop the current task "
"immediately, or `/busy status` to check. This notice won't appear again."
)
return (
"💡 First-time tip — I just interrupted my current task to answer you. "
"Send `/busy queue` to queue follow-ups for after the current task instead, "
"or `/busy status` to check. This notice won't appear again."
)
def busy_input_hint_cli(mode: str) -> str:
"""CLI version of the busy-input hint (plain text, no markdown)."""
if mode == "queue":
return (
"(tip) Your message was queued for the next turn. "
"Use /busy interrupt to make Enter stop the current run instead. "
"This tip only shows once."
)
return (
"(tip) Your message interrupted the current run. "
"Use /busy queue to queue messages for the next turn instead. "
"This tip only shows once."
)
def tool_progress_hint_gateway() -> str:
return (
"💡 First-time tip — that tool took a while and I'm streaming every step. "
"If the progress messages feel noisy, send `/verbose` to cycle modes "
"(all → new → off). This notice won't appear again."
)
def tool_progress_hint_cli() -> str:
return (
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
"display modes (all -> new -> off -> verbose). This tip only shows once."
)
# -------------------------------------------------------------------------
# State read / write
# -------------------------------------------------------------------------
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
if not isinstance(onboarding, Mapping):
return {}
seen = onboarding.get("seen")
return seen if isinstance(seen, Mapping) else {}
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
"""Return True if the user has already been shown this first-touch hint."""
return bool(_get_seen_dict(config).get(flag))
def mark_seen(config_path: Path, flag: str) -> bool:
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
Uses the atomic YAML writer so a concurrent process can't observe a
partially-written file. Returns True on success, False on any error
(including the config file being absent onboarding is best-effort).
"""
try:
import yaml
from utils import atomic_yaml_write
except Exception as e: # pragma: no cover — dependency issue
logger.debug("onboarding: failed to import yaml/utils: %s", e)
return False
try:
cfg: dict = {}
if config_path.exists():
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f) or {}
if not isinstance(cfg.get("onboarding"), dict):
cfg["onboarding"] = {}
seen = cfg["onboarding"].get("seen")
if not isinstance(seen, dict):
seen = {}
cfg["onboarding"]["seen"] = seen
if seen.get(flag) is True:
return True # already marked — nothing to do
seen[flag] = True
atomic_yaml_write(config_path, cfg)
return True
except Exception as e:
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
return False
__all__ = [
"BUSY_INPUT_FLAG",
"TOOL_PROGRESS_FLAG",
"busy_input_hint_gateway",
"busy_input_hint_cli",
"tool_progress_hint_gateway",
"tool_progress_hint_cli",
"is_seen",
"mark_seen",
]
-58
View File
@@ -176,64 +176,6 @@ SKILLS_GUIDANCE = (
"Skills that aren't maintained become liabilities."
)
KANBAN_GUIDANCE = (
"# You are a Kanban worker\n"
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
"The `kanban_*` tools in your schema are your primary coordination surface — "
"they write directly to the shared SQLite DB and work regardless of terminal "
"backend (local/docker/modal/ssh).\n"
"\n"
"## Lifecycle\n"
"\n"
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
"task). The response includes title, body, parent-task handoffs (summary + "
"metadata), any prior attempts on this task if you're a retry, the full "
"comment thread, and a pre-formatted `worker_context` you can treat as "
"ground truth.\n"
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
"any file operations. The workspace is yours for this run. Don't modify "
"files outside it unless the task explicitly asks.\n"
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
"every few minutes during long subprocesses (training, encoding, crawling). "
"Skip heartbeats for short tasks.\n"
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
"infer (missing credentials, UX choice, paywalled source, peer output you "
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
"The user will unblock with context and the dispatcher will respawn you.\n"
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
"metadata=...)`. `summary` is 13 human-readable sentences naming concrete "
"artifacts. `metadata` is machine-readable facts "
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
"workers read both via their own `kanban_show`. Never put secrets / "
"tokens / raw PII in either field — run rows are durable forever.\n"
"6. **If follow-up work appears, create it; don't do it.** Use "
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
"to spawn a child task for the appropriate specialist profile instead of "
"scope-creeping into the next thing.\n"
"\n"
"## Orchestrator mode\n"
"\n"
"If your task is itself a decomposition task (e.g. a planner profile given "
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
"express dependencies. Then `kanban_complete` your own task with a summary "
"of the decomposition. Do NOT execute the work yourself; your job is "
"routing, not implementation.\n"
"\n"
"## Do NOT\n"
"\n"
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
"the `kanban_*` tools — they work across all terminal backends.\n"
"- Do not complete a task you didn't actually finish. Block it.\n"
"- Do not assign follow-up work to yourself. Assign it to the right "
"specialist profile.\n"
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
"for short reasoning subtasks inside your own run; board tasks are for "
"cross-agent handoffs that outlive one API loop."
)
TOOL_USE_ENFORCEMENT_GUIDANCE = (
"# Tool-use enforcement\n"
"You MUST use your tools to take action — do not describe what you would do "
+2 -7
View File
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
cls = _REGISTRY.get(api_mode)
if cls is None:
# The registry can be partially populated when a specific transport
# module was imported directly (for example chat_completions before
# codex). Discover on misses, not only when the registry is empty, so
# test/order-dependent imports do not make valid api_modes unavailable.
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
+4 -5
View File
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` /
``codex_message_items`` on the message, ``call_id``/``response_item_id``
on tool_calls) that strict chat-completions providers reject with 400/422.
Strips Codex Responses API fields (``codex_reasoning_items`` on the
message, ``call_id``/``response_item_id`` on tool_calls) that strict
chat-completions providers reject with 400/422.
"""
needs_sanitize = False
for msg in messages:
if not isinstance(msg, dict):
continue
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
if "codex_reasoning_items" in msg:
needs_sanitize = True
break
tool_calls = msg.get("tool_calls")
@@ -59,7 +59,6 @@ class ChatCompletionsTransport(ProviderTransport):
if not isinstance(msg, dict):
continue
msg.pop("codex_reasoning_items", None)
msg.pop("codex_message_items", None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
-20
View File
@@ -120,24 +120,6 @@ class ResponsesApiTransport(ProviderTransport):
if request_overrides:
kwargs.update(request_overrides)
if is_codex_backend:
prompt_cache_key = kwargs.get("prompt_cache_key")
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
if cache_scope_id:
existing_extra_headers = kwargs.get("extra_headers")
merged_extra_headers: Dict[str, str] = {}
if isinstance(existing_extra_headers, dict):
merged_extra_headers.update(
{
str(key): str(value)
for key, value in existing_extra_headers.items()
if key and value is not None
}
)
merged_extra_headers["session_id"] = cache_scope_id
merged_extra_headers["x-client-request-id"] = cache_scope_id
kwargs["extra_headers"] = merged_extra_headers
max_tokens = params.get("max_tokens")
if max_tokens is not None and not is_codex_backend:
kwargs["max_output_tokens"] = max_tokens
@@ -178,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
provider_data = {}
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
provider_data["codex_message_items"] = msg.codex_message_items
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
provider_data["reasoning_details"] = msg.reasoning_details
+1 -6
View File
@@ -97,7 +97,7 @@ class NormalizedResponse:
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
@@ -126,11 +126,6 @@ class NormalizedResponse:
pd = self.provider_data or {}
return pd.get("codex_reasoning_items")
@property
def codex_message_items(self):
pd = self.provider_data or {}
return pd.get("codex_message_items")
# ---------------------------------------------------------------------------
# Factory helpers
+8 -23
View File
@@ -824,9 +824,7 @@ delegation:
# Display
# =============================================================================
display:
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
# true: Compact single-line banner
# false: Full ASCII banner with tool/skill summary (default)
# Use compact banner mode
compact: false
# Tool progress display level (CLI and gateway)
@@ -840,15 +838,12 @@ display:
# Gateway-only natural mid-turn assistant updates.
# When true, completed assistant status messages are sent as separate chat
# messages. This is independent of tool_progress and gateway streaming.
# true: Send mid-turn assistant updates as separate messages (default)
# false: Only send the final response
interim_assistant_messages: true
# What Enter does when Hermes is already busy (CLI and gateway platforms).
# What Enter does when Hermes is already busy in the CLI.
# interrupt: Interrupt the current run and redirect Hermes (default)
# queue: Queue your message for the next turn
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
# Ctrl+C always interrupts regardless of this setting.
busy_input_mode: interrupt
# Background process notifications (gateway/messaging only).
@@ -864,22 +859,17 @@ display:
# Play terminal bell when agent finishes a response.
# Useful for long-running tasks — your terminal will ding when the agent is done.
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
# true: Ring the terminal bell on each response
# false: Silent (default)
bell_on_complete: false
# Show model reasoning/thinking before each response.
# When enabled, a dim box shows the model's thought process above the response.
# Toggle at runtime with /reasoning show or /reasoning hide.
# true: Show the reasoning box
# false: Hide reasoning (default)
show_reasoning: false
# Stream tokens to the terminal as they arrive instead of waiting for the
# full response. The response box opens on first token and text appears
# line-by-line. Tool calls are still captured silently.
# true: Stream tokens as they arrive (default)
# false: Wait for the full response before rendering
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
streaming: true
# ───────────────────────────────────────────────────────────────────────────
@@ -889,15 +879,10 @@ display:
# response box label, and branding text. Change at runtime with /skin <name>.
#
# Built-in skins:
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
# daylight — Bright light-mode theme
# warm-lightmode — Warm paper-tone light-mode theme
# poseidon — Sea-green/teal Olympian theme
# sisyphus — Earthy stone-and-moss theme
# charizard — Fiery orange dragon theme
# default — Classic Hermes gold/kawaii
# ares — Crimson/bronze war-god theme with spinner wings
# mono — Clean grayscale monochrome
# slate — Cool blue developer-focused
#
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
# Schema (all fields optional, missing values inherit from default):
+146 -104
View File
@@ -22,7 +22,6 @@ import re
import concurrent.futures
import base64
import atexit
import errno
import tempfile
import time
import uuid
@@ -417,11 +416,6 @@ def load_cli_config() -> Dict[str, Any]:
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
},
"onboarding": {
# First-touch hint flags (see agent/onboarding.py). Each hint is
# shown once per install then latched here.
"seen": {},
},
}
# Track whether the config file explicitly set terminal config.
@@ -5158,29 +5152,27 @@ class HermesCLI:
_cprint(f" ✓ Model switched: {result.new_model}")
_cprint(f" Provider: {provider_label}")
# Context: always resolve via the provider-aware chain so Codex OAuth,
# Copilot, and Nous-enforced caps win over the raw models.dev entry
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
mi = result.model_info
try:
from hermes_cli.model_switch import resolve_display_context_length
ctx = resolve_display_context_length(
result.new_model,
result.target_provider,
base_url=result.base_url or self.base_url or "",
api_key=result.api_key or self.api_key or "",
model_info=mi,
)
if ctx:
_cprint(f" Context: {ctx:,} tokens")
except Exception:
pass
if mi:
if mi.context_window:
_cprint(f" Context: {mi.context_window:,} tokens")
if mi.max_output:
_cprint(f" Max output: {mi.max_output:,} tokens")
if mi.has_cost_data():
_cprint(f" Cost: {mi.format_cost()}")
_cprint(f" Capabilities: {mi.format_capabilities()}")
else:
try:
from agent.model_metadata import get_model_context_length
ctx = get_model_context_length(
result.new_model,
base_url=result.base_url or self.base_url,
api_key=result.api_key or self.api_key,
provider=result.target_provider,
)
_cprint(f" Context: {ctx:,} tokens")
except Exception:
pass
cache_enabled = (
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
@@ -5281,22 +5273,24 @@ class HermesCLI:
# Parse --provider and --global flags
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
# Load providers for switch_model (picker path needs them below)
user_provs = None
custom_provs = None
try:
from hermes_cli.config import get_compatible_custom_providers, load_config
cfg = load_config()
user_provs = cfg.get("providers")
custom_provs = get_compatible_custom_providers(cfg)
except Exception:
pass
# No args at all: open prompt_toolkit-native picker modal
if not model_input and not explicit_provider:
model_display = self.model or "unknown"
provider_display = get_label(self.provider) if self.provider else "unknown"
user_provs = None
custom_provs = None
try:
from hermes_cli.config import get_compatible_custom_providers, load_config
cfg = load_config()
user_provs = cfg.get("providers")
custom_provs = get_compatible_custom_providers(cfg)
except Exception:
pass
try:
providers = list_authenticated_providers(
current_provider=self.provider or "",
@@ -5818,28 +5812,7 @@ class HermesCLI:
print(f"(._.) Unknown cron command: {subcommand}")
print(" Available: list, add, edit, pause, resume, run, remove")
def _handle_kanban_command(self, cmd: str):
"""Handle the /kanban command — delegate to the shared kanban CLI.
The string form passed here is the user's full ``/kanban ...``
including the leading slash; we strip it and hand the remainder
to ``kanban.run_slash`` which returns a single formatted string.
"""
from hermes_cli.kanban import run_slash
rest = cmd.strip()
if rest.startswith("/"):
rest = rest.lstrip("/")
if rest.startswith("kanban"):
rest = rest[len("kanban"):].lstrip()
try:
output = run_slash(rest)
except Exception as exc: # pragma: no cover - defensive
output = f"(._.) kanban error: {exc}"
if output:
print(output)
def _handle_skills_command(self, cmd: str):
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
from hermes_cli.skills_hub import handle_skills_slash
@@ -6076,8 +6049,6 @@ class HermesCLI:
self.save_conversation()
elif canonical == "cron":
self._handle_cron_command(cmd_original)
elif canonical == "kanban":
self._handle_kanban_command(cmd_original)
elif canonical == "skills":
with self._busy_command(self._slow_command_status(cmd_original)):
self._handle_skills_command(cmd_original)
@@ -6152,6 +6123,8 @@ class HermesCLI:
self._handle_agents_command()
elif canonical == "background":
self._handle_background_command(cmd_original)
elif canonical == "btw":
self._handle_btw_command(cmd_original)
elif canonical == "queue":
# Extract prompt after "/queue " or "/q "
parts = cmd_original.split(None, 1)
@@ -6438,6 +6411,122 @@ class HermesCLI:
self._background_tasks[task_id] = thread
thread.start()
def _handle_btw_command(self, cmd: str):
"""Handle /btw <question> — ephemeral side question using session context.
Snapshots the current conversation history, spawns a no-tools agent in
a background thread, and prints the answer without persisting anything
to the main session.
"""
parts = cmd.strip().split(maxsplit=1)
if len(parts) < 2 or not parts[1].strip():
_cprint(" Usage: /btw <question>")
_cprint(" Example: /btw what module owns session title sanitization?")
_cprint(" Answers using session context. No tools, not persisted.")
return
question = parts[1].strip()
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
if not self._ensure_runtime_credentials():
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
return
turn_route = self._resolve_turn_agent_config(question)
history_snapshot = list(self.conversation_history)
preview = question[:60] + ("..." if len(question) > 60 else "")
_cprint(f' 💬 /btw: "{preview}"')
def run_btw():
try:
btw_agent = AIAgent(
model=turn_route["model"],
api_key=turn_route["runtime"].get("api_key"),
base_url=turn_route["runtime"].get("base_url"),
provider=turn_route["runtime"].get("provider"),
api_mode=turn_route["runtime"].get("api_mode"),
acp_command=turn_route["runtime"].get("command"),
acp_args=turn_route["runtime"].get("args"),
max_iterations=8,
enabled_toolsets=[],
quiet_mode=True,
verbose_logging=False,
session_id=task_id,
platform="cli",
reasoning_config=self.reasoning_config,
service_tier=self.service_tier,
request_overrides=turn_route.get("request_overrides"),
providers_allowed=self._providers_only,
providers_ignored=self._providers_ignore,
providers_order=self._providers_order,
provider_sort=self._provider_sort,
provider_require_parameters=self._provider_require_params,
provider_data_collection=self._provider_data_collection,
fallback_model=self._fallback_model,
session_db=None,
skip_memory=True,
skip_context_files=True,
persist_session=False,
)
btw_prompt = (
"[Ephemeral /btw side question. Answer using the conversation "
"context. No tools available. Be direct and concise.]\n\n"
+ question
)
result = btw_agent.run_conversation(
user_message=btw_prompt,
conversation_history=history_snapshot,
task_id=task_id,
)
response = (result.get("final_response") or "") if result else ""
if not response and result and result.get("error"):
response = f"Error: {result['error']}"
# TUI refresh before printing
if self._app:
self._app.invalidate()
time.sleep(0.05)
print()
if response:
try:
from hermes_cli.skin_engine import get_active_skin
_skin = get_active_skin()
_resp_color = _skin.get_color("response_border", "#4F6D4A")
except Exception:
_resp_color = "#4F6D4A"
ChatConsole().print(Panel(
_render_final_assistant_content(response, mode=self.final_response_markdown),
title=f"[{_resp_color} bold]⚕ /btw[/]",
title_align="left",
border_style=_resp_color,
box=rich_box.HORIZONTALS,
padding=(1, 4),
))
else:
_cprint(" 💬 /btw: (no response)")
if self.bell_on_complete:
sys.stdout.write("\a")
sys.stdout.flush()
except Exception as e:
if self._app:
self._app.invalidate()
time.sleep(0.05)
print()
_cprint(f" ❌ /btw failed: {e}")
finally:
if self._app:
self._invalidate(min_interval=0)
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
thread.start()
@staticmethod
def _try_launch_chrome_debug(port: int, system: str) -> bool:
"""Try to launch Chrome/Chromium with remote debugging enabled.
@@ -7322,31 +7411,6 @@ class HermesCLI:
_cprint(f" {line}")
except Exception:
pass
# First-touch onboarding: on the first tool in this process
# that takes longer than the threshold while we're in the
# noisiest progress mode, print a one-time hint about
# /verbose. Latched on self so it fires at most once per
# process; persisted to config.yaml so it never fires again
# across processes either.
try:
if (
not getattr(self, "_long_tool_hint_fired", False)
and self.tool_progress_mode == "all"
and duration >= 30.0
):
from agent.onboarding import (
TOOL_PROGRESS_FLAG,
is_seen,
mark_seen,
tool_progress_hint_cli,
)
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
self._long_tool_hint_fired = True
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
except Exception:
pass
self._invalidate()
return
if event_type != "tool.started":
@@ -9230,24 +9294,6 @@ class HermesCLI:
f"agent_running={self._agent_running}\n")
except Exception:
pass
# First-touch onboarding: on the very first busy-while-running
# event for this install, print a one-line tip explaining the
# /busy knob. Flag persists to config.yaml and never fires
# again. Guarded for exceptions so onboarding can't break
# the input loop.
try:
from agent.onboarding import (
BUSY_INPUT_FLAG,
busy_input_hint_cli,
is_seen,
mark_seen,
)
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
except Exception:
pass
else:
self._pending_input.put(payload)
event.app.current_buffer.reset(append_to_history=True)
@@ -10694,8 +10740,6 @@ class HermesCLI:
return # silently suppress
if isinstance(exc, KeyError) and "is not registered" in str(exc):
return # suppress selector registration failures (#6393)
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
return # suppress I/O errors from broken stdout on interrupt (#13710)
# Fall back to default handler for everything else
loop.default_exception_handler(context)
@@ -10728,11 +10772,9 @@ class HermesCLI:
except (EOFError, KeyboardInterrupt, BrokenPipeError):
pass
except (KeyError, OSError) as _stdin_err:
# Catch selector registration failures from broken stdin (#6393)
# and I/O errors from broken stdout during interrupt (#13710).
if isinstance(_stdin_err, OSError) and getattr(_stdin_err, "errno", None) == errno.EIO:
pass # suppress broken-stdout I/O errors on interrupt (#13710)
elif "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
# Catch selector registration failures from broken stdin (#6393).
# This is the fallback for cases that slip past the fstat() guard.
if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
print(
f"\nError: stdin is not usable ({_stdin_err}).\n"
"This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
+7 -9
View File
@@ -41,15 +41,6 @@ if [ "$(id -u)" = "0" ]; then
echo "Warning: chown failed (rootless container?) — continuing anyway"
fi
# Ensure config.yaml is readable by the hermes runtime user even if it was
# edited on the host after initial ownership setup. Must run here (as root)
# rather than after the gosu drop, otherwise a non-root caller like
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
fi
echo "Dropping root privileges"
exec gosu hermes "$0" "$@"
fi
@@ -76,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
fi
# Ensure the main config file remains accessible to the hermes runtime user
# even if it was edited on the host after initial ownership setup.
if [ -f "$HERMES_HOME/config.yaml" ]; then
chown hermes:hermes "$HERMES_HOME/config.yaml"
chmod 640 "$HERMES_HOME/config.yaml"
fi
# SOUL.md
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
Binary file not shown.
+3 -16
View File
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.
import asyncio
import importlib.util
import sys
from typing import Any, Callable, Dict, List, Optional
import yaml
@@ -104,28 +103,16 @@ class HookRegistry:
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
continue
# Dynamically load the handler module.
# Register in sys.modules BEFORE exec_module so Pydantic /
# dataclasses / typing introspection can resolve forward
# references (triggered by `from __future__ import annotations`
# in the handler). Without this, a handler that declares a
# Pydantic BaseModel for webhook/event payloads fails at first
# dispatch with "TypeAdapter ... is not fully defined".
module_name = f"hermes_hook_{hook_name}"
# Dynamically load the handler module
spec = importlib.util.spec_from_file_location(
module_name, handler_path
f"hermes_hook_{hook_name}", handler_path
)
if spec is None or spec.loader is None:
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
continue
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
try:
spec.loader.exec_module(module)
except Exception:
sys.modules.pop(module_name, None)
raise
spec.loader.exec_module(module)
handle_fn = getattr(module, "handle", None)
if handle_fn is None:
-49
View File
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
- GET /v1/models lists hermes-agent as an available model
- POST /v1/runs start a run, returns run_id immediately (202)
- GET /v1/runs/{run_id}/events SSE stream of structured lifecycle events
- POST /v1/runs/{run_id}/stop interrupt a running agent
- GET /health health check
- GET /health/detailed rich status for cross-container dashboard probing
@@ -587,9 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
# Creation timestamps for orphaned-run TTL sweep
self._run_streams_created: Dict[str, float] = {}
# Active run agent/task references for stop support
self._active_run_agents: Dict[str, Any] = {}
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
@staticmethod
@@ -2445,7 +2441,6 @@ class APIServerAdapter(BasePlatformAdapter):
stream_delta_callback=_text_cb,
tool_progress_callback=event_cb,
)
self._active_run_agents[run_id] = agent
def _run_sync():
r = agent.run_conversation(
user_message=user_message,
@@ -2485,11 +2480,8 @@ class APIServerAdapter(BasePlatformAdapter):
q.put_nowait(None)
except Exception:
pass
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
task = asyncio.create_task(_run_and_close())
self._active_run_tasks[run_id] = task
try:
self._background_tasks.add(task)
except TypeError:
@@ -2548,44 +2540,6 @@ class APIServerAdapter(BasePlatformAdapter):
return response
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
auth_err = self._check_auth(request)
if auth_err:
return auth_err
run_id = request.match_info["run_id"]
agent = self._active_run_agents.get(run_id)
task = self._active_run_tasks.get(run_id)
if agent is None and task is None:
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
if agent is not None:
try:
agent.interrupt("Stop requested via API")
except Exception:
pass
if task is not None and not task.done():
task.cancel()
# Bounded wait: run_conversation() executes in the default
# executor thread which task.cancel() cannot preempt — we rely on
# agent.interrupt() above to break the loop. Cap the wait so a
# slow/unresponsive interrupt can't hang this handler.
try:
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
except asyncio.TimeoutError:
logger.warning(
"[api_server] stop for run %s timed out after 5s; "
"agent may still be finishing the current step",
run_id,
)
except (asyncio.CancelledError, Exception):
pass
return web.json_response({"run_id": run_id, "status": "stopping"})
async def _sweep_orphaned_runs(self) -> None:
"""Periodically clean up run streams that were never consumed."""
while True:
@@ -2600,8 +2554,6 @@ class APIServerAdapter(BasePlatformAdapter):
logger.debug("[api_server] sweeping orphaned run %s", run_id)
self._run_streams.pop(run_id, None)
self._run_streams_created.pop(run_id, None)
self._active_run_agents.pop(run_id, None)
self._active_run_tasks.pop(run_id, None)
# ------------------------------------------------------------------
# BasePlatformAdapter interface
@@ -2637,7 +2589,6 @@ class APIServerAdapter(BasePlatformAdapter):
# Structured event streaming
self._app.router.add_post("/v1/runs", self._handle_runs)
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
# Start background sweep to clean up orphaned (unconsumed) run streams
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
try:
+5 -35
View File
@@ -1025,20 +1025,7 @@ class BasePlatformAdapter(ABC):
self._post_delivery_callbacks: Dict[str, Any] = {}
self._expected_cancelled_tasks: set[asyncio.Task] = set()
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
# Per-chat overrides live in two sets populated from ``_voice_mode``:
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
# the global default is False.
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
# global default is True.
# The gate in _process_message() is:
# fire if chat in _auto_tts_enabled_chats
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
self._auto_tts_default: bool = False
self._auto_tts_enabled_chats: set = set()
# Chats where auto-TTS on voice input is disabled (set by /voice off)
self._auto_tts_disabled_chats: set = set()
# Chats where typing indicator is paused (e.g. during approval waits).
# _keep_typing skips send_typing when the chat_id is in this set.
@@ -1060,21 +1047,6 @@ class BasePlatformAdapter(ABC):
def fatal_error_retryable(self) -> bool:
return self._fatal_error_retryable
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
"""Whether auto-TTS on voice input should fire for ``chat_id``.
Decision layers (Issue #16007):
1. Explicit ``/voice on`` or ``/voice tts`` always fire (even if
``voice.auto_tts`` is False).
2. Explicit ``/voice off`` never fire.
3. Fall back to the global ``voice.auto_tts`` config default.
"""
if chat_id in self._auto_tts_enabled_chats:
return True
if chat_id in self._auto_tts_disabled_chats:
return False
return bool(self._auto_tts_default)
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
self._fatal_error_handler = handler
@@ -2242,14 +2214,12 @@ class BasePlatformAdapter(ABC):
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
# True globally and no ``/voice off`` has been issued.
# Skipped when the chat has voice mode disabled (/voice off)
_tts_path = None
if (self._should_auto_tts_for_chat(event.source.chat_id)
and event.message_type == MessageType.VOICE
if (event.message_type == MessageType.VOICE
and text_content
and not media_files):
and not media_files
and event.source.chat_id not in self._auto_tts_disabled_chats):
try:
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
if check_tts_requirements():
+5
View File
@@ -2315,6 +2315,11 @@ class DiscordAdapter(BasePlatformAdapter):
async def slash_background(interaction: discord.Interaction, prompt: str):
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
@tree.command(name="btw", description="Ephemeral side question using session context")
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
async def slash_btw(interaction: discord.Interaction, question: str):
await self._run_simple_slash(interaction, f"/btw {question}")
# ── Auto-register any gateway-available commands not yet on the tree ──
# This ensures new commands added to COMMAND_REGISTRY in
# hermes_cli/commands.py automatically appear as Discord slash
+203 -603
View File
File diff suppressed because it is too large Load Diff
-2
View File
@@ -1232,7 +1232,6 @@ class SessionStore:
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
)
except Exception as e:
logger.debug("Session DB operation failed: %s", e)
@@ -1265,7 +1264,6 @@ class SessionStore:
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
)
except Exception as e:
logger.debug("Failed to rewrite transcript in DB: %s", e)
+2 -10
View File
@@ -356,14 +356,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=(),
base_url_env_var="BEDROCK_BASE_URL",
),
"azure-foundry": ProviderConfig(
id="azure-foundry",
name="Azure Foundry",
auth_type="api_key",
inference_base_url="", # User-provided endpoint
api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
),
}
@@ -4244,10 +4236,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
)
from hermes_cli.models import (
get_curated_nous_model_ids, get_pricing_for_provider,
_PROVIDER_MODELS, get_pricing_for_provider,
check_nous_free_tier, partition_nous_models_by_tier,
)
model_ids = get_curated_nous_model_ids()
model_ids = _PROVIDER_MODELS.get("nous", [])
print()
unavailable_models: list = []
-300
View File
@@ -1,300 +0,0 @@
"""Azure Foundry endpoint auto-detection.
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
- API transport (OpenAI-style ``chat_completions`` vs
Anthropic-style ``anthropic_messages``)
- Available models (best effort Azure does not expose a deployment
listing via the inference API key, but Azure OpenAI v1 endpoints
return the resource's model catalog via ``GET /models``)
- Context length for each discovered/entered model, via the existing
:func:`agent.model_metadata.get_model_context_length` resolver.
Rationale:
Azure has no pure-API-key deployment-listing endpoint per Microsoft,
deployment enumeration requires ARM management-plane auth. Azure
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
a ``/models`` list, but it reflects the resource's *available* models
rather than the user's *deployed* deployment names. In practice it is
still a useful hint the user picks a familiar model name and we look
up its context length from the catalog.
The detector never crashes on errors (every HTTP call is wrapped in a
broad try/except). Callers get a :class:`DetectionResult` with whatever
information could be gathered, and fall back to manual entry for the
rest.
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from typing import Optional
from urllib import request as urllib_request
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse, urlunparse
logger = logging.getLogger(__name__)
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
# accepts requests without ``api-version`` entirely, so this is only used
# as a fallback for pre-v1 resources that still require it.
_AZURE_OPENAI_PROBE_API_VERSIONS = (
"2025-04-01-preview",
"2024-10-21", # oldest GA that supports /models
)
# Default Azure Anthropic ``api-version``. Matches the value used by
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
@dataclass
class DetectionResult:
"""Everything auto-detection could gather from a base URL + API key."""
#: Detected API transport: ``"chat_completions"``,
#: ``"anthropic_messages"``, or ``None`` when detection failed.
api_mode: Optional[str] = None
#: Deployment / model IDs returned by ``/models`` (best effort).
#: Empty when the endpoint doesn't expose the list with an API key.
models: list[str] = field(default_factory=list)
#: Lowercased host from the base URL (used for display messages).
hostname: str = ""
#: Human-readable reason the detector chose ``api_mode``. Useful
#: for explaining auto-detection to the user in the wizard.
reason: str = ""
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
models_probe_ok: bool = False
#: ``True`` when the URL was determined to be an Anthropic-style
#: endpoint (from path suffix or live probe).
is_anthropic: bool = False
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
``(status_code, parsed_json_or_None)``. Never raises."""
req = urllib_request.Request(url, method="GET")
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
# so we probe once per URL rather than twice.
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("User-Agent", "hermes-agent/azure-detect")
try:
with urllib_request.urlopen(req, timeout=timeout) as resp:
body = resp.read()
try:
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
except Exception:
return resp.status, None
except HTTPError as exc:
return exc.code, None
except (URLError, TimeoutError, OSError) as exc:
logger.debug("azure_detect: GET %s failed: %s", url, exc)
return 0, None
except Exception as exc: # pragma: no cover — defensive
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
return 0, None
def _strip_trailing_v1(url: str) -> str:
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
return re.sub(r"/v1/?$", "", url.rstrip("/"))
def _looks_like_anthropic_path(url: str) -> bool:
"""Return True when the URL's path ends in ``/anthropic`` or
contains a ``/anthropic/`` segment. Used by Azure Foundry
resources that route Claude traffic through a dedicated path."""
try:
parsed = urlparse(url)
path = (parsed.path or "").lower().rstrip("/")
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
except Exception:
return False
def _extract_model_ids(payload: dict) -> list[str]:
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
response. Returns ``[]`` on any shape mismatch."""
data = payload.get("data") if isinstance(payload, dict) else None
if not isinstance(data, list):
return []
ids: list[str] = []
for item in data:
if not isinstance(item, dict):
continue
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
mid = item.get("id") or item.get("model") or item.get("name")
if isinstance(mid, str) and mid:
ids.append(mid)
return ids
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
"""Probe ``<base>/models`` for an OpenAI-shaped response.
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
"""
base_url = base_url.rstrip("/")
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
# api-version required for GA paths, so probe without first.
candidates = [f"{base_url}/models"]
# Fallback: explicit api-version for pre-v1 resources
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
candidates.append(f"{base_url}/models?api-version={v}")
for url in candidates:
status, body = _http_get_json(url, api_key)
if status == 200 and body is not None:
ids = _extract_model_ids(body)
if ids:
logger.info(
"azure_detect: /models probe OK at %s (%d models)",
url, len(ids),
)
return True, ids
# 200 + empty list still counts as "OpenAI shape, no models
# listed" — let the user proceed with manual entry.
if isinstance(body, dict) and "data" in body:
return True, []
return False, []
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
"""Send a zero-token request to ``<base>/v1/messages`` and check
whether the endpoint at least *recognises* the Anthropic Messages
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
``invalid_request`` with an Anthropic error shape). Never completes
a real chat.
"""
base = _strip_trailing_v1(base_url)
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
payload = json.dumps({
"model": "probe",
"max_tokens": 1,
"messages": [{"role": "user", "content": "ping"}],
}).encode("utf-8")
req = urllib_request.Request(url, method="POST", data=payload)
req.add_header("api-key", api_key)
req.add_header("Authorization", f"Bearer {api_key}")
req.add_header("anthropic-version", "2023-06-01")
req.add_header("content-type", "application/json")
req.add_header("User-Agent", "hermes-agent/azure-detect")
try:
with urllib_request.urlopen(req, timeout=6.0) as resp:
# Should never 200 — "probe" isn't a real deployment. But
# if it does, the endpoint definitely speaks Anthropic.
return resp.status < 500
except HTTPError as exc:
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
try:
body = exc.read().decode("utf-8", errors="replace")
lowered = body.lower()
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
return True
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
# Anthropic-style calls on non-Anthropic deployments. A
# 400 "model not found" IS Anthropic though.
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
return True
return False
except Exception:
return False
except (URLError, TimeoutError, OSError):
return False
except Exception: # pragma: no cover
return False
def detect(base_url: str, api_key: str) -> DetectionResult:
"""Inspect an Azure endpoint and describe its transport + models.
Call this from the wizard before asking the user to pick an API
mode manually. The caller should treat the returned
:class:`DetectionResult` as *advisory* if ``api_mode`` is None,
fall back to asking the user.
"""
result = DetectionResult()
try:
parsed = urlparse(base_url)
result.hostname = (parsed.hostname or "").lower()
except Exception:
result.hostname = ""
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
# under a dedicated ``/anthropic`` path.
if _looks_like_anthropic_path(base_url):
result.is_anthropic = True
result.api_mode = "anthropic_messages"
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
return result
# 2. Try the OpenAI-style /models probe. If this works, the
# endpoint definitely speaks OpenAI wire.
ok, models = _probe_openai_models(base_url, api_key)
if ok:
result.models_probe_ok = True
result.models = models
result.api_mode = "chat_completions"
result.reason = (
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
if models
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
)
return result
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
# intrusive than /models, so only run it when the OpenAI probe
# failed.
if _probe_anthropic_messages(base_url, api_key):
result.is_anthropic = True
result.api_mode = "anthropic_messages"
result.reason = "Endpoint accepts Anthropic Messages shape"
return result
# Nothing matched. Caller falls back to manual selection.
result.reason = (
"Could not probe endpoint (private network, missing model list, or "
"non-standard path) — falling back to manual API-mode selection"
)
return result
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
that returns ``None`` when only the fallback default (128k) would
fire, so the wizard can distinguish "we actually know this" from
"we guessed."""
try:
from agent.model_metadata import (
DEFAULT_FALLBACK_CONTEXT,
get_model_context_length,
)
except Exception:
return None
try:
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
except Exception as exc:
logger.debug("azure_detect: context length lookup failed: %s", exc)
return None
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
return n
return None
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
+3 -6
View File
@@ -84,7 +84,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("deny", "Deny a pending dangerous command", "Session",
gateway_only=True),
CommandDef("background", "Run a prompt in the background", "Session",
aliases=("bg", "btw"), args_hint="<prompt>"),
aliases=("bg",), args_hint="<prompt>"),
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
args_hint="<question>"),
CommandDef("agents", "Show active agents and running tasks", "Session",
aliases=("tasks",)),
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@@ -140,11 +142,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
cli_only=True, args_hint="[subcommand]",
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
"Tools & Skills", args_hint="[subcommand]",
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
"claim", "comment", "complete", "block", "unblock", "archive",
"tail", "dispatch", "context", "init", "gc")),
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
cli_only=True),
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
-108
View File
@@ -959,27 +959,6 @@ DEFAULT_CONFIG = {
"backup_count": 3, # Number of rotated backup files to keep
},
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
# curated model lists for OpenRouter and Nous Portal from this URL,
# falling back to the in-repo snapshot on network failure. Lets us
# update model picker lists without shipping a hermes-agent release.
# The default URL is served by the docs site GitHub Pages deploy.
"model_catalog": {
"enabled": True,
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
# next /model or `hermes model` invocation; network failures
# silently fall back to the stale cache.
"ttl_hours": 24,
# Optional per-provider override URLs for third parties that want
# to self-host their own curation list using the same schema.
# Example:
# providers:
# openrouter:
# url: https://example.com/my-curation.json
"providers": {},
},
# Network settings — workarounds for connectivity issues.
"network": {
# Force IPv4 connections. On servers with broken or unreachable IPv6,
@@ -1016,13 +995,6 @@ DEFAULT_CONFIG = {
"min_interval_hours": 24,
},
# Contextual first-touch onboarding hints (see agent/onboarding.py).
# Each hint is shown once per install and then latched here so it
# never fires again. Users can wipe the section to re-see all hints.
"onboarding": {
"seen": {},
},
# Config schema version - bump this when adding new required fields
"_config_version": 22,
}
@@ -1399,21 +1371,6 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"AZURE_FOUNDRY_API_KEY": {
"description": "Azure Foundry API key for custom Azure endpoints",
"prompt": "Azure Foundry API Key",
"url": "https://ai.azure.com/",
"password": True,
"category": "provider",
},
"AZURE_FOUNDRY_BASE_URL": {
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
"prompt": "Azure Foundry base URL",
"url": None,
"password": False,
"category": "provider",
"advanced": True,
},
# ── Tool API keys ──
"EXA_API_KEY": {
@@ -2249,71 +2206,6 @@ def get_compatible_custom_providers(
return compatible
def get_custom_provider_context_length(
model: str,
base_url: str,
custom_providers: Optional[List[Dict[str, Any]]] = None,
config: Optional[Dict[str, Any]] = None,
) -> Optional[int]:
"""Look up a per-model ``context_length`` override from ``custom_providers``.
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
if present and valid. Returns ``None`` when no override applies.
This is the single source of truth for custom-provider context overrides,
used by:
* ``AIAgent.__init__`` (startup resolution)
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
* ``gateway.run._format_session_info`` (``/info`` display)
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
startup path only; every other path (notably ``/model`` switch) fell back
to the 128K default. See #15779.
"""
if not model or not base_url:
return None
if custom_providers is None:
try:
custom_providers = get_compatible_custom_providers(config)
except Exception:
if config is None:
return None
raw = config.get("custom_providers")
custom_providers = raw if isinstance(raw, list) else []
if not isinstance(custom_providers, list):
return None
target_url = (base_url or "").rstrip("/")
if not target_url:
return None
for entry in custom_providers:
if not isinstance(entry, dict):
continue
entry_url = (entry.get("base_url") or "").rstrip("/")
if not entry_url or entry_url != target_url:
continue
models = entry.get("models")
if not isinstance(models, dict):
continue
model_cfg = models.get(model)
if not isinstance(model_cfg, dict):
continue
raw_ctx = model_cfg.get("context_length")
if raw_ctx is None:
continue
try:
ctx = int(raw_ctx)
except (TypeError, ValueError):
continue
if ctx > 0:
return ctx
return None
def check_config_version() -> Tuple[int, int]:
"""
Check config version.
+1 -5
View File
@@ -320,11 +320,7 @@ def run_doctor(args):
known_providers.add("custom:" + name.lower().replace(" ", "-"))
canonical_provider = provider
if (
provider
and _resolve_provider_full is not None
and provider not in ("auto", "custom")
):
if provider and _resolve_provider_full is not None and provider != "auto":
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
canonical_provider = provider_def.id if provider_def is not None else None
-361
View File
@@ -1,361 +0,0 @@
"""
hermes fallback manage the fallback provider chain.
Fallback providers are tried in order when the primary model fails with
rate-limit, overload, or connection errors. See:
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
Subcommands:
hermes fallback [list] Show the current fallback chain (default when no subcommand)
hermes fallback add Pick provider + model via the same picker as `hermes model`,
then append the selection to the chain
hermes fallback remove Pick an entry to delete from the chain
hermes fallback clear Remove all fallback entries
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
``fallback_model`` format is migrated to the new list format on first add.
"""
from __future__ import annotations
import copy
from typing import Any, Dict, List, Optional
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Return the normalized fallback chain as a list of dicts.
Accepts both the new list format (``fallback_providers``) and the legacy
single-dict format (``fallback_model``). The returned list is always a
fresh copy callers can mutate without touching the config dict.
"""
chain = config.get("fallback_providers") or []
if isinstance(chain, list):
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
if result:
return result
legacy = config.get("fallback_model")
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
return [dict(legacy)]
if isinstance(legacy, list):
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
return []
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
config["fallback_providers"] = chain
# Drop the legacy single-dict key on write so there's only one source of truth.
if "fallback_model" in config:
config.pop("fallback_model", None)
def _format_entry(entry: Dict[str, Any]) -> str:
"""One-line human-readable rendering of a fallback entry."""
provider = entry.get("provider", "?")
model = entry.get("model", "?")
base = entry.get("base_url")
suffix = f" [{base}]" if base else ""
return f"{model} (via {provider}){suffix}"
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
if not isinstance(model_cfg, dict):
return None
provider = (model_cfg.get("provider") or "").strip()
# The picker writes the selected model to ``model.default``.
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
if not provider or not model:
return None
entry: Dict[str, Any] = {"provider": provider, "model": model}
base_url = (model_cfg.get("base_url") or "").strip()
if base_url:
entry["base_url"] = base_url
api_mode = (model_cfg.get("api_mode") or "").strip()
if api_mode:
entry["api_mode"] = api_mode
return entry
def _snapshot_auth_active_provider() -> Any:
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
try:
from hermes_cli.auth import _load_auth_store
store = _load_auth_store()
return store.get("active_provider")
except Exception:
return None
def _restore_auth_active_provider(value: Any) -> None:
"""Write back a previously snapshotted ``active_provider`` value."""
try:
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
with _auth_store_lock():
store = _load_auth_store()
store["active_provider"] = value
_save_auth_store(store)
except Exception:
# Best-effort — if auth.json can't be restored, the user's primary
# provider may have been deactivated by the picker. They can re-run
# `hermes model` to fix it. Don't fail the fallback add.
pass
# ---------------------------------------------------------------------------
# Subcommand handlers
# ---------------------------------------------------------------------------
def cmd_fallback_list(args) -> None: # noqa: ARG001
"""Print the current fallback chain."""
from hermes_cli.config import load_config
config = load_config()
chain = _read_chain(config)
print()
if not chain:
print(" No fallback providers configured.")
print()
print(" Add one with: hermes fallback add")
print()
return
primary = _describe_primary(config)
if primary:
print(f" Primary: {primary}")
print()
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
print()
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
"""One-line description of the primary model for display purposes."""
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
provider = (model_cfg.get("provider") or "?").strip() or "?"
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
return f"{model} (via {provider})"
if isinstance(model_cfg, str) and model_cfg.strip():
return model_cfg.strip()
return None
def cmd_fallback_add(args) -> None:
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
from hermes_cli.main import _require_tty, select_provider_and_model
from hermes_cli.config import load_config, save_config
_require_tty("fallback add")
# Snapshot BEFORE the picker runs so we can distinguish "user actually
# picked something" from "user cancelled" by comparing before/after.
before_cfg = load_config()
model_before = copy.deepcopy(before_cfg.get("model"))
active_provider_before = _snapshot_auth_active_provider()
print()
print(" Adding a fallback provider. The picker below is the same one used by")
print(" `hermes model` — select the provider + model you want as a fallback.")
print()
try:
select_provider_and_model(args=args)
except SystemExit:
# Some provider flows exit on auth failure — restore state and re-raise.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
raise
# Read the post-picker state to see what the user selected.
after_cfg = load_config()
model_after = after_cfg.get("model")
new_entry = _extract_fallback_from_model_cfg(model_after)
if not new_entry:
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(" No fallback added.")
return
# Picker picked the same thing that's already the primary → nothing changed,
# and there's nothing useful to add as a fallback to itself.
primary_entry = _extract_fallback_from_model_cfg(model_before)
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
and primary_entry["model"] == new_entry["model"]:
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
print()
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
print(" A provider cannot be a fallback for itself — no change.")
return
# Reload the config with the primary restored, then append the new entry
# to ``fallback_providers``. We deliberately re-load (rather than mutating
# ``after_cfg``) because the picker may have touched other top-level keys
# (custom_providers, providers credentials) that we want to keep.
_restore_model_cfg(model_before)
_restore_auth_active_provider(active_provider_before)
final_cfg = load_config()
chain = _read_chain(final_cfg)
# Reject exact-duplicate fallback entries.
for existing in chain:
if existing.get("provider") == new_entry["provider"] \
and existing.get("model") == new_entry["model"]:
print()
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
return
chain.append(new_entry)
_write_chain(final_cfg, chain)
save_config(final_cfg)
print()
print(f" Added fallback: {_format_entry(new_entry)}")
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
print()
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
def _restore_model_cfg(model_before: Any) -> None:
"""Restore ``config["model"]`` to a previously-captured snapshot."""
from hermes_cli.config import load_config, save_config
cfg = load_config()
if model_before is None:
cfg.pop("model", None)
else:
cfg["model"] = copy.deepcopy(model_before)
save_config(cfg)
def cmd_fallback_remove(args) -> None: # noqa: ARG001
"""Pick an entry from the chain and remove it."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to remove.")
print()
return
choices = [_format_entry(e) for e in chain]
choices.append("Cancel")
try:
from hermes_cli.setup import _curses_prompt_choice
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
except Exception:
idx = _numbered_pick("Select a fallback to remove:", choices)
if idx is None or idx < 0 or idx >= len(chain):
print()
print(" Cancelled — no change.")
return
removed = chain.pop(idx)
_write_chain(config, chain)
save_config(config)
print()
print(f" Removed fallback: {_format_entry(removed)}")
if chain:
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
else:
print(" Fallback chain is now empty.")
print()
def cmd_fallback_clear(args) -> None: # noqa: ARG001
"""Remove all fallback entries (with confirmation)."""
from hermes_cli.config import load_config, save_config
config = load_config()
chain = _read_chain(config)
if not chain:
print()
print(" No fallback providers configured — nothing to clear.")
print()
return
print()
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
for i, entry in enumerate(chain, 1):
print(f" {i}. {_format_entry(entry)}")
print()
try:
resp = input(" Clear all entries? [y/N]: ").strip().lower()
except (KeyboardInterrupt, EOFError):
print()
print(" Cancelled.")
return
if resp not in ("y", "yes"):
print(" Cancelled — no change.")
return
_write_chain(config, [])
save_config(config)
print()
print(" Fallback chain cleared.")
print()
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
"""Fallback numbered-list picker when curses is unavailable."""
print(question)
for i, c in enumerate(choices, 1):
print(f" {i}. {c}")
print()
while True:
try:
val = input(f"Choice [1-{len(choices)}]: ").strip()
if not val:
return None
idx = int(val) - 1
if 0 <= idx < len(choices):
return idx
print(f"Please enter 1-{len(choices)}")
except ValueError:
print("Please enter a number")
except (KeyboardInterrupt, EOFError):
print()
return None
# ---------------------------------------------------------------------------
# Dispatch
# ---------------------------------------------------------------------------
def cmd_fallback(args) -> None:
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
sub = getattr(args, "fallback_command", None)
if sub in (None, "", "list", "ls"):
cmd_fallback_list(args)
elif sub == "add":
cmd_fallback_add(args)
elif sub in ("remove", "rm"):
cmd_fallback_remove(args)
elif sub == "clear":
cmd_fallback_clear(args)
else:
print(f"Unknown fallback subcommand: {sub}")
print("Use one of: list, add, remove, clear")
raise SystemExit(2)
-1
View File
@@ -125,7 +125,6 @@ _DEFAULT_PAYLOADS = {
"task_id": "test-task",
"tool_call_id": "test-call",
"result": '{"output": "hello"}',
"duration_ms": 42,
},
"pre_llm_call": {
"session_id": "test-session",
-1281
View File
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+7 -424
View File
@@ -1527,83 +1527,6 @@ def select_provider_and_model(args=None):
all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
from hermes_cli.config import read_raw_config
# Build a lookup of raw (un-expanded) api_key templates keyed by a
# stable identity. We intentionally bypass
# ``get_compatible_custom_providers(read_raw_config())`` here because
# its ``_normalize_custom_provider_entry`` step calls ``urlparse()``
# on ``base_url`` and drops any entry whose ``base_url`` is itself an
# env-ref template (e.g. ``${NEURALWATT_API_BASE}``). Dropping those
# entries is exactly how env-ref preservation fails for the user
# config that motivated this fix.
raw_api_key_refs: dict[tuple, str] = {}
raw_cfg = read_raw_config()
def _record_raw(
name: str,
provider_key: str,
model: str,
api_key: str,
) -> None:
template = str(api_key or "").strip()
if "${" not in template:
return
name = str(name or "").strip()
provider_key = str(provider_key or "").strip()
model = str(model or "").strip()
# Index by every plausible identity the loaded (expanded) config
# might present: (name), (name, model), (provider_key), and
# (provider_key, model). Case-insensitive on name/provider_key so
# the loaded entry matches regardless of display casing.
if name:
raw_api_key_refs.setdefault((name.lower(),), template)
raw_api_key_refs.setdefault((name.lower(), model), template)
if provider_key:
raw_api_key_refs.setdefault((provider_key.lower(),), template)
raw_api_key_refs.setdefault(
(provider_key.lower(), model), template
)
raw_list = raw_cfg.get("custom_providers")
if isinstance(raw_list, list):
for raw_entry in raw_list:
if not isinstance(raw_entry, dict):
continue
_record_raw(
raw_entry.get("name", ""),
"",
raw_entry.get("model", "")
or raw_entry.get("default_model", ""),
raw_entry.get("api_key", ""),
)
raw_providers = raw_cfg.get("providers")
if isinstance(raw_providers, dict):
for raw_key, raw_entry in raw_providers.items():
if not isinstance(raw_entry, dict):
continue
_record_raw(
raw_entry.get("name", "") or raw_key,
raw_key,
raw_entry.get("model", "")
or raw_entry.get("default_model", ""),
raw_entry.get("api_key", ""),
)
def _lookup_ref(name: str, provider_key: str, model: str) -> str:
name_lc = str(name or "").strip().lower()
pkey_lc = str(provider_key or "").strip().lower()
model = str(model or "").strip()
for identity in (
(pkey_lc, model),
(pkey_lc,),
(name_lc, model),
(name_lc,),
):
if identity[0] and identity in raw_api_key_refs:
return raw_api_key_refs[identity]
return ""
custom_provider_map = {}
for entry in get_compatible_custom_providers(cfg):
if not isinstance(entry, dict):
@@ -1627,9 +1550,6 @@ def select_provider_and_model(args=None):
"model": entry.get("model", ""),
"api_mode": entry.get("api_mode", ""),
"provider_key": provider_key,
"api_key_ref": _lookup_ref(
name, provider_key, entry.get("model", "")
),
}
return custom_provider_map
@@ -1719,8 +1639,6 @@ def select_provider_and_model(args=None):
_model_flow_stepfun(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider == "azure-foundry":
_model_flow_azure_foundry(config, current_model)
elif selected_provider in (
"gemini",
"deepseek",
@@ -2315,13 +2233,13 @@ def _model_flow_nous(config, current_model="", args=None):
# The live /models endpoint returns hundreds of models; the curated list
# shows only agentic models users recognize from OpenRouter.
from hermes_cli.models import (
get_curated_nous_model_ids,
_PROVIDER_MODELS,
get_pricing_for_provider,
check_nous_free_tier,
partition_nous_models_by_tier,
)
model_ids = get_curated_nous_model_ids()
model_ids = _PROVIDER_MODELS.get("nous", [])
if not model_ids:
print("No curated models available for Nous Portal.")
return
@@ -2864,19 +2782,6 @@ def _auto_provider_name(base_url: str) -> str:
return name
def _custom_provider_api_key_config_value(provider_info, resolved_api_key=""):
"""Return the value that should be persisted for a custom provider key."""
api_key_ref = str(provider_info.get("api_key_ref", "") or "").strip()
if api_key_ref:
return api_key_ref
key_env = str(provider_info.get("key_env", "") or "").strip()
if key_env and not str(provider_info.get("api_key", "") or "").strip():
return f"${{{key_env}}}"
return str(resolved_api_key or "").strip()
def _save_custom_provider(
base_url, api_key="", model="", context_length=None, name=None
):
@@ -2932,203 +2837,6 @@ def _save_custom_provider(
print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)')
def _model_flow_azure_foundry(config, current_model=""):
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
the transport and available models when possible:
* URLs ending in ``/anthropic`` Anthropic Messages API.
* Successful ``GET <base>/models`` probe OpenAI-style + populates
a picker with the returned deployment / model IDs.
* Anthropic Messages probe fallback when ``/models`` fails.
* Manual entry when every probe fails (private endpoints, etc.).
Context lengths for the chosen model are resolved via the standard
:func:`agent.model_metadata.get_model_context_length` chain
(models.dev, provider metadata, hardcoded family fallbacks).
"""
from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
from hermes_cli import azure_detect
import getpass
# ── Load current Azure Foundry configuration ─────────────────────
model_cfg = config.get("model", {})
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
current_base_url = str(model_cfg.get("base_url", "") or "")
current_api_mode = str(model_cfg.get("api_mode", "") or "")
else:
current_base_url = ""
current_api_mode = ""
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
print()
print("Azure Foundry Configuration")
print("=" * 50)
print()
print("Azure Foundry can host models with either OpenAI-style or")
print("Anthropic-style API endpoints. Hermes will probe your")
print("endpoint to auto-detect the transport and the deployed")
print("models when possible.")
print()
if current_base_url:
print(f" Current endpoint: {current_base_url}")
if current_api_mode:
_lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
print(f" Current API mode: {_lbl}")
if current_api_key:
print(f" Current API key: {current_api_key[:8]}...")
print()
# ── Step 1: endpoint URL ─────────────────────────────────────────
try:
base_url = input(
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_url = (base_url or current_base_url).rstrip("/")
if not effective_url:
print("No endpoint URL provided. Cancelled.")
return
if not effective_url.startswith(("http://", "https://")):
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
return
# ── Step 2: API key ──────────────────────────────────────────────
print()
try:
api_key = getpass.getpass(
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_key = api_key or current_api_key
if not effective_key:
print("No API key provided. Cancelled.")
return
# ── Step 3: auto-detect transport + models ───────────────────────
print()
print("◐ Probing endpoint to auto-detect transport and models...")
detection = azure_detect.detect(effective_url, effective_key)
discovered_models: list[str] = list(detection.models)
api_mode: str = detection.api_mode or ""
if api_mode:
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
print(f"✓ Detected API transport: {mode_label}")
if detection.reason:
print(f" ({detection.reason})")
if discovered_models:
print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint")
else:
print(f"⚠ Auto-detection incomplete: {detection.reason}")
print()
print("Select the API format your Azure Foundry endpoint uses:")
print(" 1. OpenAI-style (POST /v1/chat/completions)")
print(" For: GPT models, Llama, Mistral, and most open models")
print(" 2. Anthropic-style (POST /v1/messages)")
print(" For: Claude models deployed via Anthropic API format")
try:
default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
# ── Step 4: model name ───────────────────────────────────────────
print()
effective_model = ""
if discovered_models:
print("Available models on this endpoint:")
for i, mid in enumerate(discovered_models[:30], start=1):
print(f" {i:>2}. {mid}")
if len(discovered_models) > 30:
print(f" ... and {len(discovered_models) - 30} more (type name manually if not shown)")
print()
try:
pick = input(
f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
if not pick:
effective_model = current_model or discovered_models[0]
elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
effective_model = discovered_models[int(pick) - 1]
else:
effective_model = pick
else:
try:
model_name = input(
f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
).strip()
except (KeyboardInterrupt, EOFError):
print("\nCancelled.")
return
effective_model = model_name or current_model
if not effective_model:
print("No model name provided. Cancelled.")
return
# ── Step 5: context-length lookup ────────────────────────────────
ctx_len = azure_detect.lookup_context_length(
effective_model, effective_url, effective_key,
)
# ── Step 6: persist ──────────────────────────────────────────────
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "azure-foundry"
model["base_url"] = effective_url
model["api_mode"] = api_mode
model["default"] = effective_model
if ctx_len:
model["context_length"] = ctx_len
save_config(cfg)
deactivate_provider()
config["model"] = dict(model)
# Clear any conflicting env vars so auxiliary clients don't poison
# themselves with a stale OpenAI base URL / key.
if get_env_value("OPENAI_BASE_URL"):
save_env_value("OPENAI_BASE_URL", "")
if get_env_value("OPENAI_API_KEY"):
save_env_value("OPENAI_API_KEY", "")
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
print()
print("✓ Azure Foundry configured:")
print(f" Endpoint: {effective_url}")
print(f" API mode: {mode_label}")
print(f" Model: {effective_model}")
if ctx_len:
print(f" Context length: {ctx_len:,} tokens")
else:
print(" Context length: not auto-detected (will fall back at runtime)")
print()
def _remove_custom_provider(config):
"""Let the user remove a saved custom provider from config.yaml."""
from hermes_cli.config import load_config, save_config
@@ -3215,7 +2923,6 @@ def _model_flow_named_custom(config, provider_info):
# Resolve key from env var if api_key not set directly
if not api_key and key_env:
api_key = os.environ.get(key_env, "")
config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
print(f" Provider: {name}")
print(f" URL: {base_url}")
@@ -3312,8 +3019,8 @@ def _model_flow_named_custom(config, provider_info):
else:
model["provider"] = "custom"
model["base_url"] = base_url
if config_api_key:
model["api_key"] = config_api_key
if api_key:
model["api_key"] = api_key
# Apply api_mode from custom_providers entry, or clear stale value
custom_api_mode = provider_info.get("api_mode", "")
if custom_api_mode:
@@ -3331,15 +3038,15 @@ def _model_flow_named_custom(config, provider_info):
provider_entry = providers_cfg.get(provider_key)
if isinstance(provider_entry, dict):
provider_entry["default_model"] = model_name
if config_api_key and not str(provider_entry.get("api_key", "") or "").strip():
provider_entry["api_key"] = config_api_key
if api_key and not str(provider_entry.get("api_key", "") or "").strip():
provider_entry["api_key"] = api_key
if key_env and not str(provider_entry.get("key_env", "") or "").strip():
provider_entry["key_env"] = key_env
cfg["providers"] = providers_cfg
save_config(cfg)
else:
# Save model name to the custom_providers entry for next time
_save_custom_provider(base_url, config_api_key, model_name)
_save_custom_provider(base_url, api_key, model_name)
print(f"\n✅ Model set to: {model_name}")
print(f" Provider: {name} ({base_url})")
@@ -4780,13 +4487,6 @@ def cmd_webhook(args):
webhook_command(args)
def cmd_kanban(args):
"""Multi-profile collaboration board."""
from hermes_cli.kanban import kanban_command
return kanban_command(args)
def cmd_hooks(args):
"""Shell-hook inspection and management."""
from hermes_cli.hooks import hooks_command
@@ -5884,54 +5584,6 @@ def _finalize_update_output(state):
pass
def _cmd_update_check():
"""Implement ``hermes update --check``: fetch and report without installing."""
git_dir = PROJECT_ROOT / ".git"
if not git_dir.exists():
print("✗ Not a git repository — cannot check for updates.")
sys.exit(1)
git_cmd = ["git"]
if sys.platform == "win32":
git_cmd = ["git", "-c", "windows.appendAtomically=false"]
print("→ Fetching from origin...")
fetch_result = subprocess.run(
git_cmd + ["fetch", "origin"],
cwd=PROJECT_ROOT,
capture_output=True,
text=True,
)
if fetch_result.returncode != 0:
stderr = fetch_result.stderr.strip()
if "Could not resolve host" in stderr or "unable to access" in stderr:
print("✗ Network error — cannot reach the remote repository.")
elif "Authentication failed" in stderr or "could not read Username" in stderr:
print("✗ Authentication failed — check your git credentials or SSH key.")
else:
print("✗ Failed to fetch from origin.")
if stderr:
print(f" {stderr.splitlines()[0]}")
sys.exit(1)
rev_result = subprocess.run(
git_cmd + ["rev-list", "HEAD..origin/main", "--count"],
cwd=PROJECT_ROOT,
capture_output=True,
text=True,
check=True,
)
behind = int(rev_result.stdout.strip())
if behind == 0:
print("✓ Already up to date.")
else:
commits_word = "commit" if behind == 1 else "commits"
print(f"⚕ Update available: {behind} {commits_word} behind origin/main.")
from hermes_cli.config import recommended_update_command
print(f" Run '{recommended_update_command()}' to install.")
def cmd_update(args):
"""Update Hermes Agent to the latest version.
@@ -5945,10 +5597,6 @@ def cmd_update(args):
managed_error("update Hermes Agent")
return
if getattr(args, "check", False):
_cmd_update_check()
return
gateway_mode = getattr(args, "gateway", False)
# Protect against mid-update terminal disconnects (SIGHUP) and tolerate
@@ -7230,9 +6878,6 @@ Examples:
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
hermes auth reset <provider> Clear exhaustion status for a provider
hermes model Select default model
hermes fallback [list] Show fallback provider chain
hermes fallback add Add a fallback provider (same picker as `hermes model`)
hermes fallback remove Remove a fallback provider from the chain
hermes config View configuration
hermes config edit Edit config in $EDITOR
hermes config set model gpt-4 Set a config value
@@ -7574,42 +7219,6 @@ For more help on a command:
)
model_parser.set_defaults(func=cmd_model)
# =========================================================================
# fallback command — manage the fallback provider chain
# =========================================================================
from hermes_cli.fallback_cmd import cmd_fallback
fallback_parser = subparsers.add_parser(
"fallback",
help="Manage fallback providers (tried when the primary model fails)",
description=(
"Manage the fallback provider chain. Fallback providers are tried "
"in order when the primary model fails with rate-limit, overload, or "
"connection errors. See: "
"https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
),
)
fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
fallback_subparsers.add_parser(
"list",
aliases=["ls"],
help="Show the current fallback chain (default when no subcommand)",
)
fallback_subparsers.add_parser(
"add",
help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
)
fallback_subparsers.add_parser(
"remove",
aliases=["rm"],
help="Pick an entry to delete from the chain",
)
fallback_subparsers.add_parser(
"clear",
help="Remove all fallback entries",
)
fallback_parser.set_defaults(func=cmd_fallback)
# =========================================================================
# gateway command
# =========================================================================
@@ -7780,19 +7389,6 @@ For more help on a command:
setup_parser.add_argument(
"--reset", action="store_true", help="Reset configuration to defaults"
)
setup_parser.add_argument(
"--reconfigure",
action="store_true",
help="(Default on existing installs.) Re-run the full wizard, "
"showing current values as defaults. Kept for backwards "
"compatibility — a bare 'hermes setup' now does this.",
)
setup_parser.add_argument(
"--quick",
action="store_true",
help="On existing installs: only prompt for items that are missing "
"or unset, instead of running the full reconfigure wizard.",
)
setup_parser.set_defaults(func=cmd_setup)
# =========================================================================
@@ -8123,13 +7719,6 @@ For more help on a command:
webhook_parser.set_defaults(func=cmd_webhook)
# =========================================================================
# kanban command — multi-profile collaboration board
# =========================================================================
from hermes_cli.kanban import build_parser as _build_kanban_parser
kanban_parser = _build_kanban_parser(subparsers)
kanban_parser.set_defaults(func=cmd_kanban)
# =========================================================================
# hooks command — shell-hook inspection and management
# =========================================================================
@@ -9282,12 +8871,6 @@ Examples:
default=False,
help="Gateway mode: use file-based IPC for prompts instead of stdin (used internally by /update)",
)
update_parser.add_argument(
"--check",
action="store_true",
default=False,
help="Check whether an update is available without installing anything",
)
update_parser.set_defaults(func=cmd_update)
# =========================================================================
-329
View File
@@ -1,329 +0,0 @@
"""Remote model catalog fetcher.
The Hermes docs site hosts a JSON manifest of curated models for providers
we want to update without shipping a release (currently OpenRouter and
Nous Portal). This module fetches, validates, and caches that manifest,
falling back to the in-repo hardcoded lists when the network is unavailable.
Pipeline
--------
1. ``get_catalog()`` returns a parsed manifest dict.
- Checks in-process cache (invalidated by TTL).
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
- Fetches the master URL if disk cache is stale or missing.
- On any fetch failure, keeps using the stale cache (or empty dict).
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()``
thin accessors returning the shapes existing callers expect. Each
falls back to the in-repo hardcoded list on any lookup failure.
Schema (version 1)
------------------
::
{
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {...}, # free-form
"providers": {
"openrouter": {
"metadata": {...}, # free-form
"models": [
{"id": "vendor/model", "description": "recommended",
"metadata": {...}} # free-form, model-level
]
},
"nous": {...}
}
}
Unknown fields are ignored extra metadata can be added at either level
without bumping ``version``. ``version`` bumps are reserved for
breaking changes (renaming ``providers``, changing ``models`` shape).
"""
from __future__ import annotations
import json
import logging
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any
from hermes_cli import __version__ as _HERMES_VERSION
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
DEFAULT_CATALOG_URL = (
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
)
DEFAULT_TTL_HOURS = 24
DEFAULT_FETCH_TIMEOUT = 8.0
SUPPORTED_SCHEMA_VERSION = 1
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
# In-process cache to avoid repeated disk + parse work across multiple
# calls within the same session. Invalidated by TTL against the disk file's
# mtime, so calling code never has to think about this.
_catalog_cache: dict[str, Any] | None = None
_catalog_cache_source_mtime: float = 0.0
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
def _load_catalog_config() -> dict[str, Any]:
"""Load the ``model_catalog`` config block with defaults filled in."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
raw = cfg.get("model_catalog")
if not isinstance(raw, dict):
raw = {}
return {
"enabled": bool(raw.get("enabled", True)),
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
}
def _cache_path() -> Path:
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
from hermes_constants import get_hermes_home
return get_hermes_home() / "cache" / "model_catalog.json"
# ---------------------------------------------------------------------------
# Fetch + validate + cache
# ---------------------------------------------------------------------------
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
try:
req = urllib.request.Request(
url,
headers={
"Accept": "application/json",
"User-Agent": _HERMES_USER_AGENT,
},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
data = json.loads(resp.read().decode())
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
logger.info("model catalog fetch failed (%s): %s", url, exc)
return None
except Exception as exc: # pragma: no cover — defensive
logger.info("model catalog fetch errored (%s): %s", url, exc)
return None
if not _validate_manifest(data):
logger.info("model catalog at %s failed schema validation", url)
return None
return data
def _validate_manifest(data: Any) -> bool:
"""Return True when ``data`` matches the minimum manifest shape."""
if not isinstance(data, dict):
return False
version = data.get("version")
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
# Future schema version we don't understand — refuse rather than
# guess. Older schemas (version < 1) aren't supported either.
return False
providers = data.get("providers")
if not isinstance(providers, dict):
return False
for pname, pblock in providers.items():
if not isinstance(pname, str) or not isinstance(pblock, dict):
return False
models = pblock.get("models")
if not isinstance(models, list):
return False
for m in models:
if not isinstance(m, dict):
return False
if not isinstance(m.get("id"), str) or not m["id"].strip():
return False
return True
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
path = _cache_path()
try:
mtime = path.stat().st_mtime
except (OSError, FileNotFoundError):
return (None, 0.0)
try:
with open(path) as fh:
data = json.load(fh)
except (OSError, json.JSONDecodeError):
return (None, 0.0)
if not _validate_manifest(data):
return (None, 0.0)
return (data, mtime)
def _write_disk_cache(data: dict[str, Any]) -> None:
path = _cache_path()
try:
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
with open(tmp, "w") as fh:
json.dump(data, fh, indent=2)
fh.write("\n")
os.replace(tmp, path)
except OSError as exc:
logger.info("model catalog cache write failed: %s", exc)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
"""Return the parsed model catalog manifest, or an empty dict on failure.
Callers should treat a missing provider/model as "use the in-repo fallback"
never raise from this function so the CLI keeps working offline.
"""
global _catalog_cache, _catalog_cache_source_mtime
cfg = _load_catalog_config()
if not cfg["enabled"]:
return {}
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
disk_data, disk_mtime = _read_disk_cache()
now = time.time()
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
if (
not force_refresh
and _catalog_cache is not None
and disk_data is not None
and disk_mtime == _catalog_cache_source_mtime
and disk_fresh
):
return _catalog_cache
# Disk is fresh enough — use it without a network hit.
if not force_refresh and disk_fresh and disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
if fetched is not None:
_write_disk_cache(fetched)
new_disk_data, new_mtime = _read_disk_cache()
if new_disk_data is not None:
_catalog_cache = new_disk_data
_catalog_cache_source_mtime = new_mtime
return new_disk_data
_catalog_cache = fetched
_catalog_cache_source_mtime = now
return fetched
if disk_data is not None:
_catalog_cache = disk_data
_catalog_cache_source_mtime = disk_mtime
return disk_data
return {}
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
cfg = _load_catalog_config()
if not cfg["enabled"]:
return None
provider_cfg = cfg["providers"].get(provider)
if not isinstance(provider_cfg, dict):
return None
override_url = provider_cfg.get("url")
if not isinstance(override_url, str) or not override_url.strip():
return None
# Override fetches skip the disk cache because they're usually
# third-party self-hosted. Re-request on every call but with a short
# timeout so they don't block the picker.
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
def _get_provider_block(provider: str) -> dict[str, Any] | None:
"""Return the provider's manifest block, respecting per-provider overrides."""
override = _fetch_provider_override(provider)
if override is not None:
block = override.get("providers", {}).get(provider)
if isinstance(block, dict):
return block
catalog = get_catalog()
if not catalog:
return None
block = catalog.get("providers", {}).get(provider)
return block if isinstance(block, dict) else None
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
Returns ``None`` when the manifest is unavailable, so callers can fall
back to their hardcoded list.
"""
block = _get_provider_block("openrouter")
if not block:
return None
out: list[tuple[str, str]] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if not mid:
continue
desc = str(m.get("description") or "")
out.append((mid, desc))
return out or None
def get_curated_nous_models() -> list[str] | None:
"""Return Nous Portal's curated list of model ids from the manifest.
Returns ``None`` when the manifest is unavailable.
"""
block = _get_provider_block("nous")
if not block:
return None
out: list[str] = []
for m in block.get("models", []):
mid = str(m.get("id") or "").strip()
if mid:
out.append(mid)
return out or None
def reset_cache() -> None:
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
global _catalog_cache, _catalog_cache_source_mtime
_catalog_cache = None
_catalog_cache_source_mtime = 0.0
+12 -39
View File
@@ -533,7 +533,6 @@ def resolve_display_context_length(
base_url: str = "",
api_key: str = "",
model_info: Optional[ModelInfo] = None,
custom_providers: list | None = None,
) -> Optional[int]:
"""Resolve the context length to show in /model output.
@@ -544,11 +543,6 @@ def resolve_display_context_length(
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
rest.
When ``custom_providers`` is provided, per-model ``context_length``
overrides from ``custom_providers[].models.<id>.context_length`` are
honored this closes #15779 where ``/model`` switch ignored user-set
overrides.
Prefer the provider-aware value; fall back to ``model_info.context_window``
only if the resolver returns nothing.
"""
@@ -559,7 +553,6 @@ def resolve_display_context_length(
base_url=base_url or "",
api_key=api_key or "",
provider=provider or None,
custom_providers=custom_providers,
)
if ctx:
return int(ctx)
@@ -838,14 +831,9 @@ def switch_model(
requested=current_provider,
target_model=new_model,
)
# If resolution fell through to "custom" (e.g. named custom provider like
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
# credentials. Otherwise use the resolved values (picks up credential rotation,
# base_url adjustments for OpenCode, etc.).
if runtime.get("provider") != "custom":
api_key = runtime.get("api_key", "")
base_url = runtime.get("base_url", "")
api_mode = runtime.get("api_mode", "")
api_key = runtime.get("api_key", "")
base_url = runtime.get("base_url", "")
api_mode = runtime.get("api_mode", "")
except Exception:
pass
@@ -879,31 +867,16 @@ def switch_model(
"message": f"Could not validate `{new_model}`: {e}",
}
# Override rejection if model is in the user's saved provider config.
# API /v1/models may not list cloud/aliased models even though the server supports them.
if not validation.get("accepted"):
override = False
if user_providers:
for up in user_providers:
if isinstance(up, dict) and up.get("provider") == target_provider:
cfg_models = up.get("models", [])
if new_model in cfg_models or any(
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
):
override = True
break
if override:
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
else:
msg = validation.get("message", "Invalid model")
return ModelSwitchResult(
success=False,
new_model=new_model,
target_provider=target_provider,
provider_label=provider_label,
is_global=is_global,
error_message=msg,
)
msg = validation.get("message", "Invalid model")
return ModelSwitchResult(
success=False,
new_model=new_model,
target_provider=target_provider,
provider_label=provider_label,
is_global=is_global,
error_message=msg,
)
# Apply auto-correction if validation found a closer match
if validation.get("corrected_model"):
+3 -34
View File
@@ -383,9 +383,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
# Azure Foundry: user-provided endpoint and model.
# Empty list because models depend on the endpoint configuration.
"azure-foundry": [],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -743,7 +740,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
]
# Derived dicts — used throughout the codebase
@@ -876,16 +872,7 @@ def fetch_openrouter_models(
if _openrouter_catalog_cache is not None and not force_refresh:
return list(_openrouter_catalog_cache)
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
# snapshot when the manifest is unreachable. Both are curated lists that
# drive the picker; the OpenRouter live /v1/models filter (tool support,
# free pricing) is applied on top either way.
try:
from hermes_cli.model_catalog import get_curated_openrouter_models
remote = get_curated_openrouter_models()
except Exception:
remote = None
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
fallback = list(OPENROUTER_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
@@ -938,24 +925,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
def get_curated_nous_model_ids() -> list[str]:
"""Return the curated Nous Portal model-id list.
Prefers the remotely-hosted catalog manifest (published under
``website/static/api/model-catalog.json``); falls back to the in-repo
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
unreachable. Always returns a list (never None).
"""
try:
from hermes_cli.model_catalog import get_curated_nous_models
remote = get_curated_nous_models()
except Exception:
remote = None
if remote:
return list(remote)
return list(_PROVIDER_MODELS.get("nous", []))
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):
@@ -2653,8 +2622,8 @@ def validate_requested_model(
)
return {
"accepted": True,
"persist": True,
"accepted": False,
"persist": False,
"recognized": False,
"message": message,
}
-6
View File
@@ -167,12 +167,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
# The transport is determined at runtime from config.yaml model.api_mode.
"azure-foundry": HermesOverlay(
transport="openai_chat", # default; overridden by api_mode in config
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
),
}
+7 -148
View File
@@ -221,19 +221,6 @@ def _resolve_runtime_from_pool_entry(
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
elif provider == "azure-foundry":
# Azure Foundry: read api_mode and base_url from config
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
if cfg_provider == "azure-foundry":
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
if cfg_base_url:
base_url = cfg_base_url
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode:
api_mode = configured_mode
# For Anthropic-style endpoints, strip /v1 suffix
if api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
@@ -602,71 +589,6 @@ def _resolve_openrouter_runtime(
}
def _resolve_azure_foundry_runtime(
*,
requested_provider: str,
model_cfg: Dict[str, Any],
explicit_api_key: Optional[str] = None,
explicit_base_url: Optional[str] = None,
) -> Dict[str, Any]:
"""Resolve an Azure Foundry runtime entry.
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
explicit overrides), pulls the API key from ``.env`` / env var, and
strips a trailing ``/v1`` for Anthropic-style endpoints because the
Anthropic SDK appends ``/v1/messages`` internally.
Raises :class:`AuthError` when required values are missing.
"""
explicit_api_key = str(explicit_api_key or "").strip()
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
cfg_base_url = ""
cfg_api_mode = "chat_completions"
if cfg_provider == "azure-foundry":
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
if not base_url:
raise AuthError(
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
"the AZURE_FOUNDRY_BASE_URL environment variable."
)
api_key = explicit_api_key
if not api_key:
try:
from hermes_cli.config import get_env_value
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
except Exception:
api_key = ""
if not api_key:
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
if not api_key:
raise AuthError(
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
"~/.hermes/.env or run 'hermes model' to configure."
)
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
# we inherited from the configured base_url to avoid double-/v1 paths.
if cfg_api_mode == "anthropic_messages":
base_url = re.sub(r"/v1/?$", "", base_url)
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
return {
"provider": "azure-foundry",
"api_mode": cfg_api_mode,
"base_url": base_url,
"api_key": api_key,
"source": source,
"requested_provider": requested_provider,
}
def _resolve_explicit_runtime(
*,
provider: str,
@@ -756,15 +678,6 @@ def _resolve_explicit_runtime(
"requested_provider": requested_provider,
}
# Azure Foundry: user-configured endpoint with selectable API mode
if provider == "azure-foundry":
return _resolve_azure_foundry_runtime(
requested_provider=requested_provider,
model_cfg=model_cfg,
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key":
env_url = ""
@@ -833,40 +746,6 @@ def resolve_runtime_provider(
"""
requested_provider = resolve_requested_provider(requested)
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
# return provider="custom" with chat_completions api_mode and no valid key).
# Instead, use the Azure key directly with anthropic_messages api_mode.
_eff_base = (explicit_base_url or "").strip()
if requested_provider == "anthropic" and "azure.com" in _eff_base:
_azure_key = (
(explicit_api_key or "").strip()
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
)
return {
"provider": "anthropic",
"api_mode": "anthropic_messages",
"base_url": _eff_base.rstrip("/"),
"api_key": _azure_key,
"source": "azure-explicit",
"requested_provider": requested_provider,
}
# Azure Foundry: user-configured endpoint with selectable API mode
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
# Resolve before the custom-runtime / pool / generic paths so Azure
# config is always picked up from model.base_url + model.api_mode,
# regardless of whether the caller passed explicit_* args.
if requested_provider == "azure-foundry":
azure_runtime = _resolve_azure_foundry_runtime(
requested_provider=requested_provider,
model_cfg=_get_model_config(),
explicit_api_key=explicit_api_key,
explicit_base_url=explicit_base_url,
)
return azure_runtime
custom_runtime = _resolve_named_custom_runtime(
requested_provider=requested_provider,
explicit_api_key=explicit_api_key,
@@ -1045,6 +924,13 @@ def resolve_runtime_provider(
# Anthropic (native Messages API)
if provider == "anthropic":
from agent.anthropic_adapter import resolve_anthropic_token
token = resolve_anthropic_token()
if not token:
raise AuthError(
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
"run 'claude setup-token', or authenticate with 'claude /login'."
)
# Allow base URL override from config.yaml model.base_url, but only
# when the configured provider is anthropic — otherwise a non-Anthropic
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
@@ -1053,33 +939,6 @@ def resolve_runtime_provider(
if cfg_provider == "anthropic":
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
base_url = cfg_base_url or "https://api.anthropic.com"
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
# would find the Claude Code OAuth token first (priority 3) and return
# that instead, causing 401s. Detect Azure endpoints and use the env
# key directly to bypass the OAuth priority chain.
_is_azure_endpoint = "azure.com" in base_url.lower() or (
cfg_base_url and "azure.com" in cfg_base_url.lower()
)
if _is_azure_endpoint:
token = (
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
or os.getenv("ANTHROPIC_API_KEY", "").strip()
)
if not token:
raise AuthError(
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
)
else:
from agent.anthropic_adapter import resolve_anthropic_token
token = resolve_anthropic_token()
if not token:
raise AuthError(
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
"run 'claude setup-token', or authenticate with 'claude /login'."
)
return {
"provider": "anthropic",
"api_mode": "anthropic_messages",
+49 -27
View File
@@ -2863,6 +2863,17 @@ SETUP_SECTIONS = [
("agent", "Agent Settings", setup_agent_settings),
]
# The returning-user menu intentionally omits standalone TTS because model setup
# already includes TTS selection and tools setup covers the rest of the provider
# configuration. Keep this list in the same order as the visible menu entries.
RETURNING_USER_MENU_SECTION_KEYS = [
"model",
"terminal",
"gateway",
"tools",
"agent",
]
def run_setup_wizard(args):
"""Run the interactive setup wizard.
@@ -2887,9 +2898,6 @@ def run_setup_wizard(args):
save_config(copy.deepcopy(DEFAULT_CONFIG))
print_success("Configuration reset to defaults.")
reconfigure_requested = bool(getattr(args, "reconfigure", False))
quick_requested = bool(getattr(args, "quick", False))
config = load_config()
hermes_home = get_hermes_home()
@@ -2981,36 +2989,50 @@ def run_setup_wizard(args):
migration_ran = False
if is_existing:
# Existing install — default is the full-wizard reconfigure flow.
# Every prompt shows the current value as its default, so pressing
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
# missing items" flow (useful after a partial OpenClaw migration
# or when a required API key got cleared).
if quick_requested:
# ── Returning User Menu ──
print()
print_header("Welcome Back!")
print_success("You already have Hermes configured.")
print()
menu_choices = [
"Quick Setup - configure missing items only",
"Full Setup - reconfigure everything",
"Model & Provider",
"Terminal Backend",
"Messaging Platforms (Gateway)",
"Tools",
"Agent Settings",
"Exit",
]
choice = prompt_choice("What would you like to do?", menu_choices, 0)
if choice == 0:
# Quick setup
_run_quick_setup(config, hermes_home)
return
print()
print_header("Reconfigure")
print_success("You already have Hermes configured.")
print_info("Running the full wizard — each prompt shows your current value.")
print_info("Press Enter to keep it, or type a new value to change it.")
print_info("")
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
# Fall through to the "Full Setup — run all sections" block below.
# --reconfigure is now the default on existing installs; the flag
# is preserved for backwards compatibility but is a no-op here.
elif choice == 1:
# Full setup — fall through to run all sections
pass
elif choice == 7:
print_info("Exiting. Run 'hermes setup' again when ready.")
return
elif 2 <= choice <= 6:
# Individual section — map by key, not by position.
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
# so positional indexing (choice - 2) would dispatch the wrong section.
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
if section:
_, label, func = section
func(config)
save_config(config)
_print_setup_summary(config, hermes_home)
return
else:
# ── First-Time Setup ──
print()
# --reconfigure / --quick on a fresh install are meaningless — fall
# through to the normal first-time flow.
if reconfigure_requested or quick_requested:
print_info("No existing configuration found — running first-time setup.")
print()
# Offer OpenClaw migration before configuration begins
migration_ran = _offer_openclaw_migration(hermes_home)
if migration_ran:
+2 -1
View File
@@ -10,7 +10,8 @@ import random
TIPS = [
# --- Slash Commands ---
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
"/background <prompt> runs a task in a separate session while your current one stays free.",
"/branch forks the current session so you can explore a different direction without losing progress.",
"/compress manually compresses conversation context when things get long.",
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
+4 -14
View File
@@ -3103,23 +3103,13 @@ def _mount_plugin_api_routes():
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
continue
try:
module_name = f"hermes_dashboard_plugin_{plugin['name']}"
spec = importlib.util.spec_from_file_location(module_name, api_path)
spec = importlib.util.spec_from_file_location(
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
)
if spec is None or spec.loader is None:
continue
mod = importlib.util.module_from_spec(spec)
# Register in sys.modules BEFORE exec_module so pydantic/FastAPI
# can resolve forward references (e.g. models defined in a file
# that uses `from __future__ import annotations`). Without this,
# TypeAdapter lazy-build fails at first request with
# "is not fully defined" because the module namespace isn't
# reachable by name for string-annotation resolution.
sys.modules[module_name] = mod
try:
spec.loader.exec_module(mod)
except Exception:
sys.modules.pop(module_name, None)
raise
spec.loader.exec_module(mod)
router = getattr(mod, "router", None)
if router is None:
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
+5 -29
View File
@@ -31,7 +31,7 @@ T = TypeVar("T")
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
SCHEMA_VERSION = 9
SCHEMA_VERSION = 8
SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS schema_version (
@@ -83,8 +83,7 @@ CREATE TABLE IF NOT EXISTS messages (
reasoning TEXT,
reasoning_content TEXT,
reasoning_details TEXT,
codex_reasoning_items TEXT,
codex_message_items TEXT
codex_reasoning_items TEXT
);
CREATE TABLE IF NOT EXISTS state_meta (
@@ -357,15 +356,6 @@ class SessionDB:
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 8")
if current_version < 9:
# v9: preserve replayable Codex assistant message ids/phases so
# follow-up turns can rebuild Responses API message items instead
# of flattening everything to plain assistant text.
try:
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 9")
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
@@ -966,7 +956,6 @@ class SessionDB:
reasoning_content: str = None,
reasoning_details: Any = None,
codex_reasoning_items: Any = None,
codex_message_items: Any = None,
) -> int:
"""
Append a message to a session. Returns the message row ID.
@@ -983,10 +972,6 @@ class SessionDB:
json.dumps(codex_reasoning_items)
if codex_reasoning_items else None
)
codex_message_items_json = (
json.dumps(codex_message_items)
if codex_message_items else None
)
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
# Pre-compute tool call count
@@ -998,9 +983,8 @@ class SessionDB:
cursor = conn.execute(
"""INSERT INTO messages (session_id, role, content, tool_call_id,
tool_calls, tool_name, timestamp, token_count, finish_reason,
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
codex_message_items)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(
session_id,
role,
@@ -1015,7 +999,6 @@ class SessionDB:
reasoning_content,
reasoning_details_json,
codex_items_json,
codex_message_items_json,
),
)
msg_id = cursor.lastrowid
@@ -1129,8 +1112,7 @@ class SessionDB:
with self._lock:
cursor = self._conn.execute(
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
"codex_message_items "
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
(session_id,),
)
@@ -1168,12 +1150,6 @@ class SessionDB:
except (json.JSONDecodeError, TypeError):
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
msg["codex_reasoning_items"] = None
if row["codex_message_items"]:
try:
msg["codex_message_items"] = json.loads(row["codex_message_items"])
except (json.JSONDecodeError, TypeError):
logger.warning("Failed to deserialize codex_message_items, falling back to None")
msg["codex_message_items"] = None
messages.append(msg)
return messages
-12
View File
@@ -24,7 +24,6 @@ import json
import asyncio
import logging
import threading
import time
from typing import Dict, Any, List, Optional, Tuple
from tools.registry import discover_builtin_tools, registry
@@ -568,14 +567,6 @@ def handle_function_call(
except Exception:
pass # file_tools may not be loaded yet
# Measure tool dispatch latency so post_tool_call and
# transform_tool_result hooks can observe per-tool duration.
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
# PostToolUse hook inputs so plugin authors can build latency
# dashboards, budget alerts, and regression canaries without having
# to wrap every tool manually. We use monotonic() so the value is
# unaffected by wall-clock adjustments during the call.
_dispatch_start = time.monotonic()
if function_name == "execute_code":
# Prefer the caller-provided list so subagents can't overwrite
# the parent's tool set via the process-global.
@@ -591,7 +582,6 @@ def handle_function_call(
task_id=task_id,
user_task=user_task,
)
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
try:
from hermes_cli.plugins import invoke_hook
@@ -603,7 +593,6 @@ def handle_function_call(
task_id=task_id or "",
session_id=session_id or "",
tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
)
except Exception:
pass
@@ -624,7 +613,6 @@ def handle_function_call(
task_id=task_id or "",
session_id=session_id or "",
tool_call_id=tool_call_id or "",
duration_ms=duration_ms,
)
for hook_result in hook_results:
if isinstance(hook_result, str):
File diff suppressed because it is too large Load Diff
-752
View File
@@ -1,752 +0,0 @@
/*
* Hermes Kanban dashboard plugin styles.
*
* All colors reference theme CSS vars so the board reskins with the
* active dashboard theme. No hardcoded palette.
*/
.hermes-kanban {
width: 100%;
}
/* ---- Columns layout -------------------------------------------------- */
.hermes-kanban-columns {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
gap: 0.75rem;
align-items: start;
}
.hermes-kanban-column {
display: flex;
flex-direction: column;
background: color-mix(in srgb, var(--color-card) 85%, transparent);
border: 1px solid var(--color-border);
border-radius: var(--radius);
padding: 0.5rem;
min-height: 200px;
max-height: calc(100vh - 220px);
transition: border-color 120ms ease, background-color 120ms ease;
}
.hermes-kanban-column--drop {
border-color: var(--color-ring);
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
}
.hermes-kanban-column-header {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.25rem 0.25rem 0.35rem;
font-weight: 600;
font-size: 0.85rem;
color: var(--color-foreground);
}
.hermes-kanban-column-label {
flex: 1;
letter-spacing: 0.01em;
}
.hermes-kanban-column-count {
font-variant-numeric: tabular-nums;
color: var(--color-muted-foreground);
font-size: 0.75rem;
font-weight: 500;
}
.hermes-kanban-column-add {
appearance: none;
background: transparent;
border: 1px solid var(--color-border);
color: var(--color-foreground);
border-radius: var(--radius-sm, 0.25rem);
width: 22px;
height: 22px;
line-height: 1;
font-size: 1rem;
cursor: pointer;
}
.hermes-kanban-column-add:hover {
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
}
.hermes-kanban-column-sub {
padding: 0 0.25rem 0.5rem;
font-size: 0.7rem;
color: var(--color-muted-foreground);
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
margin-bottom: 0.5rem;
}
.hermes-kanban-column-body {
display: flex;
flex-direction: column;
gap: 0.45rem;
overflow-y: auto;
padding-right: 0.1rem;
}
.hermes-kanban-empty {
padding: 1.5rem 0.5rem;
text-align: center;
font-size: 0.75rem;
color: var(--color-muted-foreground);
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
border-radius: var(--radius-sm, 0.25rem);
}
/* ---- Status dots ----------------------------------------------------- */
.hermes-kanban-dot {
display: inline-block;
width: 0.5rem;
height: 0.5rem;
border-radius: 999px;
background: var(--color-muted-foreground);
}
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
.hermes-kanban-dot-archived { background: var(--color-border); }
/* ---- Progress pill (N/M child tasks done) --------------------------- */
.hermes-kanban-progress {
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.62rem;
padding: 0.05rem 0.35rem;
border-radius: 999px;
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
color: var(--color-muted-foreground);
letter-spacing: 0.02em;
}
.hermes-kanban-progress--full {
background: color-mix(in srgb, #3fb97d 22%, transparent);
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
color: var(--color-foreground);
}
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
.hermes-kanban-lane {
display: flex;
flex-direction: column;
gap: 0.35rem;
padding: 0.25rem 0 0.35rem;
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
}
.hermes-kanban-lane:first-child {
border-top: 0;
padding-top: 0;
}
.hermes-kanban-lane-head {
display: flex;
align-items: center;
gap: 0.4rem;
font-size: 0.65rem;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--color-muted-foreground);
padding: 0 0.1rem;
}
.hermes-kanban-lane-name {
font-weight: 600;
font-family: var(--font-mono, ui-monospace, monospace);
}
.hermes-kanban-lane-count {
margin-left: auto;
font-variant-numeric: tabular-nums;
}
/* ---- Card ------------------------------------------------------------ */
.hermes-kanban-card {
cursor: grab;
transition: transform 100ms ease, box-shadow 100ms ease;
}
.hermes-kanban-card:hover {
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
}
.hermes-kanban-card:active {
cursor: grabbing;
transform: scale(0.995);
}
.hermes-kanban-card-content {
padding: 0.5rem 0.6rem !important;
display: flex;
flex-direction: column;
gap: 0.3rem;
}
.hermes-kanban-card-row {
display: flex;
align-items: center;
gap: 0.35rem;
flex-wrap: wrap;
}
.hermes-kanban-card-id {
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.65rem;
color: var(--color-muted-foreground);
letter-spacing: 0.03em;
}
.hermes-kanban-card-title {
font-size: 0.85rem;
font-weight: 500;
line-height: 1.3;
color: var(--color-foreground);
word-break: break-word;
}
.hermes-kanban-card-meta {
font-size: 0.7rem;
color: var(--color-muted-foreground);
gap: 0.55rem;
}
.hermes-kanban-priority {
font-size: 0.6rem !important;
padding: 0.05rem 0.3rem !important;
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
color: var(--color-foreground);
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
}
.hermes-kanban-tag {
font-size: 0.6rem !important;
padding: 0.05rem 0.3rem !important;
}
.hermes-kanban-assignee {
font-weight: 500;
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
}
.hermes-kanban-unassigned {
font-style: italic;
}
.hermes-kanban-ago {
margin-left: auto;
}
/* ---- Inline create --------------------------------------------------- */
.hermes-kanban-inline-create {
display: flex;
flex-direction: column;
gap: 0.35rem;
padding: 0.5rem;
margin-bottom: 0.5rem;
background: color-mix(in srgb, var(--color-card) 70%, transparent);
border: 1px dashed var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
}
/* ---- Drawer (task detail side panel) --------------------------------- */
.hermes-kanban-drawer-shade {
position: fixed;
inset: 0;
background: rgba(0, 0, 0, 0.45);
z-index: 60;
display: flex;
justify-content: flex-end;
}
.hermes-kanban-drawer {
width: min(480px, 92vw);
height: 100vh;
background: var(--color-card);
border-left: 1px solid var(--color-border);
display: flex;
flex-direction: column;
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
animation: hermes-kanban-drawer-in 180ms ease-out;
}
@keyframes hermes-kanban-drawer-in {
from { transform: translateX(100%); opacity: 0.3; }
to { transform: translateX(0); opacity: 1; }
}
.hermes-kanban-drawer-head {
display: flex;
align-items: center;
justify-content: space-between;
padding: 0.6rem 0.8rem;
border-bottom: 1px solid var(--color-border);
font-family: var(--font-mono, ui-monospace, monospace);
}
.hermes-kanban-drawer-close {
appearance: none;
background: transparent;
border: 0;
color: var(--color-muted-foreground);
font-size: 1.25rem;
line-height: 1;
cursor: pointer;
padding: 0 0.25rem;
}
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
.hermes-kanban-drawer-body {
flex: 1;
overflow-y: auto;
padding: 0.9rem;
display: flex;
flex-direction: column;
gap: 0.85rem;
}
.hermes-kanban-drawer-title {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 1rem;
font-weight: 600;
}
.hermes-kanban-drawer-meta {
display: flex;
flex-direction: column;
gap: 0.15rem;
padding: 0.5rem 0.6rem;
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
border: 1px solid var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
}
.hermes-kanban-meta-row {
display: flex;
gap: 0.5rem;
font-size: 0.72rem;
}
.hermes-kanban-meta-label {
width: 92px;
color: var(--color-muted-foreground);
}
.hermes-kanban-meta-value {
color: var(--color-foreground);
word-break: break-word;
}
.hermes-kanban-actions {
display: flex;
flex-wrap: wrap;
gap: 0.3rem;
}
.hermes-kanban-section {
display: flex;
flex-direction: column;
gap: 0.35rem;
}
.hermes-kanban-section-head {
font-size: 0.72rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.07em;
color: var(--color-muted-foreground);
}
.hermes-kanban-pre {
margin: 0;
padding: 0.45rem 0.55rem;
white-space: pre-wrap;
word-break: break-word;
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
border: 1px solid var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.72rem;
color: var(--color-foreground);
}
.hermes-kanban-comment {
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
padding-left: 0.5rem;
display: flex;
flex-direction: column;
gap: 0.2rem;
}
.hermes-kanban-comment-head {
display: flex;
gap: 0.5rem;
font-size: 0.7rem;
}
.hermes-kanban-comment-author {
font-weight: 600;
color: var(--color-foreground);
}
.hermes-kanban-comment-ago {
color: var(--color-muted-foreground);
}
.hermes-kanban-event {
display: flex;
gap: 0.5rem;
font-size: 0.7rem;
color: var(--color-muted-foreground);
font-family: var(--font-mono, ui-monospace, monospace);
}
.hermes-kanban-event-kind {
color: var(--color-foreground);
min-width: 6rem;
}
.hermes-kanban-event-payload {
color: var(--color-muted-foreground);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
max-width: 280px;
}
.hermes-kanban-drawer-comment-row {
display: flex;
gap: 0.4rem;
padding: 0.55rem 0.75rem;
border-top: 1px solid var(--color-border);
background: color-mix(in srgb, var(--color-card) 90%, transparent);
}
.hermes-kanban-count {
display: inline-flex;
gap: 0.2rem;
align-items: center;
}
/* ---- Selection chrome ----------------------------------------------- */
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
box-shadow: 0 0 0 2px var(--color-ring) inset,
0 0 0 1px var(--color-ring) inset;
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
}
.hermes-kanban-card-check {
width: 0.85rem;
height: 0.85rem;
margin: 0;
cursor: pointer;
accent-color: var(--color-ring);
}
/* ---- Bulk action bar ------------------------------------------------ */
.hermes-kanban-bulk {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.4rem 0.75rem;
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
border-radius: var(--radius-sm, 0.25rem);
flex-wrap: wrap;
}
.hermes-kanban-bulk-count {
font-weight: 600;
font-size: 0.75rem;
padding-right: 0.25rem;
}
.hermes-kanban-bulk-btn {
height: 1.7rem !important;
padding: 0 0.5rem !important;
font-size: 0.7rem !important;
border: 1px solid var(--color-border);
cursor: pointer;
}
.hermes-kanban-bulk-btn:hover {
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
}
.hermes-kanban-bulk-reassign {
display: flex;
align-items: center;
gap: 0.25rem;
padding-left: 0.5rem;
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
}
/* ---- Dependency editor chips --------------------------------------- */
.hermes-kanban-deps-row {
display: flex;
align-items: center;
gap: 0.5rem;
margin-bottom: 0.4rem;
}
.hermes-kanban-deps-label {
font-size: 0.68rem;
text-transform: uppercase;
letter-spacing: 0.08em;
color: var(--color-muted-foreground);
min-width: 4rem;
}
.hermes-kanban-deps-chips {
display: flex;
gap: 0.3rem;
flex-wrap: wrap;
flex: 1;
}
.hermes-kanban-deps-empty {
font-size: 0.7rem;
color: var(--color-muted-foreground);
font-style: italic;
}
.hermes-kanban-dep-chip {
display: inline-flex;
align-items: center;
gap: 0.15rem;
padding: 0.1rem 0.35rem;
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
border: 1px solid var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.68rem;
color: var(--color-foreground);
}
.hermes-kanban-dep-chip-x {
appearance: none;
background: transparent;
border: 0;
color: var(--color-muted-foreground);
cursor: pointer;
font-size: 0.85rem;
line-height: 1;
padding: 0 0.15rem;
}
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
/* ---- Inline edit affordances --------------------------------------- */
.hermes-kanban-editable {
cursor: pointer;
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
}
.hermes-kanban-editable:hover {
color: var(--color-foreground);
border-bottom-color: var(--color-ring);
}
.hermes-kanban-drawer-title-text {
cursor: pointer;
}
.hermes-kanban-drawer-title-text:hover {
text-decoration: underline;
text-decoration-color: var(--color-ring);
text-decoration-style: dotted;
text-underline-offset: 3px;
}
.hermes-kanban-edit-row {
display: flex;
align-items: center;
gap: 0.35rem;
width: 100%;
}
.hermes-kanban-section-head-row {
display: flex;
align-items: center;
justify-content: space-between;
gap: 0.5rem;
}
.hermes-kanban-edit-link {
appearance: none;
background: transparent;
border: 0;
color: var(--color-muted-foreground);
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.05em;
cursor: pointer;
padding: 0;
}
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
.hermes-kanban-textarea {
width: 100%;
min-height: 8rem;
background: var(--color-card);
color: var(--color-foreground);
border: 1px solid var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
padding: 0.5rem 0.6rem;
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.8rem;
line-height: 1.5;
resize: vertical;
}
.hermes-kanban-textarea:focus {
outline: none;
border-color: var(--color-ring);
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
}
/* ---- Markdown rendering -------------------------------------------- */
.hermes-kanban-md {
font-size: 0.8rem;
line-height: 1.55;
color: var(--color-foreground);
}
.hermes-kanban-md p { margin: 0.25rem 0; }
.hermes-kanban-md h1,
.hermes-kanban-md h2,
.hermes-kanban-md h3,
.hermes-kanban-md h4 {
margin: 0.6rem 0 0.2rem;
line-height: 1.25;
}
.hermes-kanban-md h1 { font-size: 1.05rem; }
.hermes-kanban-md h2 { font-size: 0.95rem; }
.hermes-kanban-md h3 { font-size: 0.88rem; }
.hermes-kanban-md h4 { font-size: 0.82rem; }
.hermes-kanban-md ul {
margin: 0.25rem 0 0.25rem 1.1rem;
padding: 0;
}
.hermes-kanban-md li { margin: 0.1rem 0; }
.hermes-kanban-md a {
color: var(--color-ring);
text-decoration: underline;
}
.hermes-kanban-md code {
font-family: var(--font-mono, ui-monospace, monospace);
font-size: 0.75rem;
padding: 0.05rem 0.3rem;
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
border-radius: 3px;
}
.hermes-kanban-md-code {
margin: 0.35rem 0;
padding: 0.5rem 0.6rem;
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
border: 1px solid var(--color-border);
border-radius: var(--radius-sm, 0.25rem);
overflow-x: auto;
}
.hermes-kanban-md-code code {
background: transparent;
padding: 0;
font-size: 0.75rem;
white-space: pre;
}
.hermes-kanban-md strong { font-weight: 600; }
/* ---- Touch-drag proxy ---------------------------------------------- */
.hermes-kanban-touch-proxy {
pointer-events: none;
opacity: 0.85;
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
transform: scale(1.02);
transition: none;
}
/* ---- Staleness tiers ------------------------------------------------ */
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
box-shadow: 0 0 0 1px #d4b34888 inset;
}
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
box-shadow: 0 0 0 2px #d4b348 inset;
}
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
}
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
}
/* ---- Worker log pane ------------------------------------------------ */
.hermes-kanban-log {
max-height: 340px;
overflow: auto;
white-space: pre;
font-size: 0.7rem;
line-height: 1.45;
}
/* ---- Run history (per-attempt log in the drawer) ------------------- */
.hermes-kanban-run {
border-left: 2px solid var(--color-border);
padding: 0.35rem 0.5rem;
margin-bottom: 0.4rem;
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
border-radius: var(--radius-sm, 0.25rem);
}
.hermes-kanban-run--active { border-left-color: #3fb97d; }
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
.hermes-kanban-run--crashed,
.hermes-kanban-run--timed_out,
.hermes-kanban-run--gave_up,
.hermes-kanban-run--spawn_failed {
border-left-color: var(--color-destructive, #d14a4a);
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
}
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
.hermes-kanban-run-head {
display: flex;
align-items: center;
gap: 0.6rem;
font-size: 0.7rem;
}
.hermes-kanban-run-outcome {
font-family: var(--font-mono, ui-monospace, monospace);
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--color-foreground);
}
.hermes-kanban-run-profile {
color: var(--color-muted-foreground);
}
.hermes-kanban-run-elapsed {
font-variant-numeric: tabular-nums;
color: var(--color-muted-foreground);
}
.hermes-kanban-run-ago {
margin-left: auto;
color: var(--color-muted-foreground);
}
.hermes-kanban-run-summary {
font-size: 0.75rem;
padding: 0.2rem 0 0;
color: var(--color-foreground);
}
.hermes-kanban-run-error {
font-size: 0.7rem;
color: var(--color-destructive, #d14a4a);
padding: 0.15rem 0 0;
font-family: var(--font-mono, ui-monospace, monospace);
}
.hermes-kanban-run-meta {
display: block;
font-size: 0.65rem;
padding: 0.15rem 0 0;
color: var(--color-muted-foreground);
white-space: pre-wrap;
word-break: break-word;
font-family: var(--font-mono, ui-monospace, monospace);
}
-14
View File
@@ -1,14 +0,0 @@
{
"name": "kanban",
"label": "Kanban",
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
"icon": "Package",
"version": "1.0.0",
"tab": {
"path": "/kanban",
"position": "after:skills"
},
"entry": "dist/index.js",
"css": "dist/style.css",
"api": "plugin_api.py"
}
-830
View File
@@ -1,830 +0,0 @@
"""Kanban dashboard plugin — backend API routes.
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
This layer is intentionally thin: every handler is a small wrapper around
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
cannot drift.
Live updates arrive via the ``/events`` WebSocket, which tails the
append-only ``task_events`` table on a short poll interval (WAL mode lets
reads run alongside the dispatcher's IMMEDIATE write transactions).
Security note
-------------
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
explicitly skips ``/api/plugins/`` plugin routes are unauthenticated by
design because the dashboard binds to localhost by default. For the
WebSocket we still require the session token as a ``?token=`` query
parameter (browsers cannot set the ``Authorization`` header on an upgrade
request), matching the established pattern used by the in-browser PTY
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
``--host 0.0.0.0``, every plugin route kanban included becomes
reachable from the network. Don't do that on a shared host.
"""
from __future__ import annotations
import asyncio
import hmac
import json
import logging
import sqlite3
import time
from dataclasses import asdict
from typing import Any, Optional
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
from pydantic import BaseModel, Field
from hermes_cli import kanban_db
log = logging.getLogger(__name__)
router = APIRouter()
# ---------------------------------------------------------------------------
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
# existing plugin-bypass; this is documented above).
# ---------------------------------------------------------------------------
def _check_ws_token(provided: Optional[str]) -> bool:
"""Constant-time compare against the dashboard session token.
Imported lazily so the plugin still loads in test contexts where the
dashboard web_server module isn't importable (e.g. the bare-FastAPI
test harness).
"""
if not provided:
return False
try:
from hermes_cli import web_server as _ws
except Exception:
# No dashboard context (tests). Accept so the tail loop is still
# testable; in production the dashboard module always imports
# cleanly because it's the caller.
return True
expected = getattr(_ws, "_SESSION_TOKEN", None)
if not expected:
return True
return hmac.compare_digest(str(provided), str(expected))
def _conn():
"""Open a kanban_db connection, creating the schema on first use.
Every handler that mutates the DB goes through this so the plugin
self-heals on a fresh install (no user-visible "no such table"
error if somebody hits POST /tasks before GET /board).
``init_db`` is idempotent.
"""
try:
kanban_db.init_db()
except Exception as exc:
log.warning("kanban init_db failed: %s", exc)
return kanban_db.connect()
# ---------------------------------------------------------------------------
# Serialization helpers
# ---------------------------------------------------------------------------
# Columns shown by the dashboard, in left-to-right order. "archived" is
# available via a filter toggle rather than a visible column.
BOARD_COLUMNS: list[str] = [
"triage", "todo", "ready", "running", "blocked", "done",
]
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
d = asdict(task)
# Add derived age metrics so the UI can colour stale cards without
# computing deltas client-side.
d["age"] = kanban_db.task_age(task)
# Keep body short on list endpoints; full body comes from /tasks/:id.
return d
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
return {
"id": event.id,
"task_id": event.task_id,
"kind": event.kind,
"payload": event.payload,
"created_at": event.created_at,
"run_id": event.run_id,
}
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
return {
"id": c.id,
"task_id": c.task_id,
"author": c.author,
"body": c.body,
"created_at": c.created_at,
}
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
"""Serialise a Run for the drawer's Run history section."""
return {
"id": r.id,
"task_id": r.task_id,
"profile": r.profile,
"step_key": r.step_key,
"status": r.status,
"claim_lock": r.claim_lock,
"claim_expires": r.claim_expires,
"worker_pid": r.worker_pid,
"max_runtime_seconds": r.max_runtime_seconds,
"last_heartbeat_at": r.last_heartbeat_at,
"started_at": r.started_at,
"ended_at": r.ended_at,
"outcome": r.outcome,
"summary": r.summary,
"metadata": r.metadata,
"error": r.error,
}
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
"""Return {'parents': [...], 'children': [...]} for a task."""
parents = [
r["parent_id"]
for r in conn.execute(
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
(task_id,),
)
]
children = [
r["child_id"]
for r in conn.execute(
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
(task_id,),
)
]
return {"parents": parents, "children": children}
# ---------------------------------------------------------------------------
# GET /board
# ---------------------------------------------------------------------------
@router.get("/board")
def get_board(
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
include_archived: bool = Query(False),
):
"""Return the full board grouped by status column.
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
install doesn't surface a "failed to load" error on the plugin tab.
"""
conn = _conn()
try:
tasks = kanban_db.list_tasks(
conn, tenant=tenant, include_archived=include_archived
)
# Pre-fetch link counts per task (cheap: one query).
link_counts: dict[str, dict[str, int]] = {}
for row in conn.execute(
"SELECT parent_id, child_id FROM task_links"
).fetchall():
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
"children"
] += 1
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
"parents"
] += 1
# Comment + event counts (both cheap aggregates).
comment_counts: dict[str, int] = {
r["task_id"]: r["n"]
for r in conn.execute(
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
)
}
# Progress rollup: for each parent, how many children are done / total.
# One pass over task_links joined with child status — cheaper than
# N per-task queries and the plugin uses it to render "N/M".
progress: dict[str, dict[str, int]] = {}
for row in conn.execute(
"SELECT l.parent_id AS pid, t.status AS cstatus "
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
).fetchall():
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
p["total"] += 1
if row["cstatus"] == "done":
p["done"] += 1
latest_event_id = conn.execute(
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
).fetchone()["m"]
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
if include_archived:
columns["archived"] = []
for t in tasks:
d = _task_dict(t)
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
d["comment_count"] = comment_counts.get(t.id, 0)
d["progress"] = progress.get(t.id) # None when the task has no children
col = t.status if t.status in columns else "todo"
columns[col].append(d)
# Stable per-column ordering already applied by list_tasks
# (priority DESC, created_at ASC), keep as-is.
# List of known tenants for the UI filter dropdown.
tenants = [
r["tenant"]
for r in conn.execute(
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
)
]
# List of distinct assignees for the lane-by-profile sub-grouping.
assignees = [
r["assignee"]
for r in conn.execute(
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
"AND status != 'archived' ORDER BY assignee"
)
]
return {
"columns": [
{"name": name, "tasks": columns[name]} for name in columns.keys()
],
"tenants": tenants,
"assignees": assignees,
"latest_event_id": int(latest_event_id),
"now": int(time.time()),
}
finally:
conn.close()
# ---------------------------------------------------------------------------
# GET /tasks/:id
# ---------------------------------------------------------------------------
@router.get("/tasks/{task_id}")
def get_task(task_id: str):
conn = _conn()
try:
task = kanban_db.get_task(conn, task_id)
if task is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
return {
"task": _task_dict(task),
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
"links": _links_for(conn, task_id),
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
}
finally:
conn.close()
# ---------------------------------------------------------------------------
# POST /tasks
# ---------------------------------------------------------------------------
class CreateTaskBody(BaseModel):
title: str
body: Optional[str] = None
assignee: Optional[str] = None
tenant: Optional[str] = None
priority: int = 0
workspace_kind: str = "scratch"
workspace_path: Optional[str] = None
parents: list[str] = Field(default_factory=list)
triage: bool = False
idempotency_key: Optional[str] = None
max_runtime_seconds: Optional[int] = None
skills: Optional[list[str]] = None
@router.post("/tasks")
def create_task(payload: CreateTaskBody):
conn = _conn()
try:
task_id = kanban_db.create_task(
conn,
title=payload.title,
body=payload.body,
assignee=payload.assignee,
created_by="dashboard",
workspace_kind=payload.workspace_kind,
workspace_path=payload.workspace_path,
tenant=payload.tenant,
priority=payload.priority,
parents=payload.parents,
triage=payload.triage,
idempotency_key=payload.idempotency_key,
max_runtime_seconds=payload.max_runtime_seconds,
skills=payload.skills,
)
task = kanban_db.get_task(conn, task_id)
return {"task": _task_dict(task) if task else None}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
finally:
conn.close()
# ---------------------------------------------------------------------------
# PATCH /tasks/:id (status / assignee / priority / title / body)
# ---------------------------------------------------------------------------
class UpdateTaskBody(BaseModel):
status: Optional[str] = None
assignee: Optional[str] = None
priority: Optional[int] = None
title: Optional[str] = None
body: Optional[str] = None
result: Optional[str] = None
block_reason: Optional[str] = None
# Structured handoff fields — forwarded to complete_task when status
# transitions to 'done'. Dashboard parity with ``hermes kanban
# complete --summary ... --metadata ...``.
summary: Optional[str] = None
metadata: Optional[dict] = None
@router.patch("/tasks/{task_id}")
def update_task(task_id: str, payload: UpdateTaskBody):
conn = _conn()
try:
task = kanban_db.get_task(conn, task_id)
if task is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
# --- assignee ----------------------------------------------------
if payload.assignee is not None:
try:
ok = kanban_db.assign_task(
conn, task_id, payload.assignee or None,
)
except RuntimeError as e:
raise HTTPException(status_code=409, detail=str(e))
if not ok:
raise HTTPException(status_code=404, detail="task not found")
# --- status -------------------------------------------------------
if payload.status is not None:
s = payload.status
ok = True
if s == "done":
ok = kanban_db.complete_task(
conn, task_id,
result=payload.result,
summary=payload.summary,
metadata=payload.metadata,
)
elif s == "blocked":
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
elif s == "ready":
# Re-open a blocked task, or just an explicit status set.
current = kanban_db.get_task(conn, task_id)
if current and current.status == "blocked":
ok = kanban_db.unblock_task(conn, task_id)
else:
# Direct status write for drag-drop (todo -> ready etc).
ok = _set_status_direct(conn, task_id, "ready")
elif s == "archived":
ok = kanban_db.archive_task(conn, task_id)
elif s in ("todo", "running", "triage"):
ok = _set_status_direct(conn, task_id, s)
else:
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
if not ok:
raise HTTPException(
status_code=409,
detail=f"status transition to {s!r} not valid from current state",
)
# --- priority -----------------------------------------------------
if payload.priority is not None:
with kanban_db.write_txn(conn):
conn.execute(
"UPDATE tasks SET priority = ? WHERE id = ?",
(int(payload.priority), task_id),
)
conn.execute(
"INSERT INTO task_events (task_id, kind, payload, created_at) "
"VALUES (?, 'reprioritized', ?, ?)",
(task_id, json.dumps({"priority": int(payload.priority)}),
int(time.time())),
)
# --- title / body -------------------------------------------------
if payload.title is not None or payload.body is not None:
with kanban_db.write_txn(conn):
sets, vals = [], []
if payload.title is not None:
if not payload.title.strip():
raise HTTPException(status_code=400, detail="title cannot be empty")
sets.append("title = ?")
vals.append(payload.title.strip())
if payload.body is not None:
sets.append("body = ?")
vals.append(payload.body)
vals.append(task_id)
conn.execute(
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
)
conn.execute(
"INSERT INTO task_events (task_id, kind, payload, created_at) "
"VALUES (?, 'edited', NULL, ?)",
(task_id, int(time.time())),
)
updated = kanban_db.get_task(conn, task_id)
return {"task": _task_dict(updated) if updated else None}
finally:
conn.close()
def _set_status_direct(
conn: sqlite3.Connection, task_id: str, new_status: str,
) -> bool:
"""Direct status write for drag-drop moves that aren't covered by the
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
running<->ready). Appends a ``status`` event row for the live feed.
When this transitions OFF ``running`` to anything other than the
terminal verbs above (which own their own run closing), we close the
active run with outcome='reclaimed' so attempt history isn't
orphaned. ``running -> ready`` via drag-drop is the common case
(user yanking a stuck worker back to the queue).
"""
with kanban_db.write_txn(conn):
# Snapshot current state so we know whether to close a run.
prev = conn.execute(
"SELECT status, current_run_id FROM tasks WHERE id = ?",
(task_id,),
).fetchone()
if prev is None:
return False
was_running = prev["status"] == "running"
cur = conn.execute(
"UPDATE tasks SET status = ?, "
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
"WHERE id = ?",
(new_status, new_status, new_status, new_status, task_id),
)
if cur.rowcount != 1:
return False
run_id = None
if was_running and new_status != "running" and prev["current_run_id"]:
run_id = kanban_db._end_run(
conn, task_id,
outcome="reclaimed", status="reclaimed",
summary=f"status changed to {new_status} (dashboard/direct)",
)
conn.execute(
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
"VALUES (?, ?, 'status', ?, ?)",
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
)
# If we re-opened something, children may have gone stale.
if new_status in ("done", "ready"):
kanban_db.recompute_ready(conn)
return True
# ---------------------------------------------------------------------------
# Comments
# ---------------------------------------------------------------------------
class CommentBody(BaseModel):
body: str
author: Optional[str] = "dashboard"
@router.post("/tasks/{task_id}/comments")
def add_comment(task_id: str, payload: CommentBody):
if not payload.body.strip():
raise HTTPException(status_code=400, detail="body is required")
conn = _conn()
try:
if kanban_db.get_task(conn, task_id) is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
kanban_db.add_comment(
conn, task_id, author=payload.author or "dashboard", body=payload.body,
)
return {"ok": True}
finally:
conn.close()
# ---------------------------------------------------------------------------
# Links
# ---------------------------------------------------------------------------
class LinkBody(BaseModel):
parent_id: str
child_id: str
@router.post("/links")
def add_link(payload: LinkBody):
conn = _conn()
try:
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
return {"ok": True}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
finally:
conn.close()
@router.delete("/links")
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
conn = _conn()
try:
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
return {"ok": bool(ok)}
finally:
conn.close()
# ---------------------------------------------------------------------------
# Bulk actions (multi-select on the board)
# ---------------------------------------------------------------------------
class BulkTaskBody(BaseModel):
ids: list[str]
status: Optional[str] = None
assignee: Optional[str] = None # "" or None = unassign
priority: Optional[int] = None
archive: bool = False
@router.post("/tasks/bulk")
def bulk_update(payload: BulkTaskBody):
"""Apply the same patch to every id in ``payload.ids``.
This is an *independent* iteration per-task failures don't abort
siblings. Returns per-id outcome so the UI can surface partials.
"""
ids = [i for i in (payload.ids or []) if i]
if not ids:
raise HTTPException(status_code=400, detail="ids is required")
results: list[dict] = []
conn = _conn()
try:
for tid in ids:
entry: dict[str, Any] = {"id": tid, "ok": True}
try:
task = kanban_db.get_task(conn, tid)
if task is None:
entry.update(ok=False, error="not found")
results.append(entry)
continue
if payload.archive:
if not kanban_db.archive_task(conn, tid):
entry.update(ok=False, error="archive refused")
if payload.status is not None and not payload.archive:
s = payload.status
if s == "done":
ok = kanban_db.complete_task(conn, tid)
elif s == "blocked":
ok = kanban_db.block_task(conn, tid)
elif s == "ready":
cur = kanban_db.get_task(conn, tid)
if cur and cur.status == "blocked":
ok = kanban_db.unblock_task(conn, tid)
else:
ok = _set_status_direct(conn, tid, "ready")
elif s in ("todo", "running", "triage"):
ok = _set_status_direct(conn, tid, s)
else:
entry.update(ok=False, error=f"unknown status {s!r}")
results.append(entry)
continue
if not ok:
entry.update(ok=False, error=f"transition to {s!r} refused")
if payload.assignee is not None:
try:
if not kanban_db.assign_task(
conn, tid, payload.assignee or None,
):
entry.update(ok=False, error="assign refused")
except RuntimeError as e:
entry.update(ok=False, error=str(e))
if payload.priority is not None:
with kanban_db.write_txn(conn):
conn.execute(
"UPDATE tasks SET priority = ? WHERE id = ?",
(int(payload.priority), tid),
)
conn.execute(
"INSERT INTO task_events (task_id, kind, payload, created_at) "
"VALUES (?, 'reprioritized', ?, ?)",
(tid, json.dumps({"priority": int(payload.priority)}),
int(time.time())),
)
except Exception as e: # defensive — one bad id shouldn't kill the batch
entry.update(ok=False, error=str(e))
results.append(entry)
return {"results": results}
finally:
conn.close()
# ---------------------------------------------------------------------------
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
# ---------------------------------------------------------------------------
@router.get("/config")
def get_config():
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
Used by the UI to pre-select tenant filters, toggle markdown rendering,
or set column-width preferences without a round-trip per page load.
"""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
except Exception:
cfg = {}
dash_cfg = (cfg.get("dashboard") or {})
# dashboard.kanban may itself be a dict; fall back to {}.
k_cfg = dash_cfg.get("kanban") or {}
return {
"default_tenant": k_cfg.get("default_tenant") or "",
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
"render_markdown": bool(k_cfg.get("render_markdown", True)),
}
# ---------------------------------------------------------------------------
# Stats (per-profile / per-status counts + oldest-ready age)
# ---------------------------------------------------------------------------
@router.get("/stats")
def get_stats():
"""Per-status + per-assignee counts + oldest-ready age.
Designed for the dashboard HUD and for router profiles that need to
answer "is this specialist overloaded?" without scanning the whole
board themselves.
"""
conn = _conn()
try:
return kanban_db.board_stats(conn)
finally:
conn.close()
@router.get("/assignees")
def get_assignees():
"""Known profiles + per-profile task counts.
Returns the union of ``~/.hermes/profiles/*`` on disk and every
distinct assignee currently used on the board. The dashboard uses
this to populate its assignee dropdown so a freshly-created profile
appears in the picker before it's been given any task.
"""
conn = _conn()
try:
return {"assignees": kanban_db.known_assignees(conn)}
finally:
conn.close()
# ---------------------------------------------------------------------------
# Worker log (read-only; file written by _default_spawn)
# ---------------------------------------------------------------------------
@router.get("/tasks/{task_id}/log")
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
"""Return the worker's stdout/stderr log.
``tail`` caps the response size (bytes) so the dashboard drawer
doesn't paginate megabytes into the browser. Returns 404 if the task
has never spawned. The on-disk log is rotated at 2 MiB per
``_rotate_worker_log`` a single ``.log.1`` is kept, no further
generations, so disk usage per task is bounded at ~4 MiB.
"""
conn = _conn()
try:
task = kanban_db.get_task(conn, task_id)
finally:
conn.close()
if task is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
log_path = kanban_db.worker_log_path(task_id)
size = log_path.stat().st_size if log_path.exists() else 0
return {
"task_id": task_id,
"path": str(log_path),
"exists": content is not None,
"size_bytes": size,
"content": content or "",
# Truncated when the on-disk file was larger than the tail cap.
"truncated": bool(tail and size > tail),
}
# ---------------------------------------------------------------------------
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
# ---------------------------------------------------------------------------
@router.post("/dispatch")
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
conn = _conn()
try:
result = kanban_db.dispatch_once(
conn, dry_run=dry_run, max_spawn=max_n,
)
# DispatchResult is a dataclass.
try:
return asdict(result)
except TypeError:
return {"result": str(result)}
finally:
conn.close()
# ---------------------------------------------------------------------------
# WebSocket: /events?since=<event_id>
# ---------------------------------------------------------------------------
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
# the simplest and most robust approach; it adds a fraction of a percent
# of CPU and has no shared state to synchronize across workers.
_EVENT_POLL_SECONDS = 0.3
@router.websocket("/events")
async def stream_events(ws: WebSocket):
# Enforce the dashboard session token as a query param — browsers can't
# set Authorization on a WS upgrade. This matches how the PTY bridge
# authenticates in hermes_cli/web_server.py.
token = ws.query_params.get("token")
if not _check_ws_token(token):
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
return
await ws.accept()
try:
since_raw = ws.query_params.get("since", "0")
try:
cursor = int(since_raw)
except ValueError:
cursor = 0
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
conn = kanban_db.connect()
try:
rows = conn.execute(
"SELECT id, task_id, run_id, kind, payload, created_at "
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
(cursor_val,),
).fetchall()
out: list[dict] = []
new_cursor = cursor_val
for r in rows:
try:
payload = json.loads(r["payload"]) if r["payload"] else None
except Exception:
payload = None
out.append({
"id": r["id"],
"task_id": r["task_id"],
"run_id": r["run_id"],
"kind": r["kind"],
"payload": payload,
"created_at": r["created_at"],
})
new_cursor = r["id"]
return new_cursor, out
finally:
conn.close()
while True:
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
if events:
await ws.send_json({"events": events, "cursor": cursor})
await asyncio.sleep(_EVENT_POLL_SECONDS)
except WebSocketDisconnect:
return
except Exception as exc: # defensive: never crash the dashboard worker
log.warning("Kanban event stream error: %s", exc)
try:
await ws.close()
except Exception:
pass
@@ -1,17 +0,0 @@
[Unit]
Description=Hermes Kanban dispatcher (hermes kanban daemon)
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
After=network.target
[Service]
Type=simple
ExecStart=/usr/bin/env hermes kanban daemon --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
Restart=on-failure
RestartSec=5
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=default.target
+79 -252
View File
@@ -40,7 +40,6 @@ from types import SimpleNamespace
import urllib.request
import uuid
from typing import List, Dict, Any, Optional
from urllib.parse import urlparse, parse_qs, urlunparse
from openai import OpenAI
import fire
from datetime import datetime
@@ -86,7 +85,6 @@ from agent.error_classifier import classify_api_error, FailoverReason
from agent.prompt_builder import (
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
KANBAN_GUIDANCE,
build_nous_subscription_prompt,
)
from agent.model_metadata import (
@@ -893,6 +891,7 @@ class AIAgent:
checkpoints_enabled: bool = False,
checkpoint_max_snapshots: int = 50,
pass_session_id: bool = False,
persist_session: bool = True,
):
"""
Initialize the AI Agent.
@@ -964,6 +963,7 @@ class AIAgent:
self.background_review_callback = None # Optional sync callback for gateway delivery
self.skip_context_files = skip_context_files
self.pass_session_id = pass_session_id
self.persist_session = persist_session
self._credential_pool = credential_pool
self.log_prefix_chars = log_prefix_chars
self.log_prefix = f"{log_prefix} " if log_prefix else ""
@@ -1033,16 +1033,12 @@ class AIAgent:
# surface.
# When api_mode was explicitly provided, respect it — the user
# knows what their endpoint supports (#10473).
# Exception: Azure OpenAI serves gpt-5.x on /chat/completions and
# does NOT support the Responses API — skip the upgrade for Azure
# (openai.azure.com), even though it looks OpenAI-compatible.
if (
api_mode is None
and self.api_mode == "chat_completions"
and self.provider != "copilot-acp"
and not str(self.base_url or "").lower().startswith("acp://copilot")
and not str(self.base_url or "").lower().startswith("acp+tcp://")
and not self._is_azure_openai_url()
and (
self._is_direct_openai_url()
or self._provider_model_requires_responses_api(
@@ -1318,22 +1314,7 @@ class AIAgent:
if api_key and base_url:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
# Extract query params (e.g. Azure api-version) from base_url
# and pass via default_query to prevent loss during SDK URL
# joining (httpx drops query string when joining paths).
_parsed_url = urlparse(base_url)
if _parsed_url.query:
_clean_url = urlunparse(_parsed_url._replace(query=""))
_query_params = {
k: v[0] for k, v in parse_qs(_parsed_url.query).items()
}
client_kwargs = {
"api_key": api_key,
"base_url": _clean_url,
"default_query": _query_params,
}
else:
client_kwargs = {"api_key": api_key, "base_url": base_url}
client_kwargs = {"api_key": api_key, "base_url": base_url}
if _provider_timeout is not None:
client_kwargs["timeout"] = _provider_timeout
if self.provider == "copilot-acp":
@@ -1784,64 +1765,43 @@ class AIAgent:
# Store for reuse in switch_model (so config override persists across model switches)
self._config_context_length = _config_context_length
# Resolve custom_providers list once for reuse below (startup
# context-length override and plugin context-engine init).
try:
from hermes_cli.config import get_compatible_custom_providers
_custom_providers = get_compatible_custom_providers(_agent_cfg)
except Exception:
_custom_providers = _agent_cfg.get("custom_providers")
if not isinstance(_custom_providers, list):
_custom_providers = []
# Check custom_providers per-model context_length
if _config_context_length is None and _custom_providers:
if _config_context_length is None:
try:
from hermes_cli.config import get_custom_provider_context_length
_cp_ctx_resolved = get_custom_provider_context_length(
model=self.model,
base_url=self.base_url,
custom_providers=_custom_providers,
)
if _cp_ctx_resolved:
_config_context_length = int(_cp_ctx_resolved)
from hermes_cli.config import get_compatible_custom_providers
_custom_providers = get_compatible_custom_providers(_agent_cfg)
except Exception:
_cp_ctx_resolved = None
# Surface a clear warning if the user set a context_length but it
# wasn't a valid positive int — the helper silently skips those.
if _config_context_length is None:
_target = self.base_url.rstrip("/") if self.base_url else ""
for _cp_entry in _custom_providers:
if not isinstance(_cp_entry, dict):
continue
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
if _target and _cp_url == _target:
_cp_models = _cp_entry.get("models", {})
if isinstance(_cp_models, dict):
_cp_model_cfg = _cp_models.get(self.model, {})
if isinstance(_cp_model_cfg, dict):
_cp_ctx = _cp_model_cfg.get("context_length")
if _cp_ctx is not None:
try:
_parsed = int(_cp_ctx)
if _parsed <= 0:
raise ValueError
except (TypeError, ValueError):
logger.warning(
"Invalid context_length for model %r in "
"custom_providers: %r — must be a positive "
"integer (e.g. 256000, not '256K'). "
"Falling back to auto-detection.",
self.model, _cp_ctx,
)
print(
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
f" Must be a positive integer (e.g. 256000, not '256K').\n"
f" Falling back to auto-detected context window.\n",
file=sys.stderr,
)
break
_custom_providers = _agent_cfg.get("custom_providers")
if not isinstance(_custom_providers, list):
_custom_providers = []
for _cp_entry in _custom_providers:
if not isinstance(_cp_entry, dict):
continue
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
if _cp_url and _cp_url == self.base_url.rstrip("/"):
_cp_models = _cp_entry.get("models", {})
if isinstance(_cp_models, dict):
_cp_model_cfg = _cp_models.get(self.model, {})
if isinstance(_cp_model_cfg, dict):
_cp_ctx = _cp_model_cfg.get("context_length")
if _cp_ctx is not None:
try:
_config_context_length = int(_cp_ctx)
except (TypeError, ValueError):
logger.warning(
"Invalid context_length for model %r in "
"custom_providers: %r — must be a plain "
"integer (e.g. 256000, not '256K'). "
"Falling back to auto-detection.",
self.model, _cp_ctx,
)
print(
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
f" Must be a plain integer (e.g. 256000, not '256K').\n"
f" Falling back to auto-detected context window.\n",
file=sys.stderr,
)
break
# Select context engine: config-driven (like memory providers).
# 1. Check config.yaml context.engine setting
@@ -1891,7 +1851,6 @@ class AIAgent:
api_key=getattr(self, "api_key", ""),
config_context_length=_config_context_length,
provider=self.provider,
custom_providers=_custom_providers,
)
self.context_compressor.update_model(
model=self.model,
@@ -2182,23 +2141,12 @@ class AIAgent:
# ── Update context compressor ──
if hasattr(self, "context_compressor") and self.context_compressor:
from agent.model_metadata import get_model_context_length
# Re-read custom_providers from live config so per-model
# context_length overrides are honored when switching to a
# custom provider mid-session (closes #15779).
_sm_custom_providers = None
try:
from hermes_cli.config import load_config, get_compatible_custom_providers
_sm_cfg = load_config()
_sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
except Exception:
_sm_custom_providers = None
new_context_length = get_model_context_length(
self.model,
base_url=self.base_url,
api_key=self.api_key,
provider=self.provider,
config_context_length=getattr(self, "_config_context_length", None),
custom_providers=_sm_custom_providers,
)
self.context_compressor.update_model(
model=self.model,
@@ -2556,22 +2504,6 @@ class AIAgent:
)
return hostname == "api.openai.com"
def _is_azure_openai_url(self, base_url: str = None) -> bool:
"""Return True when a base URL targets Azure OpenAI.
Azure OpenAI exposes an OpenAI-compatible endpoint at
``{resource}.openai.azure.com/openai/v1`` that accepts the
standard ``openai`` Python client. Unlike api.openai.com it
does NOT support the Responses API gpt-5.x models are served
on the regular ``/chat/completions`` path so routing decisions
must treat Azure separately from direct OpenAI.
"""
if base_url is not None:
url = str(base_url).lower()
else:
url = getattr(self, "_base_url_lower", "") or ""
return "openai.azure.com" in url
def _resolved_api_call_timeout(self) -> float:
"""Resolve the effective per-call request timeout in seconds.
@@ -2743,14 +2675,12 @@ class AIAgent:
def _max_tokens_param(self, value: int) -> dict:
"""Return the correct max tokens kwarg for the current provider.
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
'max_completion_tokens'. Azure OpenAI also requires
'max_completion_tokens' for gpt-5.x models served via the
OpenAI-compatible endpoint. OpenRouter, local models, and older
'max_completion_tokens'. OpenRouter, local models, and older
OpenAI models use 'max_tokens'.
"""
if self._is_direct_openai_url() or self._is_azure_openai_url():
if self._is_direct_openai_url():
return {"max_completion_tokens": value}
return {"max_tokens": value}
@@ -3108,28 +3038,13 @@ class AIAgent:
)
_SKILL_REVIEW_PROMPT = (
"Review the conversation above and consider whether a skill should be saved or updated.\n\n"
"Work in this order — do not skip steps:\n\n"
"1. SURVEY the existing skill landscape first. Call skills_list to see what you "
"have. If anything looks potentially relevant, skill_view it before deciding. "
"You are looking for the CLASS of task that just happened, not the exact task. "
"Example: a successful Tauri build is in the class \"desktop app build "
"troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
"2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
"What conditions will trigger this pattern again? Describe the class in one "
"sentence before looking at what to save.\n\n"
"3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
"already covers the class — even partially — update it (skill_manage patch) "
"with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
"4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
"When you create one, name and scope it at the class level "
"(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
"section must describe the class of situations, not this one session.\n\n"
"5. If you notice two existing skills that overlap, note it in your response "
"so a future review can consolidate them. Do not consolidate now unless the "
"overlap is obvious and low-risk.\n\n"
"Only act when something is genuinely worth saving. "
"If nothing stands out, just say 'Nothing to save.' and stop."
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
"Focus on: was a non-trivial approach used to complete a task that required trial "
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome?\n\n"
"If a relevant skill already exists, update it with what you learned. "
"Otherwise, create a new skill if the approach is reusable.\n"
"If nothing is worth saving, just say 'Nothing to save.' and stop."
)
_COMBINED_REVIEW_PROMPT = (
@@ -3139,16 +3054,9 @@ class AIAgent:
"about how you should behave, their work style, or ways they want you to operate? "
"If so, save using the memory tool.\n\n"
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
"and error, changing course due to experiential findings, or a different method "
"or outcome than the user expected? If so, work in this order:\n"
" a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
" b. Identify the CLASS of task, not the specific task "
"(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
" c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
" d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
"the class level, not this one session.\n"
" e. If you notice overlapping skills during the survey, note it so a future "
"review can consolidate them.\n\n"
"and error, or changing course due to experiential findings along the way, or did "
"the user expect or desire a different method or outcome? If a relevant skill "
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
"Only act if there's something genuinely worth saving. "
"If nothing stands out, just say 'Nothing to save.' and stop."
)
@@ -3246,25 +3154,12 @@ class AIAgent:
with open(os.devnull, "w") as _devnull, \
contextlib.redirect_stdout(_devnull), \
contextlib.redirect_stderr(_devnull):
# Inherit the parent agent's live runtime (provider, model,
# base_url, api_key, api_mode) so the fork uses the exact
# same credentials the main turn is using. Without this,
# AIAgent.__init__ re-runs auto-resolution from env vars,
# which fails for OAuth-only providers, session-scoped
# creds, or credential-pool setups where the resolver can't
# reconstruct auth from scratch -- producing the spurious
# "No LLM provider configured" warning at end of turn.
_parent_runtime = self._current_main_runtime()
review_agent = AIAgent(
model=self.model,
max_iterations=8,
quiet_mode=True,
platform=self.platform,
provider=self.provider,
api_mode=_parent_runtime.get("api_mode") or None,
base_url=_parent_runtime.get("base_url") or None,
api_key=_parent_runtime.get("api_key") or None,
credential_pool=getattr(self, "_credential_pool", None),
parent_session_id=self.session_id,
)
review_agent._memory_write_origin = "background_review"
@@ -3365,7 +3260,10 @@ class AIAgent:
"""Save session state to both JSON log and SQLite on any exit path.
Ensures conversations are never lost, even on errors or early returns.
Skipped when ``persist_session=False`` (ephemeral helper flows).
"""
if not self.persist_session:
return
self._apply_persist_user_message_override(messages)
self._session_messages = messages
self._save_session_log(messages)
@@ -3415,7 +3313,6 @@ class AIAgent:
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
)
self._last_flushed_db_idx = len(messages)
except Exception as e:
@@ -4498,12 +4395,6 @@ class AIAgent:
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
if "skill_manage" in self.valid_tool_names:
tool_guidance.append(SKILLS_GUIDANCE)
# Kanban worker/orchestrator lifecycle — only present when the
# dispatcher spawned this process (kanban_show check_fn gates on
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
# this block.
if "kanban_show" in self.valid_tool_names:
tool_guidance.append(KANBAN_GUIDANCE)
if tool_guidance:
prompt_parts.append(" ".join(tool_guidance))
@@ -5546,11 +5437,6 @@ class AIAgent:
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) use their own keys.
if self.provider != "anthropic":
return False
# Azure endpoints use static API keys — OAuth token rotation doesn't apply.
# Refreshing would pick up ~/.claude/.credentials.json OAuth token and break auth.
_base = getattr(self, "_anthropic_base_url", "") or ""
if "azure.com" in _base:
return False
try:
from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
@@ -6907,15 +6793,10 @@ class AIAgent:
# Determine api_mode from provider / base URL / model
fb_api_mode = "chat_completions"
fb_base_url = str(fb_client.base_url)
_fb_is_azure = self._is_azure_openai_url(fb_base_url)
if fb_provider == "openai-codex":
fb_api_mode = "codex_responses"
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
fb_api_mode = "anthropic_messages"
elif _fb_is_azure:
# Azure OpenAI serves gpt-5.x on /chat/completions — does NOT
# support the Responses API. Stay on chat_completions.
fb_api_mode = "chat_completions"
elif self._is_direct_openai_url(fb_base_url):
fb_api_mode = "codex_responses"
elif self._provider_model_requires_responses_api(
@@ -7788,13 +7669,6 @@ class AIAgent:
if codex_items:
msg["codex_reasoning_items"] = codex_items
# Codex Responses API: preserve exact assistant message items (with
# id/phase) so follow-up turns can replay structured items instead of
# flattening to plain text. This is required for prefix cache hits.
codex_message_items = getattr(assistant_message, "codex_message_items", None)
if codex_message_items:
msg["codex_message_items"] = codex_message_items
if assistant_message.tool_calls:
tool_calls = []
for tool_call in assistant_message.tool_calls:
@@ -7888,17 +7762,7 @@ class AIAgent:
api_msg["reasoning_content"] = existing
return
# 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
# for providers that use the internal 'reasoning' key.
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
# genuine reasoning content is not overwritten by the empty-string
# fallback (#15812 regression in PR #15478).
normalized_reasoning = source_msg.get("reasoning")
if isinstance(normalized_reasoning, str) and normalized_reasoning:
api_msg["reasoning_content"] = normalized_reasoning
return
# 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
# 2. DeepSeek / Kimi thinking mode: tool-call turns that lack
# reasoning_content are "poisoned history" — a prior provider (MiniMax,
# etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
# is absent on replay; inject "" to satisfy the provider's requirement
@@ -7914,6 +7778,13 @@ class AIAgent:
api_msg["reasoning_content"] = ""
return
# 3. Healthy session: promote 'reasoning' field to 'reasoning_content'
# for providers that use the internal 'reasoning' key.
normalized_reasoning = source_msg.get("reasoning")
if isinstance(normalized_reasoning, str) and normalized_reasoning:
api_msg["reasoning_content"] = normalized_reasoning
return
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
# reasoning_content. Inject "" to satisfy the provider's requirement
# when no explicit reasoning content is present.
@@ -11047,69 +10918,36 @@ class AIAgent:
continue
# ── Nous Portal: record rate limit & skip retries ─────
# When Nous returns a 429 that is a genuine account-
# level rate limit, record the reset time to a shared
# file so ALL sessions (cron, gateway, auxiliary) know
# not to pile on, then skip further retries -- each
# one burns another RPH request and deepens the hole.
# The retry loop's top-of-iteration guard will catch
# this on the next pass and try fallback or bail.
#
# IMPORTANT: Nous Portal multiplexes multiple upstream
# providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can
# also mean an UPSTREAM provider is out of capacity
# for one specific model -- transient, clears in
# seconds, nothing to do with the caller's quota.
# Tripping the cross-session breaker on that would
# block every Nous model for minutes. We use
# ``is_genuine_nous_rate_limit`` to tell the two
# apart via the 429's own x-ratelimit-* headers and
# the last-known-good state captured on the previous
# successful response.
# When Nous returns a 429, record the reset time to a
# shared file so ALL sessions (cron, gateway, auxiliary)
# know not to pile on. Then skip further retries —
# each one burns another RPH request and deepens the
# rate limit hole. The retry loop's top-of-iteration
# guard will catch this on the next pass and try
# fallback or bail with a clear message.
if (
is_rate_limited
and self.provider == "nous"
and classified.reason == FailoverReason.rate_limit
and not recovered_with_pool
):
_genuine_nous_rate_limit = False
try:
from agent.nous_rate_guard import (
is_genuine_nous_rate_limit,
record_nous_rate_limit,
)
from agent.nous_rate_guard import record_nous_rate_limit
_err_resp = getattr(api_error, "response", None)
_err_hdrs = (
getattr(_err_resp, "headers", None)
if _err_resp else None
)
_genuine_nous_rate_limit = is_genuine_nous_rate_limit(
record_nous_rate_limit(
headers=_err_hdrs,
last_known_state=self._rate_limit_state,
error_context=error_context,
)
if _genuine_nous_rate_limit:
record_nous_rate_limit(
headers=_err_hdrs,
error_context=error_context,
)
else:
logging.info(
"Nous 429 looks like upstream capacity "
"(no exhausted bucket in headers or "
"last-known state) -- not tripping "
"cross-session breaker."
)
except Exception:
pass
if _genuine_nous_rate_limit:
# Skip straight to max_retries -- the
# top-of-loop guard will handle fallback or
# bail cleanly.
retry_count = max_retries
continue
# Upstream capacity 429: fall through to normal
# retry logic. A different model (or the same
# model a moment later) will typically succeed.
# Skip straight to max_retries — the top-of-loop
# guard will handle fallback or bail cleanly.
retry_count = max_retries
continue
is_payload_too_large = (
classified.reason == FailoverReason.payload_too_large
@@ -11711,26 +11549,16 @@ class AIAgent:
interim_has_content = bool((interim_msg.get("content") or "").strip())
interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
interim_has_codex_message_items = bool(interim_msg.get("codex_message_items"))
if (
interim_has_content
or interim_has_reasoning
or interim_has_codex_reasoning
or interim_has_codex_message_items
):
if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
last_msg = messages[-1] if messages else None
# Duplicate detection: two consecutive incomplete assistant
# messages with identical content AND reasoning are collapsed.
# For provider-state-only changes (encrypted reasoning
# items or replayable message ids/phases/statuses differ
# while visible content/reasoning are unchanged), compare
# those opaque payloads too so we don't silently drop the
# newer continuation state.
# For reasoning-only messages (codex_reasoning_items differ but
# visible content/reasoning are both empty), we also compare
# the encrypted items to avoid silently dropping new state.
last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
interim_codex_items = interim_msg.get("codex_reasoning_items")
last_codex_message_items = last_msg.get("codex_message_items") if isinstance(last_msg, dict) else None
interim_codex_message_items = interim_msg.get("codex_message_items")
duplicate_interim = (
isinstance(last_msg, dict)
and last_msg.get("role") == "assistant"
@@ -11738,7 +11566,6 @@ class AIAgent:
and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
and last_codex_items == interim_codex_items
and last_codex_message_items == interim_codex_message_items
)
if not duplicate_interim:
messages.append(interim_msg)
-95
View File
@@ -1,95 +0,0 @@
#!/usr/bin/env python3
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
lets maintainers update model lists without shipping a Hermes release.
The runtime fetcher falls back to the same in-repo hardcoded lists if the
manifest is unreachable, so this script is a convenience for keeping the
manifest in sync not a source of truth.
Usage::
python scripts/build_model_catalog.py
Output: ``website/static/api/model-catalog.json``
Live URL (after ``deploy-site.yml`` runs on merge to main):
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
"""
from __future__ import annotations
import json
import os
import sys
from datetime import datetime, timezone
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, REPO_ROOT)
# Ensure HERMES_HOME is set for imports that touch it at module level.
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
CATALOG_VERSION = 1
def build_catalog() -> dict:
return {
"version": CATALOG_VERSION,
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"metadata": {
"source": "hermes-agent repo",
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
},
"providers": {
"openrouter": {
"metadata": {
"display_name": "OpenRouter",
"note": (
"Descriptions drive picker badges. Live /api/v1/models "
"filters curated ids by tool-calling support and free pricing."
),
},
"models": [
{"id": mid, "description": desc}
for mid, desc in OPENROUTER_MODELS
],
},
"nous": {
"metadata": {
"display_name": "Nous Portal",
"note": (
"Free-tier gating is determined live via Portal pricing "
"(partition_nous_models_by_tier), not this manifest."
),
},
"models": [
{"id": mid}
for mid in _PROVIDER_MODELS.get("nous", [])
],
},
},
}
def main() -> int:
catalog = build_catalog()
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
with open(OUTPUT_PATH, "w") as fh:
json.dump(catalog, fh, indent=2)
fh.write("\n")
print(f"Wrote {OUTPUT_PATH}")
for provider, block in catalog["providers"].items():
print(f" {provider}: {len(block['models'])} models")
return 0
if __name__ == "__main__":
sys.exit(main())
-10
View File
@@ -43,7 +43,6 @@ AUTHOR_MAP = {
"teknium1@gmail.com": "teknium1",
"teknium@nousresearch.com": "teknium1",
"127238744+teknium1@users.noreply.github.com": "teknium1",
"focusflow.app.help@gmail.com": "yes999zc",
"343873859@qq.com": "DrStrangerUJN",
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
"jefferson@heimdallstrategy.com": "Mind-Dragon",
@@ -52,7 +51,6 @@ AUTHOR_MAP = {
"web3blind@users.noreply.github.com": "web3blind",
"julia@alexland.us": "alexg0bot",
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
"nerijusn76@gmail.com": "Nerijusas",
# contributors (from noreply pattern)
"david.vv@icloud.com": "davidvv",
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -69,9 +67,7 @@ AUTHOR_MAP = {
"kshitijk4poor@gmail.com": "kshitijk4poor",
"keira.voss94@gmail.com": "keiravoss94",
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
"fqsy1416@gmail.com": "EKKOLearnAI",
"simbamax99@gmail.com": "simbam99",
"iris@growthpillars.co": "irispillars",
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
@@ -96,7 +92,6 @@ AUTHOR_MAP = {
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
"liusway405@gmail.com": "voidborne-d",
"xydarcher@uestc.edu.cn": "Readon",
"sir_even@icloud.com": "sirEven",
"36056348+sirEven@users.noreply.github.com": "sirEven",
@@ -181,10 +176,6 @@ AUTHOR_MAP = {
"jaisehgal11299@gmail.com": "jaisup",
"percydikec@gmail.com": "PercyDikec",
"noonou7@gmail.com": "HenkDz",
# Azure Foundry salvage (PRs #9029, #4599, #10086, #8766)
"tech@smartlogics.net": "TechPrototyper",
"637186+HangGlidersRule@users.noreply.github.com": "HangGlidersRule",
"pein892@gmail.com": "pein892",
"dean.kerr@gmail.com": "deankerr",
"socrates1024@gmail.com": "socrates1024",
"seanalt555@gmail.com": "Salt-555",
@@ -419,7 +410,6 @@ AUTHOR_MAP = {
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
"vivien000812@gmail.com": "iamagenius00",
"89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
"oluwadareferanmi11@gmail.com": "Feranmi10",
"simon@gtcl.us": "simon-gtcl",
"suzukaze.haduki@gmail.com": "houko",
"cliff@cigii.com": "cgarwood82",
@@ -281,6 +281,7 @@ Type these during an interactive chat session.
### Utility
```
/branch (/fork) Branch the current session
/btw Ephemeral side question (doesn't interrupt main task)
/fast Toggle priority/fast processing
/browser Open CDP browser connection
/history Show conversation history (CLI)
-152
View File
@@ -1,152 +0,0 @@
---
name: kanban-orchestrator
description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role.
version: 2.0.0
metadata:
hermes:
tags: [kanban, multi-agent, orchestration, routing]
related_skills: [kanban-worker]
---
# Kanban Orchestrator — Decomposition Playbook
> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing.
## When to use the board (vs. just doing the work)
Create Kanban tasks when any of these are true:
1. **Multiple specialists are needed.** Research + analysis + writing is three profiles.
2. **The work should survive a crash or restart.** Long-running, recurring, or important.
3. **The user might want to interject.** Human-in-the-loop at any step.
4. **Multiple subtasks can run in parallel.** Fan-out for speed.
5. **Review / iteration is expected.** A reviewer profile loops on drafter output.
6. **The audit trail matters.** Board rows persist in SQLite forever.
If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly.
## The anti-temptation rules
Your job description says "route, don't execute." The rules that enforce that:
- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist.
- **For any concrete task, create a Kanban task and assign it.** Every single time.
- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough."
- **Decompose, route, and summarize — that's the whole job.**
## The standard specialist roster (convention)
Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure.
| Profile | Does | Typical workspace |
|---|---|---|
| `researcher` | Reads sources, gathers facts, writes findings | `scratch` |
| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` |
| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault |
| `reviewer` | Reads output, leaves findings, gates approval | `scratch` |
| `backend-eng` | Writes server-side code | `worktree` |
| `frontend-eng` | Writes client-side code | `worktree` |
| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo |
| `pm` | Writes specs, acceptance criteria | `scratch` |
## Decomposition playbook
### Step 1 — Understand the goal
Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet.
### Step 2 — Sketch the task graph
Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres":
```
T1 researcher research: Postgres cost vs current
T2 researcher research: Postgres performance vs current
T3 analyst synthesize migration recommendation parents: T1, T2
T4 writer draft decision memo parents: T3
```
Show this to the user. Let them correct it before you create anything.
### Step 3 — Create tasks and link
```python
t1 = kanban_create(
title="research: Postgres cost vs current",
assignee="researcher",
body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.",
tenant=os.environ.get("HERMES_TENANT"),
)["task_id"]
t2 = kanban_create(
title="research: Postgres performance vs current",
assignee="researcher",
body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.",
)["task_id"]
t3 = kanban_create(
title="synthesize migration recommendation",
assignee="analyst",
body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.",
parents=[t1, t2],
)["task_id"]
t4 = kanban_create(
title="draft decision memo",
assignee="writer",
body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.",
parents=[t3],
)["task_id"]
```
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
### Step 4 — Complete your own task
If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
```python
kanban_complete(
summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation",
metadata={
"task_graph": {
"T1": {"assignee": "researcher", "parents": []},
"T2": {"assignee": "researcher", "parents": []},
"T3": {"assignee": "analyst", "parents": ["T1", "T2"]},
"T4": {"assignee": "writer", "parents": ["T3"]},
},
},
)
```
### Step 5 — Report back to the user
Tell them what you created in plain prose:
> I've queued 4 tasks:
> - **T1** (researcher): cost comparison
> - **T2** (researcher): performance comparison, in parallel with T1
> - **T3** (analyst): synthesizes T1 + T2 into a recommendation
> - **T4** (writer): turns T3 into a CTO memo
>
> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail <id>` to follow along.
## Common patterns
**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents.
**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns.
**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory.
**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context.
## Pitfalls
**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile.
**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`.
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
-134
View File
@@ -1,134 +0,0 @@
---
name: kanban-worker
description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios.
version: 2.0.0
metadata:
hermes:
tags: [kanban, multi-agent, collaboration, workflow, pitfalls]
related_skills: [kanban-orchestrator]
---
# Kanban Worker — Pitfalls and Examples
> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases.
## Workspace handling
Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`:
| Kind | What it is | How to work |
|---|---|---|
| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
| `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
## Tenant isolation
If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants:
- Good: `business-a: Acme is our biggest customer`
- Bad (leaks): `Acme is our biggest customer`
## Good summary + metadata shapes
The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work:
**Coding task:**
```python
kanban_complete(
summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass",
metadata={
"changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
"tests_run": 14,
"tests_passed": 14,
"decisions": ["user_id primary, IP fallback for unauthenticated requests"],
},
)
```
**Research task:**
```python
kanban_complete(
summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency",
metadata={
"sources_read": 12,
"recommendation": "vLLM",
"benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72},
},
)
```
**Review task:**
```python
kanban_complete(
summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)",
metadata={
"pr_number": 123,
"findings": [
{"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"},
{"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"},
],
"approved": False,
},
)
```
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
## Block reasons that get answered fast
Bad: `"stuck"` — the human has no context.
Good: one sentence naming the specific decision you need. Leave longer context as a comment instead.
```python
kanban_comment(
task_id=os.environ["HERMES_KANBAN_TASK"],
body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.",
)
kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?")
```
The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task.
## Heartbeats worth sending
Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`.
Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes.
## Retry scenarios
If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics:
- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it.
- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint.
- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly.
- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
## Do NOT
- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
- Create follow-up tasks assigned to yourself — assign to the right specialist.
- Complete a task you didn't actually finish. Block it instead.
## Pitfalls
**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running.
**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in.
**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban <verb>` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool.
## CLI fallback (for scripting)
Every tool has a CLI equivalent for human operators and scripts:
- `kanban_show``hermes kanban show <id> --json`
- `kanban_complete``hermes kanban complete <id> --summary "..." --metadata '{...}'`
- `kanban_block``hermes kanban block <id> "reason"`
- `kanban_create``hermes kanban create "title" --assignee <profile> [--parent <id>]`
- etc.
Use the tools from inside an agent; the CLI exists for the human at the terminal.
+8 -51
View File
@@ -192,43 +192,6 @@ class TestDefaultContextLengths:
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_deepseek_v4_models_1m_context(self):
from agent.model_metadata import get_model_context_length
from unittest.mock import patch as mock_patch
expected_keys = {
"deepseek-v4-pro": 1_000_000,
"deepseek-v4-flash": 1_000_000,
"deepseek-chat": 1_000_000,
"deepseek-reasoner": 1_000_000,
}
for key, value in expected_keys.items():
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
)
# Longest-first substring matching must resolve both the bare V4
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
# / Nous Portal) to 1M without probing down to the legacy 128K
# ``deepseek`` substring fallback.
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
cases = [
("deepseek-v4-pro", 1_000_000),
("deepseek-v4-flash", 1_000_000),
("deepseek/deepseek-v4-pro", 1_000_000),
("deepseek/deepseek-v4-flash", 1_000_000),
("deepseek-chat", 1_000_000),
("deepseek-reasoner", 1_000_000),
]
for model_id, expected_ctx in cases:
actual = get_model_context_length(model_id)
assert actual == expected_ctx, (
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_all_values_positive(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
assert value > 0, f"{key} has non-positive context length"
@@ -340,9 +303,7 @@ class TestCodexOAuthContextLength:
from agent.model_metadata import get_model_context_length
# OpenRouter — should hit its own catalog path first; when mocked
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
# matching the real direct-API value — Codex OAuth's 272k cap is
# provider-specific and must not leak here).
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
@@ -353,7 +314,7 @@ class TestCodexOAuthContextLength:
api_key="",
provider="openrouter",
)
assert ctx == 1_050_000, (
assert ctx == 400_000, (
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
"leaked outside openai-codex provider"
)
@@ -498,10 +459,9 @@ class TestGetModelContextLength:
@patch("agent.model_metadata.fetch_model_metadata")
def test_api_missing_context_length_key(self, mock_fetch):
"""Model in API but without context_length → defaults to the top
probe tier (currently 256K)."""
"""Model in API but without context_length → defaults to 128000."""
mock_fetch.return_value = {"test/model": {"name": "Test"}}
assert get_model_context_length("test/model") == CONTEXT_PROBE_TIERS[0]
assert get_model_context_length("test/model") == 128000
@patch("agent.model_metadata.fetch_model_metadata")
def test_cache_takes_priority_over_api(self, mock_fetch, tmp_path):
@@ -854,17 +814,14 @@ class TestContextProbeTiers:
for i in range(len(CONTEXT_PROBE_TIERS) - 1):
assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]
def test_first_tier_is_256k(self):
assert CONTEXT_PROBE_TIERS[0] == 256_000
def test_first_tier_is_128k(self):
assert CONTEXT_PROBE_TIERS[0] == 128_000
def test_last_tier_is_8k(self):
assert CONTEXT_PROBE_TIERS[-1] == 8_000
class TestGetNextProbeTier:
def test_from_256k(self):
assert get_next_probe_tier(256_000) == 128_000
def test_from_128k(self):
assert get_next_probe_tier(128_000) == 64_000
@@ -884,8 +841,8 @@ class TestGetNextProbeTier:
assert get_next_probe_tier(100_000) == 64_000
def test_above_max_tier(self):
"""Value above 256K should return 256K."""
assert get_next_probe_tier(500_000) == 256_000
"""Value above 128K should return 128K."""
assert get_next_probe_tier(500_000) == 128_000
def test_zero_returns_none(self):
assert get_next_probe_tier(0) is None
-138
View File
@@ -251,141 +251,3 @@ class TestAuxiliaryClientIntegration:
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
result = aux._try_nous()
assert result == (None, None)
class TestIsGenuineNousRateLimit:
"""Tell a real account-level 429 apart from an upstream-capacity 429.
Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
A 429 from an upstream out of capacity should NOT trip the
cross-session breaker; a real user-quota 429 should.
"""
def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "0",
"x-ratelimit-reset-requests-1h": "3100",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-reset-requests": "40",
}
assert is_genuine_nous_rate_limit(headers=headers) is True
def test_exhausted_tokens_bucket_is_genuine(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "0",
"x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "0",
"x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine
}
assert is_genuine_nous_rate_limit(headers=headers) is True
def test_healthy_headers_on_429_are_upstream_capacity(self):
# Classic upstream-capacity symptom: Nous edge reports plenty of
# headroom on every bucket, but returns 429 anyway because
# upstream (DeepSeek / Kimi / ...) is out of capacity.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "198",
"x-ratelimit-reset-requests": "40",
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "750",
"x-ratelimit-reset-requests-1h": "3100",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "790000",
"x-ratelimit-reset-tokens": "40",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7800000",
"x-ratelimit-reset-tokens-1h": "3100",
}
assert is_genuine_nous_rate_limit(headers=headers) is False
def test_bare_429_with_no_headers_is_upstream(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
assert is_genuine_nous_rate_limit(headers=None) is False
assert is_genuine_nous_rate_limit(headers={}) is False
assert is_genuine_nous_rate_limit(
headers={"content-type": "application/json"}
) is False
def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
# remaining == 0 but reset in < 60s: almost certainly a
# secondary per-minute throttle that will clear immediately --
# not worth tripping the cross-session breaker.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
headers = {
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "0",
"x-ratelimit-reset-requests": "30",
}
assert is_genuine_nous_rate_limit(headers=headers) is False
def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
# Headers on the 429 lack rate-limit info, but the previous
# successful response already showed the hourly bucket
# exhausted -- the 429 is almost certainly that limit
# continuing.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
from agent.rate_limit_tracker import parse_rate_limit_headers
prior_headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "0",
"x-ratelimit-reset-requests-1h": "2000",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "100",
"x-ratelimit-reset-requests": "30",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "700000",
"x-ratelimit-reset-tokens": "30",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7000000",
"x-ratelimit-reset-tokens-1h": "2000",
}
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=last_state
) is True
def test_last_known_state_all_healthy_stays_upstream(self):
# Prior state was healthy; bare 429 arrives; should be treated
# as upstream capacity.
from agent.nous_rate_guard import is_genuine_nous_rate_limit
from agent.rate_limit_tracker import parse_rate_limit_headers
prior_headers = {
"x-ratelimit-limit-requests-1h": "800",
"x-ratelimit-remaining-requests-1h": "750",
"x-ratelimit-reset-requests-1h": "2000",
"x-ratelimit-limit-requests": "200",
"x-ratelimit-remaining-requests": "180",
"x-ratelimit-reset-requests": "30",
"x-ratelimit-limit-tokens": "800000",
"x-ratelimit-remaining-tokens": "790000",
"x-ratelimit-reset-tokens": "30",
"x-ratelimit-limit-tokens-1h": "8000000",
"x-ratelimit-remaining-tokens-1h": "7900000",
"x-ratelimit-reset-tokens-1h": "2000",
}
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=last_state
) is False
def test_none_last_state_and_no_headers_is_upstream(self):
from agent.nous_rate_guard import is_genuine_nous_rate_limit
assert is_genuine_nous_rate_limit(
headers=None, last_known_state=None
) is False
-164
View File
@@ -1,164 +0,0 @@
"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
from __future__ import annotations
import yaml
import pytest
from agent.onboarding import (
BUSY_INPUT_FLAG,
TOOL_PROGRESS_FLAG,
busy_input_hint_cli,
busy_input_hint_gateway,
is_seen,
mark_seen,
tool_progress_hint_cli,
tool_progress_hint_gateway,
)
class TestIsSeen:
def test_empty_config_unseen(self):
assert is_seen({}, BUSY_INPUT_FLAG) is False
def test_missing_onboarding_unseen(self):
assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
def test_onboarding_not_dict_unseen(self):
assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
def test_seen_dict_missing_flag(self):
assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
def test_seen_flag_true(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
assert is_seen(cfg, BUSY_INPUT_FLAG) is True
def test_seen_flag_falsy(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
assert is_seen(cfg, BUSY_INPUT_FLAG) is False
def test_other_flags_isolated(self):
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
class TestMarkSeen:
def test_creates_missing_file_and_sets_flag(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_preserves_other_config(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({
"model": {"default": "claude-sonnet-4.6"},
"display": {"skin": "default"},
}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["model"]["default"] == "claude-sonnet-4.6"
assert loaded["display"]["skin"] == "default"
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_preserves_other_seen_flags(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({
"onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_idempotent(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
mark_seen(cfg_path, BUSY_INPUT_FLAG)
first = cfg_path.read_text()
# Second call must be a no-op on-disk content (file may be touched,
# but the YAML contents should be identical).
mark_seen(cfg_path, BUSY_INPUT_FLAG)
second = cfg_path.read_text()
assert yaml.safe_load(first) == yaml.safe_load(second)
def test_handles_non_dict_onboarding(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
def test_handles_non_dict_seen(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
class TestHintMessages:
def test_busy_input_hint_gateway_interrupt(self):
msg = busy_input_hint_gateway("interrupt")
assert "/busy queue" in msg
assert "interrupted" in msg.lower()
def test_busy_input_hint_gateway_queue(self):
msg = busy_input_hint_gateway("queue")
assert "/busy interrupt" in msg
assert "queued" in msg.lower()
def test_busy_input_hint_cli_interrupt(self):
msg = busy_input_hint_cli("interrupt")
assert "/busy queue" in msg
def test_busy_input_hint_cli_queue(self):
msg = busy_input_hint_cli("queue")
assert "/busy interrupt" in msg
def test_tool_progress_hints_mention_verbose(self):
assert "/verbose" in tool_progress_hint_gateway()
assert "/verbose" in tool_progress_hint_cli()
def test_hints_are_not_empty(self):
for hint in (
busy_input_hint_gateway("queue"),
busy_input_hint_gateway("interrupt"),
busy_input_hint_cli("queue"),
busy_input_hint_cli("interrupt"),
tool_progress_hint_gateway(),
tool_progress_hint_cli(),
):
assert hint.strip()
class TestRoundTrip:
"""After mark_seen, is_seen on the re-loaded config must return True."""
def test_mark_then_is_seen(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
loaded = yaml.safe_load(cfg_path.read_text())
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
def test_mark_both_flags_independently(self, tmp_path):
cfg_path = tmp_path / "config.yaml"
mark_seen(cfg_path, BUSY_INPUT_FLAG)
mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
loaded = yaml.safe_load(cfg_path.read_text())
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True
@@ -33,18 +33,15 @@ class TestChatCompletionsBasic:
def test_convert_messages_strips_codex_fields(self, transport):
msgs = [
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
"codex_message_items": [{"id": "msg_1", "type": "message"}],
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
]
result = transport.convert_messages(msgs)
assert "codex_reasoning_items" not in result[0]
assert "codex_message_items" not in result[0]
assert "call_id" not in result[0]["tool_calls"][0]
assert "response_item_id" not in result[0]["tool_calls"][0]
# Original list untouched (deepcopy-on-demand)
assert "codex_reasoning_items" in msgs[0]
assert "codex_message_items" in msgs[0]
class TestChatCompletionsBuildKwargs:
@@ -194,36 +194,6 @@ class TestCodexNormalizeResponse:
assert nr.content == "Hello world"
assert nr.finish_reason == "stop"
def test_message_items_preserved_in_provider_data(self, transport):
"""Codex assistant message item ids/phases must survive transport normalization."""
r = SimpleNamespace(
output=[
SimpleNamespace(
type="message",
role="assistant",
id="msg_abc",
phase="final_answer",
content=[SimpleNamespace(type="output_text", text="Hello world")],
status="completed",
),
],
status="completed",
incomplete_details=None,
usage=SimpleNamespace(input_tokens=10, output_tokens=5,
input_tokens_details=None, output_tokens_details=None),
)
nr = transport.normalize_response(r)
assert nr.codex_message_items == [
{
"type": "message",
"role": "assistant",
"status": "completed",
"content": [{"type": "output_text", "text": "Hello world"}],
"id": "msg_abc",
"phase": "final_answer",
}
]
def test_tool_call_response(self, transport):
"""Normalize a Codex response with tool calls."""
r = SimpleNamespace(
-7
View File
@@ -60,13 +60,6 @@ class TestTransportRegistry:
assert t is not None
assert t.api_mode == "anthropic_messages"
def test_discovers_missing_transport_when_registry_partially_populated(self):
"""Importing one transport directly must not hide other valid api_modes."""
import agent.transports.chat_completions # noqa: F401
t = get_transport("codex_responses")
assert t is not None
assert t.api_mode == "codex_responses"
def test_register_and_get(self):
class DummyTransport(ProviderTransport):
@property
-12
View File
@@ -270,15 +270,3 @@ class TestNormalizedResponseBackwardCompat:
def test_codex_reasoning_items_none_when_absent(self):
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
assert nr.codex_reasoning_items is None
def test_codex_message_items_from_provider_data(self):
items = [{"id": "msg_1", "type": "message"}]
nr = NormalizedResponse(
content="hi", tool_calls=None, finish_reason="stop",
provider_data={"codex_message_items": items},
)
assert nr.codex_message_items == items
def test_codex_message_items_none_when_absent(self):
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
assert nr.codex_message_items is None
-1
View File
@@ -346,7 +346,6 @@ def make_discord_message(
return SimpleNamespace(
id=message_id, content=content, author=author, channel=channel,
guild=getattr(channel, "guild", None),
mentions=mentions, attachments=attachments,
type=getattr(discord, "MessageType", SimpleNamespace()).default,
reference=None, created_at=datetime.now(timezone.utc),
-365
View File
@@ -1,365 +0,0 @@
"""Tests for /v1/runs endpoints: start, events, and stop.
Covers:
- POST /v1/runs start a run (202)
- GET /v1/runs/{run_id}/events SSE event stream
- POST /v1/runs/{run_id}/stop interrupt a running agent
- Auth, error handling, and cleanup
"""
import asyncio
import json
import threading
import time as _time
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from aiohttp import web
from aiohttp.test_utils import TestClient, TestServer
from gateway.config import PlatformConfig
from gateway.platforms.api_server import (
APIServerAdapter,
cors_middleware,
security_headers_middleware,
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_adapter(api_key: str = "") -> APIServerAdapter:
"""Create an adapter with optional API key."""
extra = {}
if api_key:
extra["key"] = api_key
config = PlatformConfig(enabled=True, extra=extra)
adapter = APIServerAdapter(config)
return adapter
def _create_runs_app(adapter: APIServerAdapter) -> web.Application:
"""Create an aiohttp app with /v1/runs routes registered."""
mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
app = web.Application(middlewares=mws)
app["api_server_adapter"] = adapter
app.router.add_post("/v1/runs", adapter._handle_runs)
app.router.add_get("/v1/runs/{run_id}/events", adapter._handle_run_events)
app.router.add_post("/v1/runs/{run_id}/stop", adapter._handle_stop_run)
return app
def _make_slow_agent(**kwargs):
"""Create a mock agent that blocks in run_conversation until interrupted.
Returns (mock_agent, agent_ready_event, interrupt_event) where
agent_ready_event is set once run_conversation starts, and
interrupt_event is set when interrupt() is called.
"""
ready = threading.Event()
interrupted = threading.Event()
mock_agent = MagicMock()
def _do_interrupt(message=None):
interrupted.set()
mock_agent.interrupt = MagicMock(side_effect=_do_interrupt)
def _slow_run(user_message=None, conversation_history=None, task_id=None):
ready.set()
# Block until interrupt() is called
interrupted.wait(timeout=10)
return {"final_response": "interrupted"}
mock_agent.run_conversation.side_effect = _slow_run
mock_agent.session_prompt_tokens = 0
mock_agent.session_completion_tokens = 0
mock_agent.session_total_tokens = 0
return mock_agent, ready, interrupted
@pytest.fixture
def adapter():
return _make_adapter()
@pytest.fixture
def auth_adapter():
return _make_adapter(api_key="sk-secret")
# ---------------------------------------------------------------------------
# POST /v1/runs — start a run
# ---------------------------------------------------------------------------
class TestStartRun:
@pytest.mark.asyncio
async def test_start_returns_202(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "done"}
mock_agent.session_prompt_tokens = 10
mock_agent.session_completion_tokens = 5
mock_agent.session_total_tokens = 15
mock_create.return_value = mock_agent
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
assert data["status"] == "started"
assert data["run_id"].startswith("run_")
@pytest.mark.asyncio
async def test_start_invalid_json_returns_400(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/v1/runs",
data="not json",
headers={"Content-Type": "application/json"},
)
assert resp.status == 400
@pytest.mark.asyncio
async def test_start_missing_input_returns_400(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post("/v1/runs", json={"model": "test"})
assert resp.status == 400
data = await resp.json()
assert "input" in data["error"]["message"]
@pytest.mark.asyncio
async def test_start_empty_input_returns_400(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post("/v1/runs", json={"input": ""})
assert resp.status == 400
@pytest.mark.asyncio
async def test_start_requires_auth(self, auth_adapter):
app = _create_runs_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 401
@pytest.mark.asyncio
async def test_start_with_valid_auth(self, auth_adapter):
app = _create_runs_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_create_agent") as mock_create:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "ok"}
mock_agent.session_prompt_tokens = 0
mock_agent.session_completion_tokens = 0
mock_agent.session_total_tokens = 0
mock_create.return_value = mock_agent
resp = await cli.post(
"/v1/runs",
json={"input": "hello"},
headers={"Authorization": "Bearer sk-secret"},
)
assert resp.status == 202
# ---------------------------------------------------------------------------
# GET /v1/runs/{run_id}/events — SSE event stream
# ---------------------------------------------------------------------------
class TestRunEvents:
@pytest.mark.asyncio
async def test_events_stream_returns_completed(self, adapter):
"""Events stream should receive run.completed when agent finishes."""
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "Hello!"}
mock_agent.session_prompt_tokens = 10
mock_agent.session_completion_tokens = 5
mock_agent.session_total_tokens = 15
mock_create.return_value = mock_agent
# Start run
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
run_id = data["run_id"]
# Subscribe to events
events_resp = await cli.get(f"/v1/runs/{run_id}/events")
assert events_resp.status == 200
body = await events_resp.text()
# Should contain run.completed
assert "run.completed" in body
assert "Hello!" in body
@pytest.mark.asyncio
async def test_events_not_found_returns_404(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/runs/run_nonexistent/events")
assert resp.status == 404
@pytest.mark.asyncio
async def test_events_requires_auth(self, auth_adapter):
app = _create_runs_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/runs/run_any/events")
assert resp.status == 401
# ---------------------------------------------------------------------------
# POST /v1/runs/{run_id}/stop — interrupt a running agent
# ---------------------------------------------------------------------------
class TestStopRun:
@pytest.mark.asyncio
async def test_stop_running_agent(self, adapter):
"""Stop should interrupt the agent and cancel the task."""
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent, agent_ready, _ = _make_slow_agent()
mock_create.return_value = mock_agent
# Start run
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
run_id = data["run_id"]
# Wait for agent to start running in the thread
agent_ready.wait(timeout=3.0)
await asyncio.sleep(0.1)
# Verify agent ref is stored
assert run_id in adapter._active_run_agents
# Stop the run
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
assert stop_resp.status == 200
stop_data = await stop_resp.json()
assert stop_data["run_id"] == run_id
assert stop_data["status"] == "stopping"
# Agent interrupt should have been called
mock_agent.interrupt.assert_called_once_with("Stop requested via API")
# Refs should be cleaned up
await asyncio.sleep(0.5)
assert run_id not in adapter._active_run_agents
assert run_id not in adapter._active_run_tasks
@pytest.mark.asyncio
async def test_stop_nonexistent_run_returns_404(self, adapter):
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post("/v1/runs/run_nonexistent/stop")
assert resp.status == 404
@pytest.mark.asyncio
async def test_stop_requires_auth(self, auth_adapter):
app = _create_runs_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post("/v1/runs/run_any/stop")
assert resp.status == 401
@pytest.mark.asyncio
async def test_stop_already_completed_run_returns_404(self, adapter):
"""Stopping a run that already finished should return 404 (refs cleaned up)."""
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "done"}
mock_agent.session_prompt_tokens = 0
mock_agent.session_completion_tokens = 0
mock_agent.session_total_tokens = 0
mock_create.return_value = mock_agent
# Start and wait for completion
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
run_id = data["run_id"]
await asyncio.sleep(0.3)
# Run should be done, refs cleaned up
assert run_id not in adapter._active_run_agents
# Stop should return 404
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
assert stop_resp.status == 404
@pytest.mark.asyncio
async def test_stop_interrupt_exception_does_not_crash(self, adapter):
"""If agent.interrupt() raises, stop should still succeed."""
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent, agent_ready, _ = _make_slow_agent()
# Override the interrupt side_effect to raise
mock_agent.interrupt = MagicMock(side_effect=RuntimeError("interrupt failed"))
mock_create.return_value = mock_agent
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
run_id = data["run_id"]
agent_ready.wait(timeout=3.0)
await asyncio.sleep(0.1)
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
assert stop_resp.status == 200
stop_data = await stop_resp.json()
assert stop_data["status"] == "stopping"
@pytest.mark.asyncio
async def test_stop_sends_sentinel_to_events_stream(self, adapter):
"""After stop, the events stream should close."""
app = _create_runs_app(adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(adapter, "_create_agent") as mock_create:
mock_agent, agent_ready, _ = _make_slow_agent()
mock_create.return_value = mock_agent
# Start run
resp = await cli.post("/v1/runs", json={"input": "hello"})
assert resp.status == 202
data = await resp.json()
run_id = data["run_id"]
agent_ready.wait(timeout=3.0)
await asyncio.sleep(0.1)
# Subscribe to events in background
events_task = asyncio.ensure_future(
cli.get(f"/v1/runs/{run_id}/events")
)
await asyncio.sleep(0.1)
# Stop the run
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
assert stop_resp.status == 200
# Events stream should close
events_resp = await asyncio.wait_for(events_task, timeout=5.0)
assert events_resp.status == 200
body = await events_resp.text()
# Stream should have received run.failed and closed
assert "run.failed" in body or "stream closed" in body
-118
View File
@@ -349,121 +349,3 @@ class TestBusySessionAck:
result = await runner._handle_active_session_busy_message(event, sk)
assert result is False # not handled, let default path try
class TestBusySessionOnboardingHint:
"""First-touch hint appended to the busy-ack the first time it fires."""
@pytest.mark.asyncio
async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
"""First busy-while-running message gets an extra hint about /busy."""
import gateway.run as _gr
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
# mark_seen imports utils.atomic_yaml_write; make sure it resolves
# against a writable dir by pointing _hermes_home at tmp_path.
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
runner, _sentinel = _make_runner()
runner._busy_input_mode = "interrupt"
adapter = _make_adapter()
event = _make_event(text="ping")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 3, "max_iterations": 60,
"current_tool": None, "last_activity_ts": time.time(),
"last_activity_desc": "api", "seconds_since_activity": 0.1,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 5
runner.adapters[event.source.platform] = adapter
await runner._handle_active_session_busy_message(event, sk)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
# Normal ack body
assert "Interrupting" in content
# First-touch hint appended
assert "First-time tip" in content
assert "/busy queue" in content
# The flag is now persisted to tmp_path/config.yaml
import yaml
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
@pytest.mark.asyncio
async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
"""Once the flag is marked, the hint never appears again."""
import gateway.run as _gr
import yaml
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
# Pre-populate the config so is_seen() returns True from the start.
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
"onboarding": {"seen": {"busy_input_prompt": True}},
}))
monkeypatch.setattr(
_gr, "_load_gateway_config",
lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
)
runner, _sentinel = _make_runner()
runner._busy_input_mode = "interrupt"
adapter = _make_adapter()
event = _make_event(text="ping again")
sk = build_session_key(event.source)
agent = MagicMock()
agent.get_activity_summary.return_value = {
"api_call_count": 3, "max_iterations": 60,
"current_tool": None, "last_activity_ts": time.time(),
"last_activity_desc": "api", "seconds_since_activity": 0.1,
}
runner._running_agents[sk] = agent
runner._running_agents_ts[sk] = time.time() - 5
runner.adapters[event.source.platform] = adapter
await runner._handle_active_session_busy_message(event, sk)
call_kwargs = adapter._send_with_retry.call_args
content = call_kwargs.kwargs.get("content", "")
assert "Interrupting" in content
assert "First-time tip" not in content
assert "/busy queue" not in content
@pytest.mark.asyncio
async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
"""In queue mode the hint should suggest /busy interrupt, not /busy queue."""
import gateway.run as _gr
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
runner, _sentinel = _make_runner()
runner._busy_input_mode = "queue"
adapter = _make_adapter()
event = _make_event(text="queue me")
sk = build_session_key(event.source)
runner.adapters[event.source.platform] = adapter
agent = MagicMock()
runner._running_agents[sk] = agent
with patch("gateway.run.merge_pending_message_event"):
await runner._handle_active_session_busy_message(event, sk)
content = adapter._send_with_retry.call_args.kwargs.get("content", "")
assert "Queued for the next turn" in content
assert "First-time tip" in content
assert "/busy interrupt" in content
# Must NOT tell the user to /busy queue when they're already on queue.
assert "/busy queue" not in content
+1 -133
View File
@@ -33,7 +33,6 @@ def _make_runner():
runner._ephemeral_system_prompt = ""
runner._prefill_messages = []
runner._reasoning_config = None
runner._session_reasoning_overrides = {}
runner._show_reasoning = False
runner._provider_routing = {}
runner._fallback_model = None
@@ -77,10 +76,6 @@ class TestReasoningCommand:
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
assert '"reasoning"' in source
def test_parse_reasoning_command_args_accepts_ascii_and_smart_global_flags(self):
assert gateway_run.GatewayRunner._parse_reasoning_command_args("high --global") == ("high", True)
assert gateway_run.GatewayRunner._parse_reasoning_command_args("—global xhigh") == ("xhigh", True)
@pytest.mark.asyncio
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
@@ -116,90 +111,13 @@ class TestReasoningCommand:
runner = _make_runner()
runner._reasoning_config = {"enabled": True, "effort": "medium"}
result = await runner._handle_reasoning_command(_make_event("/reasoning low --global"))
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "low"
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
assert "takes effect on next message" in result
@pytest.mark.asyncio
async def test_handle_reasoning_command_defaults_to_session_only(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning high")
session_key = runner._session_key_for_source(event.source)
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "medium"
assert runner._session_reasoning_overrides[session_key] == {"enabled": True, "effort": "high"}
assert runner._reasoning_config == {"enabled": True, "effort": "high"}
assert "session only" in result
@pytest.mark.asyncio
async def test_reasoning_global_clears_existing_session_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning low --global")
session_key = runner._session_key_for_source(event.source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "low"
assert session_key not in runner._session_reasoning_overrides
assert "saved to config" in result
@pytest.mark.asyncio
async def test_reasoning_reset_clears_session_override_without_config_write(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
event = _make_event("/reasoning reset")
session_key = runner._session_key_for_source(event.source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
result = await runner._handle_reasoning_command(event)
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert saved["agent"]["reasoning_effort"] == "medium"
assert session_key not in runner._session_reasoning_overrides
assert "cleared" in result
def test_resolve_session_reasoning_prefers_session_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
runner = _make_runner()
source = _make_event("/reasoning").source
session_key = runner._session_key_for_source(source)
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
assert runner._resolve_session_reasoning_config(source=source) == {"enabled": True, "effort": "xhigh"}
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
@@ -249,56 +167,6 @@ class TestReasoningCommand:
assert _CapturingAgent.last_init is not None
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
def test_run_agent_prefers_session_reasoning_override(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": "openrouter",
"api_mode": "chat_completions",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "***",
},
)
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = _CapturingAgent
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
_CapturingAgent.last_init = None
runner = _make_runner()
session_key = "agent:main:local:dm"
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
source = SessionSource(
platform=Platform.LOCAL,
chat_id="cli",
chat_name="CLI",
chat_type="dm",
user_id="user-1",
)
result = asyncio.run(
runner._run_agent(
message="ping",
context_prompt="",
history=[],
source=source,
session_id="session-1",
session_key=session_key,
)
)
assert result["final_response"] == "ok"
assert _CapturingAgent.last_init is not None
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir()
@@ -1,215 +0,0 @@
"""Tests for interrupt-aware tool-progress suppression in gateway.
When a user sends `stop` while the agent is executing a batch of parallel
tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
and the drain loop should drop any already-queued events. Without this
guard, the stop acknowledgement appears first but is followed by a trail
of tool-progress bubbles for calls that were already parsed from the LLM
response making the interrupt feel ignored.
"""
import asyncio
import importlib
import sys
import time
import types
from types import SimpleNamespace
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter, SendResult
from gateway.session import SessionSource
class ProgressCaptureAdapter(BasePlatformAdapter):
def __init__(self, platform=Platform.TELEGRAM):
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
self.sent = []
self.edits = []
self.typing = []
async def connect(self) -> bool:
return True
async def disconnect(self) -> None:
return None
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
self.sent.append({"chat_id": chat_id, "content": content})
return SendResult(success=True, message_id="progress-1")
async def edit_message(self, chat_id, message_id, content) -> SendResult:
self.edits.append({"message_id": message_id, "content": content})
return SendResult(success=True, message_id=message_id)
async def send_typing(self, chat_id, metadata=None) -> None:
self.typing.append(chat_id)
async def stop_typing(self, chat_id) -> None:
return None
async def get_chat_info(self, chat_id: str):
return {"id": chat_id}
class PreInterruptAgent:
"""Fires tool-progress events BEFORE the interrupt lands.
These should render normally. Baseline for comparison with the
interrupted case proves the harness renders events when no
interrupt is active.
"""
def __init__(self, **kwargs):
self.tool_progress_callback = kwargs.get("tool_progress_callback")
self.tools = []
self._interrupt_requested = False
@property
def is_interrupted(self) -> bool:
return self._interrupt_requested
def run_conversation(self, message, conversation_history=None, task_id=None):
self.tool_progress_callback("tool.started", "web_search", "first search", {})
time.sleep(0.35) # let the drain loop process
return {"final_response": "done", "messages": [], "api_calls": 1}
class InterruptedAgent:
"""Fires tool.started events AFTER interrupt — all should be suppressed.
Mirrors the failure mode in the bug report: LLM returned N parallel
web_search calls, interrupt flag flipped, remaining events still
rendered as bubbles. With the fix, none of these should appear.
"""
def __init__(self, **kwargs):
self.tool_progress_callback = kwargs.get("tool_progress_callback")
self.tools = []
# Start already interrupted — simulates stop having already landed
# by the time the agent batch starts firing tool.started events.
self._interrupt_requested = True
@property
def is_interrupted(self) -> bool:
return self._interrupt_requested
def run_conversation(self, message, conversation_history=None, task_id=None):
# Parallel tool batch — in production these come from one LLM
# response with 5 tool_calls. All are post-interrupt.
self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
time.sleep(0.35) # let the drain loop attempt to process the queue
return {"final_response": "interrupted", "messages": [], "api_calls": 1}
def _make_runner(adapter):
gateway_run = importlib.import_module("gateway.run")
GatewayRunner = gateway_run.GatewayRunner
runner = object.__new__(GatewayRunner)
runner.adapters = {adapter.platform: adapter}
runner._voice_mode = {}
runner._prefill_messages = []
runner._ephemeral_system_prompt = ""
runner._reasoning_config = None
runner._provider_routing = {}
runner._fallback_model = None
runner._session_db = None
runner._running_agents = {}
runner._session_run_generation = {}
runner.hooks = SimpleNamespace(loaded_hooks=False)
runner.config = SimpleNamespace(
thread_sessions_per_user=False,
group_sessions_per_user=False,
stt_enabled=False,
)
return runner
async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
fake_dotenv = types.ModuleType("dotenv")
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
fake_run_agent = types.ModuleType("run_agent")
fake_run_agent.AIAgent = agent_cls
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
adapter = ProgressCaptureAdapter()
runner = _make_runner(adapter)
gateway_run = importlib.import_module("gateway.run")
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {"api_key": "fake"},
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1001",
chat_type="group",
thread_id="17585",
)
result = await runner._run_agent(
message="hi",
context_prompt="",
history=[],
source=source,
session_id=session_id,
session_key="agent:main:telegram:group:-1001:17585",
)
return adapter, result
@pytest.mark.asyncio
async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
"""Sanity check: when is_interrupted is False, tool-progress renders normally."""
adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
assert result["final_response"] == "done"
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
c["content"] for c in adapter.edits
)
assert "first search" in rendered, (
"baseline agent should render its tool-progress event — "
"if this fails the test harness is broken, not the fix"
)
@pytest.mark.asyncio
async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
"""Post-interrupt tool.started events must not render as bubbles.
This is Bug B from the screenshot: user sends `stop`, agent acks with
Interrupting, but 5 more 🔍 web_search bubbles still render because
their tool.started events were already parsed from the LLM response.
With the fix, progress_callback and the drain loop both check
is_interrupted and skip these events.
"""
adapter, result = await _run_once(
monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
)
assert result["final_response"] == "interrupted"
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
c["content"] for c in adapter.edits
)
# None of the post-interrupt queries should appear.
for leaked_query in (
"cognee hermes",
"McBee deer hunting",
"kuzu graph db",
"moonshot kimi api",
"platform.moonshot.cn",
):
assert leaked_query not in rendered, (
f"event '{leaked_query}' leaked into the UI after interrupt — "
f"progress_callback / drain loop is not checking is_interrupted"
)
@@ -165,26 +165,3 @@ async def test_reasoning_rejected_mid_run():
assert result is not None
assert "can't run mid-turn" in result
assert "/reasoning" in result
@pytest.mark.asyncio
async def test_btw_dispatches_mid_run():
"""/btw mid-run must dispatch to /background's handler, not hit the catch-all.
/btw is an alias of /background (see hermes_cli/commands.py). Typing
/btw mid-turn must spawn a parallel background task that's the whole
point of the command. Before the mid-turn bypass was added for
/background, /btw fell through to the "Agent is running — wait or
/stop first" catch-all, making it useless in exactly the scenario it
was designed for. The alias and the bypass together make it work.
"""
runner = _make_runner()
runner._handle_background_command = AsyncMock(
return_value='🚀 Background task started: "what module owns titles?"'
)
result = await runner._handle_message(_make_event("/btw what module owns titles?"))
runner._handle_background_command.assert_awaited_once()
assert result is not None
assert "can't run mid-turn" not in result
+1 -1
View File
@@ -58,7 +58,7 @@ class TestFormatSessionInfo:
{"provider": "", "base_url": "", "api_key": ""})
with p1, p2, p3:
info = runner._format_session_info()
assert "256K" in info
assert "128K" in info
assert "model.context_length" in info
def test_local_endpoint_shown(self, runner, tmp_path):
@@ -54,7 +54,6 @@ def _make_runner():
runner._background_tasks = set()
runner._session_db = None
runner._session_model_overrides = {}
runner._session_reasoning_overrides = {}
runner._pending_model_notes = {}
runner._pending_approvals = {}
runner._agent_cache = {}
@@ -103,7 +102,6 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
)
session_key = "agent:main:local:dm"
runner._session_model_overrides[session_key] = _codex_override()
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
result = asyncio.run(
runner._run_agent(
@@ -123,7 +121,6 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
assert _CapturingAgent.last_init["api_key"] == "***"
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
@pytest.mark.asyncio
@@ -152,7 +149,6 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
)
session_key = runner._session_key_for_source(source)
runner._session_model_overrides[session_key] = _codex_override()
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
await runner._run_background_task("say hello", source, "bg_test")
@@ -162,4 +158,3 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
assert _CapturingAgent.last_init["api_key"] == "***"
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
+3 -12
View File
@@ -1,4 +1,4 @@
"""Tests that /new (and its /reset alias) clears session-scoped overrides."""
"""Tests that /new (and its /reset alias) clears the session-scoped model override."""
from datetime import datetime
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock
@@ -37,7 +37,6 @@ def _make_runner():
runner._voice_mode = {}
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
runner._session_model_overrides = {}
runner._session_reasoning_overrides = {}
runner._pending_model_notes = {}
runner._background_tasks = set()
@@ -76,16 +75,14 @@ async def test_new_command_clears_session_model_override():
runner._session_model_overrides[session_key] = {
"model": "gpt-4o",
"provider": "openai",
"api_key": "***",
"api_key": "sk-test",
"base_url": "",
"api_mode": "openai",
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
@pytest.mark.asyncio
@@ -95,12 +92,10 @@ async def test_new_command_no_override_is_noop():
session_key = build_session_key(_make_source())
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
@pytest.mark.asyncio
@@ -120,16 +115,12 @@ async def test_new_command_only_clears_own_session():
runner._session_model_overrides[other_key] = {
"model": "claude-sonnet-4-6",
"provider": "anthropic",
"api_key": "***",
"api_key": "sk-ant-test",
"base_url": "",
"api_mode": "anthropic",
}
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"}
await runner._handle_reset_command(_make_event("/new"))
assert session_key not in runner._session_model_overrides
assert other_key in runner._session_model_overrides
assert session_key not in runner._session_reasoning_overrides
assert other_key in runner._session_reasoning_overrides
-100
View File
@@ -177,53 +177,6 @@ class TestHandleVoiceCommand:
assert adapter._auto_tts_disabled_chats == {"123"}
def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
"""Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
persisted voice_mode is ``voice_only`` or ``all`` without this,
``/voice on`` was relying on a "not in disabled set" default that
silently enabled auto-TTS for every chat.
"""
from gateway.config import Platform
runner._voice_mode = {
"telegram:off_chat": "off",
"telegram:on_chat": "voice_only",
"telegram:tts_chat": "all",
"slack:999": "voice_only", # wrong platform, must be ignored
}
adapter = SimpleNamespace(
_auto_tts_default=False,
_auto_tts_disabled_chats=set(),
_auto_tts_enabled_chats=set(),
platform=Platform.TELEGRAM,
)
runner._sync_voice_mode_state_to_adapter(adapter)
assert adapter._auto_tts_disabled_chats == {"off_chat"}
assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
"""Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
from gateway.config import Platform
fake_cfg = {"voice": {"auto_tts": True}}
monkeypatch.setattr(
"hermes_cli.config.load_config",
lambda: fake_cfg,
)
adapter = SimpleNamespace(
_auto_tts_default=False,
_auto_tts_disabled_chats=set(),
_auto_tts_enabled_chats=set(),
platform=Platform.TELEGRAM,
)
runner._sync_voice_mode_state_to_adapter(adapter)
assert adapter._auto_tts_default is True
def test_restart_restores_voice_off_state(self, runner, tmp_path):
from gateway.config import Platform
runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
@@ -2753,56 +2706,3 @@ class TestUDPKeepalive:
mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
finally:
DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
# =====================================================================
# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
# on voice input. Regression test for Issue #16007.
# =====================================================================
class TestShouldAutoTtsForChat:
"""Three-layer gate: per-chat enable > per-chat disable > config default."""
def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
"""Build a bare adapter with only the attrs the gate reads."""
adapter = SimpleNamespace(
_auto_tts_default=default,
_auto_tts_enabled_chats=set(enabled),
_auto_tts_disabled_chats=set(disabled),
)
# Bind the unbound method — _should_auto_tts_for_chat only reads the
# three attrs above via ``self.``, so an unbound call works.
from gateway.platforms.base import BasePlatformAdapter
return BasePlatformAdapter._should_auto_tts_for_chat, adapter
def test_default_false_no_override_suppresses(self):
"""Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
fn, adapter = self._make_adapter(default=False)
assert fn(adapter, "chat1") is False
def test_default_true_no_override_fires(self):
fn, adapter = self._make_adapter(default=True)
assert fn(adapter, "chat1") is True
def test_explicit_enable_overrides_false_default(self):
"""``/voice on`` with config auto_tts=False still fires."""
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
assert fn(adapter, "chat1") is True
def test_explicit_disable_overrides_true_default(self):
"""``/voice off`` with config auto_tts=True still suppresses."""
fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
assert fn(adapter, "chat1") is False
def test_enabled_wins_over_disabled(self):
"""An explicit enable beats an explicit disable (enable takes priority)."""
fn, adapter = self._make_adapter(
default=False, enabled={"chat1"}, disabled={"chat1"}
)
assert fn(adapter, "chat1") is True
def test_per_chat_isolation(self):
"""Enable for chat1 doesn't leak to chat2."""
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
assert fn(adapter, "chat1") is True
assert fn(adapter, "chat2") is False
@@ -1,152 +0,0 @@
"""Regression test for the `/model` picker confirmation display.
Bug (April 2026): after choosing a model from the interactive `/model` picker,
``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
straight from models.dev, which always reports the vendor-wide value (e.g.
gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps in
particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
``resolve_display_context_length()``; the picker path was missed, causing
"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
Fix: both display paths now go through ``resolve_display_context_length()``.
"""
from __future__ import annotations
from unittest.mock import patch
from hermes_cli.model_switch import ModelSwitchResult
class _FakeModelInfo:
context_window = 1_050_000
max_output = 0
def has_cost_data(self):
return False
def format_capabilities(self):
return ""
class _StubCLI:
"""Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
agent = None
model = ""
provider = ""
requested_provider = ""
api_key = ""
_explicit_api_key = ""
base_url = ""
_explicit_base_url = ""
api_mode = ""
_pending_model_switch_note = ""
def _run_display(monkeypatch, result):
import cli as cli_mod
captured: list[str] = []
monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
# Avoid writing to ~/.hermes/config.yaml during the test.
monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
return captured
def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
"""``_apply_model_switch_result`` must prefer the provider-aware resolver
(272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
"""
result = ModelSwitchResult(
success=True,
new_model="gpt-5.5",
target_provider="openai-codex",
provider_changed=True,
api_key="",
base_url="https://chatgpt.com/backend-api/codex",
api_mode="codex_responses",
warning_message="",
provider_label="ChatGPT Codex",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(), # models.dev says 1.05M
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=272_000,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "272,000" in ctx_line, (
f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
)
assert "1,050,000" not in ctx_line, (
f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
)
def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
"""On providers with no enforced cap (e.g. OpenRouter), the picker path
should surface the real 1.05M context for gpt-5.5 resolver and models.dev
agree here.
"""
result = ModelSwitchResult(
success=True,
new_model="openai/gpt-5.5",
target_provider="openrouter",
provider_changed=True,
api_key="",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
warning_message="",
provider_label="OpenRouter",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(),
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=1_050_000,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "1,050,000" in ctx_line, (
f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
)
def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
"""If ``get_model_context_length`` returns nothing (rare — truly unknown
endpoint), the display still surfaces ``ModelInfo.context_window`` so the
user sees *something* rather than a silent blank.
"""
result = ModelSwitchResult(
success=True,
new_model="some-model",
target_provider="some-provider",
provider_changed=True,
api_key="",
base_url="",
api_mode="chat_completions",
warning_message="",
provider_label="Some Provider",
resolved_via_alias=False,
capabilities=None,
model_info=_FakeModelInfo(), # context_window = 1_050_000
is_global=False,
)
with patch(
"agent.model_metadata.get_model_context_length",
return_value=None,
):
lines = _run_display(monkeypatch, result)
ctx_line = next((l for l in lines if "Context:" in l), "")
assert "1,050,000" in ctx_line, (
f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
)
-237
View File
@@ -1,237 +0,0 @@
"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
from hermes_cli import azure_detect
# ----------------------------------------------------------------------
# Helpers
# ----------------------------------------------------------------------
class _FakeHTTPResponse:
"""Minimal stand-in for urllib.request.urlopen's context manager."""
def __init__(self, status: int, body: bytes):
self.status = status
self._body = body
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self) -> bytes:
return self._body
def _openai_models_body(*ids: str) -> bytes:
return json.dumps({
"object": "list",
"data": [{"id": i, "object": "model"} for i in ids],
}).encode()
def _anthropic_error_body(msg: str = "model not found") -> bytes:
return json.dumps({
"type": "error",
"error": {"type": "invalid_request_error", "message": msg},
}).encode()
# ----------------------------------------------------------------------
# _looks_like_anthropic_path
# ----------------------------------------------------------------------
@pytest.mark.parametrize("url, expected", [
("https://foo.services.ai.azure.com/anthropic", True),
("https://foo.services.ai.azure.com/anthropic/", True),
("https://foo.services.ai.azure.com/anthropic/v1", True),
("https://foo.openai.azure.com/openai/v1", False),
("https://foo.openai.azure.com/", False),
("https://openrouter.ai/api/v1", False),
])
def test_looks_like_anthropic_path(url, expected):
assert azure_detect._looks_like_anthropic_path(url) is expected
# ----------------------------------------------------------------------
# _extract_model_ids
# ----------------------------------------------------------------------
def test_extract_model_ids_openai_shape():
body = {
"object": "list",
"data": [
{"id": "gpt-4.1-mini", "object": "model"},
{"id": "claude-sonnet-4-6", "object": "model"},
],
}
assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
def test_extract_model_ids_bad_shape_returns_empty():
assert azure_detect._extract_model_ids({}) == []
assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
# ----------------------------------------------------------------------
# detect() integration
# ----------------------------------------------------------------------
def test_detect_anthropic_path_wins_without_http():
"""URL path sniff short-circuits — no HTTP call happens."""
with patch.object(azure_detect, "_http_get_json") as fake_get, \
patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
result = azure_detect.detect(
"https://foo.services.ai.azure.com/anthropic", "key-abc",
)
assert result.api_mode == "anthropic_messages"
assert result.is_anthropic is True
assert "path" in result.reason.lower()
fake_get.assert_not_called()
fake_probe.assert_not_called()
def test_detect_openai_models_probe_success():
"""/models probe returning a model list → chat_completions."""
def _fake_get(url, api_key, timeout=6.0):
assert "key-abc" == api_key
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
result = azure_detect.detect(
"https://my.openai.azure.com/openai/v1", "key-abc",
)
assert result.api_mode == "chat_completions"
assert result.models_probe_ok is True
assert result.models == ["gpt-5.4", "claude-opus-4-6"]
assert "/models" in result.reason
def test_detect_openai_models_probe_empty_list_still_counts():
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
def _fake_get(url, api_key, timeout=6.0):
return 200, {"object": "list", "data": []}
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
result = azure_detect.detect(
"https://my.openai.azure.com/openai/v1", "key-abc",
)
assert result.api_mode == "chat_completions"
assert result.models == []
assert result.models_probe_ok is True
def test_detect_falls_back_to_anthropic_probe():
"""/models fails but Anthropic Messages probe succeeds."""
def _fake_get(url, api_key, timeout=6.0):
return 401, None # /models forbidden
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
result = azure_detect.detect(
"https://my.services.ai.azure.com/v1", "key-abc",
)
assert result.api_mode == "anthropic_messages"
assert result.is_anthropic is True
def test_detect_all_probes_fail_returns_none():
"""Every probe fails → api_mode is None and caller falls back to manual."""
with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
result = azure_detect.detect(
"https://some-private.example.com/", "key-abc",
)
assert result.api_mode is None
assert result.models == []
assert "manual" in result.reason.lower()
# ----------------------------------------------------------------------
# _probe_openai_models URL list (Azure vs v1 api-version)
# ----------------------------------------------------------------------
def test_probe_openai_models_tries_multiple_api_versions():
"""First call (no api-version) fails, api-version fallback succeeds."""
calls = []
def _fake_get(url, api_key, timeout=6.0):
calls.append(url)
if "api-version" not in url:
return 404, None
return 200, json.loads(_openai_models_body("gpt-4.1"))
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
ok, models = azure_detect._probe_openai_models(
"https://my.openai.azure.com/openai/v1", "k",
)
assert ok is True
assert models == ["gpt-4.1"]
# Should have tried without api-version first, then with at least one
assert any("api-version" not in u for u in calls)
assert any("api-version" in u for u in calls)
# ----------------------------------------------------------------------
# _http_get_json error handling
# ----------------------------------------------------------------------
def test_http_get_json_on_urlerror_returns_zero_none():
"""Network failure returns (0, None), never raises."""
import urllib.error
with patch("hermes_cli.azure_detect.urllib_request.urlopen",
side_effect=urllib.error.URLError("dns fail")):
status, body = azure_detect._http_get_json("https://bad.example/", "k")
assert status == 0
assert body is None
def test_http_get_json_on_http_error_returns_code_none():
"""HTTP 4xx/5xx returns (code, None)."""
import urllib.error
err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
status, body = azure_detect._http_get_json("https://x/", "k")
assert status == 403
assert body is None
# ----------------------------------------------------------------------
# lookup_context_length
# ----------------------------------------------------------------------
def test_lookup_context_length_returns_known():
"""When model_metadata returns a non-fallback value, we pass it through."""
fake = MagicMock(return_value=400000)
with patch("agent.model_metadata.get_model_context_length", fake), \
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
n = azure_detect.lookup_context_length(
"gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
)
assert n == 400000
def test_lookup_context_length_returns_none_on_fallback():
"""When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
n = azure_detect.lookup_context_length(
"totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
)
assert n is None
def test_lookup_context_length_swallows_exceptions():
"""Resolver raising must not crash the wizard."""
with patch("agent.model_metadata.get_model_context_length",
side_effect=RuntimeError("boom")):
assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
@@ -1,240 +0,0 @@
"""Regression tests for custom_providers per-model context_length resolution.
Covers the fix for #15779 — mid-session /model switch to a named custom
provider must honor ``custom_providers[].models.<id>.context_length`` the
same way startup already does.
"""
from __future__ import annotations
from unittest.mock import patch
from hermes_cli.config import get_custom_provider_context_length
class TestGetCustomProviderContextLength:
def test_returns_override_for_matching_entry(self):
custom = [
{
"name": "my-endpoint",
"base_url": "https://example.invalid/v1",
"models": {"gpt-5.5": {"context_length": 1_050_000}},
}
]
assert (
get_custom_provider_context_length(
"gpt-5.5", "https://example.invalid/v1", custom
)
== 1_050_000
)
def test_trailing_slash_insensitive(self):
custom = [
{
"base_url": "https://example.invalid/v1/",
"models": {"m": {"context_length": 500_000}},
}
]
# config has trailing slash, runtime doesn't — must match
assert (
get_custom_provider_context_length(
"m", "https://example.invalid/v1", custom
)
== 500_000
)
# and the reverse
custom2 = [
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": 500_000}},
}
]
assert (
get_custom_provider_context_length(
"m", "https://example.invalid/v1/", custom2
)
== 500_000
)
def test_returns_none_when_url_does_not_match(self):
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": 400_000}},
}
]
assert (
get_custom_provider_context_length(
"m", "https://other.invalid/v1", custom
)
is None
)
def test_returns_none_when_model_does_not_match(self):
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"gpt-5.5": {"context_length": 400_000}},
}
]
assert (
get_custom_provider_context_length(
"different-model", "https://example.invalid/v1", custom
)
is None
)
def test_returns_none_for_string_value(self):
"""'256K' string is not a valid int — skip silently.
(The inline startup path still emits a user-visible warning; the
helper itself returns None so downstream fallbacks can run.)
"""
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": "256K"}},
}
]
assert (
get_custom_provider_context_length(
"m", "https://example.invalid/v1", custom
)
is None
)
def test_returns_none_for_zero_or_negative(self):
for bad in (0, -1, -100):
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": bad}},
}
]
assert (
get_custom_provider_context_length(
"m", "https://example.invalid/v1", custom
)
is None
), f"value {bad!r} should be rejected"
def test_empty_inputs_return_none(self):
assert get_custom_provider_context_length("", "http://x", [{"base_url": "http://x", "models": {"": {"context_length": 1}}}]) is None
assert get_custom_provider_context_length("m", "", [{"base_url": "", "models": {"m": {"context_length": 1}}}]) is None
assert get_custom_provider_context_length("m", "http://x", None) is None
assert get_custom_provider_context_length("m", "http://x", []) is None
def test_ignores_non_dict_entries(self):
"""Malformed entries must not crash the lookup."""
custom = [
"not a dict",
None,
{"base_url": "https://example.invalid/v1", "models": "not a dict"},
{"base_url": "https://example.invalid/v1", "models": {"m": "not a dict"}},
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": 400_000}},
},
]
assert (
get_custom_provider_context_length(
"m", "https://example.invalid/v1", custom
)
== 400_000
)
class TestGetModelContextLengthHonorsOverride:
"""agent.model_metadata.get_model_context_length must honor the
custom_providers override at step 0b before any probe, cache hit,
or models.dev lookup can override it.
"""
def _mock_all_probes(self):
"""Context manager that disables every downstream resolution step."""
from agent import model_metadata as _mm
return [
patch.object(_mm, "get_cached_context_length", return_value=None),
patch.object(_mm, "fetch_endpoint_model_metadata", return_value={}),
patch.object(_mm, "fetch_model_metadata", return_value={}),
patch.object(_mm, "is_local_endpoint", return_value=False),
patch.object(_mm, "_is_known_provider_base_url", return_value=False),
]
def test_custom_providers_override_wins_over_default_fallback(self):
from agent.model_metadata import get_model_context_length
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"gpt-5.5": {"context_length": 1_050_000}},
}
]
patches = self._mock_all_probes()
for p in patches:
p.start()
try:
ctx = get_model_context_length(
"gpt-5.5",
base_url="https://example.invalid/v1",
provider="custom",
custom_providers=custom,
)
finally:
for p in patches:
p.stop()
assert ctx == 1_050_000
def test_explicit_config_context_length_still_wins(self):
"""Top-level model.context_length (step 0) outranks custom_providers (step 0b).
Users who set both should see the top-level value that's the
documented precedence and matches the long-standing step-0 behavior.
"""
from agent.model_metadata import get_model_context_length
custom = [
{
"base_url": "https://example.invalid/v1",
"models": {"m": {"context_length": 1_050_000}},
}
]
ctx = get_model_context_length(
"m",
base_url="https://example.invalid/v1",
provider="custom",
config_context_length=500_000, # explicit top-level wins
custom_providers=custom,
)
assert ctx == 500_000
def test_no_override_falls_through_to_default(self):
"""With custom_providers=None and all probes disabled, resolver
returns DEFAULT_FALLBACK_CONTEXT (256K after the stepdown bump).
"""
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
patches = self._mock_all_probes()
for p in patches:
p.start()
try:
ctx = get_model_context_length(
"unknown-model",
base_url="https://example.invalid/v1",
provider="custom",
custom_providers=None,
)
finally:
for p in patches:
p.stop()
assert ctx == DEFAULT_FALLBACK_CONTEXT
class TestContextProbeTiers:
def test_256k_is_top_tier_and_default(self):
"""The stepdown probe starts at 256K and 256K is the new default."""
from agent.model_metadata import CONTEXT_PROBE_TIERS, DEFAULT_FALLBACK_CONTEXT
assert CONTEXT_PROBE_TIERS[0] == 256_000
assert DEFAULT_FALLBACK_CONTEXT == 256_000
# Tiers still descend monotonically
for a, b in zip(CONTEXT_PROBE_TIERS, CONTEXT_PROBE_TIERS[1:]):
assert a > b, f"tiers must strictly descend, got {a} then {b}"
# 128K is still a tier (users relying on it probe-down get there)
assert 128_000 in CONTEXT_PROBE_TIERS
@@ -52,12 +52,7 @@ class TestCustomProviderModelSwitch:
_model_flow_named_custom({}, provider_info)
# fetch_api_models MUST be called even though model was saved
mock_fetch.assert_called_once_with(
"sk-test",
"https://vllm.example.com/v1",
timeout=8.0,
api_mode=None,
)
mock_fetch.assert_called_once_with("sk-test", "https://vllm.example.com/v1", timeout=8.0)
def test_can_switch_to_different_model(self, config_home):
"""User selects a different model than the saved one."""
@@ -178,147 +173,3 @@ class TestCustomProviderModelSwitch:
model = config.get("model")
assert isinstance(model, dict)
assert "api_mode" not in model, "Stale api_mode should be removed"
def test_env_template_api_key_is_preserved_in_model_config(self, config_home, monkeypatch):
"""Selecting an env-backed custom provider must not inline the secret."""
import yaml
from hermes_cli.main import _model_flow_named_custom
config_path = config_home / "config.yaml"
config_path.write_text(
"model:\n"
" default: old-model\n"
" provider: openrouter\n"
"custom_providers:\n"
"- name: Example Provider\n"
" base_url: https://api.example-provider.test/v1\n"
" api_key: ${EXAMPLE_PROVIDER_API_KEY}\n"
" model: qwen3.6-35b-fast\n"
)
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
provider_info = {
"name": "Example Provider",
"base_url": "https://api.example-provider.test/v1",
"api_key": "sk-live-example-provider",
"api_key_ref": "${EXAMPLE_PROVIDER_API_KEY}",
"model": "qwen3.6-35b-fast",
}
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
patch.dict("sys.modules", {"simple_term_menu": None}), \
patch("builtins.input", return_value="1"), \
patch("builtins.print"):
_model_flow_named_custom({}, provider_info)
mock_fetch.assert_called_once_with(
"sk-live-example-provider",
"https://api.example-provider.test/v1",
timeout=8.0,
api_mode=None,
)
config = yaml.safe_load(config_path.read_text()) or {}
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
assert config["custom_providers"][0]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
assert "sk-live-example-provider" not in config_path.read_text()
def test_key_env_custom_provider_persists_reference_not_secret(self, config_home, monkeypatch):
"""key_env custom providers should also avoid writing plaintext keys."""
import yaml
from hermes_cli.main import _model_flow_named_custom
config_path = config_home / "config.yaml"
config_path.write_text(
"model:\n"
" default: old-model\n"
"custom_providers:\n"
"- name: Example Provider\n"
" base_url: https://api.example-provider.test/v1\n"
" key_env: EXAMPLE_PROVIDER_API_KEY\n"
" model: qwen3.6-35b-fast\n"
)
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
provider_info = {
"name": "Example Provider",
"base_url": "https://api.example-provider.test/v1",
"api_key": "",
"key_env": "EXAMPLE_PROVIDER_API_KEY",
"model": "qwen3.6-35b-fast",
}
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]), \
patch.dict("sys.modules", {"simple_term_menu": None}), \
patch("builtins.input", return_value="1"), \
patch("builtins.print"):
_model_flow_named_custom({}, provider_info)
config = yaml.safe_load(config_path.read_text()) or {}
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
assert config["custom_providers"][0]["key_env"] == "EXAMPLE_PROVIDER_API_KEY"
assert "sk-live-example-provider" not in config_path.read_text()
def test_env_ref_base_url_preserves_api_key_ref_through_picker(
self, config_home, monkeypatch
):
"""Integration regression: when BOTH ``base_url`` and ``api_key`` use
``${VAR}`` templates (the Discord-reported NeuralWatt case), the picker
must still preserve the env reference in ``model.api_key``.
The earlier lookup went through ``get_compatible_custom_providers``
which dropped entries whose ``base_url`` was an env-ref template
(``urlparse("${NEURALWATT_API_BASE}")`` has no scheme/netloc), causing
``api_key_ref`` to stay empty and the resolved secret to be written to
``config.yaml``. This test drives the real picker-callsite code path.
"""
import yaml
from hermes_cli.main import select_provider_and_model
config_path = config_home / "config.yaml"
config_path.write_text(
"model:\n"
" default: old-model\n"
" provider: openrouter\n"
"custom_providers:\n"
"- name: NeuralWatt\n"
" base_url: ${NEURALWATT_API_BASE}\n"
" api_key: ${NEURALWATT_API_KEY}\n"
" model: qwen3.6-35b-fast\n"
" models: []\n"
)
monkeypatch.setenv("NEURALWATT_API_BASE", "https://api.neuralwatt.com/v1")
monkeypatch.setenv("NEURALWATT_API_KEY", "sk-live-neuralwatt-secret")
# Exercise the real picker: select "custom:neuralwatt" from the
# provider menu. ``select_provider_and_model`` prompts for a provider
# choice (returns an index), then hands off to
# ``_model_flow_named_custom`` with the provider_info built by
# ``_named_custom_provider_map``.
def _pick_neuralwatt(labels, default=0):
for i, label in enumerate(labels):
if "NeuralWatt" in label:
return i
raise AssertionError(
f"NeuralWatt entry missing from provider menu: {labels}"
)
with patch("hermes_cli.main._prompt_provider_choice",
side_effect=_pick_neuralwatt), \
patch("hermes_cli.models.fetch_api_models",
return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
patch.dict("sys.modules", {"simple_term_menu": None}), \
patch("builtins.input", return_value="1"), \
patch("builtins.print"):
select_provider_and_model()
# The live probe must still use the resolved secret.
mock_fetch.assert_called_once()
probe_args, probe_kwargs = mock_fetch.call_args
assert probe_args[0] == "sk-live-neuralwatt-secret"
# But config.yaml must keep the env reference, not the plaintext secret.
saved = config_path.read_text()
config = yaml.safe_load(saved) or {}
assert config["model"]["api_key"] == "${NEURALWATT_API_KEY}"
assert config["custom_providers"][0]["api_key"] == "${NEURALWATT_API_KEY}"
assert "sk-live-neuralwatt-secret" not in saved
-37
View File
@@ -308,43 +308,6 @@ def test_run_doctor_accepts_named_provider_from_providers_section(monkeypatch, t
assert "model.provider 'volcengine-plan' is not a recognised provider" not in out
def test_run_doctor_accepts_bare_custom_provider(monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text(
"model:\n"
" provider: custom\n"
" default: local-model\n"
" base_url: http://localhost:8000/v1\n",
encoding="utf-8",
)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
(tmp_path / "project").mkdir(exist_ok=True)
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except Exception:
pass
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
doctor_mod.run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "model.provider 'custom' is not a recognised provider" not in out
def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
-486
View File
@@ -1,486 +0,0 @@
"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
from __future__ import annotations
import io
import types
from pathlib import Path
from unittest.mock import patch
import pytest
import yaml
# ---------------------------------------------------------------------------
# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
# ---------------------------------------------------------------------------
@pytest.fixture()
def isolated_home(tmp_path, monkeypatch):
monkeypatch.setattr(Path, "home", lambda: tmp_path)
home = tmp_path / ".hermes"
home.mkdir(exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(home))
return tmp_path
def _write_config(home: Path, data: dict) -> None:
config_path = home / ".hermes" / "config.yaml"
config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
def _read_config(home: Path) -> dict:
config_path = home / ".hermes" / "config.yaml"
return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
# ---------------------------------------------------------------------------
# _read_chain / _write_chain
# ---------------------------------------------------------------------------
class TestReadChain:
def test_returns_empty_list_when_unset(self):
from hermes_cli.fallback_cmd import _read_chain
assert _read_chain({}) == []
def test_reads_new_list_format(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {
"fallback_providers": [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
]
}
assert _read_chain(cfg) == [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
]
def test_migrates_legacy_single_dict(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
def test_skips_incomplete_entries(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {
"fallback_providers": [
{"provider": "openrouter"}, # missing model
{"model": "gpt-5.4"}, # missing provider
{"provider": "nous", "model": "foo"}, # valid
"not-a-dict", # noise
]
}
assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
def test_returns_copies_not_aliases(self):
from hermes_cli.fallback_cmd import _read_chain
cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
result = _read_chain(cfg)
result[0]["provider"] = "mutated"
assert cfg["fallback_providers"][0]["provider"] == "nous"
# ---------------------------------------------------------------------------
# _extract_fallback_from_model_cfg
# ---------------------------------------------------------------------------
class TestExtractFallback:
def test_extracts_from_default_field(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
assert _extract_fallback_from_model_cfg(model_cfg) == {
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
}
def test_extracts_optional_base_url_and_api_mode(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
model_cfg = {
"provider": "custom",
"default": "local-model",
"base_url": "http://localhost:11434/v1",
"api_mode": "chat_completions",
}
assert _extract_fallback_from_model_cfg(model_cfg) == {
"provider": "custom",
"model": "local-model",
"base_url": "http://localhost:11434/v1",
"api_mode": "chat_completions",
}
def test_returns_none_without_provider(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
def test_returns_none_without_model(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
def test_returns_none_for_non_dict(self):
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
assert _extract_fallback_from_model_cfg("plain-string") is None
assert _extract_fallback_from_model_cfg(None) is None
# ---------------------------------------------------------------------------
# cmd_fallback_list
# ---------------------------------------------------------------------------
class TestListCommand:
def test_list_empty(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "No fallback providers configured" in out
assert "hermes fallback add" in out
def test_list_with_entries(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
"fallback_providers": [
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
{"provider": "nous", "model": "Hermes-4"},
],
})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "Fallback chain (2 entries)" in out
assert "anthropic/claude-sonnet-4.6" in out
assert "Hermes-4" in out
# Primary should be shown too
assert "claude-sonnet-4-6" in out
def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
_write_config(isolated_home, {
"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
})
from hermes_cli.fallback_cmd import cmd_fallback_list
cmd_fallback_list(types.SimpleNamespace())
out = capsys.readouterr().out
assert "1 entry" in out
assert "gpt-5.4" in out
# ---------------------------------------------------------------------------
# cmd_fallback_add — mock select_provider_and_model
# ---------------------------------------------------------------------------
class TestAddCommand:
def test_add_appends_new_entry(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
# Simulate what the real picker does: writes the selection to config["model"]
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {
"provider": "openrouter",
"default": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Primary is preserved
assert cfg["model"]["provider"] == "anthropic"
assert cfg["model"]["default"] == "claude-sonnet-4-6"
# Fallback was appended
assert cfg["fallback_providers"] == [
{
"provider": "openrouter",
"model": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
]
out = capsys.readouterr().out
assert "Added fallback" in out
def test_add_rejects_duplicate(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
],
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Should still have exactly one entry
assert len(cfg["fallback_providers"]) == 1
out = capsys.readouterr().out
assert "already in the fallback chain" in out
def test_add_rejects_same_as_primary(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "openrouter", "default": "gpt-5.4"},
})
def fake_picker(args=None):
# User picks the same thing that's already the primary
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
out = capsys.readouterr().out
assert "matches the current primary" in out
def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
"""The picker mutates config["model"]; fallback_add must restore the primary."""
_write_config(isolated_home, {
"model": {
"provider": "anthropic",
"default": "claude-sonnet-4-6",
"base_url": "https://api.anthropic.com",
"api_mode": "anthropic_messages",
},
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {
"provider": "openrouter",
"default": "anthropic/claude-sonnet-4.6",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
# Primary exactly as it was
assert cfg["model"]["provider"] == "anthropic"
assert cfg["model"]["default"] == "claude-sonnet-4-6"
assert cfg["model"]["base_url"] == "https://api.anthropic.com"
assert cfg["model"]["api_mode"] == "anthropic_messages"
# Fallback added
assert len(cfg["fallback_providers"]) == 1
assert cfg["fallback_providers"][0]["provider"] == "openrouter"
def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
# User cancelled — no change to config
pass
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
out = capsys.readouterr().out
# Either "No fallback added" (picker fully cancelled) or "matches the current primary"
# (picker left config untouched) — both indicate a non-add outcome.
assert ("No fallback added" in out) or ("matches the current primary" in out)
def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
"""Simulate picker explicitly clearing model.default (unusual but possible)."""
_write_config(isolated_home, {
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
})
def fake_picker(args=None):
from hermes_cli.config import load_config, save_config
cfg = load_config()
cfg["model"] = {"provider": "", "default": ""}
save_config(cfg)
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
patch("hermes_cli.main._require_tty"):
from hermes_cli.fallback_cmd import cmd_fallback_add
cmd_fallback_add(types.SimpleNamespace())
out = capsys.readouterr().out
assert "No fallback added" in out
# ---------------------------------------------------------------------------
# cmd_fallback_remove
# ---------------------------------------------------------------------------
class TestRemoveCommand:
def test_remove_empty_chain(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
out = capsys.readouterr().out
assert "nothing to remove" in out
def test_remove_selected_entry(self, isolated_home, capsys):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "nous", "model": "Hermes-4"},
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
],
})
# Picker returns index 1 (the middle entry, "nous / Hermes-4")
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert cfg["fallback_providers"] == [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
]
out = capsys.readouterr().out
assert "Removed fallback" in out
assert "Hermes-4" in out
def test_remove_cancel_keeps_chain(self, isolated_home):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
],
})
# Cancel = last item (index == len(chain) == 1 in our menu)
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
from hermes_cli.fallback_cmd import cmd_fallback_remove
cmd_fallback_remove(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert len(cfg["fallback_providers"]) == 1
# ---------------------------------------------------------------------------
# cmd_fallback_clear
# ---------------------------------------------------------------------------
class TestClearCommand:
def test_clear_empty_chain(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
out = capsys.readouterr().out
assert "nothing to clear" in out
def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
_write_config(isolated_home, {
"fallback_providers": [
{"provider": "openrouter", "model": "gpt-5.4"},
{"provider": "nous", "model": "Hermes-4"},
],
})
monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert cfg.get("fallback_providers") == []
out = capsys.readouterr().out
assert "Fallback chain cleared" in out
def test_clear_cancelled(self, isolated_home, monkeypatch):
_write_config(isolated_home, {
"fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
})
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
from hermes_cli.fallback_cmd import cmd_fallback_clear
cmd_fallback_clear(types.SimpleNamespace())
cfg = _read_config(isolated_home)
assert len(cfg["fallback_providers"]) == 1
# ---------------------------------------------------------------------------
# cmd_fallback dispatcher
# ---------------------------------------------------------------------------
class TestDispatcher:
def test_no_subcommand_lists(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command=None))
out = capsys.readouterr().out
assert "No fallback providers configured" in out
def test_list_alias(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
out = capsys.readouterr().out
assert "No fallback providers configured" in out
def test_remove_alias(self, isolated_home, capsys):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
out = capsys.readouterr().out
assert "nothing to remove" in out
def test_unknown_subcommand_exits(self, isolated_home):
_write_config(isolated_home, {})
from hermes_cli.fallback_cmd import cmd_fallback
with pytest.raises(SystemExit):
cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
# ---------------------------------------------------------------------------
# argparse wiring — verify the subparser is registered
# ---------------------------------------------------------------------------
class TestArgparseWiring:
"""Verify `hermes fallback` is wired into main.py's argparse tree.
main() builds the parser inline, so we invoke main([...]) via subprocess
with --help to introspect registered subcommands without side effects.
"""
def test_fallback_help_lists_subcommands(self):
import subprocess
import sys
result = subprocess.run(
[sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
capture_output=True,
text=True,
timeout=30,
)
# --help exits 0
assert result.returncode == 0, f"stderr: {result.stderr}"
out = result.stdout + result.stderr
# All four subcommands should appear in help
assert "list" in out
assert "add" in out
assert "remove" in out
assert "clear" in out
-210
View File
@@ -1,210 +0,0 @@
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import pytest
from hermes_cli import kanban as kc
from hermes_cli import kanban_db as kb
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
# ---------------------------------------------------------------------------
# Workspace flag parsing
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"value,expected",
[
("scratch", ("scratch", None)),
("worktree", ("worktree", None)),
("dir:/tmp/work", ("dir", "/tmp/work")),
],
)
def test_parse_workspace_flag_valid(value, expected):
assert kc._parse_workspace_flag(value) == expected
def test_parse_workspace_flag_expands_user():
kind, path = kc._parse_workspace_flag("dir:~/vault")
assert kind == "dir"
assert path.endswith("/vault")
assert not path.startswith("~")
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
def test_parse_workspace_flag_rejects(bad):
if not bad:
# Empty -> defaults; not an error.
assert kc._parse_workspace_flag(bad) == ("scratch", None)
return
with pytest.raises(argparse.ArgumentTypeError):
kc._parse_workspace_flag(bad)
# ---------------------------------------------------------------------------
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
# ---------------------------------------------------------------------------
def test_run_slash_no_args_shows_usage(kanban_home):
out = kc.run_slash("")
assert "kanban" in out.lower()
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
def test_run_slash_create_and_list(kanban_home):
out = kc.run_slash("create 'ship feature' --assignee alice")
assert "Created" in out
out = kc.run_slash("list")
assert "ship feature" in out
assert "alice" in out
def test_run_slash_create_with_parent_and_cascade(kanban_home):
# Parent then child via --parent
out1 = kc.run_slash("create 'parent' --assignee alice")
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
import re
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
p = m.group(1)
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
assert "todo" in out2 # child starts as todo
# Complete parent; list should promote child to ready
kc.run_slash(f"complete {p}")
# Explicit filter: child should now be ready (was todo before complete).
ready_list = kc.run_slash("list --status ready")
assert "child" in ready_list
def test_run_slash_show_includes_comments(kanban_home):
out = kc.run_slash("create 'x'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'source is paywalled'")
show = kc.run_slash(f"show {tid}")
assert "source is paywalled" in show
def test_run_slash_block_unblock_cycle(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
# Claim first so block() finds it running
kc.run_slash(f"claim {tid}")
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
def test_run_slash_json_output(kanban_home):
out = kc.run_slash("create 'jsontask' --assignee alice --json")
payload = json.loads(out)
assert payload["title"] == "jsontask"
assert payload["assignee"] == "alice"
assert payload["status"] == "ready"
def test_run_slash_dispatch_dry_run_counts(kanban_home):
kc.run_slash("create 'a' --assignee alice")
kc.run_slash("create 'b' --assignee bob")
out = kc.run_slash("dispatch --dry-run")
assert "Spawned:" in out
def test_run_slash_context_output_format(kanban_home):
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'remember to include performance section'")
ctx = kc.run_slash(f"context {tid}")
assert "tech spec" in ctx
assert "write an RFC" in ctx
assert "performance section" in ctx
def test_run_slash_tenant_filter(kanban_home):
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
a = kc.run_slash("list --tenant biz-a")
b = kc.run_slash("list --tenant biz-b")
assert "biz-a task" in a and "biz-b task" not in a
assert "biz-b task" in b and "biz-a task" not in b
def test_run_slash_usage_error_returns_message(kanban_home):
# Missing required argument for create
out = kc.run_slash("create")
assert "usage" in out.lower() or "error" in out.lower()
def test_run_slash_assign_reassigns(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
show = kc.run_slash(f"show {tid}")
assert "bob" in show
def test_run_slash_link_unlink(kanban_home):
a = kc.run_slash("create 'a'")
b = kc.run_slash("create 'b'")
import re
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
# After link, b is todo
show = kc.run_slash(f"show {tb}")
assert "todo" in show
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
# ---------------------------------------------------------------------------
# Integration with the COMMAND_REGISTRY
# ---------------------------------------------------------------------------
def test_kanban_is_resolvable():
from hermes_cli.commands import resolve_command
cmd = resolve_command("kanban")
assert cmd is not None
assert cmd.name == "kanban"
def test_kanban_bypasses_active_session_guard():
from hermes_cli.commands import should_bypass_active_session
assert should_bypass_active_session("kanban")
def test_kanban_in_autocomplete_table():
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
assert "/kanban" in COMMANDS
subs = SUBCOMMANDS.get("/kanban") or []
assert "create" in subs
assert "dispatch" in subs
def test_kanban_not_gateway_only():
# kanban is available in BOTH CLI and gateway surfaces.
from hermes_cli.commands import COMMAND_REGISTRY
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
assert not cmd.cli_only
assert not cmd.gateway_only
File diff suppressed because it is too large Load Diff
-438
View File
@@ -1,438 +0,0 @@
"""Tests for the Kanban DB layer (hermes_cli.kanban_db)."""
from __future__ import annotations
import concurrent.futures
import os
import time
from pathlib import Path
import pytest
from hermes_cli import kanban_db as kb
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with an empty kanban DB."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
# ---------------------------------------------------------------------------
# Schema / init
# ---------------------------------------------------------------------------
def test_init_db_is_idempotent(kanban_home):
# Second call should not error or drop data.
with kb.connect() as conn:
kb.create_task(conn, title="persisted")
kb.init_db()
with kb.connect() as conn:
tasks = kb.list_tasks(conn)
assert len(tasks) == 1
assert tasks[0].title == "persisted"
def test_init_creates_expected_tables(kanban_home):
with kb.connect() as conn:
rows = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
).fetchall()
names = {r["name"] for r in rows}
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
# ---------------------------------------------------------------------------
# Task creation + status inference
# ---------------------------------------------------------------------------
def test_create_task_no_parents_is_ready(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="ship it", assignee="alice")
t = kb.get_task(conn, tid)
assert t is not None
assert t.status == "ready"
assert t.assignee == "alice"
assert t.workspace_kind == "scratch"
def test_create_task_with_parent_is_todo_until_parent_done(kanban_home):
with kb.connect() as conn:
p = kb.create_task(conn, title="parent")
c = kb.create_task(conn, title="child", parents=[p])
assert kb.get_task(conn, c).status == "todo"
kb.complete_task(conn, p, result="ok")
assert kb.get_task(conn, c).status == "ready"
def test_create_task_unknown_parent_errors(kanban_home):
with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"):
kb.create_task(conn, title="orphan", parents=["t_ghost"])
def test_workspace_kind_validation(kanban_home):
with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"):
kb.create_task(conn, title="bad ws", workspace_kind="cloud")
# ---------------------------------------------------------------------------
# Links + dependency resolution
# ---------------------------------------------------------------------------
def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
b = kb.create_task(conn, title="b")
assert kb.get_task(conn, b).status == "ready"
kb.link_tasks(conn, a, b)
assert kb.get_task(conn, b).status == "todo"
def test_link_keeps_ready_child_when_parent_already_done(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
kb.complete_task(conn, a)
b = kb.create_task(conn, title="b")
assert kb.get_task(conn, b).status == "ready"
kb.link_tasks(conn, a, b)
assert kb.get_task(conn, b).status == "ready"
def test_link_rejects_self_loop(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
with pytest.raises(ValueError, match="itself"):
kb.link_tasks(conn, a, a)
def test_link_detects_cycle(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
b = kb.create_task(conn, title="b", parents=[a])
c = kb.create_task(conn, title="c", parents=[b])
with pytest.raises(ValueError, match="cycle"):
kb.link_tasks(conn, c, a)
with pytest.raises(ValueError, match="cycle"):
kb.link_tasks(conn, b, a)
def test_recompute_ready_cascades_through_chain(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
b = kb.create_task(conn, title="b", parents=[a])
c = kb.create_task(conn, title="c", parents=[b])
assert [kb.get_task(conn, x).status for x in (a, b, c)] == \
["ready", "todo", "todo"]
kb.complete_task(conn, a)
assert kb.get_task(conn, b).status == "ready"
kb.complete_task(conn, b)
assert kb.get_task(conn, c).status == "ready"
def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a")
b = kb.create_task(conn, title="b")
c = kb.create_task(conn, title="c", parents=[a, b])
kb.complete_task(conn, a)
assert kb.get_task(conn, c).status == "todo"
kb.complete_task(conn, b)
assert kb.get_task(conn, c).status == "ready"
# ---------------------------------------------------------------------------
# Atomic claim (CAS)
# ---------------------------------------------------------------------------
def test_claim_once_wins_second_loses(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
first = kb.claim_task(conn, t, claimer="host:1")
assert first is not None and first.status == "running"
second = kb.claim_task(conn, t, claimer="host:2")
assert second is None
def test_claim_fails_on_non_ready(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
# Move to todo by introducing an unsatisfied parent.
p = kb.create_task(conn, title="p")
kb.link_tasks(conn, p, t)
assert kb.get_task(conn, t).status == "todo"
assert kb.claim_task(conn, t) is None
def test_stale_claim_reclaimed(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
kb.claim_task(conn, t)
# Rewind claim_expires so it looks stale.
conn.execute(
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
(int(time.time()) - 3600, t),
)
reclaimed = kb.release_stale_claims(conn)
assert reclaimed == 1
assert kb.get_task(conn, t).status == "ready"
def test_heartbeat_extends_claim(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
claimer = "host:hb"
kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60)
original = kb.get_task(conn, t).claim_expires
# Rewind then heartbeat.
conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t))
ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600)
assert ok
new = kb.get_task(conn, t).claim_expires
assert new > int(time.time()) + 3000
def test_concurrent_claims_only_one_wins(kanban_home):
"""Fire N threads claiming the same task; exactly one must win."""
with kb.connect() as conn:
t = kb.create_task(conn, title="race", assignee="a")
def attempt(i):
with kb.connect() as c:
return kb.claim_task(c, t, claimer=f"host:{i}")
n_workers = 8
with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex:
results = list(ex.map(attempt, range(n_workers)))
winners = [r for r in results if r is not None]
assert len(winners) == 1
assert winners[0].status == "running"
# ---------------------------------------------------------------------------
# Complete / block / unblock / archive / assign
# ---------------------------------------------------------------------------
def test_complete_records_result(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
assert kb.complete_task(conn, t, result="done and dusted")
task = kb.get_task(conn, t)
assert task.status == "done"
assert task.result == "done and dusted"
assert task.completed_at is not None
def test_block_then_unblock(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
kb.claim_task(conn, t)
assert kb.block_task(conn, t, reason="need input")
assert kb.get_task(conn, t).status == "blocked"
assert kb.unblock_task(conn, t)
assert kb.get_task(conn, t).status == "ready"
def test_assign_refuses_while_running(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
kb.claim_task(conn, t)
with pytest.raises(RuntimeError, match="currently running"):
kb.assign_task(conn, t, "b")
def test_assign_reassigns_when_not_running(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
assert kb.assign_task(conn, t, "b")
assert kb.get_task(conn, t).assignee == "b"
def test_archive_hides_from_default_list(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
kb.complete_task(conn, t)
assert kb.archive_task(conn, t)
assert len(kb.list_tasks(conn)) == 0
assert len(kb.list_tasks(conn, include_archived=True)) == 1
# ---------------------------------------------------------------------------
# Comments / events / worker context
# ---------------------------------------------------------------------------
def test_comments_recorded_in_order(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
kb.add_comment(conn, t, "user", "first")
kb.add_comment(conn, t, "researcher", "second")
comments = kb.list_comments(conn, t)
assert [c.body for c in comments] == ["first", "second"]
assert [c.author for c in comments] == ["user", "researcher"]
def test_empty_comment_rejected(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
with pytest.raises(ValueError, match="body is required"):
kb.add_comment(conn, t, "user", "")
def test_events_capture_lifecycle(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="a")
kb.claim_task(conn, t)
kb.complete_task(conn, t, result="ok")
events = kb.list_events(conn, t)
kinds = [e.kind for e in events]
assert "created" in kinds
assert "claimed" in kinds
assert "completed" in kinds
def test_worker_context_includes_parent_results_and_comments(kanban_home):
with kb.connect() as conn:
p = kb.create_task(conn, title="p")
kb.complete_task(conn, p, result="PARENT_RESULT_MARKER")
c = kb.create_task(conn, title="child", parents=[p])
kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER")
ctx = kb.build_worker_context(conn, c)
assert "PARENT_RESULT_MARKER" in ctx
assert "CLARIFICATION_MARKER" in ctx
assert c in ctx
assert "child" in ctx
# ---------------------------------------------------------------------------
# Dispatcher
# ---------------------------------------------------------------------------
def test_dispatch_dry_run_does_not_claim(kanban_home):
with kb.connect() as conn:
t1 = kb.create_task(conn, title="a", assignee="alice")
t2 = kb.create_task(conn, title="b", assignee="bob")
res = kb.dispatch_once(conn, dry_run=True)
assert {s[0] for s in res.spawned} == {t1, t2}
with kb.connect() as conn:
# Dry run must NOT mutate status.
assert kb.get_task(conn, t1).status == "ready"
assert kb.get_task(conn, t2).status == "ready"
def test_dispatch_skips_unassigned(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="floater")
res = kb.dispatch_once(conn, dry_run=True)
assert t in res.skipped_unassigned
assert not res.spawned
def test_dispatch_promotes_ready_and_spawns(kanban_home):
spawns = []
def fake_spawn(task, workspace):
spawns.append((task.id, task.assignee, workspace))
with kb.connect() as conn:
p = kb.create_task(conn, title="p", assignee="alice")
c = kb.create_task(conn, title="c", assignee="bob", parents=[p])
# Finish parent outside dispatch; promotion happens inside.
kb.complete_task(conn, p)
res = kb.dispatch_once(conn, spawn_fn=fake_spawn)
# Spawned c (a was already done when dispatch was called).
assert len(spawns) == 1
assert spawns[0][0] == c
assert spawns[0][1] == "bob"
# c is now running
with kb.connect() as conn:
assert kb.get_task(conn, c).status == "running"
def test_dispatch_spawn_failure_releases_claim(kanban_home):
def boom(task, workspace):
raise RuntimeError("spawn failed")
with kb.connect() as conn:
t = kb.create_task(conn, title="boom", assignee="alice")
kb.dispatch_once(conn, spawn_fn=boom)
# Must return to ready so the next tick can retry.
assert kb.get_task(conn, t).status == "ready"
assert kb.get_task(conn, t).claim_lock is None
def test_dispatch_reclaims_stale_before_spawning(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x", assignee="alice")
kb.claim_task(conn, t)
conn.execute(
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
(int(time.time()) - 1, t),
)
res = kb.dispatch_once(conn, dry_run=True)
assert res.reclaimed == 1
# ---------------------------------------------------------------------------
# Workspace resolution
# ---------------------------------------------------------------------------
def test_scratch_workspace_created_under_hermes_home(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="x")
task = kb.get_task(conn, t)
ws = kb.resolve_workspace(task)
assert ws.exists()
assert ws.is_dir()
assert "kanban" in str(ws)
def test_dir_workspace_honors_given_path(kanban_home, tmp_path):
target = tmp_path / "my-vault"
with kb.connect() as conn:
t = kb.create_task(
conn, title="biz", workspace_kind="dir", workspace_path=str(target)
)
task = kb.get_task(conn, t)
ws = kb.resolve_workspace(task)
assert ws == target
assert ws.exists()
def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path):
target = str(tmp_path / ".worktrees" / "my-task")
with kb.connect() as conn:
t = kb.create_task(
conn, title="ship", workspace_kind="worktree", workspace_path=target
)
task = kb.get_task(conn, t)
ws = kb.resolve_workspace(task)
# We do NOT auto-create worktrees; the worker's skill handles that.
assert str(ws) == target
# ---------------------------------------------------------------------------
# Tenancy
# ---------------------------------------------------------------------------
def test_tenant_column_filters_listings(kanban_home):
with kb.connect() as conn:
kb.create_task(conn, title="a1", tenant="biz-a")
kb.create_task(conn, title="b1", tenant="biz-b")
kb.create_task(conn, title="shared") # no tenant
biz_a = kb.list_tasks(conn, tenant="biz-a")
biz_b = kb.list_tasks(conn, tenant="biz-b")
assert [t.title for t in biz_a] == ["a1"]
assert [t.title for t in biz_b] == ["b1"]
def test_tenant_propagates_to_events(kanban_home):
with kb.connect() as conn:
t = kb.create_task(conn, title="tenant-task", tenant="biz-a")
events = kb.list_events(conn, t)
# The "created" event should have tenant in its payload.
created = [e for e in events if e.kind == "created"]
assert created and created[0].payload.get("tenant") == "biz-a"
-284
View File
@@ -1,284 +0,0 @@
"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback."""
from __future__ import annotations
import json
import time
from pathlib import Path
from unittest.mock import patch
import pytest
@pytest.fixture
def isolated_home(tmp_path, monkeypatch):
"""Isolate HERMES_HOME + reset any module-level catalog cache per test."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Force a fresh catalog module state for each test.
import importlib
from hermes_cli import model_catalog
importlib.reload(model_catalog)
yield home
model_catalog.reset_cache()
def _valid_manifest() -> dict:
return {
"version": 1,
"updated_at": "2026-04-25T22:00:00Z",
"metadata": {"source": "test"},
"providers": {
"openrouter": {
"metadata": {"display_name": "OpenRouter"},
"models": [
{"id": "anthropic/claude-opus-4.7", "description": "recommended"},
{"id": "openai/gpt-5.4", "description": ""},
{"id": "openrouter/elephant-alpha", "description": "free"},
],
},
"nous": {
"metadata": {"display_name": "Nous Portal"},
"models": [
{"id": "anthropic/claude-opus-4.7"},
{"id": "moonshotai/kimi-k2.6"},
],
},
},
}
class TestValidation:
def test_accepts_well_formed_manifest(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
assert _validate_manifest(_valid_manifest()) is True
def test_rejects_non_dict(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
assert _validate_manifest("string") is False
assert _validate_manifest([]) is False
assert _validate_manifest(None) is False
def test_rejects_missing_version(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
del m["version"]
assert _validate_manifest(m) is False
def test_rejects_future_version(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["version"] = 999
assert _validate_manifest(m) is False
def test_rejects_missing_providers(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
del m["providers"]
assert _validate_manifest(m) is False
def test_rejects_malformed_model_entry(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["providers"]["openrouter"]["models"][0] = {"id": ""} # empty id
assert _validate_manifest(m) is False
def test_rejects_non_string_model_id(self, isolated_home):
from hermes_cli.model_catalog import _validate_manifest
m = _valid_manifest()
m["providers"]["openrouter"]["models"][0] = {"id": 42}
assert _validate_manifest(m) is False
class TestFetchSuccess:
def test_fetch_and_cache_writes_disk(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
result = model_catalog.get_catalog(force_refresh=True)
assert result == manifest
assert fetch.called
cache_file = model_catalog._cache_path()
assert cache_file.exists()
with open(cache_file) as fh:
assert json.load(fh) == manifest
def test_second_call_uses_in_process_cache(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
model_catalog.get_catalog(force_refresh=True)
model_catalog.get_catalog() # should not hit network again
assert fetch.call_count == 1
def test_force_refresh_always_refetches(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
with patch.object(
model_catalog, "_fetch_manifest", return_value=manifest
) as fetch:
model_catalog.get_catalog(force_refresh=True)
model_catalog.get_catalog(force_refresh=True)
assert fetch.call_count == 2
class TestFetchFailure:
def test_network_failure_returns_empty_when_no_cache(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog(force_refresh=True)
assert result == {}
def test_network_failure_falls_back_to_disk_cache(self, isolated_home):
from hermes_cli import model_catalog
# Prime disk cache with a fresh copy.
manifest = _valid_manifest()
with patch.object(model_catalog, "_fetch_manifest", return_value=manifest):
model_catalog.get_catalog(force_refresh=True)
# Now wipe in-process cache and simulate network failure on refetch.
model_catalog.reset_cache()
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog(force_refresh=True)
assert result == manifest
def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home):
from hermes_cli import model_catalog
manifest = _valid_manifest()
# Write stale cache directly (mtime in the past).
cache = model_catalog._cache_path()
cache.parent.mkdir(parents=True, exist_ok=True)
with open(cache, "w") as fh:
json.dump(manifest, fh)
old = time.time() - 30 * 24 * 3600 # 30 days ago
import os as _os
_os.utime(cache, (old, old))
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = model_catalog.get_catalog()
# Stale cache is better than nothing.
assert result == manifest
class TestCuratedAccessors:
def test_openrouter_returns_tuples(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = model_catalog.get_curated_openrouter_models()
assert result == [
("anthropic/claude-opus-4.7", "recommended"),
("openai/gpt-5.4", ""),
("openrouter/elephant-alpha", "free"),
]
def test_nous_returns_ids(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = model_catalog.get_curated_nous_models()
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
def test_openrouter_returns_none_when_catalog_empty(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
assert model_catalog.get_curated_openrouter_models() is None
def test_nous_returns_none_when_catalog_empty(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
assert model_catalog.get_curated_nous_models() is None
class TestDisabled:
def test_disabled_config_short_circuits(self, isolated_home):
from hermes_cli import model_catalog
with patch.object(
model_catalog,
"_load_catalog_config",
return_value={
"enabled": False,
"url": "http://ignored",
"ttl_hours": 24.0,
"providers": {},
},
):
with patch.object(model_catalog, "_fetch_manifest") as fetch:
result = model_catalog.get_catalog()
assert result == {}
fetch.assert_not_called()
class TestProviderOverride:
def test_override_url_takes_precedence(self, isolated_home):
from hermes_cli import model_catalog
override_payload = {
"version": 1,
"providers": {
"openrouter": {
"models": [
{"id": "override/model", "description": "custom"},
]
}
},
}
def fake_fetch(url, timeout):
if "override" in url:
return override_payload
return _valid_manifest()
with patch.object(
model_catalog,
"_load_catalog_config",
return_value={
"enabled": True,
"url": "http://master",
"ttl_hours": 24.0,
"providers": {"openrouter": {"url": "http://override"}},
},
):
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
result = model_catalog.get_curated_openrouter_models()
assert result == [("override/model", "custom")]
class TestIntegrationWithModelsModule:
"""Exercise the fallback paths via the real callers in hermes_cli.models."""
def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog(
self, isolated_home
):
from hermes_cli import model_catalog
from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
result = get_curated_nous_model_ids()
assert result == list(_PROVIDER_MODELS["nous"])
def test_curated_nous_ids_prefers_manifest(self, isolated_home):
from hermes_cli import model_catalog
from hermes_cli.models import get_curated_nous_model_ids
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
result = get_curated_nous_model_ids()
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
@@ -88,61 +88,3 @@ class TestResolveDisplayContextLength:
model_info=fake_mi,
)
assert ctx == 128_000
def test_custom_providers_override_honored(self):
"""Regression for #15779: /model switch onto a custom provider must
surface the configured per-model context_length, not the 128K/256K
fallback.
"""
custom_provs = [
{
"name": "my-custom-endpoint",
"base_url": "https://example.invalid/v1",
"models": {"gpt-5.5": {"context_length": 1_050_000}},
}
]
# Real resolver call — no mock — so the override path is exercised
# through agent.model_metadata.get_model_context_length.
from unittest.mock import patch as _p
from agent import model_metadata as _mm
with _p.object(_mm, "get_cached_context_length", return_value=None), \
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
_p.object(_mm, "fetch_model_metadata", return_value={}), \
_p.object(_mm, "is_local_endpoint", return_value=False), \
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
ctx = resolve_display_context_length(
"gpt-5.5",
"custom",
base_url="https://example.invalid/v1",
api_key="k",
custom_providers=custom_provs,
)
assert ctx == 1_050_000, (
"custom_providers[].models.gpt-5.5.context_length=1.05M must win "
"over probe-down fallback"
)
def test_custom_providers_trailing_slash_insensitive(self):
"""Base URL comparison must tolerate trailing-slash differences
between config.yaml and the runtime value.
"""
custom_provs = [
{
"base_url": "https://example.invalid/v1/",
"models": {"m": {"context_length": 400_000}},
}
]
from unittest.mock import patch as _p
from agent import model_metadata as _mm
with _p.object(_mm, "get_cached_context_length", return_value=None), \
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
_p.object(_mm, "fetch_model_metadata", return_value={}), \
_p.object(_mm, "is_local_endpoint", return_value=False), \
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
ctx = resolve_display_context_length(
"m",
"custom",
base_url="https://example.invalid/v1", # no trailing slash
custom_providers=custom_provs,
)
assert ctx == 400_000
@@ -1,5 +1,3 @@
import pytest
from hermes_cli import runtime_provider as rp
@@ -1567,79 +1565,3 @@ class TestOllamaUrlSubstringLeak:
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "ol-legit-key"
# =============================================================================
# Azure Foundry — both OpenAI-style and Anthropic-style endpoints
# =============================================================================
class TestAzureFoundryResolution:
"""Verify Azure Foundry resolves correctly for both API modes."""
def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"):
return {
"provider": "azure-foundry",
"base_url": base_url,
"api_mode": api_mode,
"default": "gpt-5.4",
}
def test_azure_foundry_openai_style_explicit(self, monkeypatch):
"""OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is."""
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://my-resource.openai.azure.com/openai/v1",
"chat_completions",
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
assert resolved["provider"] == "azure-foundry"
assert resolved["api_mode"] == "chat_completions"
assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1"
assert resolved["api_key"] == "az-key-openai"
def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch):
"""Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped
because the Anthropic SDK appends /v1/messages itself."""
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://my-resource.services.ai.azure.com/anthropic/v1",
"anthropic_messages",
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
assert resolved["provider"] == "azure-foundry"
assert resolved["api_mode"] == "anthropic_messages"
# /v1 stripped so SDK can append /v1/messages cleanly
assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic"
def test_azure_foundry_missing_base_url_raises(self, monkeypatch):
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key")
monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False)
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
with pytest.raises(rp.AuthError, match="base URL"):
rp.resolve_runtime_provider(requested="azure-foundry")
def test_azure_foundry_missing_api_key_raises(self, monkeypatch):
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
# `get_env_value` reads from ~/.hermes/.env — mock it to return None
# so the resolver can't find a key there either.
import hermes_cli.config as cfg_mod
monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None)
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://my-resource.openai.azure.com/openai/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
with pytest.raises(rp.AuthError, match="API key"):
rp.resolve_runtime_provider(requested="azure-foundry")
@@ -144,6 +144,91 @@ class TestNonInteractiveSetup:
out = capsys.readouterr().out
assert "hermes config set model.provider custom" in out
def test_returning_user_terminal_menu_choice_dispatches_terminal_section(self, tmp_path):
"""Returning-user menu should map Terminal Backend to the terminal setup, not TTS."""
from hermes_cli import setup as setup_mod
args = _make_setup_args()
config = {}
model_section = MagicMock()
tts_section = MagicMock()
terminal_section = MagicMock()
gateway_section = MagicMock()
tools_section = MagicMock()
agent_section = MagicMock()
with (
patch.object(setup_mod, "ensure_hermes_home"),
patch.object(setup_mod, "load_config", return_value=config),
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
patch.object(
setup_mod,
"get_env_value",
side_effect=lambda key: "sk-test" if key == "OPENROUTER_API_KEY" else "",
),
patch("hermes_cli.auth.get_active_provider", return_value=None),
patch.object(setup_mod, "prompt_choice", return_value=3),
patch.object(
setup_mod,
"SETUP_SECTIONS",
[
("model", "Model & Provider", model_section),
("tts", "Text-to-Speech", tts_section),
("terminal", "Terminal Backend", terminal_section),
("gateway", "Messaging Platforms (Gateway)", gateway_section),
("tools", "Tools", tools_section),
("agent", "Agent Settings", agent_section),
],
),
patch.object(setup_mod, "save_config"),
patch.object(setup_mod, "_print_setup_summary"),
):
setup_mod.run_setup_wizard(args)
terminal_section.assert_called_once_with(config)
tts_section.assert_not_called()
def test_returning_user_menu_does_not_show_separator_rows(self, tmp_path):
"""Returning-user menu should only show selectable actions."""
from hermes_cli import setup as setup_mod
args = _make_setup_args()
captured = {}
def fake_prompt_choice(question, choices, default=0):
captured["question"] = question
captured["choices"] = list(choices)
return len(choices) - 1
with (
patch.object(setup_mod, "ensure_hermes_home"),
patch.object(setup_mod, "load_config", return_value={}),
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
patch.object(
setup_mod,
"get_env_value",
side_effect=lambda key: "sk-test" if key == "OPENROUTER_API_KEY" else "",
),
patch("hermes_cli.auth.get_active_provider", return_value=None),
patch.object(setup_mod, "prompt_choice", side_effect=fake_prompt_choice),
):
setup_mod.run_setup_wizard(args)
assert captured["question"] == "What would you like to do?"
assert "---" not in captured["choices"]
assert captured["choices"] == [
"Quick Setup - configure missing items only",
"Full Setup - reconfigure everything",
"Model & Provider",
"Terminal Backend",
"Messaging Platforms (Gateway)",
"Tools",
"Agent Settings",
"Exit",
]
def test_main_accepts_tts_setup_section(self, monkeypatch):
"""`hermes setup tts` should parse and dispatch like other setup sections."""
from hermes_cli import main as main_mod
-287
View File
@@ -1,287 +0,0 @@
"""Tests for the setup wizard's returning-user behavior.
On an existing install:
- Bare `hermes setup` drops straight into the full reconfigure wizard
(every prompt shows the current value as its default).
- `hermes setup --quick` runs the narrower "fill in missing items" flow.
- `hermes setup --reconfigure` is a backwards-compat alias for the
bare-setup default.
On a fresh install, all three are no-ops fall through to first-time setup.
"""
from argparse import Namespace
from contextlib import ExitStack
from unittest.mock import patch
import pytest
def _make_setup_args(**overrides):
return Namespace(
non_interactive=overrides.get("non_interactive", False),
section=overrides.get("section", None),
reset=overrides.get("reset", False),
reconfigure=overrides.get("reconfigure", False),
quick=overrides.get("quick", False),
)
@pytest.fixture
def existing_install(tmp_path, monkeypatch):
"""Simulate a returning user with an existing configured install."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
return home
@pytest.fixture
def fresh_install(tmp_path, monkeypatch):
"""Simulate a first-time user with no existing configuration."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
return home
def _enter_existing_install_patches(stack, **extra):
"""Apply standard existing-install mocks via an ExitStack.
Returns a dict of mocks from the `extra` kwargs (which map mock-name to
target path) so callers can assert on them.
"""
# Unconditional mocks (no return values to assert against).
for target, kwargs in [
("hermes_cli.setup.ensure_hermes_home", {}),
("hermes_cli.setup.is_interactive_stdin", {"return_value": True}),
("hermes_cli.config.is_managed", {"return_value": False}),
("hermes_cli.setup.load_config", {"return_value": {}}),
("hermes_cli.setup.save_config", {}),
("hermes_cli.setup.get_env_value", {"return_value": None}),
("hermes_cli.auth.get_active_provider", {"return_value": "openrouter"}),
("hermes_cli.setup._print_setup_summary", {}),
("hermes_cli.setup._offer_launch_chat", {}),
("hermes_cli.setup._offer_openclaw_migration", {"return_value": False}),
]:
stack.enter_context(patch(target, **kwargs))
# Named mocks caller wants to assert on.
named = {}
for name, target in extra.items():
named[name] = stack.enter_context(patch(target))
return named
def _enter_fresh_install_patches(stack, **extra):
for target, kwargs in [
("hermes_cli.setup.ensure_hermes_home", {}),
("hermes_cli.setup.is_interactive_stdin", {"return_value": True}),
("hermes_cli.config.is_managed", {"return_value": False}),
("hermes_cli.setup.load_config", {"return_value": {}}),
("hermes_cli.setup.save_config", {}),
("hermes_cli.auth.get_active_provider", {"return_value": None}),
("hermes_cli.setup.get_env_value", {"return_value": None}),
("hermes_cli.setup._offer_openclaw_migration", {"return_value": False}),
]:
stack.enter_context(patch(target, **kwargs))
named = {}
for name, target_spec in extra.items():
if isinstance(target_spec, tuple):
target, kwargs = target_spec
named[name] = stack.enter_context(patch(target, **kwargs))
else:
named[name] = stack.enter_context(patch(target_spec))
return named
class TestExistingInstallDefault:
"""Bare `hermes setup` on an existing install = full reconfigure wizard."""
def test_bare_setup_runs_full_reconfigure_without_menu(self, existing_install):
"""No menu, no prompt_choice — just run every section in sequence."""
args = _make_setup_args() # no flags
with ExitStack() as stack:
m = _enter_existing_install_patches(
stack,
prompt_choice="hermes_cli.setup.prompt_choice",
quick="hermes_cli.setup._run_quick_setup",
model="hermes_cli.setup.setup_model_provider",
terminal="hermes_cli.setup.setup_terminal_backend",
agent="hermes_cli.setup.setup_agent_settings",
gateway="hermes_cli.setup.setup_gateway",
tools="hermes_cli.setup.setup_tools",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
# No menu shown.
m["prompt_choice"].assert_not_called()
# Quick-setup path NOT taken.
m["quick"].assert_not_called()
# All five sections ran.
m["model"].assert_called_once()
m["terminal"].assert_called_once()
m["agent"].assert_called_once()
m["gateway"].assert_called_once()
m["tools"].assert_called_once()
def test_reconfigure_flag_is_backwards_compat_noop(self, existing_install):
"""`hermes setup --reconfigure` behaves the same as bare `hermes setup`."""
args = _make_setup_args(reconfigure=True)
with ExitStack() as stack:
m = _enter_existing_install_patches(
stack,
prompt_choice="hermes_cli.setup.prompt_choice",
model="hermes_cli.setup.setup_model_provider",
terminal="hermes_cli.setup.setup_terminal_backend",
agent="hermes_cli.setup.setup_agent_settings",
gateway="hermes_cli.setup.setup_gateway",
tools="hermes_cli.setup.setup_tools",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
m["prompt_choice"].assert_not_called()
m["model"].assert_called_once()
m["terminal"].assert_called_once()
m["agent"].assert_called_once()
m["gateway"].assert_called_once()
m["tools"].assert_called_once()
class TestQuickFlag:
"""`--quick` on an existing install runs the fill-missing flow."""
def test_quick_flag_runs_quick_setup_only(self, existing_install):
args = _make_setup_args(quick=True)
with ExitStack() as stack:
m = _enter_existing_install_patches(
stack,
quick="hermes_cli.setup._run_quick_setup",
model="hermes_cli.setup.setup_model_provider",
terminal="hermes_cli.setup.setup_terminal_backend",
agent="hermes_cli.setup.setup_agent_settings",
gateway="hermes_cli.setup.setup_gateway",
tools="hermes_cli.setup.setup_tools",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
m["quick"].assert_called_once()
# Full reconfigure sections must NOT run.
m["model"].assert_not_called()
m["terminal"].assert_not_called()
m["agent"].assert_not_called()
m["gateway"].assert_not_called()
m["tools"].assert_not_called()
class TestFreshInstall:
"""On a fresh install (no active provider), flags are no-ops."""
def test_bare_setup_runs_first_time_flow(self, fresh_install):
args = _make_setup_args()
with ExitStack() as stack:
m = _enter_fresh_install_patches(
stack,
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
first="hermes_cli.setup._run_first_time_quick_setup",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
m["prompt"].assert_called_once() # quick-vs-full prompt
m["first"].assert_called_once()
def test_reconfigure_on_fresh_install_falls_through(self, fresh_install):
args = _make_setup_args(reconfigure=True)
with ExitStack() as stack:
m = _enter_fresh_install_patches(
stack,
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
first="hermes_cli.setup._run_first_time_quick_setup",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
m["prompt"].assert_called_once()
m["first"].assert_called_once()
def test_quick_on_fresh_install_falls_through(self, fresh_install):
args = _make_setup_args(quick=True)
with ExitStack() as stack:
m = _enter_fresh_install_patches(
stack,
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
first="hermes_cli.setup._run_first_time_quick_setup",
)
from hermes_cli.setup import run_setup_wizard
run_setup_wizard(args)
m["prompt"].assert_called_once()
m["first"].assert_called_once()
class TestArgparse:
"""The flags are plumbed through argparse to cmd_setup."""
def test_reconfigure_flag_reaches_cmd_setup(self, monkeypatch):
import sys
from hermes_cli.main import main
captured = {}
monkeypatch.setattr(
"hermes_cli.setup.run_setup_wizard",
lambda args: captured.setdefault("args", args),
)
monkeypatch.setattr(sys, "argv", ["hermes", "setup", "--reconfigure"])
try:
main()
except SystemExit:
pass
assert captured["args"].reconfigure is True
assert captured["args"].quick is False
def test_quick_flag_reaches_cmd_setup(self, monkeypatch):
import sys
from hermes_cli.main import main
captured = {}
monkeypatch.setattr(
"hermes_cli.setup.run_setup_wizard",
lambda args: captured.setdefault("args", args),
)
monkeypatch.setattr(sys, "argv", ["hermes", "setup", "--quick"])
try:
main()
except SystemExit:
pass
assert captured["args"].quick is True
assert captured["args"].reconfigure is False
def test_bare_setup_has_both_flags_false(self, monkeypatch):
import sys
from hermes_cli.main import main
captured = {}
monkeypatch.setattr(
"hermes_cli.setup.run_setup_wizard",
lambda args: captured.setdefault("args", args),
)
monkeypatch.setattr(sys, "argv", ["hermes", "setup"])
try:
main()
except SystemExit:
pass
assert captured["args"].reconfigure is False
assert captured["args"].quick is False
@@ -1,115 +0,0 @@
"""Tests for OSError EIO suppression during interrupt shutdown (#13710).
When the user interrupts a running task, prompt_toolkit tries to flush
stdout during emergency shutdown. If stdout is already in a broken state
(redirected to /dev/null, pipe closed, etc.), the flush raises
``OSError: [Errno 5] Input/output error``.
The ``_suppress_closed_loop_errors`` asyncio exception handler and the
outer ``except (KeyError, OSError)`` block must both suppress this error
to prevent a hard crash.
"""
from __future__ import annotations
import errno
import os
from unittest.mock import MagicMock
import pytest
# ---------------------------------------------------------------------------
# _suppress_closed_loop_errors asyncio exception handler
# ---------------------------------------------------------------------------
def _make_suppress_fn():
"""Build a standalone copy of ``_suppress_closed_loop_errors``.
The real function is defined as a closure inside
``CLI._run_interactive``; we reconstruct an equivalent here so the
unit tests don't need a full CLI instance.
"""
def _suppress_closed_loop_errors(loop, context):
exc = context.get("exception")
if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc):
return
if isinstance(exc, KeyError) and "is not registered" in str(exc):
return
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
return
loop.default_exception_handler(context)
return _suppress_closed_loop_errors
class TestSuppressClosedLoopErrors:
"""Verify the asyncio exception handler suppresses expected errors."""
def test_suppresses_event_loop_closed(self):
handler = _make_suppress_fn()
loop = MagicMock()
handler(loop, {"exception": RuntimeError("Event loop is closed")})
loop.default_exception_handler.assert_not_called()
def test_suppresses_key_not_registered(self):
handler = _make_suppress_fn()
loop = MagicMock()
handler(loop, {"exception": KeyError("0 is not registered")})
loop.default_exception_handler.assert_not_called()
def test_suppresses_oserror_eio(self):
"""OSError with errno.EIO must be suppressed (#13710)."""
handler = _make_suppress_fn()
loop = MagicMock()
exc = OSError(errno.EIO, "Input/output error")
handler(loop, {"exception": exc})
loop.default_exception_handler.assert_not_called()
def test_does_not_suppress_oserror_other_errno(self):
"""OSError with a different errno must still propagate."""
handler = _make_suppress_fn()
loop = MagicMock()
exc = OSError(errno.EACCES, "Permission denied")
handler(loop, {"exception": exc})
loop.default_exception_handler.assert_called_once()
def test_does_not_suppress_unrelated_exception(self):
"""Unrelated exceptions must still propagate."""
handler = _make_suppress_fn()
loop = MagicMock()
handler(loop, {"exception": ValueError("something else")})
loop.default_exception_handler.assert_called_once()
def test_no_exception_key(self):
"""Context without 'exception' must propagate to default handler."""
handler = _make_suppress_fn()
loop = MagicMock()
handler(loop, {"message": "some log"})
loop.default_exception_handler.assert_called_once()
# ---------------------------------------------------------------------------
# Outer except block EIO handling
# ---------------------------------------------------------------------------
class TestOuterExceptEIO:
"""Verify the outer ``except (KeyError, OSError)`` block logic."""
def test_eio_does_not_reraise(self):
"""OSError with errno.EIO should be silently suppressed."""
exc = OSError(errno.EIO, "Input/output error")
# Simulate the condition check from the outer except block:
assert isinstance(exc, OSError)
assert getattr(exc, "errno", None) == errno.EIO
def test_bad_file_descriptor_matches(self):
"""'Bad file descriptor' string should be caught."""
exc = OSError(errno.EBADF, "Bad file descriptor")
assert "Bad file descriptor" in str(exc)
def test_other_oserror_reraises(self):
"""Other OSError variants must not match the EIO guard."""
exc = OSError(errno.EACCES, "Permission denied")
assert not (getattr(exc, "errno", None) == errno.EIO)
assert "is not registered" not in str(exc)
assert "Bad file descriptor" not in str(exc)
@@ -1,822 +0,0 @@
"""Tests for the Kanban dashboard plugin backend (plugins/kanban/dashboard/plugin_api.py).
The plugin mounts as /api/plugins/kanban/ inside the dashboard's FastAPI app,
but here we attach its router to a bare FastAPI instance so we can test the
REST surface without spinning up the whole dashboard.
"""
from __future__ import annotations
import importlib.util
import os
import sys
import time
from pathlib import Path
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from hermes_cli import kanban_db as kb
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
def _load_plugin_router():
"""Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router."""
repo_root = Path(__file__).resolve().parents[2]
plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py"
assert plugin_file.exists(), f"plugin file missing: {plugin_file}"
spec = importlib.util.spec_from_file_location(
"hermes_dashboard_plugin_kanban_test", plugin_file,
)
assert spec is not None and spec.loader is not None
mod = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = mod
spec.loader.exec_module(mod)
return mod.router
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with an empty kanban DB."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
@pytest.fixture
def client(kanban_home):
app = FastAPI()
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
return TestClient(app)
# ---------------------------------------------------------------------------
# GET /board on an empty DB
# ---------------------------------------------------------------------------
def test_board_empty(client):
r = client.get("/api/plugins/kanban/board")
assert r.status_code == 200
data = r.json()
# All canonical columns present (triage + the rest), each empty.
names = [c["name"] for c in data["columns"]]
for expected in ("triage", "todo", "ready", "running", "blocked", "done"):
assert expected in names, f"missing column {expected}: {names}"
assert all(len(c["tasks"]) == 0 for c in data["columns"])
assert data["tenants"] == []
assert data["assignees"] == []
assert data["latest_event_id"] == 0
# ---------------------------------------------------------------------------
# POST /tasks then GET /board sees it
# ---------------------------------------------------------------------------
def test_create_task_appears_on_board(client):
r = client.post(
"/api/plugins/kanban/tasks",
json={
"title": "Research LLM caching",
"assignee": "researcher",
"priority": 3,
"tenant": "acme",
},
)
assert r.status_code == 200, r.text
task = r.json()["task"]
assert task["title"] == "Research LLM caching"
assert task["assignee"] == "researcher"
assert task["status"] == "ready" # no parents -> immediately ready
assert task["priority"] == 3
assert task["tenant"] == "acme"
task_id = task["id"]
# Board now lists it under 'ready'.
r = client.get("/api/plugins/kanban/board")
assert r.status_code == 200
data = r.json()
ready = next(c for c in data["columns"] if c["name"] == "ready")
assert len(ready["tasks"]) == 1
assert ready["tasks"][0]["id"] == task_id
assert "acme" in data["tenants"]
assert "researcher" in data["assignees"]
def test_tenant_filter(client):
client.post("/api/plugins/kanban/tasks", json={"title": "A", "tenant": "t1"})
client.post("/api/plugins/kanban/tasks", json={"title": "B", "tenant": "t2"})
r = client.get("/api/plugins/kanban/board?tenant=t1")
counts = {c["name"]: len(c["tasks"]) for c in r.json()["columns"]}
total = sum(counts.values())
assert total == 1
r = client.get("/api/plugins/kanban/board?tenant=t2")
total = sum(len(c["tasks"]) for c in r.json()["columns"])
assert total == 1
# ---------------------------------------------------------------------------
# GET /tasks/:id returns body + comments + events + links
# ---------------------------------------------------------------------------
def test_task_detail_includes_links_and_events(client):
parent = client.post(
"/api/plugins/kanban/tasks", json={"title": "parent"},
).json()["task"]
child = client.post(
"/api/plugins/kanban/tasks",
json={"title": "child", "parents": [parent["id"]]},
).json()["task"]
assert child["status"] == "todo" # parent not done yet
# Detail for the child shows the parent link.
r = client.get(f"/api/plugins/kanban/tasks/{child['id']}")
assert r.status_code == 200
data = r.json()
assert data["task"]["id"] == child["id"]
assert parent["id"] in data["links"]["parents"]
# Detail for the parent shows the child.
r = client.get(f"/api/plugins/kanban/tasks/{parent['id']}")
assert child["id"] in r.json()["links"]["children"]
# Events exist from creation.
assert len(data["events"]) >= 1
def test_task_detail_404_on_unknown(client):
r = client.get("/api/plugins/kanban/tasks/does-not-exist")
assert r.status_code == 404
# ---------------------------------------------------------------------------
# PATCH /tasks/:id — status transitions
# ---------------------------------------------------------------------------
def test_patch_status_complete(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"status": "done", "result": "shipped"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "done"
# Board reflects the move.
done = next(
c for c in client.get("/api/plugins/kanban/board").json()["columns"]
if c["name"] == "done"
)
assert any(x["id"] == t["id"] for x in done["tasks"])
def test_patch_block_then_unblock(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"status": "blocked", "block_reason": "need input"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "blocked"
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"status": "ready"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "ready"
def test_patch_drag_drop_move_todo_to_ready(client):
"""Direct status write: the drag-drop path for statuses without a
dedicated verb (e.g. manually promoting todo -> ready)."""
parent = client.post("/api/plugins/kanban/tasks", json={"title": "p"}).json()["task"]
child = client.post(
"/api/plugins/kanban/tasks",
json={"title": "c", "parents": [parent["id"]]},
).json()["task"]
assert child["status"] == "todo"
r = client.patch(
f"/api/plugins/kanban/tasks/{child['id']}",
json={"status": "ready"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "ready"
def test_patch_reassign(client):
t = client.post(
"/api/plugins/kanban/tasks",
json={"title": "x", "assignee": "a"},
).json()["task"]
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"assignee": "b"},
)
assert r.status_code == 200
assert r.json()["task"]["assignee"] == "b"
def test_patch_priority_and_edit(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"priority": 5, "title": "renamed"},
)
assert r.status_code == 200
data = r.json()["task"]
assert data["priority"] == 5
assert data["title"] == "renamed"
def test_patch_invalid_status(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}",
json={"status": "banana"},
)
assert r.status_code == 400
# ---------------------------------------------------------------------------
# Comments + Links
# ---------------------------------------------------------------------------
def test_add_comment(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.post(
f"/api/plugins/kanban/tasks/{t['id']}/comments",
json={"body": "how's progress?", "author": "teknium"},
)
assert r.status_code == 200
r = client.get(f"/api/plugins/kanban/tasks/{t['id']}")
comments = r.json()["comments"]
assert len(comments) == 1
assert comments[0]["body"] == "how's progress?"
assert comments[0]["author"] == "teknium"
def test_add_comment_empty_rejected(client):
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
r = client.post(
f"/api/plugins/kanban/tasks/{t['id']}/comments",
json={"body": " "},
)
assert r.status_code == 400
def test_add_link_and_delete_link(client):
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
r = client.post(
"/api/plugins/kanban/links",
json={"parent_id": a["id"], "child_id": b["id"]},
)
assert r.status_code == 200
r = client.get(f"/api/plugins/kanban/tasks/{b['id']}")
assert a["id"] in r.json()["links"]["parents"]
r = client.delete(
"/api/plugins/kanban/links",
params={"parent_id": a["id"], "child_id": b["id"]},
)
assert r.status_code == 200
assert r.json()["ok"] is True
def test_add_link_cycle_rejected(client):
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
client.post(
"/api/plugins/kanban/links",
json={"parent_id": a["id"], "child_id": b["id"]},
)
r = client.post(
"/api/plugins/kanban/links",
json={"parent_id": b["id"], "child_id": a["id"]},
)
assert r.status_code == 400
# ---------------------------------------------------------------------------
# Dispatch nudge
# ---------------------------------------------------------------------------
def test_dispatch_dry_run(client):
client.post(
"/api/plugins/kanban/tasks",
json={"title": "work", "assignee": "researcher"},
)
r = client.post("/api/plugins/kanban/dispatch?dry_run=true&max=4")
assert r.status_code == 200
body = r.json()
# DispatchResult is serialized as a dataclass dict.
assert isinstance(body, dict)
# ---------------------------------------------------------------------------
# Triage column (new v1 status)
# ---------------------------------------------------------------------------
def test_create_triage_lands_in_triage_column(client):
r = client.post(
"/api/plugins/kanban/tasks",
json={"title": "rough idea, spec me", "triage": True},
)
assert r.status_code == 200
task = r.json()["task"]
assert task["status"] == "triage"
r = client.get("/api/plugins/kanban/board")
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
assert len(triage["tasks"]) == 1
assert triage["tasks"][0]["title"] == "rough idea, spec me"
def test_triage_task_not_promoted_to_ready(client):
"""Triage tasks must stay in triage even when they have no parents."""
client.post(
"/api/plugins/kanban/tasks",
json={"title": "must stay put", "triage": True},
)
# Run the dispatcher — it should NOT promote the triage task.
client.post("/api/plugins/kanban/dispatch?dry_run=false&max=4")
r = client.get("/api/plugins/kanban/board")
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
ready = next(c for c in r.json()["columns"] if c["name"] == "ready")
assert len(triage["tasks"]) == 1
assert len(ready["tasks"]) == 0
def test_patch_status_triage_works(client):
"""A user (or specifier) can push a task back into triage, and out of it."""
t = client.post(
"/api/plugins/kanban/tasks", json={"title": "x"},
).json()["task"]
# Normal creation is 'ready'; push to triage.
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "triage"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "triage"
# Now promote to todo.
r = client.patch(
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "todo"},
)
assert r.status_code == 200
assert r.json()["task"]["status"] == "todo"
# ---------------------------------------------------------------------------
# Progress rollup (done children / total children)
# ---------------------------------------------------------------------------
def test_board_progress_rollup(client):
parent = client.post(
"/api/plugins/kanban/tasks", json={"title": "parent"},
).json()["task"]
child_a = client.post(
"/api/plugins/kanban/tasks",
json={"title": "a", "parents": [parent["id"]]},
).json()["task"]
child_b = client.post(
"/api/plugins/kanban/tasks",
json={"title": "b", "parents": [parent["id"]]},
).json()["task"]
# Children start as "todo" because the parent isn't done yet; promote
# them to "ready" so complete_task will accept the transition.
for cid in (child_a["id"], child_b["id"]):
r = client.patch(
f"/api/plugins/kanban/tasks/{cid}", json={"status": "ready"},
)
assert r.status_code == 200
# 0/2 done.
r = client.get("/api/plugins/kanban/board")
parent_row = next(
t for col in r.json()["columns"] for t in col["tasks"]
if t["id"] == parent["id"]
)
assert parent_row["progress"] == {"done": 0, "total": 2}
# Complete one child. 1/2.
r = client.patch(
f"/api/plugins/kanban/tasks/{child_a['id']}",
json={"status": "done"},
)
assert r.status_code == 200
r = client.get("/api/plugins/kanban/board")
parent_row = next(
t for col in r.json()["columns"] for t in col["tasks"]
if t["id"] == parent["id"]
)
assert parent_row["progress"] == {"done": 1, "total": 2}
# Childless tasks report progress=None, not {0/0}.
assert next(
t for col in r.json()["columns"] for t in col["tasks"]
if t["id"] == child_b["id"]
)["progress"] is None
# ---------------------------------------------------------------------------
# Auto-init on first board read
# ---------------------------------------------------------------------------
def test_board_auto_initializes_missing_db(tmp_path, monkeypatch):
"""If kanban.db doesn't exist yet, GET /board must create it, not 500."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
# Deliberately DO NOT call kb.init_db().
app = FastAPI()
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
c = TestClient(app)
r = c.get("/api/plugins/kanban/board")
assert r.status_code == 200
assert (home / "kanban.db").exists(), "init_db wasn't invoked by /board"
# ---------------------------------------------------------------------------
# WebSocket auth (query-param token)
# ---------------------------------------------------------------------------
def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
"""When _SESSION_TOKEN is set (normal dashboard context), a missing or
wrong ?token= query param must be rejected with policy-violation."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
# Stub web_server so _check_ws_token has a token to compare against.
import types
stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
app = FastAPI()
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
c = TestClient(app)
# No token → policy violation close.
from starlette.websockets import WebSocketDisconnect
with pytest.raises(WebSocketDisconnect) as exc:
with c.websocket_connect("/api/plugins/kanban/events"):
pass
assert exc.value.code == 1008
# Wrong token → policy violation close.
with pytest.raises(WebSocketDisconnect) as exc:
with c.websocket_connect("/api/plugins/kanban/events?token=nope"):
pass
assert exc.value.code == 1008
# Correct token → accepted (connect then close cleanly from our side).
with c.websocket_connect(
"/api/plugins/kanban/events?token=secret-xyz"
) as ws:
assert ws is not None # handshake succeeded
# ---------------------------------------------------------------------------
# Bulk actions
# ---------------------------------------------------------------------------
def test_bulk_status_ready(client):
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
# Parent-less tasks land in "ready" already; push them to blocked first.
for tid in (a["id"], b["id"], c2["id"]):
client.patch(f"/api/plugins/kanban/tasks/{tid}",
json={"status": "blocked", "block_reason": "wait"})
r = client.post("/api/plugins/kanban/tasks/bulk",
json={"ids": [a["id"], b["id"], c2["id"]], "status": "ready"})
assert r.status_code == 200
results = r.json()["results"]
assert all(r["ok"] for r in results)
# All three are now ready.
board = client.get("/api/plugins/kanban/board").json()
ready = next(col for col in board["columns"] if col["name"] == "ready")
ids = {t["id"] for t in ready["tasks"]}
assert {a["id"], b["id"], c2["id"]}.issubset(ids)
def test_bulk_archive(client):
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
r = client.post("/api/plugins/kanban/tasks/bulk",
json={"ids": [a["id"], b["id"]], "archive": True})
assert r.status_code == 200
assert all(r["ok"] for r in r.json()["results"])
# Default board (archived hidden) — both gone.
board = client.get("/api/plugins/kanban/board").json()
ids = {t["id"] for col in board["columns"] for t in col["tasks"]}
assert a["id"] not in ids
assert b["id"] not in ids
def test_bulk_reassign(client):
a = client.post("/api/plugins/kanban/tasks",
json={"title": "a", "assignee": "old"}).json()["task"]
b = client.post("/api/plugins/kanban/tasks",
json={"title": "b", "assignee": "old"}).json()["task"]
r = client.post("/api/plugins/kanban/tasks/bulk",
json={"ids": [a["id"], b["id"]], "assignee": "new"})
assert r.status_code == 200
for tid in (a["id"], b["id"]):
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
assert t["assignee"] == "new"
def test_bulk_unassign_via_empty_string(client):
a = client.post("/api/plugins/kanban/tasks",
json={"title": "a", "assignee": "x"}).json()["task"]
r = client.post("/api/plugins/kanban/tasks/bulk",
json={"ids": [a["id"]], "assignee": ""})
assert r.status_code == 200
t = client.get(f"/api/plugins/kanban/tasks/{a['id']}").json()["task"]
assert t["assignee"] is None
def test_bulk_partial_failure_doesnt_abort_siblings(client):
"""One bad id in the middle of a batch must not prevent others from
applying."""
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
r = client.post("/api/plugins/kanban/tasks/bulk",
json={"ids": [a["id"], "bogus-id", c2["id"]], "priority": 7})
assert r.status_code == 200
results = r.json()["results"]
assert len(results) == 3
ok_ids = {r["id"] for r in results if r["ok"]}
assert a["id"] in ok_ids
assert c2["id"] in ok_ids
assert any(not r["ok"] and r["id"] == "bogus-id" for r in results)
# Good siblings actually got the priority bump.
for tid in (a["id"], c2["id"]):
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
assert t["priority"] == 7
def test_bulk_empty_ids_400(client):
r = client.post("/api/plugins/kanban/tasks/bulk", json={"ids": []})
assert r.status_code == 400
# ---------------------------------------------------------------------------
# /config endpoint
# ---------------------------------------------------------------------------
def test_config_returns_defaults_when_section_missing(client):
r = client.get("/api/plugins/kanban/config")
assert r.status_code == 200
data = r.json()
# Defaults when dashboard.kanban is missing.
assert data["default_tenant"] == ""
assert data["lane_by_profile"] is True
assert data["include_archived_by_default"] is False
assert data["render_markdown"] is True
def test_config_reads_dashboard_kanban_section(tmp_path, monkeypatch, client):
home = Path(os.environ["HERMES_HOME"])
(home / "config.yaml").write_text(
"dashboard:\n"
" kanban:\n"
" default_tenant: acme\n"
" lane_by_profile: false\n"
" include_archived_by_default: true\n"
" render_markdown: false\n"
)
r = client.get("/api/plugins/kanban/config")
assert r.status_code == 200
data = r.json()
assert data["default_tenant"] == "acme"
assert data["lane_by_profile"] is False
assert data["include_archived_by_default"] is True
assert data["render_markdown"] is False
# ---------------------------------------------------------------------------
# Runs surfacing (vulcan-artivus RFC feedback)
# ---------------------------------------------------------------------------
def test_task_detail_includes_runs(client):
"""GET /tasks/:id carries a runs[] array with the attempt history."""
r = client.post("/api/plugins/kanban/tasks",
json={"title": "port x", "assignee": "worker"}).json()
tid = r["task"]["id"]
# Drive status running to force a run creation: PATCH to running
# doesn't call claim_task (the PATCH path uses _set_status_direct),
# so use the bulk/claim indirection via the kernel.
import hermes_cli.kanban_db as _kb
conn = _kb.connect()
try:
_kb.claim_task(conn, tid)
_kb.complete_task(
conn, tid,
result="done",
summary="tested on rate limiter",
metadata={"changed_files": ["limiter.py"]},
)
finally:
conn.close()
d = client.get(f"/api/plugins/kanban/tasks/{tid}").json()
assert "runs" in d
assert len(d["runs"]) == 1
run = d["runs"][0]
assert run["outcome"] == "completed"
assert run["profile"] == "worker"
assert run["summary"] == "tested on rate limiter"
assert run["metadata"] == {"changed_files": ["limiter.py"]}
assert run["ended_at"] is not None
def test_task_detail_runs_empty_before_claim(client):
"""A task that's never been claimed has an empty runs[] list, not
a missing key."""
r = client.post("/api/plugins/kanban/tasks", json={"title": "fresh"}).json()
d = client.get(f"/api/plugins/kanban/tasks/{r['task']['id']}").json()
assert d["runs"] == []
def test_patch_status_done_with_summary_and_metadata(client):
"""PATCH /tasks/:id with status=done + summary + metadata must
reach complete_task, so the dashboard has CLI parity."""
# Create + claim.
r = client.post("/api/plugins/kanban/tasks", json={"title": "x", "assignee": "worker"})
tid = r.json()["task"]["id"]
from hermes_cli import kanban_db as kb
conn = kb.connect()
try:
kb.claim_task(conn, tid)
finally:
conn.close()
r = client.patch(
f"/api/plugins/kanban/tasks/{tid}",
json={
"status": "done",
"summary": "shipped the thing",
"metadata": {"changed_files": ["a.py", "b.py"], "tests_run": 7},
},
)
assert r.status_code == 200, r.text
# The run must have the summary + metadata attached.
conn = kb.connect()
try:
run = kb.latest_run(conn, tid)
assert run.outcome == "completed"
assert run.summary == "shipped the thing"
assert run.metadata == {"changed_files": ["a.py", "b.py"], "tests_run": 7}
finally:
conn.close()
def test_patch_status_done_without_summary_still_works(client):
"""Back-compat: PATCH without the new fields still completes."""
r = client.post("/api/plugins/kanban/tasks", json={"title": "y", "assignee": "worker"})
tid = r.json()["task"]["id"]
from hermes_cli import kanban_db as kb
conn = kb.connect()
try:
kb.claim_task(conn, tid)
finally:
conn.close()
r = client.patch(
f"/api/plugins/kanban/tasks/{tid}",
json={"status": "done", "result": "legacy shape"},
)
assert r.status_code == 200, r.text
conn = kb.connect()
try:
run = kb.latest_run(conn, tid)
assert run.outcome == "completed"
assert run.summary == "legacy shape" # falls back to result
finally:
conn.close()
def test_patch_status_archive_closes_running_run(client):
"""PATCH to archived while running must close the in-flight run."""
r = client.post("/api/plugins/kanban/tasks", json={"title": "z", "assignee": "worker"})
tid = r.json()["task"]["id"]
from hermes_cli import kanban_db as kb
conn = kb.connect()
try:
kb.claim_task(conn, tid)
open_run = kb.latest_run(conn, tid)
assert open_run.ended_at is None
finally:
conn.close()
r = client.patch(
f"/api/plugins/kanban/tasks/{tid}",
json={"status": "archived"},
)
assert r.status_code == 200, r.text
conn = kb.connect()
try:
task = kb.get_task(conn, tid)
assert task.status == "archived"
assert task.current_run_id is None
assert kb.latest_run(conn, tid).outcome == "reclaimed"
finally:
conn.close()
def test_event_dict_includes_run_id(client):
"""GET /tasks/:id returns events with run_id populated."""
r = client.post("/api/plugins/kanban/tasks", json={"title": "e", "assignee": "worker"})
tid = r.json()["task"]["id"]
from hermes_cli import kanban_db as kb
conn = kb.connect()
try:
kb.claim_task(conn, tid)
run_id = kb.latest_run(conn, tid).id
kb.complete_task(conn, tid, summary="wss")
finally:
conn.close()
r = client.get(f"/api/plugins/kanban/tasks/{tid}")
assert r.status_code == 200
events = r.json()["events"]
# Every event in the response must have a run_id key (None or int).
for e in events:
assert "run_id" in e, f"missing run_id in event: {e}"
# completed event must have the actual run_id.
comp = [e for e in events if e["kind"] == "completed"]
assert comp[0]["run_id"] == run_id
# ---------------------------------------------------------------------------
# Per-task force-loaded skills via REST
# ---------------------------------------------------------------------------
def test_create_task_with_skills_roundtrips(client):
"""POST /tasks accepts `skills: [...]`, GET /tasks/:id returns it."""
r = client.post(
"/api/plugins/kanban/tasks",
json={
"title": "translate docs",
"assignee": "linguist",
"skills": ["translation", "github-code-review"],
},
)
assert r.status_code == 200, r.text
task = r.json()["task"]
assert task["skills"] == ["translation", "github-code-review"]
# Fetch via GET /tasks/:id as the drawer does.
got = client.get(f"/api/plugins/kanban/tasks/{task['id']}").json()
assert got["task"]["skills"] == ["translation", "github-code-review"]
def test_create_task_without_skills_defaults_to_empty_list(client):
"""_task_dict serializes Task.skills=None as [] so the drawer can
always .length check without guarding against null."""
r = client.post(
"/api/plugins/kanban/tasks",
json={"title": "no skills", "assignee": "x"},
)
assert r.status_code == 200, r.text
task = r.json()["task"]
# Task.skills is None in-memory; _task_dict serializes via
# dataclasses.asdict which keeps it None. The drawer's
# `t.skills && t.skills.length > 0` guard handles both null and [].
assert task.get("skills") in (None, [])
-97
View File
@@ -716,103 +716,6 @@ class TestNormalizeCodexResponse:
assert len(msg.tool_calls) == 1
assert msg.tool_calls[0].function.name == "web_search"
def test_message_items_captured_with_id_and_phase(self, monkeypatch):
"""Exact message items (with id/phase) must be captured for cache replay."""
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(
type="message", status="completed", id="msg_abc",
phase="commentary",
content=[SimpleNamespace(type="output_text", text="Thinking...")],
),
SimpleNamespace(
type="message", status="completed", id="msg_def",
phase="final_answer",
content=[SimpleNamespace(type="output_text", text="Done!")],
),
],
status="completed",
)
msg, reason = _normalize_codex_response(response)
assert msg.codex_message_items is not None
assert len(msg.codex_message_items) == 2
assert msg.codex_message_items[0]["id"] == "msg_abc"
assert msg.codex_message_items[0]["phase"] == "commentary"
assert msg.codex_message_items[0]["content"][0]["text"] == "Thinking..."
assert msg.codex_message_items[1]["id"] == "msg_def"
assert msg.codex_message_items[1]["phase"] == "final_answer"
assert msg.codex_message_items[1]["content"][0]["text"] == "Done!"
def test_message_items_none_when_no_messages(self, monkeypatch):
"""Only reasoning + tool calls should yield None codex_message_items."""
agent = self._make_codex_agent(monkeypatch)
response = SimpleNamespace(
output=[
SimpleNamespace(type="function_call", status="completed",
call_id="call_1", name="web_search", arguments='{}', id="fc_1"),
],
status="completed",
)
msg, reason = _normalize_codex_response(response)
assert msg.codex_message_items is None
class TestChatMessagesToResponsesInputMessageItems:
"""Verify codex_message_items are replayed verbatim instead of reconstructed."""
def test_replays_exact_message_items(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{
"role": "assistant",
"content": "Hello world",
"codex_message_items": [
{
"type": "message",
"role": "assistant",
"status": "completed",
"id": "msg_123",
"phase": "final_answer",
"content": [{"type": "output_text", "text": "Hello world"}],
},
],
},
{"role": "user", "content": "follow up"},
]
items = _chat_messages_to_responses_input(messages)
msg_items = [i for i in items if i.get("type") == "message"]
assert len(msg_items) == 1
assert msg_items[0]["id"] == "msg_123"
assert msg_items[0]["phase"] == "final_answer"
assert msg_items[0]["content"][0]["text"] == "Hello world"
def test_fallback_to_plain_when_no_message_items(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [{"role": "assistant", "content": "Hello world"}]
items = _chat_messages_to_responses_input(messages)
assert items == [{"role": "assistant", "content": "Hello world"}]
def test_skips_invalid_message_items(self, monkeypatch):
agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
base_url="https://chatgpt.com/backend-api/codex")
messages = [
{
"role": "assistant",
"content": "fallback text",
"codex_message_items": [
{"type": "function_call", "role": "assistant"}, # wrong type
{"type": "message", "role": "user"}, # wrong role
{"type": "message", "role": "assistant", "content": "not a list"},
],
},
]
items = _chat_messages_to_responses_input(messages)
# All invalid — falls back to plain text reconstruction
assert items == [{"role": "assistant", "content": "fallback text"}]
# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
@@ -1,78 +0,0 @@
"""Behavior tests for the class-first skill review prompts.
The skill review / combined review prompts steer the background review agent
toward generalizing existing skills rather than accumulating near-duplicates.
These tests assert the behavioral *instructions* are present they do NOT
snapshot the full prompt text (change-detector).
"""
from run_agent import AIAgent
def test_skill_review_prompt_instructs_survey_first():
"""Prompt must tell the reviewer to list existing skills before deciding."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "skills_list" in prompt, "must instruct the reviewer to call skills_list"
assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates"
assert "SURVEY" in prompt, "must name the survey step explicitly"
def test_skill_review_prompt_is_class_first():
"""Prompt must steer toward the CLASS of task, not the specific task."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "CLASS" in prompt, "must tell the reviewer to think about the task class"
assert "class level" in prompt, "must anchor naming at the class level"
def test_skill_review_prompt_prefers_updating_existing():
"""Prompt must prefer generalizing an existing skill over creating a new one."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, (
"must state the update-over-create preference"
)
assert "ONLY CREATE A NEW SKILL" in prompt, (
"must gate new-skill creation behind a last-resort clause"
)
def test_skill_review_prompt_flags_overlap_for_followup():
"""Prompt must ask the reviewer to note overlapping skills for future review."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol"
def test_skill_review_prompt_preserves_opt_out_clause():
"""The 'Nothing to save.' escape clause must remain."""
prompt = AIAgent._SKILL_REVIEW_PROMPT
assert "Nothing to save." in prompt
def test_combined_review_prompt_keeps_memory_section():
"""Combined prompt must still cover memory review."""
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "**Memory**" in prompt
assert "memory tool" in prompt
def test_combined_review_prompt_skills_section_is_class_first():
"""The **Skills** half of the combined prompt must follow the same protocol."""
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "**Skills**" in prompt
assert "SURVEY" in prompt
assert "CLASS" in prompt
assert "skills_list" in prompt
assert "ONLY CREATE A NEW SKILL" in prompt
def test_combined_review_prompt_preserves_opt_out_clause():
prompt = AIAgent._COMBINED_REVIEW_PROMPT
assert "Nothing to save." in prompt
def test_memory_review_prompt_unchanged_in_structure():
"""Memory-only review prompt stays focused on user facts — not touched by this change."""
prompt = AIAgent._MEMORY_REVIEW_PROMPT
# Guardrails: the memory-only prompt must NOT mention skills/surveys.
assert "skills_list" not in prompt
assert "SURVEY" not in prompt
assert "memory tool" in prompt
-55
View File
@@ -3386,61 +3386,6 @@ class TestMaxTokensParam:
result = agent._max_tokens_param(4096)
assert result == {"max_tokens": 4096}
def test_returns_max_completion_tokens_for_azure(self, agent):
"""Azure OpenAI requires max_completion_tokens for gpt-5.x models."""
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
result = agent._max_tokens_param(4096)
assert result == {"max_completion_tokens": 4096}
class TestAzureOpenAIRouting:
"""Verify Azure OpenAI endpoints stay on chat_completions for gpt-5.x."""
def test_azure_gpt5_stays_on_chat_completions(self, agent):
"""Azure serves gpt-5.x on /chat/completions — must not upgrade to codex_responses."""
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
agent.api_mode = "chat_completions"
agent.model = "gpt-5.4-mini"
# Mirror the routing logic from __init__
if (
agent.api_mode == "chat_completions"
and not agent._is_azure_openai_url()
and (
agent._is_direct_openai_url()
or agent._provider_model_requires_responses_api(
agent.model, provider=agent.provider,
)
)
):
agent.api_mode = "codex_responses"
assert agent.api_mode == "chat_completions"
def test_non_azure_gpt5_upgrades_to_codex_responses(self, agent):
"""On api.openai.com, gpt-5.x must still upgrade to codex_responses."""
agent.base_url = "https://api.openai.com/v1"
agent.api_mode = "chat_completions"
agent.model = "gpt-5.4-mini"
if (
agent.api_mode == "chat_completions"
and not agent._is_azure_openai_url()
and (
agent._is_direct_openai_url()
or agent._provider_model_requires_responses_api(
agent.model, provider=agent.provider,
)
)
):
agent.api_mode = "codex_responses"
assert agent.api_mode == "codex_responses"
def test_is_azure_openai_url_detection(self, agent):
assert agent._is_azure_openai_url("https://foo.openai.azure.com/openai/v1") is True
assert agent._is_azure_openai_url("https://api.openai.com/v1") is False
assert agent._is_azure_openai_url("https://openrouter.ai/api/v1") is False
# Path-embedded azure string should still detect — we're ~substring matching
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
assert agent._is_azure_openai_url() is True
# ---------------------------------------------------------------------------
# System prompt stability for prompt caching
@@ -943,33 +943,6 @@ def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(mo
assert "inspect the repository" in (assistant_message.content or "")
def test_normalize_codex_response_preserves_message_status_for_replay(monkeypatch):
"""Incomplete Codex output messages must not be replayed as completed."""
agent = _build_agent(monkeypatch)
from agent.codex_responses_adapter import _normalize_codex_response
response = SimpleNamespace(
output=[
SimpleNamespace(
type="message",
id="msg_partial",
phase="commentary",
status="in_progress",
content=[SimpleNamespace(type="output_text", text="Still working...")],
)
],
usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6),
status="in_progress",
model="gpt-5-codex",
)
assistant_message, finish_reason = _normalize_codex_response(response)
assert finish_reason == "incomplete"
assert assistant_message.codex_message_items[0]["id"] == "msg_partial"
assert assistant_message.codex_message_items[0]["status"] == "in_progress"
def test_normalize_codex_response_detects_leaked_tool_call_text(monkeypatch):
"""Harmony-style `to=functions.foo` leaked into assistant content with no
structured function_call items must be treated as incomplete so the
@@ -1430,44 +1403,6 @@ def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monk
assert following.get("role") == "assistant"
def test_codex_message_item_status_survives_conversion_and_preflight(monkeypatch):
"""Stored Codex assistant message statuses must survive replay normalization."""
agent = _build_agent(monkeypatch)
from agent.codex_responses_adapter import (
_chat_messages_to_responses_input,
_preflight_codex_input_items,
)
items = _chat_messages_to_responses_input([
{
"role": "assistant",
"content": "partial",
"codex_message_items": [
{
"type": "message",
"role": "assistant",
"status": "incomplete",
"id": "msg_incomplete",
"phase": "commentary",
"content": [{"type": "output_text", "text": "partial"}],
}
],
}
])
replay_item = next(item for item in items if item.get("type") == "message")
assert replay_item["status"] == "incomplete"
normalized = _preflight_codex_input_items([
{
"type": "message",
"role": "assistant",
"status": "in_progress",
"content": [{"type": "output_text", "text": "working"}],
}
])
assert normalized[0]["status"] == "in_progress"
def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
"""Two consecutive reasoning-only responses with different encrypted content
must NOT be treated as duplicates."""
@@ -1518,58 +1453,6 @@ def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch
assert "enc_second" in encrypted_contents
def test_duplicate_detection_distinguishes_different_codex_message_items(monkeypatch):
"""Incomplete turns with new message ids/phases/statuses must not be collapsed."""
agent = _build_agent(monkeypatch)
responses = [
SimpleNamespace(
output=[
SimpleNamespace(
type="message",
id="msg_first",
phase="commentary",
status="in_progress",
content=[SimpleNamespace(type="output_text", text="Still working...")],
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
status="in_progress",
model="gpt-5-codex",
),
SimpleNamespace(
output=[
SimpleNamespace(
type="message",
id="msg_second",
phase="commentary",
status="in_progress",
content=[SimpleNamespace(type="output_text", text="Still working...")],
)
],
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
status="in_progress",
model="gpt-5-codex",
),
_codex_message_response("Final answer after progress updates."),
]
monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
result = agent.run_conversation("keep going")
assert result["completed"] is True
interim_msgs = [
msg for msg in result["messages"]
if msg.get("role") == "assistant"
and msg.get("finish_reason") == "incomplete"
]
assert len(interim_msgs) == 2
assert [msg["codex_message_items"][0]["id"] for msg in interim_msgs] == [
"msg_first",
"msg_second",
]
assert all(msg["codex_message_items"][0]["status"] == "in_progress" for msg in interim_msgs)
def test_chat_messages_to_responses_input_deduplicates_reasoning_ids(monkeypatch):
"""Duplicate reasoning item IDs across multi-turn incomplete responses
must be deduplicated so the Responses API doesn't reject with HTTP 400."""
-41
View File
@@ -1,41 +0,0 @@
# Stress / battle-test suite
Long-running tests that exercise the Kanban kernel under adversarial
conditions. **Not run by `scripts/run_tests.sh`** because they can
take 30+ seconds each and spawn real subprocesses.
Run manually:
```bash
./venv/bin/python -m pytest tests/stress/ -v -s
# or individual files:
./venv/bin/python tests/stress/test_concurrency.py
./venv/bin/python tests/stress/test_subprocess_e2e.py
./venv/bin/python tests/stress/test_property_fuzzing.py
./venv/bin/python tests/stress/test_benchmarks.py
```
## What's covered
- **test_concurrency.py** — 5 workers, 100 tasks, race-for-claim. Asserts
no double-claims, no orphan runs, no SQLite errors escape retry.
- **test_concurrency_mixed.py** — 10 workers + 1 reclaimer, 500 tasks,
random ops (claim/complete/block/unblock/archive). Same invariants
under adversarial scheduling.
- **test_concurrency_reclaim_race.py** — TTL < work duration so the
reclaimer intentionally yanks tasks mid-work; verifies the worker's
late-complete is refused cleanly (CAS guard works).
- **test_subprocess_e2e.py** — dispatcher spawns real Python subprocess
workers that heartbeat + complete via the CLI; crash detection
against a real dead PID.
- **test_property_fuzzing.py** — 500 random operation sequences,
~40k operations total, 9 invariant checks after each step.
- **test_atypical_scenarios.py** — 28 scenarios covering atypical
user inputs: unicode/emoji/RTL, 1 MB strings, SQL injection
attempts, cycles, self-parents, wide fan-in/out, clock skew,
HERMES_HOME with spaces/unicode/symlinks, 1000 runs on one
task, idempotency-key race across processes, terminal-state
resurrection attempts, dashboard REST with weird JSON.
- **test_benchmarks.py** — latency at 100/1k/10k tasks for dispatch,
recompute_ready, list_tasks, build_worker_context, etc. Results saved
to JSON for regression diffing.
-50
View File
@@ -1,50 +0,0 @@
#!/usr/bin/env python3
"""Fake worker process that exercises the real subprocess contract.
Reads HERMES_KANBAN_TASK from env, heartbeats periodically, does short
work, completes via the CLI. Designed to be spawned by the dispatcher
exactly the way `hermes chat -q` would be, minus the LLM cost.
"""
import json
import os
import subprocess
import sys
import time
def main():
tid = os.environ["HERMES_KANBAN_TASK"]
workspace = os.environ.get("HERMES_KANBAN_WORKSPACE", "")
# Announce via CLI (goes through real argparse + init_db + etc)
subprocess.run(
["hermes", "kanban", "heartbeat", tid, "--note", "started"],
check=True, capture_output=True,
)
# Simulate work with periodic heartbeats
for i in range(3):
time.sleep(0.3)
subprocess.run(
["hermes", "kanban", "heartbeat", tid, "--note", f"progress {i+1}/3"],
check=True, capture_output=True,
)
# Complete with structured handoff
subprocess.run(
[
"hermes", "kanban", "complete", tid,
"--summary", f"real-subprocess worker finished {tid}",
"--metadata", json.dumps({
"workspace": workspace,
"worker_pid": os.getpid(),
"iterations": 3,
}),
],
check=True, capture_output=True,
)
if __name__ == "__main__":
main()
-37
View File
@@ -1,37 +0,0 @@
"""pytest config for the stress/ subdirectory.
These tests are slow (30s+), spawn subprocesses, and are not run by
default. Enable via `pytest --run-stress` or by running the scripts
directly.
The scripts are primarily __main__-executable entry points; pytest
isn't expected to collect individual test functions from them.
"""
import pytest
def pytest_collection_modifyitems(config, items):
if config.getoption("--run-stress", default=False):
return
skip_stress = pytest.mark.skip(
reason="stress test (opt-in via --run-stress or run script directly)"
)
for item in items:
if "tests/stress" in str(item.fspath):
item.add_marker(skip_stress)
def pytest_addoption(parser):
parser.addoption(
"--run-stress",
action="store_true",
default=False,
help="Run the stress/battle-test suite (slow, spawns subprocesses).",
)
collect_ignore_glob = [
# The stress scripts have top-level code and hard-coded paths; they're
# meant to run as `python tests/stress/<name>.py`, not as pytest modules.
"*.py",
]
File diff suppressed because it is too large Load Diff
-221
View File
@@ -1,221 +0,0 @@
"""Scale benchmarks for the Kanban kernel.
Measures:
- dispatch_once latency at 100, 1000, 10000 tasks
- recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs
- build_worker_context latency with 1, 10, 50 parent dependencies
- board list/stats query latency
- task_runs query latency at scale
Results printed as a table. Saved to JSON for regression-diffing in CI
or future reviews. Not a pass/fail test records numbers so we know
when a change regresses latency by 10x and can decide whether to care.
"""
import json
import os
import random
import sys
import tempfile
import time
from pathlib import Path
WT = str(Path(__file__).resolve().parents[2])
def bench(label, fn, iterations=5):
"""Time fn over `iterations` runs, return (min, median, max) in ms."""
times = []
for _ in range(iterations):
t0 = time.perf_counter()
fn()
times.append((time.perf_counter() - t0) * 1000)
times.sort()
mn = times[0]
md = times[len(times) // 2]
mx = times[-1]
return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}
def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):
"""Seed n tasks. Optionally give each task 5 parents."""
ids = []
for i in range(n):
if with_parents and i >= 5:
parents = random.sample(ids[:i], 5)
else:
parents = ()
tid = kb.create_task(
conn, title=f"bench {i}", assignee=assignee,
tenant="bench", parents=parents,
)
ids.append(tid)
return ids
def main():
home = tempfile.mkdtemp(prefix="hermes_bench_")
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
kb.init_db()
results = []
# ============ dispatch_once latency ============
for n in [100, 1000, 10000]:
print(f"\n== dispatch_once @ {n} tasks ==")
# Fresh DB each time so we're not measuring cumulative effects
import shutil
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn
r = bench(
f"dispatch_once (n={n}, no spawn)",
lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),
iterations=5,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["n"] = n
results.append(r)
conn.close()
# ============ recompute_ready at scale with parent graphs ============
for n in [100, 1000, 10000]:
print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)
# Complete the first 100 so some todo tasks might get promoted
for tid in ids[:min(100, n // 10)]:
kb.complete_task(conn, tid, result="bench")
r = bench(
f"recompute_ready (n={n}, with parents)",
lambda: kb.recompute_ready(conn),
iterations=5,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["n"] = n
results.append(r)
conn.close()
# ============ build_worker_context with N parents ============
for parent_count in [1, 10, 50]:
print(f"\n== build_worker_context with {parent_count} parents ==")
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
# Create parents, complete them with summaries+metadata
parent_ids = []
for i in range(parent_count):
pid = kb.create_task(conn, title=f"parent {i}", assignee="p")
kb.claim_task(conn, pid)
kb.complete_task(
conn, pid,
summary=f"parent {i} result that is longer than a single token "
f"so we actually measure the IO",
metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},
)
parent_ids.append(pid)
child_id = kb.create_task(
conn, title="child", assignee="c", parents=parent_ids,
)
r = bench(
f"build_worker_context (parents={parent_count})",
lambda: kb.build_worker_context(conn, child_id),
iterations=10,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["parent_count"] = parent_count
results.append(r)
conn.close()
# ============ list_tasks at scale ============
for n in [100, 1000, 10000]:
print(f"\n== list_tasks @ {n} ==")
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
seed_tasks(conn, kb, n)
r = bench(
f"list_tasks (n={n})",
lambda: kb.list_tasks(conn),
iterations=5,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["n"] = n
results.append(r)
conn.close()
# ============ board_stats at scale ============
for n in [100, 1000, 10000]:
print(f"\n== board_stats @ {n} ==")
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
seed_tasks(conn, kb, n)
r = bench(
f"board_stats (n={n})",
lambda: kb.board_stats(conn),
iterations=5,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["n"] = n
results.append(r)
conn.close()
# ============ list_runs at scale ============
for n in [100, 1000]:
print(f"\n== list_runs for task with {n} attempts ==")
shutil.rmtree(home, ignore_errors=True)
os.makedirs(home)
kb._INITIALIZED_PATHS.clear()
kb.init_db()
conn = kb.connect()
tid = kb.create_task(conn, title="x", assignee="w")
# Create N attempts via claim/release
for i in range(n):
kb.claim_task(conn, tid, ttl_seconds=0)
kb.release_stale_claims(conn)
r = bench(
f"list_runs (runs={n})",
lambda: kb.list_runs(conn, tid),
iterations=10,
)
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
r["run_count"] = n
results.append(r)
conn.close()
# ============ SUMMARY TABLE ============
print()
print("=" * 60)
print("SUMMARY")
print("=" * 60)
print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")
for r in results:
print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")
# Save for future diffing.
out_path = "/tmp/kanban_bench_results.json"
with open(out_path, "w") as f:
json.dump(results, f, indent=2)
print(f"\nResults saved to {out_path}")
if __name__ == "__main__":
main()
-302
View File
@@ -1,302 +0,0 @@
"""Multi-process concurrency stress test for the Kanban kernel.
5 worker processes race for claims on a shared DB with 100 tasks. Each
worker loops: claim -> simulate work -> complete. Asserts the invariants
that make the system worth building:
- No task claimed by two workers simultaneously
- No task completed twice
- Every claim produces exactly one run row
- Every completion closes exactly one run row
- Zero SQLite locking errors that escape the retry layer
- Total run count == total claim events == total completed events
This test is the primary justification for WAL + CAS-based claim. If it
passes, the architecture holds. If it fails, we have a real bug to fix
before anyone runs this in anger.
"""
import json
import multiprocessing as mp
import os
import random
import sqlite3
import subprocess
import sys
import tempfile
import time
from pathlib import Path
NUM_WORKERS = 5
NUM_TASKS = 100
WORKER_TIMEOUT_S = 60
WT = str(Path(__file__).resolve().parents[2])
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
"""One worker's inner loop. Runs in a fresh Python process.
Tries to claim a ready task, marks it done with a per-worker summary,
repeats until the ready pool is empty. Records every claim + complete
into its own JSON result file for later aggregation.
"""
os.environ["HERMES_HOME"] = hermes_home
os.environ["HOME"] = hermes_home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
events = []
empty_polls = 0
start = time.monotonic()
while time.monotonic() - start < WORKER_TIMEOUT_S:
conn = kb.connect()
try:
# Find any ready task (non-deterministic order intentional — we
# want workers to race on popular assignees).
row = conn.execute(
"SELECT id FROM tasks WHERE status = 'ready' "
"AND claim_lock IS NULL LIMIT 1"
).fetchone()
if row is None:
empty_polls += 1
if empty_polls > 20:
break # queue empty long enough, stop
time.sleep(0.01)
continue
empty_polls = 0
tid = row["id"]
try:
claimed = kb.claim_task(
conn, tid, claimer=f"worker-{worker_id}",
)
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err_on_claim", "task": tid, "err": str(e)})
continue
if claimed is None:
# Someone else beat us — expected contention, not an error.
events.append({"kind": "lost_claim_race", "task": tid})
continue
run = kb.latest_run(conn, tid)
events.append({
"kind": "claimed",
"task": tid,
"worker": worker_id,
"run_id": run.id,
"t": time.monotonic() - start,
})
# Simulate short, variable work
time.sleep(random.uniform(0.001, 0.05))
try:
kb.complete_task(
conn, tid,
result=f"done by worker-{worker_id}",
summary=f"worker-{worker_id} finished task {tid}",
metadata={"worker_id": worker_id, "run_id": run.id},
)
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err_on_complete", "task": tid, "err": str(e)})
continue
events.append({
"kind": "completed",
"task": tid,
"worker": worker_id,
"run_id": run.id,
"t": time.monotonic() - start,
})
finally:
conn.close()
with open(result_file, "w") as f:
json.dump(events, f)
def main():
home = tempfile.mkdtemp(prefix="hermes_concurrency_")
print(f"HERMES_HOME = {home}")
# Seed.
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
kb.init_db()
conn = kb.connect()
tids = []
for i in range(NUM_TASKS):
tid = kb.create_task(
conn, title=f"task #{i}", assignee="shared",
tenant="concurrency-test",
)
tids.append(tid)
conn.close()
print(f"Seeded {NUM_TASKS} tasks.")
# Spawn workers.
ctx = mp.get_context("spawn")
result_files = [f"/tmp/concurrency_worker_{i}.json" for i in range(NUM_WORKERS)]
procs = []
start = time.monotonic()
for i in range(NUM_WORKERS):
p = ctx.Process(target=worker_loop, args=(i, home, result_files[i]))
p.start()
procs.append(p)
for p in procs:
p.join(timeout=WORKER_TIMEOUT_S + 30)
if p.is_alive():
p.terminate()
p.join()
elapsed = time.monotonic() - start
print(f"All workers done in {elapsed:.1f}s")
# Aggregate worker events.
all_events = []
for i, f in enumerate(result_files):
if not os.path.isfile(f):
print(f" WORKER {i} produced no result file — died?")
continue
with open(f) as fh:
events = json.load(fh)
all_events.extend(events)
# ============ INVARIANT CHECKS ============
print()
print("=" * 60)
print("INVARIANT CHECKS")
print("=" * 60)
failures = []
# Check 1: no task claimed by two different workers
claims_by_task = {}
for e in all_events:
if e["kind"] == "claimed":
if e["task"] in claims_by_task:
prev = claims_by_task[e["task"]]
if prev["worker"] != e["worker"]:
failures.append(
f"DOUBLE CLAIM: task {e['task']} claimed by "
f"worker {prev['worker']} AND worker {e['worker']}"
)
claims_by_task[e["task"]] = e
# Check 2: every completion has a matching claim from the same worker
for e in all_events:
if e["kind"] == "completed":
prev_claim = claims_by_task.get(e["task"])
if prev_claim is None:
failures.append(f"COMPLETION WITHOUT CLAIM: task {e['task']}")
elif prev_claim["worker"] != e["worker"]:
failures.append(
f"WORKER MISMATCH: task {e['task']} claimed by "
f"{prev_claim['worker']} but completed by {e['worker']}"
)
# Check 3: DB state — every task should be in 'done', no dangling claims
conn = kb.connect()
try:
bad_status = conn.execute(
"SELECT id, status, claim_lock, current_run_id FROM tasks "
"WHERE status != 'done' OR claim_lock IS NOT NULL "
"OR current_run_id IS NOT NULL"
).fetchall()
if bad_status:
for row in bad_status:
failures.append(
f"BAD FINAL STATE: task {row['id']} status={row['status']} "
f"claim_lock={row['claim_lock']} current_run_id={row['current_run_id']}"
)
# Check 4: exactly one run per task, all closed as completed
bad_runs = conn.execute(
"SELECT task_id, COUNT(*) as n FROM task_runs "
"GROUP BY task_id HAVING n != 1"
).fetchall()
if bad_runs:
for row in bad_runs:
failures.append(
f"WRONG RUN COUNT: task {row['task_id']} has {row['n']} runs (expected 1)"
)
open_runs = conn.execute(
"SELECT id, task_id FROM task_runs WHERE ended_at IS NULL"
).fetchall()
for row in open_runs:
failures.append(f"OPEN RUN: run {row['id']} on task {row['task_id']}")
wrong_outcomes = conn.execute(
"SELECT task_id, outcome FROM task_runs "
"WHERE outcome IS NULL OR outcome != 'completed'"
).fetchall()
for row in wrong_outcomes:
failures.append(
f"WRONG OUTCOME: task {row['task_id']} run outcome={row['outcome']}"
)
# Check 5: event counts — exactly NUM_TASKS completed events
completed_events = conn.execute(
"SELECT COUNT(*) as n FROM task_events WHERE kind='completed'"
).fetchone()["n"]
if completed_events != NUM_TASKS:
failures.append(
f"EVENT COUNT MISMATCH: {completed_events} completed events "
f"expected {NUM_TASKS}"
)
# Check 6: count SQLite errors that escaped retry
sqlite_errs = sum(
1 for e in all_events if e["kind"].startswith("sqlite_err")
)
if sqlite_errs > 0:
failures.append(f"UNRETRIED SQLITE ERRORS: {sqlite_errs}")
finally:
conn.close()
# ============ STATS ============
print()
total_claims = sum(1 for e in all_events if e["kind"] == "claimed")
total_completes = sum(1 for e in all_events if e["kind"] == "completed")
total_lost_races = sum(1 for e in all_events if e["kind"] == "lost_claim_race")
per_worker = {}
for e in all_events:
if e["kind"] == "completed":
per_worker.setdefault(e["worker"], 0)
per_worker[e["worker"]] += 1
print(f"Total claims: {total_claims}")
print(f"Total completes: {total_completes}")
print(f"Lost claim races: {total_lost_races} (expected contention; not a bug)")
print(f"Elapsed: {elapsed:.2f}s")
print(f"Throughput: {NUM_TASKS/elapsed:.1f} tasks/sec")
print(f"Per-worker completions:")
for w in sorted(per_worker.keys()):
print(f" worker-{w}: {per_worker[w]}")
if failures:
print()
print("=" * 60)
print(f"FAILURES ({len(failures)}):")
print("=" * 60)
for f in failures[:20]:
print(f" {f}")
if len(failures) > 20:
print(f" ... and {len(failures) - 20} more")
sys.exit(1)
else:
print()
print("✔ ALL INVARIANTS HELD")
if __name__ == "__main__":
main()
-350
View File
@@ -1,350 +0,0 @@
"""Harder concurrency stress: mixed operations + larger scale.
Scales to 500 tasks, 10 workers, 60s runtime. Each worker randomly:
- claims + completes (70%)
- claims + blocks with a reason (15%)
- unblocks a random blocked task (10%)
- archives a random done task (5%)
Adds a background "dispatcher" process that calls release_stale_claims
and detect_crashed_workers every 200ms, racing against the workers to
surface TTL + crash detection races.
Pass criteria: runs invariant holds, no double-completions, no orphan
runs, no SQLite errors escape the retry layer.
"""
import json
import multiprocessing as mp
import os
import random
import sqlite3
import sys
import tempfile
import time
from pathlib import Path
NUM_WORKERS = 10
NUM_TASKS = 500
RUN_DURATION_S = 30
WT = str(Path(__file__).resolve().parents[2])
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
os.environ["HERMES_HOME"] = hermes_home
os.environ["HOME"] = hermes_home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
events = []
start = time.monotonic()
idle_rounds = 0
while time.monotonic() - start < RUN_DURATION_S:
conn = kb.connect()
try:
op = random.random()
if op < 0.10:
# Try to unblock a blocked task.
row = conn.execute(
"SELECT id FROM tasks WHERE status='blocked' "
"ORDER BY RANDOM() LIMIT 1"
).fetchone()
if row:
try:
ok = kb.unblock_task(conn, row["id"])
events.append({"kind": "unblocked" if ok else "unblock_noop",
"task": row["id"], "worker": worker_id})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "unblock",
"task": row["id"], "err": str(e)[:100]})
continue
if op < 0.15:
# Try to archive a done task.
row = conn.execute(
"SELECT id FROM tasks WHERE status='done' "
"ORDER BY RANDOM() LIMIT 1"
).fetchone()
if row:
try:
kb.archive_task(conn, row["id"])
events.append({"kind": "archived", "task": row["id"],
"worker": worker_id})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "archive",
"task": row["id"], "err": str(e)[:100]})
continue
# Default: claim + complete-or-block.
row = conn.execute(
"SELECT id FROM tasks WHERE status='ready' "
"AND claim_lock IS NULL LIMIT 1"
).fetchone()
if row is None:
idle_rounds += 1
if idle_rounds > 50:
break
time.sleep(0.02)
continue
idle_rounds = 0
tid = row["id"]
try:
claimed = kb.claim_task(
conn, tid, claimer=f"worker-{worker_id}",
ttl_seconds=5, # short TTL so reclaim races in
)
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "claim",
"task": tid, "err": str(e)[:100]})
continue
if claimed is None:
events.append({"kind": "lost_claim_race", "task": tid})
continue
run = kb.latest_run(conn, tid)
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
"run_id": run.id, "t": time.monotonic() - start})
time.sleep(random.uniform(0.005, 0.05))
# 20% of the time, block instead of complete
if random.random() < 0.20:
try:
kb.block_task(conn, tid,
reason=f"blocked by worker-{worker_id}")
events.append({"kind": "blocked", "task": tid,
"worker": worker_id, "run_id": run.id})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "block",
"task": tid, "err": str(e)[:100]})
else:
try:
kb.complete_task(
conn, tid,
result=f"done by worker-{worker_id}",
summary=f"worker-{worker_id} ok",
metadata={"worker_id": worker_id},
)
events.append({"kind": "completed", "task": tid,
"worker": worker_id, "run_id": run.id,
"t": time.monotonic() - start})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "complete",
"task": tid, "err": str(e)[:100]})
finally:
conn.close()
with open(result_file, "w") as f:
json.dump(events, f)
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
"""Background dispatcher-like loop that reclaims stale tasks."""
os.environ["HERMES_HOME"] = hermes_home
os.environ["HOME"] = hermes_home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
events = []
start = time.monotonic()
while time.monotonic() - start < RUN_DURATION_S + 2:
conn = kb.connect()
try:
try:
reclaimed = kb.release_stale_claims(conn)
if reclaimed:
events.append({"kind": "reclaimed", "count": reclaimed,
"t": time.monotonic() - start})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "reclaim",
"err": str(e)[:100]})
finally:
conn.close()
time.sleep(0.2)
with open(result_file, "w") as f:
json.dump(events, f)
def main():
home = tempfile.mkdtemp(prefix="hermes_mixed_stress_")
print(f"HERMES_HOME = {home}")
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
kb.init_db()
conn = kb.connect()
for i in range(NUM_TASKS):
kb.create_task(
conn, title=f"t#{i}", assignee="shared", tenant="mixed-stress",
)
conn.close()
print(f"Seeded {NUM_TASKS} tasks, launching {NUM_WORKERS} workers + 1 reclaimer")
ctx = mp.get_context("spawn")
worker_results = [f"/tmp/mixed_worker_{i}.json" for i in range(NUM_WORKERS)]
reclaim_result = "/tmp/mixed_reclaim.json"
procs = []
start = time.monotonic()
for i in range(NUM_WORKERS):
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
p.start()
procs.append(p)
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
r.start()
procs.append(r)
for p in procs:
p.join(timeout=RUN_DURATION_S + 30)
if p.is_alive():
p.terminate()
p.join()
elapsed = time.monotonic() - start
print(f"Done in {elapsed:.1f}s")
# Aggregate.
all_events = []
for i, f in enumerate(worker_results):
if os.path.isfile(f):
with open(f) as fh:
all_events.extend(json.load(fh))
else:
print(f" WORKER {i} died with no result file!")
reclaim_events = []
if os.path.isfile(reclaim_result):
with open(reclaim_result) as fh:
reclaim_events = json.load(fh)
# ============ INVARIANT CHECKS ============
print()
print("=" * 60)
print("INVARIANT CHECKS")
print("=" * 60)
failures = []
# Per-run attribution tracking
claims = [e for e in all_events if e["kind"] == "claimed"]
completions = [e for e in all_events if e["kind"] == "completed"]
blocks = [e for e in all_events if e["kind"] == "blocked"]
# Every completion must have a matching claim on the same run_id AND
# the same worker (workers don't steal each other's runs).
claims_by_run = {c["run_id"]: c for c in claims}
for comp in completions:
claim = claims_by_run.get(comp["run_id"])
if claim is None:
# It's possible this worker saw a reclaimed run from another worker
# — that's still a bug: the worker shouldn't be able to complete
# a run it didn't claim. But let me check if reclaim happened first.
failures.append(
f"COMPLETION WITHOUT CLAIM: task {comp['task']} run {comp['run_id']} "
f"by worker {comp['worker']}"
)
elif claim["worker"] != comp["worker"]:
failures.append(
f"CROSS-WORKER COMPLETION: run {comp['run_id']} claimed by "
f"worker {claim['worker']} but completed by worker {comp['worker']}"
)
# SQLite errors that escaped the retry layer
sqlite_errs = [e for e in all_events if e["kind"] == "sqlite_err"]
if sqlite_errs:
for e in sqlite_errs[:5]:
failures.append(f"SQLITE ERROR: op={e.get('op')} err={e.get('err')}")
if len(sqlite_errs) > 5:
failures.append(f" ... and {len(sqlite_errs) - 5} more sqlite errs")
# DB final state — every task should be in a clean terminal state.
conn = kb.connect()
try:
# Invariant: current_run_id NULL iff latest run is terminal
inconsistent = conn.execute("""
SELECT t.id, t.status, t.current_run_id
FROM tasks t
WHERE t.current_run_id IS NOT NULL
AND EXISTS (SELECT 1 FROM task_runs r
WHERE r.id = t.current_run_id AND r.ended_at IS NOT NULL)
""").fetchall()
for row in inconsistent:
failures.append(
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
f"has current_run_id={row['current_run_id']} but run is ended"
)
# Invariant: no orphan open runs
orphans = conn.execute("""
SELECT r.id, r.task_id, r.status
FROM task_runs r
LEFT JOIN tasks t ON t.current_run_id = r.id
WHERE r.ended_at IS NULL AND t.id IS NULL
""").fetchall()
for row in orphans:
failures.append(
f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}"
)
# Counts — should roughly balance.
status_counts = dict(
conn.execute("SELECT status, COUNT(*) FROM tasks GROUP BY status").fetchall()
)
run_outcome_counts = dict(
conn.execute(
"SELECT outcome, COUNT(*) FROM task_runs "
"WHERE ended_at IS NOT NULL GROUP BY outcome"
).fetchall()
)
active_runs = conn.execute(
"SELECT COUNT(*) FROM task_runs WHERE ended_at IS NULL"
).fetchone()[0]
finally:
conn.close()
# ============ STATS ============
print()
print(f"Workers: {NUM_WORKERS}, Tasks: {NUM_TASKS}")
print(f"Elapsed: {elapsed:.1f}s")
print(f"Events collected: {len(all_events)} (+{len(reclaim_events)} reclaim)")
print()
print("Operations:")
op_counts = {}
for e in all_events:
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
for k in sorted(op_counts.keys()):
print(f" {k:<25} {op_counts[k]}")
print()
print("Final task status:")
for s, n in sorted(status_counts.items()):
print(f" {s:<10} {n}")
print("Final run outcomes:")
for o, n in sorted(run_outcome_counts.items(), key=lambda x: (x[0] or '',)):
print(f" {o:<12} {n}")
print(f" active {active_runs}")
if failures:
print()
print("=" * 60)
print(f"FAILURES ({len(failures)}):")
print("=" * 60)
for f in failures[:30]:
print(f" {f}")
if len(failures) > 30:
print(f" ... and {len(failures) - 30} more")
sys.exit(1)
else:
print()
print("✔ ALL INVARIANTS HELD UNDER MIXED STRESS")
if __name__ == "__main__":
main()
@@ -1,241 +0,0 @@
"""Target the reclaim race specifically.
Workers claim tasks with a 1s TTL but sleep 2s before completing. The
reclaimer runs every 200ms. Scenario: worker claims, reclaimer expires
the claim mid-work, worker tries to complete AFTER its run has been
reclaimed.
Expected behavior (per design): the worker's complete_task should
either succeed on the reclaimed-and-re-claimed-by-another-worker case
(no, it should refuse the claim was invalidated), OR succeed by
grace (we "forgive" a late complete from the original worker if no
one else picked it up).
Actually looking at complete_task: it doesn't check claim_lock. It just
transitions from 'running' -> 'done'. So if the reclaimer moved it back
to 'ready', the late worker's complete_task will fail (CAS on
status='running' fails). This is the CORRECT behavior.
Invariant being tested: race between worker.complete and
dispatcher.reclaim must not produce a double-run-close or other
inconsistency.
"""
import json
import multiprocessing as mp
import os
import random
import sqlite3
import sys
import tempfile
import time
from pathlib import Path
NUM_WORKERS = 5
NUM_TASKS = 50
TTL = 1
WORK_DURATION_S = 2.0 # longer than TTL => reclaimer wins
WT = str(Path(__file__).resolve().parents[2])
def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None:
os.environ["HERMES_HOME"] = hermes_home
os.environ["HOME"] = hermes_home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
events = []
start = time.monotonic()
idle = 0
while time.monotonic() - start < 40:
conn = kb.connect()
try:
row = conn.execute(
"SELECT id FROM tasks WHERE status='ready' AND claim_lock IS NULL LIMIT 1"
).fetchone()
if row is None:
idle += 1
if idle > 30:
break
time.sleep(0.05)
continue
idle = 0
tid = row["id"]
try:
claimed = kb.claim_task(conn, tid, claimer=f"worker-{worker_id}",
ttl_seconds=TTL)
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "claim", "err": str(e)[:100]})
continue
if claimed is None:
events.append({"kind": "lost_claim", "task": tid})
continue
run = kb.latest_run(conn, tid)
events.append({"kind": "claimed", "task": tid, "worker": worker_id,
"run_id": run.id})
# Sleep longer than TTL so reclaimer has a chance to intervene
time.sleep(WORK_DURATION_S + random.uniform(-0.3, 0.3))
try:
ok = kb.complete_task(
conn, tid,
result=f"by worker-{worker_id}",
summary=f"worker-{worker_id} finished",
)
events.append({"kind": "complete_ok" if ok else "complete_refused",
"task": tid, "worker": worker_id, "run_id": run.id})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "op": "complete", "err": str(e)[:100]})
finally:
conn.close()
with open(result_file, "w") as f:
json.dump(events, f)
def reclaimer_loop(hermes_home: str, result_file: str) -> None:
os.environ["HERMES_HOME"] = hermes_home
os.environ["HOME"] = hermes_home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
events = []
start = time.monotonic()
while time.monotonic() - start < 42:
conn = kb.connect()
try:
try:
n = kb.release_stale_claims(conn)
if n:
events.append({"kind": "reclaimed", "count": n,
"t": time.monotonic() - start})
except sqlite3.OperationalError as e:
events.append({"kind": "sqlite_err", "err": str(e)[:100]})
finally:
conn.close()
time.sleep(0.2)
with open(result_file, "w") as f:
json.dump(events, f)
def main():
home = tempfile.mkdtemp(prefix="hermes_reclaim_race_")
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
kb.init_db()
conn = kb.connect()
for i in range(NUM_TASKS):
kb.create_task(conn, title=f"t{i}", assignee="shared",
tenant="reclaim-race")
conn.close()
print(f"Seeded {NUM_TASKS} tasks. TTL={TTL}s, work_duration={WORK_DURATION_S}s")
print(f"(worker work > TTL guarantees reclaims)")
ctx = mp.get_context("spawn")
worker_results = [f"/tmp/rc_worker_{i}.json" for i in range(NUM_WORKERS)]
reclaim_result = "/tmp/rc_reclaim.json"
procs = []
for i in range(NUM_WORKERS):
p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i]))
p.start()
procs.append(p)
r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result))
r.start()
procs.append(r)
for p in procs:
p.join(timeout=60)
if p.is_alive():
p.terminate()
p.join()
# Aggregate.
all_events = []
for f in worker_results:
if os.path.isfile(f):
with open(f) as fh:
all_events.extend(json.load(fh))
reclaim_events = []
if os.path.isfile(reclaim_result):
with open(reclaim_result) as fh:
reclaim_events = json.load(fh)
op_counts = {}
for e in all_events:
op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1
total_reclaims = sum(e.get("count", 0) for e in reclaim_events)
print(f"\nReclaimer fired {len(reclaim_events)} times, total tasks reclaimed: {total_reclaims}")
print("Worker events:")
for k in sorted(op_counts):
print(f" {k:<25} {op_counts[k]}")
# Invariant checks
failures = []
conn = kb.connect()
try:
# Any task stuck with current_run_id pointing at a closed run?
bad = conn.execute("""
SELECT t.id, t.status, t.current_run_id, r.ended_at, r.outcome
FROM tasks t
JOIN task_runs r ON r.id = t.current_run_id
WHERE r.ended_at IS NOT NULL
""").fetchall()
for row in bad:
failures.append(
f"INVARIANT VIOLATION: task {row['id']} status={row['status']} "
f"current_run_id={row['current_run_id']} but run ended "
f"outcome={row['outcome']}"
)
# Every run with NULL ended_at should still have the task pointing at it
orphans = conn.execute("""
SELECT r.id, r.task_id
FROM task_runs r
LEFT JOIN tasks t ON t.current_run_id = r.id
WHERE r.ended_at IS NULL AND t.id IS NULL
""").fetchall()
for row in orphans:
failures.append(f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}")
# Event counts
claim_evts = conn.execute(
"SELECT COUNT(*) FROM task_events WHERE kind='claimed'").fetchone()[0]
reclaim_evts = conn.execute(
"SELECT COUNT(*) FROM task_events WHERE kind='reclaimed'").fetchone()[0]
comp_evts = conn.execute(
"SELECT COUNT(*) FROM task_events WHERE kind='completed'").fetchone()[0]
print(f"\nDB event counts: claimed={claim_evts} reclaimed={reclaim_evts} completed={comp_evts}")
# Every reclaimed run must have ended_at set
unended_reclaims = conn.execute(
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed' AND ended_at IS NULL"
).fetchone()[0]
if unended_reclaims:
failures.append(f"UNENDED RECLAIMED RUNS: {unended_reclaims}")
# Count of completed runs
comp_runs = conn.execute(
"SELECT COUNT(*) FROM task_runs WHERE outcome='completed'"
).fetchone()[0]
reclaim_runs = conn.execute(
"SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed'"
).fetchone()[0]
print(f"DB run outcomes: completed={comp_runs} reclaimed={reclaim_runs}")
finally:
conn.close()
if reclaim_runs == 0:
failures.append("NO RECLAIMS HAPPENED — test didn't stress what it was supposed to")
if failures:
print(f"\nFAILURES ({len(failures)}):")
for f in failures[:20]:
print(f" {f}")
sys.exit(1)
else:
print("\n✔ RECLAIM RACE INVARIANTS HELD")
if __name__ == "__main__":
main()
-283
View File
@@ -1,283 +0,0 @@
"""Randomized property testing for the Kanban kernel.
Generates 1000 random operation sequences, each 20-50 ops, on small
task graphs. After each step, checks the full invariant set:
I1. If tasks.current_run_id IS NOT NULL, the run MUST exist AND
ended_at MUST be NULL (we never point at a closed run).
I2. If a run has ended_at NULL, SOME task MUST have current_run_id
pointing at it (no orphan open runs).
I3. task.status in the valid set {triage, todo, ready, running,
blocked, done, archived}.
I4. task.claim_lock NULL iff status not in (running,).
I5. Every run has started_at <= ended_at (or ended_at is NULL).
I6. If outcome is set, ended_at must also be set.
I7. Events are strictly monotonic in (created_at, id).
I8. task_events.run_id references a task_runs.id that exists
(or is NULL).
I9. Parent completion invariant: if all parents are 'done', the
child cannot be in 'todo' status (recompute_ready should have
promoted it). This is called out in the comment on
recompute_ready; verify it holds after every random seq.
Not using hypothesis the lib; just Python random for simplicity.
"""
import os
import random
import sys
import tempfile
import time
from pathlib import Path
WT = str(Path(__file__).resolve().parents[2])
NUM_SEQUENCES = 500
OPS_PER_SEQUENCE = 100
TASK_POOL = 10
OPS = [
"create", "create_child", "claim", "complete", "block", "unblock",
"archive", "heartbeat", "release_stale", "detect_crashed",
"recompute_ready", "reassign",
]
def assert_invariants(conn, kb, ops_log):
"""Run all invariant checks; raise AssertionError with context on any."""
failures = []
# I1: current_run_id → run exists and not ended
bad_ptr = conn.execute("""
SELECT t.id, t.current_run_id, r.ended_at, r.outcome
FROM tasks t
LEFT JOIN task_runs r ON r.id = t.current_run_id
WHERE t.current_run_id IS NOT NULL
AND (r.id IS NULL OR r.ended_at IS NOT NULL)
""").fetchall()
for row in bad_ptr:
if row["ended_at"] is None and row["outcome"] is None:
detail = "missing"
else:
detail = f"closed ({row['outcome']})"
failures.append(
f"I1: task {row['id']} points at run {row['current_run_id']} "
f"which is {detail}"
)
# I2: open run → some task points at it
orphans = conn.execute("""
SELECT r.id, r.task_id
FROM task_runs r
WHERE r.ended_at IS NULL
AND NOT EXISTS (SELECT 1 FROM tasks t WHERE t.current_run_id = r.id)
""").fetchall()
for row in orphans:
failures.append(f"I2: open run {row['id']} on task {row['task_id']} has no pointer")
# I3: valid statuses
valid = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
bad_status = conn.execute("SELECT id, status FROM tasks").fetchall()
for row in bad_status:
if row["status"] not in valid:
failures.append(f"I3: task {row['id']} has invalid status {row['status']!r}")
# I4: claim_lock set only when running
bad_lock = conn.execute("""
SELECT id, status, claim_lock FROM tasks
WHERE (status != 'running' AND claim_lock IS NOT NULL)
""").fetchall()
for row in bad_lock:
failures.append(
f"I4: task {row['id']} status={row['status']} but claim_lock={row['claim_lock']!r}"
)
# I5: run started_at <= ended_at
bad_times = conn.execute("""
SELECT id, started_at, ended_at FROM task_runs
WHERE ended_at IS NOT NULL AND started_at > ended_at
""").fetchall()
for row in bad_times:
failures.append(
f"I5: run {row['id']} started_at={row['started_at']} > ended_at={row['ended_at']}"
)
# I6: outcome set → ended_at set
bad_outcome = conn.execute("""
SELECT id, outcome, ended_at FROM task_runs
WHERE outcome IS NOT NULL AND ended_at IS NULL
""").fetchall()
for row in bad_outcome:
failures.append(f"I6: run {row['id']} outcome={row['outcome']} but ended_at NULL")
# I7: events monotonic in id (always true for autoincrement)
# Skip — autoincrement guarantees it.
# I8: event.run_id references existing run
bad_ev_fk = conn.execute("""
SELECT e.id, e.run_id FROM task_events e
LEFT JOIN task_runs r ON r.id = e.run_id
WHERE e.run_id IS NOT NULL AND r.id IS NULL
""").fetchall()
for row in bad_ev_fk:
failures.append(f"I8: event {row['id']} references missing run {row['run_id']}")
# I9: if all parents done → child not in todo
# (Only applies to children with at least one parent)
orphaned_todo = conn.execute("""
SELECT c.id AS child_id,
COUNT(*) AS n_parents,
SUM(CASE WHEN p.status = 'done' THEN 1 ELSE 0 END) AS done_parents
FROM tasks c
JOIN task_links l ON l.child_id = c.id
JOIN tasks p ON p.id = l.parent_id
WHERE c.status = 'todo'
GROUP BY c.id
HAVING n_parents > 0 AND n_parents = done_parents
""").fetchall()
for row in orphaned_todo:
failures.append(
f"I9: task {row['child_id']} is todo but all {row['n_parents']} parents are done"
)
if failures:
print(f"\n!!! INVARIANT VIOLATION after {len(ops_log)} ops:")
for f in failures[:10]:
print(f" {f}")
if len(failures) > 10:
print(f" ... and {len(failures) - 10} more")
print("\nLast 10 ops:")
for op in ops_log[-10:]:
print(f" {op}")
return False
return True
def random_op(rng, conn, kb, task_pool):
op = rng.choice(OPS)
if op == "create":
tid = kb.create_task(
conn,
title=f"rand {rng.randint(0, 1000)}",
assignee=rng.choice(["w1", "w2", "w3", None]),
)
task_pool.append(tid)
return {"op": "create", "tid": tid}
if op == "create_child" and task_pool:
parent = rng.choice(task_pool)
tid = kb.create_task(
conn, title=f"child of {parent}",
assignee=rng.choice(["w1", "w2", "w3", None]),
parents=[parent],
)
task_pool.append(tid)
return {"op": "create_child", "tid": tid, "parent": parent}
if not task_pool:
return None
tid = rng.choice(task_pool)
task = kb.get_task(conn, tid)
if task is None:
task_pool.remove(tid)
return None
if op == "claim":
claimed = kb.claim_task(conn, tid, ttl_seconds=rng.choice([1, 3, 10]))
return {"op": "claim", "tid": tid, "ok": claimed is not None}
if op == "complete":
summary = rng.choice([None, f"done via op {rng.randint(0, 1000)}"])
ok = kb.complete_task(conn, tid, summary=summary)
return {"op": "complete", "tid": tid, "ok": ok}
if op == "block":
reason = rng.choice([None, "rand block"])
ok = kb.block_task(conn, tid, reason=reason)
return {"op": "block", "tid": tid, "ok": ok}
if op == "unblock":
ok = kb.unblock_task(conn, tid)
return {"op": "unblock", "tid": tid, "ok": ok}
if op == "archive":
ok = kb.archive_task(conn, tid)
if ok:
task_pool.remove(tid)
return {"op": "archive", "tid": tid, "ok": ok}
if op == "heartbeat":
ok = kb.heartbeat_worker(conn, tid)
return {"op": "heartbeat", "tid": tid, "ok": ok}
if op == "release_stale":
n = kb.release_stale_claims(conn)
return {"op": "release_stale", "n": n}
if op == "detect_crashed":
# Force-kill a fake PID first so there's something to detect
crashed = kb.detect_crashed_workers(conn)
return {"op": "detect_crashed", "n": len(crashed)}
if op == "recompute_ready":
n = kb.recompute_ready(conn)
return {"op": "recompute_ready", "promoted": n}
if op == "reassign":
# Reassignment isn't a direct API; simulate via assign_task
new_a = rng.choice(["w1", "w2", "w3", None])
try:
kb.assign_task(conn, tid, new_a)
return {"op": "reassign", "tid": tid, "to": new_a}
except Exception as e:
return {"op": "reassign", "tid": tid, "err": str(e)[:50]}
return None
def main():
total_ops = 0
total_violations = 0
for seq_idx in range(NUM_SEQUENCES):
seed = random.randint(0, 10**9)
rng = random.Random(seed)
home = tempfile.mkdtemp(prefix=f"hermes_fuzz_{seq_idx}_")
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
# Fresh module state per sequence to avoid cached init paths.
for m in list(sys.modules.keys()):
if m.startswith("hermes_cli"):
del sys.modules[m]
from hermes_cli import kanban_db as kb
kb.init_db()
conn = kb.connect()
task_pool = []
ops_log = []
try:
for i in range(OPS_PER_SEQUENCE):
result = random_op(rng, conn, kb, task_pool)
if result is None:
continue
ops_log.append(result)
total_ops += 1
if not assert_invariants(conn, kb, ops_log):
total_violations += 1
print(f" sequence {seq_idx} (seed={seed}) failed at op {i}")
break
finally:
conn.close()
if seq_idx % 10 == 0:
print(f" seq {seq_idx:3d}: {total_ops} ops so far, {total_violations} violations")
print()
print("=" * 60)
print(f"Total sequences: {NUM_SEQUENCES}")
print(f"Total operations: {total_ops}")
print(f"Invariant violations: {total_violations}")
if total_violations == 0:
print("\n✔ ALL INVARIANTS HELD ACROSS RANDOMIZED SEQUENCES")
else:
print("\n✗ INVARIANT VIOLATIONS FOUND")
sys.exit(1)
if __name__ == "__main__":
main()
-228
View File
@@ -1,228 +0,0 @@
"""E2E: dispatcher spawns real Python subprocess workers.
This validates the IPC + lifecycle story that mocks can't:
- spawn_fn returns a real PID
- the child process resolves hermes_cli.kanban_db on its own
- the child writes heartbeats via the CLI (real argparse, real init_db)
- the child completes via the CLI with --summary + --metadata
- the dispatcher observes all of this through the DB only
- worker logs are captured to HERMES_HOME/kanban/logs/<task>.log
- crash detection works against a real dead PID
"""
import json
import os
import subprocess
import sys
import tempfile
import time
WT = str(Path(__file__).resolve().parents[2])
FAKE_WORKER = str(Path(__file__).parent / "_fake_worker.py")
PY = sys.executable
def make_spawn_fn(home: str):
"""Return a spawn_fn the dispatcher can call. Launches the fake
worker as a detached subprocess."""
def _spawn(task, workspace):
log_path = os.path.join(home, f"worker_{task.id}.log")
env = {
**os.environ,
"HERMES_HOME": home,
"HOME": home,
"PYTHONPATH": WT,
"HERMES_KANBAN_TASK": task.id,
"HERMES_KANBAN_WORKSPACE": workspace,
"PATH": f"{os.path.dirname(PY)}:{os.environ.get('PATH','')}",
}
log_f = open(log_path, "ab")
proc = subprocess.Popen(
[PY, FAKE_WORKER],
stdin=subprocess.DEVNULL,
stdout=log_f,
stderr=subprocess.STDOUT,
env=env,
start_new_session=True,
)
return proc.pid
return _spawn
def main():
home = tempfile.mkdtemp(prefix="hermes_e2e_")
os.environ["HERMES_HOME"] = home
os.environ["HOME"] = home
sys.path.insert(0, WT)
from hermes_cli import kanban_db as kb
# Point the `hermes` CLI child processes will run at the worktree
# hermes_cli.main. We do this by putting a shim on PATH.
shim_dir = os.path.join(home, "bin")
os.makedirs(shim_dir, exist_ok=True)
shim_path = os.path.join(shim_dir, "hermes")
with open(shim_path, "w") as f:
f.write(f"""#!/bin/sh
exec {PY} -m hermes_cli.main "$@"
""")
os.chmod(shim_path, 0o755)
os.environ["PATH"] = f"{shim_dir}:{os.environ.get('PATH','')}"
kb.init_db()
conn = kb.connect()
# ============ SCENARIO A: happy path, 3 tasks ============
print("=" * 60)
print("A. Real-subprocess happy path (3 tasks)")
print("=" * 60)
tids = []
for i in range(3):
tid = kb.create_task(
conn, title=f"real-e2e-{i}", assignee="worker",
)
tids.append(tid)
spawn_fn = make_spawn_fn(home)
result = kb.dispatch_once(conn, spawn_fn=spawn_fn)
print(f" dispatched: {len(result.spawned)} spawned")
spawned_pids = []
# The dispatcher sets worker_pid on each claimed task via _set_worker_pid.
for tid in tids:
task = kb.get_task(conn, tid)
spawned_pids.append(task.worker_pid)
print(f" task {tid}: pid={task.worker_pid} status={task.status}")
# Wait for all workers to complete (up to 10s).
deadline = time.monotonic() + 10
while time.monotonic() < deadline:
statuses = [kb.get_task(conn, tid).status for tid in tids]
if all(s == "done" for s in statuses):
break
time.sleep(0.2)
print()
failures = []
for tid in tids:
task = kb.get_task(conn, tid)
runs = kb.list_runs(conn, tid)
print(f" task {tid}: status={task.status}, current_run_id={task.current_run_id}, "
f"runs={[(r.id, r.outcome) for r in runs]}")
if task.status != "done":
failures.append(f"task {tid} not done: status={task.status}")
if task.current_run_id is not None:
failures.append(f"task {tid} has dangling current_run_id={task.current_run_id}")
if len(runs) != 1:
failures.append(f"task {tid} has {len(runs)} runs, expected 1")
else:
r = runs[0]
if r.outcome != "completed":
failures.append(f"task {tid} run outcome={r.outcome}, expected completed")
if not r.summary or "real-subprocess worker finished" not in r.summary:
failures.append(f"task {tid} summary missing: {r.summary!r}")
if not r.metadata or r.metadata.get("iterations") != 3:
failures.append(f"task {tid} metadata missing iterations: {r.metadata}")
# Heartbeat events should be present
events = kb.list_events(conn, tid)
heartbeats = [e for e in events if e.kind == "heartbeat"]
if len(heartbeats) < 3: # start + 3 progress
failures.append(f"task {tid} heartbeats={len(heartbeats)} expected >=3")
if failures:
print("\nFAILURES:")
for f in failures:
print(f" {f}")
sys.exit(1)
print("\n ✔ Scenario A: all 3 real-subprocess workers completed cleanly")
# ============ SCENARIO B: crashed worker ============
print()
print("=" * 60)
print("B. Crashed worker (kill -9 mid-heartbeat)")
print("=" * 60)
crash_tid = kb.create_task(
conn, title="crash-e2e", assignee="worker",
)
# Spawn a worker that sleeps long enough for us to kill it.
# CRITICAL: spawn through a double-fork so when we kill the child it
# doesn't zombify under our pid (which would fool kill -0 liveness
# checks into thinking it's still alive). In production the
# dispatcher daemon is long-lived but its workers are reaped by init
# after exit; the test needs to match that orphaning behavior.
def spawn_sleeper(task, workspace):
r, w = os.pipe()
middleman = subprocess.Popen(
[
PY, "-c",
"import os,sys,subprocess;"
"p=subprocess.Popen(['sleep','30'],"
"stdin=subprocess.DEVNULL,"
"stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL,"
"start_new_session=True);"
"os.write(int(sys.argv[1]), str(p.pid).encode());"
"sys.exit(0)",
str(w),
],
pass_fds=(w,),
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
os.close(w)
middleman.wait() # middleman exits immediately, orphaning the sleep
grandchild_pid = int(os.read(r, 16))
os.close(r)
return grandchild_pid
result = kb.dispatch_once(conn, spawn_fn=spawn_sleeper)
task = kb.get_task(conn, crash_tid)
print(f" spawned sleeper pid={task.worker_pid} for {crash_tid}")
# Kill the sleeper forcibly
os.kill(task.worker_pid, 9)
# Give the OS a moment to reap
time.sleep(0.5)
# Simulate next dispatcher tick — should detect the crashed PID
crashed = kb.detect_crashed_workers(conn)
print(f" detect_crashed_workers returned {len(crashed)} crashed (expected 1)")
task = kb.get_task(conn, crash_tid)
runs = kb.list_runs(conn, crash_tid)
print(f" task status={task.status}, runs={[(r.id, r.outcome) for r in runs]}")
if len(crashed) < 1:
print(" ✗ crash NOT detected")
sys.exit(1)
if task.status != "ready":
print(f" ✗ task should be back to ready, got {task.status}")
sys.exit(1)
if runs[0].outcome != "crashed":
print(f" ✗ run outcome should be 'crashed', got {runs[0].outcome!r}")
sys.exit(1)
print("\n ✔ Scenario B: crash detected, task re-queued, run outcome=crashed")
# ============ SCENARIO C: worker log was captured ============
print()
print("=" * 60)
print("C. Worker log captured to disk")
print("=" * 60)
# Scenario A workers wrote to /tmp/hermes_e2e_*/worker_*.log
import glob
logs = glob.glob(os.path.join(home, "worker_*.log"))
print(f" {len(logs)} worker log files")
for lp in logs[:3]:
size = os.path.getsize(lp)
print(f" {os.path.basename(lp)}: {size} bytes")
# Our fake worker is quiet (no prints); size=0 is fine
conn.close()
print("\n✔ ALL E2E SCENARIOS PASS")
if __name__ == "__main__":
main()
+3 -30
View File
@@ -308,33 +308,6 @@ class TestMessageStorage:
assert "reasoning_content" in conv[0]
assert conv[0]["reasoning_content"] == ""
def test_codex_message_items_persisted_and_restored(self, db):
"""codex_message_items must round-trip through JSON serialization."""
db.create_session(session_id="s1", source="cli")
items = [
{
"type": "message",
"role": "assistant",
"status": "completed",
"id": "msg_123",
"phase": "commentary",
"content": [{"type": "output_text", "text": "Thinking..."}],
},
{
"type": "message",
"role": "assistant",
"status": "completed",
"id": "msg_456",
"phase": "final_answer",
"content": [{"type": "output_text", "text": "Done!"}],
},
]
db.append_message("s1", role="assistant", content="Done!", codex_message_items=items)
conv = db.get_messages_as_conversation("s1")
assert len(conv) == 1
assert conv[0].get("codex_message_items") == items
def test_reasoning_not_set_for_non_assistant(self, db):
"""reasoning is never leaked onto user or tool messages."""
db.create_session(session_id="s1", source="telegram")
@@ -1200,7 +1173,7 @@ class TestSchemaInit:
def test_schema_version(self, db):
cursor = db._conn.execute("SELECT version FROM schema_version")
version = cursor.fetchone()[0]
assert version == 9
assert version == 8
def test_title_column_exists(self, db):
"""Verify the title column was created in the sessions table."""
@@ -1256,12 +1229,12 @@ class TestSchemaInit:
conn.commit()
conn.close()
# Open with SessionDB — should migrate to v9
# Open with SessionDB — should migrate to v8
migrated_db = SessionDB(db_path=db_path)
# Verify migration
cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
assert cursor.fetchone()[0] == 9
assert cursor.fetchone()[0] == 8
# Verify title column exists and is NULL for existing sessions
session = migrated_db.get_session("existing")

Some files were not shown because too many files have changed in this diff Show More