Compare commits
130 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 854206e59e | |||
| dd83173621 | |||
| c65c1ddf21 | |||
| 1970bcf5a5 | |||
| 832ecde4b0 | |||
| be184aa5fa | |||
| 63b7b6d5bd | |||
| 123f8d0fed | |||
| a24c6e191f | |||
| 7206eed319 | |||
| 1619c0e503 | |||
| e27c819de3 | |||
| 1c78f6627a | |||
| 8ef2ae6502 | |||
| 0146cb2bd2 | |||
| da7d09c3b6 | |||
| af8d43dbbb | |||
| 27fc6c1086 | |||
| 45806629c5 | |||
| 4093201c47 | |||
| 9f610aa8f3 | |||
| e1c5e741ad | |||
| e3901d5b25 | |||
| 06f81752ed | |||
| 9ef1ae138a | |||
| c5196f1fc2 | |||
| 63bf7a29b6 | |||
| 15937a6b46 | |||
| 454d883e69 | |||
| 70f56e7605 | |||
| 7fa70b6c87 | |||
| 9a70260490 | |||
| ffd2621039 | |||
| 1e37ddc929 | |||
| 83c1c201f6 | |||
| 4bda9dcade | |||
| 67dcace412 | |||
| 35c57cc46b | |||
| e8441c4c0f | |||
| 2511207cb0 | |||
| 0f3a6f0fb3 | |||
| a562420383 | |||
| 855366909f | |||
| d09ab8ff13 | |||
| 438db0c7b0 | |||
| 2ccdadcca6 | |||
| 76042f5867 | |||
| 192e7eb21f | |||
| 59b56d445c | |||
| eb28145f36 | |||
| a55de5bcd0 | |||
| cec0af02ad | |||
| 91a7a0acbe | |||
| 7c50ed707c | |||
| 731e1ef8cb | |||
| ac57114284 | |||
| 24b4b24d79 | |||
| c15064fa37 | |||
| 7bfa9442de | |||
| d8e4c7214e | |||
| 6ef3a47ce5 | |||
| 3a7653dd1f | |||
| 125de02056 | |||
| 4c591c2819 | |||
| 01535a4732 | |||
| 0a15dbdc43 | |||
| ce0513dd2e | |||
| dc5e02ea7f | |||
| ff851ba7b9 | |||
| 14dd8e9a72 | |||
| 1d80e92c7e | |||
| edce7522a5 | |||
| 45e1228a8a | |||
| 83129e72de | |||
| 4d170134ef | |||
| 81e01f6ee9 | |||
| 7fd8dc0bfb | |||
| d056b610b7 | |||
| 2536a36f6f | |||
| 1b8ca9254f | |||
| db7c5735f0 | |||
| 8bbeaea6c7 | |||
| 1fdc31b214 | |||
| 5fac6c3440 | |||
| 2c56dce0ed | |||
| 01cf2c65cc | |||
| b2d3308f98 | |||
| 25ba6a4a74 | |||
| 4c797bfae9 | |||
| c58956a9a2 | |||
| 3944b22506 | |||
| 489bed6f96 | |||
| ad0ac89478 | |||
| dc4d92f131 | |||
| 47420a84b9 | |||
| f93d4624bf | |||
| 5ae608152e | |||
| 88b65cc82a | |||
| edc78e258c | |||
| 31d7f1951a | |||
| b1c18e5a41 | |||
| bd66e55a02 | |||
| 1735ced93b | |||
| bba16943f6 | |||
| 132620ba3d | |||
| 876bb60044 | |||
| a68793b6c4 | |||
| bcc5362432 | |||
| 283c8fd6e2 | |||
| 919274b60e | |||
| 6e83d90eb4 | |||
| c6fdf48b79 | |||
| a046483e86 | |||
| fdcbd2257b | |||
| 48bdd2445e | |||
| 5e52011de3 | |||
| e48a497d16 | |||
| 2dfcc8087a | |||
| 4db58d45d4 | |||
| 57b43fdd4b | |||
| e9c47c7042 | |||
| ee0728c6c4 | |||
| 9daa0620a6 | |||
| 648b89911f | |||
| 7c17accb29 | |||
| 5006b2204b | |||
| a9fa73a620 | |||
| 7c8c031f60 | |||
| ea01bdcebe | |||
| 0738b80833 |
@@ -390,7 +390,16 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
@@ -1680,9 +1689,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
|
||||
+29
-10
@@ -42,6 +42,7 @@ import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -52,6 +53,17 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
clean = urlunparse(parsed._replace(query=""))
|
||||
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
|
||||
return clean, params
|
||||
return url, None
|
||||
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
@@ -390,7 +402,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -1157,8 +1169,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
_extra = {"default_query": _dq} if _dq else {}
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -1172,12 +1186,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1825,12 +1839,15 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
@@ -1867,6 +1884,8 @@ def resolve_provider_client(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
@@ -1884,7 +1903,7 @@ def resolve_provider_client(
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
@@ -1893,7 +1912,7 @@ def resolve_provider_client(
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
@@ -2803,8 +2822,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2892,7 +2911,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
|
||||
@@ -44,22 +44,31 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
converted.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -67,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
converted.append({"type": text_type, "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -218,6 +227,23 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -233,9 +259,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -262,7 +289,57 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
if content_parts:
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -422,6 +499,47 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -429,13 +547,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
validated.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -446,7 +567,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
validated.append({"type": text_type, "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -703,6 +824,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -721,6 +843,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -730,6 +853,18 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -842,6 +977,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
|
||||
+39
-8
@@ -106,9 +106,11 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -143,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -162,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -1193,6 +1206,7 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
@@ -1213,6 +1227,23 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
@@ -1352,7 +1383,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", 128000)
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
|
||||
@@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -176,6 +176,64 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
@@ -23,9 +23,14 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg:
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -59,6 +59,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -120,6 +120,24 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
@@ -160,6 +178,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -126,6 +126,11 @@ class NormalizedResponse:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
+23
-8
@@ -824,7 +824,9 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -838,12 +840,15 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -859,17 +864,22 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -879,10 +889,15 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
@@ -22,6 +22,7 @@ import re
|
||||
import concurrent.futures
|
||||
import base64
|
||||
import atexit
|
||||
import errno
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
@@ -416,6 +417,11 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
"onboarding": {
|
||||
# First-touch hint flags (see agent/onboarding.py). Each hint is
|
||||
# shown once per install then latched here.
|
||||
"seen": {},
|
||||
},
|
||||
}
|
||||
|
||||
# Track whether the config file explicitly set terminal config.
|
||||
@@ -4318,7 +4324,7 @@ class HermesCLI:
|
||||
|
||||
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
|
||||
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G{_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G (Alt+G in VSCode/Cursor){_RST}")
|
||||
if _is_termux_environment():
|
||||
_cprint(f" {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n")
|
||||
else:
|
||||
@@ -4668,10 +4674,6 @@ class HermesCLI:
|
||||
def new_session(self, silent=False):
|
||||
"""Start a fresh session with a new session ID and cleared agent state."""
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Trigger memory extraction on the old session before session_id rotates.
|
||||
self.agent.commit_memory_session(self.conversation_history)
|
||||
self._notify_session_boundary("on_session_finalize")
|
||||
@@ -5156,27 +5158,29 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
try:
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cache_enabled = (
|
||||
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
|
||||
@@ -5277,24 +5281,22 @@ class HermesCLI:
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
|
||||
# Load providers for switch_model (picker path needs them below)
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# No args at all: open prompt_toolkit-native picker modal
|
||||
if not model_input and not explicit_provider:
|
||||
model_display = self.model or "unknown"
|
||||
provider_display = get_label(self.provider) if self.provider else "unknown"
|
||||
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=self.provider or "",
|
||||
@@ -5816,7 +5818,28 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
|
||||
def _handle_kanban_command(self, cmd: str):
|
||||
"""Handle the /kanban command — delegate to the shared kanban CLI.
|
||||
|
||||
The string form passed here is the user's full ``/kanban ...``
|
||||
including the leading slash; we strip it and hand the remainder
|
||||
to ``kanban.run_slash`` which returns a single formatted string.
|
||||
"""
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
rest = cmd.strip()
|
||||
if rest.startswith("/"):
|
||||
rest = rest.lstrip("/")
|
||||
if rest.startswith("kanban"):
|
||||
rest = rest[len("kanban"):].lstrip()
|
||||
try:
|
||||
output = run_slash(rest)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
output = f"(._.) kanban error: {exc}"
|
||||
if output:
|
||||
print(output)
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6053,6 +6076,8 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "kanban":
|
||||
self._handle_kanban_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6127,8 +6152,6 @@ class HermesCLI:
|
||||
self._handle_agents_command()
|
||||
elif canonical == "background":
|
||||
self._handle_background_command(cmd_original)
|
||||
elif canonical == "btw":
|
||||
self._handle_btw_command(cmd_original)
|
||||
elif canonical == "queue":
|
||||
# Extract prompt after "/queue " or "/q "
|
||||
parts = cmd_original.split(None, 1)
|
||||
@@ -6415,122 +6438,6 @@ class HermesCLI:
|
||||
self._background_tasks[task_id] = thread
|
||||
thread.start()
|
||||
|
||||
def _handle_btw_command(self, cmd: str):
|
||||
"""Handle /btw <question> — ephemeral side question using session context.
|
||||
|
||||
Snapshots the current conversation history, spawns a no-tools agent in
|
||||
a background thread, and prints the answer without persisting anything
|
||||
to the main session.
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
_cprint(" Usage: /btw <question>")
|
||||
_cprint(" Example: /btw what module owns session title sanitization?")
|
||||
_cprint(" Answers using session context. No tools, not persisted.")
|
||||
return
|
||||
|
||||
question = parts[1].strip()
|
||||
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
|
||||
if not self._ensure_runtime_credentials():
|
||||
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
|
||||
return
|
||||
|
||||
turn_route = self._resolve_turn_agent_config(question)
|
||||
history_snapshot = list(self.conversation_history)
|
||||
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
_cprint(f' 💬 /btw: "{preview}"')
|
||||
|
||||
def run_btw():
|
||||
try:
|
||||
btw_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_iterations=8,
|
||||
enabled_toolsets=[],
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
provider_sort=self._provider_sort,
|
||||
provider_require_parameters=self._provider_require_params,
|
||||
provider_data_collection=self._provider_data_collection,
|
||||
fallback_model=self._fallback_model,
|
||||
session_db=None,
|
||||
skip_memory=True,
|
||||
skip_context_files=True,
|
||||
persist_session=False,
|
||||
)
|
||||
|
||||
btw_prompt = (
|
||||
"[Ephemeral /btw side question. Answer using the conversation "
|
||||
"context. No tools available. Be direct and concise.]\n\n"
|
||||
+ question
|
||||
)
|
||||
result = btw_agent.run_conversation(
|
||||
user_message=btw_prompt,
|
||||
conversation_history=history_snapshot,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
response = (result.get("final_response") or "") if result else ""
|
||||
if not response and result and result.get("error"):
|
||||
response = f"Error: {result['error']}"
|
||||
|
||||
# TUI refresh before printing
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
|
||||
if response:
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_skin = get_active_skin()
|
||||
_resp_color = _skin.get_color("response_border", "#4F6D4A")
|
||||
except Exception:
|
||||
_resp_color = "#4F6D4A"
|
||||
|
||||
ChatConsole().print(Panel(
|
||||
_render_final_assistant_content(response, mode=self.final_response_markdown),
|
||||
title=f"[{_resp_color} bold]⚕ /btw[/]",
|
||||
title_align="left",
|
||||
border_style=_resp_color,
|
||||
box=rich_box.HORIZONTALS,
|
||||
padding=(1, 4),
|
||||
))
|
||||
else:
|
||||
_cprint(" 💬 /btw: (no response)")
|
||||
|
||||
if self.bell_on_complete:
|
||||
sys.stdout.write("\a")
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception as e:
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
_cprint(f" ❌ /btw failed: {e}")
|
||||
finally:
|
||||
if self._app:
|
||||
self._invalidate(min_interval=0)
|
||||
|
||||
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
|
||||
thread.start()
|
||||
|
||||
@staticmethod
|
||||
def _try_launch_chrome_debug(port: int, system: str) -> bool:
|
||||
"""Try to launch Chrome/Chromium with remote debugging enabled.
|
||||
@@ -7415,6 +7322,31 @@ class HermesCLI:
|
||||
_cprint(f" {line}")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the first tool in this process
|
||||
# that takes longer than the threshold while we're in the
|
||||
# noisiest progress mode, print a one-time hint about
|
||||
# /verbose. Latched on self so it fires at most once per
|
||||
# process; persisted to config.yaml so it never fires again
|
||||
# across processes either.
|
||||
try:
|
||||
if (
|
||||
not getattr(self, "_long_tool_hint_fired", False)
|
||||
and self.tool_progress_mode == "all"
|
||||
and duration >= 30.0
|
||||
):
|
||||
from agent.onboarding import (
|
||||
TOOL_PROGRESS_FLAG,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
|
||||
self._long_tool_hint_fired = True
|
||||
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
self._invalidate()
|
||||
return
|
||||
if event_type != "tool.started":
|
||||
@@ -9298,6 +9230,24 @@ class HermesCLI:
|
||||
f"agent_running={self._agent_running}\n")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the very first busy-while-running
|
||||
# event for this install, print a one-line tip explaining the
|
||||
# /busy knob. Flag persists to config.yaml and never fires
|
||||
# again. Guarded for exceptions so onboarding can't break
|
||||
# the input loop.
|
||||
try:
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
busy_input_hint_cli,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
|
||||
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self._pending_input.put(payload)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
@@ -9312,14 +9262,18 @@ class HermesCLI:
|
||||
"""Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
|
||||
event.current_buffer.insert_text('\n')
|
||||
|
||||
@kb.add(
|
||||
'c-g',
|
||||
filter=Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
),
|
||||
# VSCode/Cursor bind Ctrl+G to "Find Next" at the editor level, so
|
||||
# the keystroke never reaches the embedded terminal. Alt+G is unbound
|
||||
# in those IDEs and arrives here as ('escape', 'g') — register it as
|
||||
# a fallback so the editor handoff works inside Cursor/VSCode too.
|
||||
_editor_filter = Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
)
|
||||
|
||||
@kb.add('c-g', filter=_editor_filter)
|
||||
@kb.add('escape', 'g', filter=_editor_filter)
|
||||
def handle_open_in_editor(event):
|
||||
"""Ctrl+G opens the current draft in an external editor."""
|
||||
"""Ctrl+G (or Alt+G in VSCode/Cursor) opens the current draft in an external editor."""
|
||||
cli_ref._open_external_editor(event.current_buffer)
|
||||
|
||||
@kb.add('tab', eager=True)
|
||||
@@ -9783,6 +9737,11 @@ class HermesCLI:
|
||||
completer=_completer,
|
||||
),
|
||||
)
|
||||
# Keep prompt_toolkit on its simple tempfile path. Setting
|
||||
# buffer.tempfile = "prompt.md" triggers its complex-tempfile branch,
|
||||
# which tries to mkdir() the mkdtemp() directory again and raises
|
||||
# EEXIST. The suffix keeps markdown highlighting without that bug.
|
||||
input_area.buffer.tempfile_suffix = '.md'
|
||||
|
||||
# Dynamic height: accounts for both explicit newlines AND visual
|
||||
# wrapping of long lines so the input area always fits its content.
|
||||
@@ -10735,6 +10694,8 @@ class HermesCLI:
|
||||
return # silently suppress
|
||||
if isinstance(exc, KeyError) and "is not registered" in str(exc):
|
||||
return # suppress selector registration failures (#6393)
|
||||
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
|
||||
return # suppress I/O errors from broken stdout on interrupt (#13710)
|
||||
# Fall back to default handler for everything else
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
@@ -10767,9 +10728,11 @@ class HermesCLI:
|
||||
except (EOFError, KeyboardInterrupt, BrokenPipeError):
|
||||
pass
|
||||
except (KeyError, OSError) as _stdin_err:
|
||||
# Catch selector registration failures from broken stdin (#6393).
|
||||
# This is the fallback for cases that slip past the fstat() guard.
|
||||
if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
# Catch selector registration failures from broken stdin (#6393)
|
||||
# and I/O errors from broken stdout during interrupt (#13710).
|
||||
if isinstance(_stdin_err, OSError) and getattr(_stdin_err, "errno", None) == errno.EIO:
|
||||
pass # suppress broken-stdout I/O errors on interrupt (#13710)
|
||||
elif "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
print(
|
||||
f"\nError: stdin is not usable ({_stdin_err}).\n"
|
||||
"This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
|
||||
@@ -10788,12 +10751,6 @@ class HermesCLI:
|
||||
self.agent.interrupt()
|
||||
except Exception:
|
||||
pass
|
||||
# Flush memories before exit (only for substantial conversations)
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Shut down voice recorder (release persistent audio stream)
|
||||
if hasattr(self, '_voice_recorder') and self._voice_recorder:
|
||||
try:
|
||||
|
||||
@@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
|
||||
Binary file not shown.
+16
-3
@@ -21,6 +21,7 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -103,16 +104,28 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
module_name, handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
@@ -9,6 +9,7 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -586,6 +587,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -2441,6 +2445,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2480,8 +2485,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2540,6 +2548,44 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2554,6 +2600,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2589,6 +2637,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
@@ -1025,7 +1025,20 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -1047,6 +1060,21 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -2214,12 +2242,14 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
_tts_path = None
|
||||
if (event.message_type == MessageType.VOICE
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
and not media_files):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
|
||||
@@ -2315,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
|
||||
+668
-411
File diff suppressed because it is too large
Load Diff
+9
-7
@@ -439,11 +439,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -479,7 +479,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -531,7 +531,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -1232,6 +1232,7 @@ class SessionStore:
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1264,6 +1265,7 @@ class SessionStore:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
+10
-2
@@ -356,6 +356,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=(),
|
||||
base_url_env_var="BEDROCK_BASE_URL",
|
||||
),
|
||||
"azure-foundry": ProviderConfig(
|
||||
id="azure-foundry",
|
||||
name="Azure Foundry",
|
||||
auth_type="api_key",
|
||||
inference_base_url="", # User-provided endpoint
|
||||
api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -4236,10 +4244,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS, get_pricing_for_provider,
|
||||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
|
||||
print()
|
||||
unavailable_models: list = []
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
@@ -84,9 +84,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -103,7 +101,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
@@ -141,6 +140,11 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
|
||||
+108
-8
@@ -612,14 +612,6 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -967,6 +959,27 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -1003,6 +1016,13 @@ DEFAULT_CONFIG = {
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -1379,6 +1399,21 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"AZURE_FOUNDRY_API_KEY": {
|
||||
"description": "Azure Foundry API key for custom Azure endpoints",
|
||||
"prompt": "Azure Foundry API Key",
|
||||
"url": "https://ai.azure.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"AZURE_FOUNDRY_BASE_URL": {
|
||||
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
|
||||
"prompt": "Azure Foundry base URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
@@ -2214,6 +2249,71 @@ def get_compatible_custom_providers(
|
||||
return compatible
|
||||
|
||||
|
||||
def get_custom_provider_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Look up a per-model ``context_length`` override from ``custom_providers``.
|
||||
|
||||
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
|
||||
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
|
||||
if present and valid. Returns ``None`` when no override applies.
|
||||
|
||||
This is the single source of truth for custom-provider context overrides,
|
||||
used by:
|
||||
* ``AIAgent.__init__`` (startup resolution)
|
||||
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
|
||||
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
|
||||
* ``gateway.run._format_session_info`` (``/info`` display)
|
||||
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
|
||||
|
||||
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
|
||||
startup path only; every other path (notably ``/model`` switch) fell back
|
||||
to the 128K default. See #15779.
|
||||
"""
|
||||
if not model or not base_url:
|
||||
return None
|
||||
if custom_providers is None:
|
||||
try:
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
if config is None:
|
||||
return None
|
||||
raw = config.get("custom_providers")
|
||||
custom_providers = raw if isinstance(raw, list) else []
|
||||
if not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
target_url = (base_url or "").rstrip("/")
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_url = (entry.get("base_url") or "").rstrip("/")
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
models = entry.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
model_cfg = models.get(model)
|
||||
if not isinstance(model_cfg, dict):
|
||||
continue
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
if raw_ctx is None:
|
||||
continue
|
||||
try:
|
||||
ctx = int(raw_ctx)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
|
||||
@@ -320,7 +320,11 @@ def run_doctor(args):
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
if provider and _resolve_provider_full is not None and provider != "auto":
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
@@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
|
||||
"task_id": "test-task",
|
||||
"tool_call_id": "test-call",
|
||||
"result": '{"output": "hello"}',
|
||||
"duration_ms": 42,
|
||||
},
|
||||
"pre_llm_call": {
|
||||
"session_id": "test-session",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+544
-15
@@ -839,6 +839,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
|
||||
|
||||
|
||||
def _tui_build_needed(tui_dir: Path) -> bool:
|
||||
if _hermes_ink_bundle_stale(tui_dir):
|
||||
return True
|
||||
entry = tui_dir / "dist" / "entry.js"
|
||||
if not entry.exists():
|
||||
return True
|
||||
@@ -1026,7 +1028,12 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [node, str(root / "dist" / "entry.js")], root
|
||||
|
||||
|
||||
def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
||||
def _launch_tui(
|
||||
resume_session_id: Optional[str] = None,
|
||||
tui_dev: bool = False,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
):
|
||||
"""Replace current process with the TUI."""
|
||||
tui_dir = PROJECT_ROOT / "ui-tui"
|
||||
|
||||
@@ -1036,6 +1043,12 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
||||
)
|
||||
env.setdefault("HERMES_PYTHON", sys.executable)
|
||||
env.setdefault("HERMES_CWD", os.getcwd())
|
||||
if model:
|
||||
env["HERMES_MODEL"] = model
|
||||
env["HERMES_INFERENCE_MODEL"] = model
|
||||
if provider:
|
||||
env["HERMES_TUI_PROVIDER"] = provider
|
||||
env["HERMES_INFERENCE_PROVIDER"] = provider
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
@@ -1174,6 +1187,8 @@ def cmd_chat(args):
|
||||
_launch_tui(
|
||||
getattr(args, "resume", None),
|
||||
tui_dev=getattr(args, "tui_dev", False),
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
)
|
||||
|
||||
# Import and run the CLI
|
||||
@@ -1512,6 +1527,83 @@ def select_provider_and_model(args=None):
|
||||
all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
|
||||
|
||||
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
# Build a lookup of raw (un-expanded) api_key templates keyed by a
|
||||
# stable identity. We intentionally bypass
|
||||
# ``get_compatible_custom_providers(read_raw_config())`` here because
|
||||
# its ``_normalize_custom_provider_entry`` step calls ``urlparse()``
|
||||
# on ``base_url`` and drops any entry whose ``base_url`` is itself an
|
||||
# env-ref template (e.g. ``${NEURALWATT_API_BASE}``). Dropping those
|
||||
# entries is exactly how env-ref preservation fails for the user
|
||||
# config that motivated this fix.
|
||||
raw_api_key_refs: dict[tuple, str] = {}
|
||||
raw_cfg = read_raw_config()
|
||||
|
||||
def _record_raw(
|
||||
name: str,
|
||||
provider_key: str,
|
||||
model: str,
|
||||
api_key: str,
|
||||
) -> None:
|
||||
template = str(api_key or "").strip()
|
||||
if "${" not in template:
|
||||
return
|
||||
name = str(name or "").strip()
|
||||
provider_key = str(provider_key or "").strip()
|
||||
model = str(model or "").strip()
|
||||
# Index by every plausible identity the loaded (expanded) config
|
||||
# might present: (name), (name, model), (provider_key), and
|
||||
# (provider_key, model). Case-insensitive on name/provider_key so
|
||||
# the loaded entry matches regardless of display casing.
|
||||
if name:
|
||||
raw_api_key_refs.setdefault((name.lower(),), template)
|
||||
raw_api_key_refs.setdefault((name.lower(), model), template)
|
||||
if provider_key:
|
||||
raw_api_key_refs.setdefault((provider_key.lower(),), template)
|
||||
raw_api_key_refs.setdefault(
|
||||
(provider_key.lower(), model), template
|
||||
)
|
||||
|
||||
raw_list = raw_cfg.get("custom_providers")
|
||||
if isinstance(raw_list, list):
|
||||
for raw_entry in raw_list:
|
||||
if not isinstance(raw_entry, dict):
|
||||
continue
|
||||
_record_raw(
|
||||
raw_entry.get("name", ""),
|
||||
"",
|
||||
raw_entry.get("model", "")
|
||||
or raw_entry.get("default_model", ""),
|
||||
raw_entry.get("api_key", ""),
|
||||
)
|
||||
raw_providers = raw_cfg.get("providers")
|
||||
if isinstance(raw_providers, dict):
|
||||
for raw_key, raw_entry in raw_providers.items():
|
||||
if not isinstance(raw_entry, dict):
|
||||
continue
|
||||
_record_raw(
|
||||
raw_entry.get("name", "") or raw_key,
|
||||
raw_key,
|
||||
raw_entry.get("model", "")
|
||||
or raw_entry.get("default_model", ""),
|
||||
raw_entry.get("api_key", ""),
|
||||
)
|
||||
|
||||
def _lookup_ref(name: str, provider_key: str, model: str) -> str:
|
||||
name_lc = str(name or "").strip().lower()
|
||||
pkey_lc = str(provider_key or "").strip().lower()
|
||||
model = str(model or "").strip()
|
||||
for identity in (
|
||||
(pkey_lc, model),
|
||||
(pkey_lc,),
|
||||
(name_lc, model),
|
||||
(name_lc,),
|
||||
):
|
||||
if identity[0] and identity in raw_api_key_refs:
|
||||
return raw_api_key_refs[identity]
|
||||
return ""
|
||||
|
||||
custom_provider_map = {}
|
||||
for entry in get_compatible_custom_providers(cfg):
|
||||
if not isinstance(entry, dict):
|
||||
@@ -1535,6 +1627,9 @@ def select_provider_and_model(args=None):
|
||||
"model": entry.get("model", ""),
|
||||
"api_mode": entry.get("api_mode", ""),
|
||||
"provider_key": provider_key,
|
||||
"api_key_ref": _lookup_ref(
|
||||
name, provider_key, entry.get("model", "")
|
||||
),
|
||||
}
|
||||
return custom_provider_map
|
||||
|
||||
@@ -1624,6 +1719,8 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_stepfun(config, current_model)
|
||||
elif selected_provider == "bedrock":
|
||||
_model_flow_bedrock(config, current_model)
|
||||
elif selected_provider == "azure-foundry":
|
||||
_model_flow_azure_foundry(config, current_model)
|
||||
elif selected_provider in (
|
||||
"gemini",
|
||||
"deepseek",
|
||||
@@ -1707,7 +1804,6 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("session_search", "Session search", "past-conversation recall"),
|
||||
("approval", "Approval", "smart command approval"),
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("flush_memories", "Flush memories", "memory consolidation"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
]
|
||||
@@ -2219,13 +2315,13 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
# The live /models endpoint returns hundreds of models; the curated list
|
||||
# shows only agentic models users recognize from OpenRouter.
|
||||
from hermes_cli.models import (
|
||||
_PROVIDER_MODELS,
|
||||
get_curated_nous_model_ids,
|
||||
get_pricing_for_provider,
|
||||
check_nous_free_tier,
|
||||
partition_nous_models_by_tier,
|
||||
)
|
||||
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
if not model_ids:
|
||||
print("No curated models available for Nous Portal.")
|
||||
return
|
||||
@@ -2768,6 +2864,19 @@ def _auto_provider_name(base_url: str) -> str:
|
||||
return name
|
||||
|
||||
|
||||
def _custom_provider_api_key_config_value(provider_info, resolved_api_key=""):
|
||||
"""Return the value that should be persisted for a custom provider key."""
|
||||
api_key_ref = str(provider_info.get("api_key_ref", "") or "").strip()
|
||||
if api_key_ref:
|
||||
return api_key_ref
|
||||
|
||||
key_env = str(provider_info.get("key_env", "") or "").strip()
|
||||
if key_env and not str(provider_info.get("api_key", "") or "").strip():
|
||||
return f"${{{key_env}}}"
|
||||
|
||||
return str(resolved_api_key or "").strip()
|
||||
|
||||
|
||||
def _save_custom_provider(
|
||||
base_url, api_key="", model="", context_length=None, name=None
|
||||
):
|
||||
@@ -2823,6 +2932,203 @@ def _save_custom_provider(
|
||||
print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)')
|
||||
|
||||
|
||||
def _model_flow_azure_foundry(config, current_model=""):
|
||||
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
|
||||
|
||||
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
|
||||
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
|
||||
the transport and available models when possible:
|
||||
|
||||
* URLs ending in ``/anthropic`` → Anthropic Messages API.
|
||||
* Successful ``GET <base>/models`` probe → OpenAI-style + populates
|
||||
a picker with the returned deployment / model IDs.
|
||||
* Anthropic Messages probe fallback when ``/models`` fails.
|
||||
* Manual entry when every probe fails (private endpoints, etc.).
|
||||
|
||||
Context lengths for the chosen model are resolved via the standard
|
||||
:func:`agent.model_metadata.get_model_context_length` chain
|
||||
(models.dev, provider metadata, hardcoded family fallbacks).
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
from hermes_cli import azure_detect
|
||||
import getpass
|
||||
|
||||
# ── Load current Azure Foundry configuration ─────────────────────
|
||||
model_cfg = config.get("model", {})
|
||||
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
|
||||
current_base_url = str(model_cfg.get("base_url", "") or "")
|
||||
current_api_mode = str(model_cfg.get("api_mode", "") or "")
|
||||
else:
|
||||
current_base_url = ""
|
||||
current_api_mode = ""
|
||||
|
||||
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
|
||||
print()
|
||||
print("Azure Foundry Configuration")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print("Azure Foundry can host models with either OpenAI-style or")
|
||||
print("Anthropic-style API endpoints. Hermes will probe your")
|
||||
print("endpoint to auto-detect the transport and the deployed")
|
||||
print("models when possible.")
|
||||
print()
|
||||
|
||||
if current_base_url:
|
||||
print(f" Current endpoint: {current_base_url}")
|
||||
if current_api_mode:
|
||||
_lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f" Current API mode: {_lbl}")
|
||||
if current_api_key:
|
||||
print(f" Current API key: {current_api_key[:8]}...")
|
||||
print()
|
||||
|
||||
# ── Step 1: endpoint URL ─────────────────────────────────────────
|
||||
try:
|
||||
base_url = input(
|
||||
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_url = (base_url or current_base_url).rstrip("/")
|
||||
if not effective_url:
|
||||
print("No endpoint URL provided. Cancelled.")
|
||||
return
|
||||
if not effective_url.startswith(("http://", "https://")):
|
||||
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
|
||||
return
|
||||
|
||||
# ── Step 2: API key ──────────────────────────────────────────────
|
||||
print()
|
||||
try:
|
||||
api_key = getpass.getpass(
|
||||
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_key = api_key or current_api_key
|
||||
if not effective_key:
|
||||
print("No API key provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 3: auto-detect transport + models ───────────────────────
|
||||
print()
|
||||
print("◐ Probing endpoint to auto-detect transport and models...")
|
||||
detection = azure_detect.detect(effective_url, effective_key)
|
||||
|
||||
discovered_models: list[str] = list(detection.models)
|
||||
api_mode: str = detection.api_mode or ""
|
||||
|
||||
if api_mode:
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f"✓ Detected API transport: {mode_label}")
|
||||
if detection.reason:
|
||||
print(f" ({detection.reason})")
|
||||
if discovered_models:
|
||||
print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint")
|
||||
else:
|
||||
print(f"⚠ Auto-detection incomplete: {detection.reason}")
|
||||
print()
|
||||
print("Select the API format your Azure Foundry endpoint uses:")
|
||||
print(" 1. OpenAI-style (POST /v1/chat/completions)")
|
||||
print(" For: GPT models, Llama, Mistral, and most open models")
|
||||
print(" 2. Anthropic-style (POST /v1/messages)")
|
||||
print(" For: Claude models deployed via Anthropic API format")
|
||||
try:
|
||||
default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
|
||||
mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
|
||||
|
||||
# ── Step 4: model name ───────────────────────────────────────────
|
||||
print()
|
||||
effective_model = ""
|
||||
if discovered_models:
|
||||
print("Available models on this endpoint:")
|
||||
for i, mid in enumerate(discovered_models[:30], start=1):
|
||||
print(f" {i:>2}. {mid}")
|
||||
if len(discovered_models) > 30:
|
||||
print(f" ... and {len(discovered_models) - 30} more (type name manually if not shown)")
|
||||
print()
|
||||
try:
|
||||
pick = input(
|
||||
f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
if not pick:
|
||||
effective_model = current_model or discovered_models[0]
|
||||
elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
|
||||
effective_model = discovered_models[int(pick) - 1]
|
||||
else:
|
||||
effective_model = pick
|
||||
else:
|
||||
try:
|
||||
model_name = input(
|
||||
f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
effective_model = model_name or current_model
|
||||
|
||||
if not effective_model:
|
||||
print("No model name provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 5: context-length lookup ────────────────────────────────
|
||||
ctx_len = azure_detect.lookup_context_length(
|
||||
effective_model, effective_url, effective_key,
|
||||
)
|
||||
|
||||
# ── Step 6: persist ──────────────────────────────────────────────
|
||||
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
|
||||
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
if not isinstance(model, dict):
|
||||
model = {"default": model} if model else {}
|
||||
cfg["model"] = model
|
||||
|
||||
model["provider"] = "azure-foundry"
|
||||
model["base_url"] = effective_url
|
||||
model["api_mode"] = api_mode
|
||||
model["default"] = effective_model
|
||||
if ctx_len:
|
||||
model["context_length"] = ctx_len
|
||||
|
||||
save_config(cfg)
|
||||
deactivate_provider()
|
||||
config["model"] = dict(model)
|
||||
|
||||
# Clear any conflicting env vars so auxiliary clients don't poison
|
||||
# themselves with a stale OpenAI base URL / key.
|
||||
if get_env_value("OPENAI_BASE_URL"):
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
if get_env_value("OPENAI_API_KEY"):
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print()
|
||||
print("✓ Azure Foundry configured:")
|
||||
print(f" Endpoint: {effective_url}")
|
||||
print(f" API mode: {mode_label}")
|
||||
print(f" Model: {effective_model}")
|
||||
if ctx_len:
|
||||
print(f" Context length: {ctx_len:,} tokens")
|
||||
else:
|
||||
print(" Context length: not auto-detected (will fall back at runtime)")
|
||||
print()
|
||||
|
||||
|
||||
def _remove_custom_provider(config):
|
||||
"""Let the user remove a saved custom provider from config.yaml."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
@@ -2909,6 +3215,7 @@ def _model_flow_named_custom(config, provider_info):
|
||||
# Resolve key from env var if api_key not set directly
|
||||
if not api_key and key_env:
|
||||
api_key = os.environ.get(key_env, "")
|
||||
config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
|
||||
|
||||
print(f" Provider: {name}")
|
||||
print(f" URL: {base_url}")
|
||||
@@ -3005,8 +3312,8 @@ def _model_flow_named_custom(config, provider_info):
|
||||
else:
|
||||
model["provider"] = "custom"
|
||||
model["base_url"] = base_url
|
||||
if api_key:
|
||||
model["api_key"] = api_key
|
||||
if config_api_key:
|
||||
model["api_key"] = config_api_key
|
||||
# Apply api_mode from custom_providers entry, or clear stale value
|
||||
custom_api_mode = provider_info.get("api_mode", "")
|
||||
if custom_api_mode:
|
||||
@@ -3024,15 +3331,15 @@ def _model_flow_named_custom(config, provider_info):
|
||||
provider_entry = providers_cfg.get(provider_key)
|
||||
if isinstance(provider_entry, dict):
|
||||
provider_entry["default_model"] = model_name
|
||||
if api_key and not str(provider_entry.get("api_key", "") or "").strip():
|
||||
provider_entry["api_key"] = api_key
|
||||
if config_api_key and not str(provider_entry.get("api_key", "") or "").strip():
|
||||
provider_entry["api_key"] = config_api_key
|
||||
if key_env and not str(provider_entry.get("key_env", "") or "").strip():
|
||||
provider_entry["key_env"] = key_env
|
||||
cfg["providers"] = providers_cfg
|
||||
save_config(cfg)
|
||||
else:
|
||||
# Save model name to the custom_providers entry for next time
|
||||
_save_custom_provider(base_url, api_key, model_name)
|
||||
_save_custom_provider(base_url, config_api_key, model_name)
|
||||
|
||||
print(f"\n✅ Model set to: {model_name}")
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
@@ -4473,6 +4780,13 @@ def cmd_webhook(args):
|
||||
webhook_command(args)
|
||||
|
||||
|
||||
def cmd_kanban(args):
|
||||
"""Multi-profile collaboration board."""
|
||||
from hermes_cli.kanban import kanban_command
|
||||
|
||||
return kanban_command(args)
|
||||
|
||||
|
||||
def cmd_hooks(args):
|
||||
"""Shell-hook inspection and management."""
|
||||
from hermes_cli.hooks import hooks_command
|
||||
@@ -5570,6 +5884,54 @@ def _finalize_update_output(state):
|
||||
pass
|
||||
|
||||
|
||||
def _cmd_update_check():
|
||||
"""Implement ``hermes update --check``: fetch and report without installing."""
|
||||
git_dir = PROJECT_ROOT / ".git"
|
||||
if not git_dir.exists():
|
||||
print("✗ Not a git repository — cannot check for updates.")
|
||||
sys.exit(1)
|
||||
|
||||
git_cmd = ["git"]
|
||||
if sys.platform == "win32":
|
||||
git_cmd = ["git", "-c", "windows.appendAtomically=false"]
|
||||
|
||||
print("→ Fetching from origin...")
|
||||
fetch_result = subprocess.run(
|
||||
git_cmd + ["fetch", "origin"],
|
||||
cwd=PROJECT_ROOT,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if fetch_result.returncode != 0:
|
||||
stderr = fetch_result.stderr.strip()
|
||||
if "Could not resolve host" in stderr or "unable to access" in stderr:
|
||||
print("✗ Network error — cannot reach the remote repository.")
|
||||
elif "Authentication failed" in stderr or "could not read Username" in stderr:
|
||||
print("✗ Authentication failed — check your git credentials or SSH key.")
|
||||
else:
|
||||
print("✗ Failed to fetch from origin.")
|
||||
if stderr:
|
||||
print(f" {stderr.splitlines()[0]}")
|
||||
sys.exit(1)
|
||||
|
||||
rev_result = subprocess.run(
|
||||
git_cmd + ["rev-list", "HEAD..origin/main", "--count"],
|
||||
cwd=PROJECT_ROOT,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
behind = int(rev_result.stdout.strip())
|
||||
|
||||
if behind == 0:
|
||||
print("✓ Already up to date.")
|
||||
else:
|
||||
commits_word = "commit" if behind == 1 else "commits"
|
||||
print(f"⚕ Update available: {behind} {commits_word} behind origin/main.")
|
||||
from hermes_cli.config import recommended_update_command
|
||||
print(f" Run '{recommended_update_command()}' to install.")
|
||||
|
||||
|
||||
def cmd_update(args):
|
||||
"""Update Hermes Agent to the latest version.
|
||||
|
||||
@@ -5583,6 +5945,10 @@ def cmd_update(args):
|
||||
managed_error("update Hermes Agent")
|
||||
return
|
||||
|
||||
if getattr(args, "check", False):
|
||||
_cmd_update_check()
|
||||
return
|
||||
|
||||
gateway_mode = getattr(args, "gateway", False)
|
||||
|
||||
# Protect against mid-update terminal disconnects (SIGHUP) and tolerate
|
||||
@@ -6071,6 +6437,50 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
return False
|
||||
_time.sleep(0.5)
|
||||
|
||||
def _service_restart_sec(
|
||||
scope_cmd_: list, svc_name_: str, default: float = 0.0,
|
||||
) -> float:
|
||||
"""Read the unit's ``RestartUSec`` (RestartSec) in seconds.
|
||||
|
||||
After a graceful exit-75, systemd waits ``RestartSec`` before
|
||||
respawning the unit. Callers that poll for ``is-active``
|
||||
must use a timeout >= ``RestartSec`` + transition slack, or
|
||||
they'll give up *during* the cooldown window and wrongly
|
||||
conclude the unit didn't relaunch.
|
||||
"""
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd_ + [
|
||||
"show", svc_name_,
|
||||
"--property=RestartUSec", "--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return default
|
||||
raw = (_show.stdout or "").strip()
|
||||
# systemd emits values like "30s", "100ms", "1min 30s", or
|
||||
# "infinity". Parse conservatively; on any miss return default.
|
||||
if not raw or raw == "infinity":
|
||||
return default
|
||||
total = 0.0
|
||||
matched = False
|
||||
for part in raw.split():
|
||||
for _suf, _mult in (
|
||||
("ms", 0.001),
|
||||
("us", 0.000001),
|
||||
("min", 60.0),
|
||||
("s", 1.0),
|
||||
):
|
||||
if part.endswith(_suf):
|
||||
try:
|
||||
total += float(part[: -len(_suf)]) * _mult
|
||||
matched = True
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
return total if matched else default
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
@@ -6177,13 +6587,22 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. Poll is-active for
|
||||
# up to ~10s because the unit's Stopped ->
|
||||
# Started transition can take a few seconds
|
||||
# after the old PID exits, and a one-shot
|
||||
# check races that window.
|
||||
# via Restart=on-failure. The unit's
|
||||
# RestartSec (default 30s on ours) gates the
|
||||
# respawn — poll past that + slack so we
|
||||
# don't give up mid-cooldown and falsely
|
||||
# print "drained but didn't relaunch". For
|
||||
# units without RestartSec set we fall back
|
||||
# to the original 10s budget.
|
||||
_restart_sec = _service_restart_sec(
|
||||
scope_cmd, svc_name, default=0.0,
|
||||
)
|
||||
_post_drain_timeout = max(
|
||||
10.0, _restart_sec + 10.0,
|
||||
)
|
||||
if _wait_for_service_active(
|
||||
scope_cmd, svc_name, timeout=10.0,
|
||||
scope_cmd, svc_name,
|
||||
timeout=_post_drain_timeout,
|
||||
):
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
@@ -6811,6 +7230,9 @@ Examples:
|
||||
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
|
||||
hermes auth reset <provider> Clear exhaustion status for a provider
|
||||
hermes model Select default model
|
||||
hermes fallback [list] Show fallback provider chain
|
||||
hermes fallback add Add a fallback provider (same picker as `hermes model`)
|
||||
hermes fallback remove Remove a fallback provider from the chain
|
||||
hermes config View configuration
|
||||
hermes config edit Edit config in $EDITOR
|
||||
hermes config set model gpt-4 Set a config value
|
||||
@@ -6836,6 +7258,40 @@ For more help on a command:
|
||||
parser.add_argument(
|
||||
"--version", "-V", action="store_true", help="Show version and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--oneshot",
|
||||
metavar="PROMPT",
|
||||
default=None,
|
||||
help=(
|
||||
"One-shot mode: send a single prompt and print ONLY the final "
|
||||
"response text to stdout. No banner, no spinner, no tool "
|
||||
"previews, no session_id line. Tools, memory, rules, and "
|
||||
"AGENTS.md in the CWD are loaded as normal; approvals are "
|
||||
"auto-bypassed. Intended for scripts / pipes."
|
||||
),
|
||||
)
|
||||
# --model / --provider are accepted at the top level so they can pair
|
||||
# with -z without needing the `chat` subcommand. If neither -z nor a
|
||||
# subcommand consumes them, they fall through harmlessly as None.
|
||||
# Mirrors `hermes chat --model ... --provider ...` semantics.
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
default=None,
|
||||
help=(
|
||||
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
@@ -7118,6 +7574,42 @@ For more help on a command:
|
||||
)
|
||||
model_parser.set_defaults(func=cmd_model)
|
||||
|
||||
# =========================================================================
|
||||
# fallback command — manage the fallback provider chain
|
||||
# =========================================================================
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
|
||||
fallback_parser = subparsers.add_parser(
|
||||
"fallback",
|
||||
help="Manage fallback providers (tried when the primary model fails)",
|
||||
description=(
|
||||
"Manage the fallback provider chain. Fallback providers are tried "
|
||||
"in order when the primary model fails with rate-limit, overload, or "
|
||||
"connection errors. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
|
||||
),
|
||||
)
|
||||
fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
|
||||
fallback_subparsers.add_parser(
|
||||
"list",
|
||||
aliases=["ls"],
|
||||
help="Show the current fallback chain (default when no subcommand)",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"add",
|
||||
help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"remove",
|
||||
aliases=["rm"],
|
||||
help="Pick an entry to delete from the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"clear",
|
||||
help="Remove all fallback entries",
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# gateway command
|
||||
# =========================================================================
|
||||
@@ -7288,6 +7780,19 @@ For more help on a command:
|
||||
setup_parser.add_argument(
|
||||
"--reset", action="store_true", help="Reset configuration to defaults"
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"--reconfigure",
|
||||
action="store_true",
|
||||
help="(Default on existing installs.) Re-run the full wizard, "
|
||||
"showing current values as defaults. Kept for backwards "
|
||||
"compatibility — a bare 'hermes setup' now does this.",
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"--quick",
|
||||
action="store_true",
|
||||
help="On existing installs: only prompt for items that are missing "
|
||||
"or unset, instead of running the full reconfigure wizard.",
|
||||
)
|
||||
setup_parser.set_defaults(func=cmd_setup)
|
||||
|
||||
# =========================================================================
|
||||
@@ -7618,6 +8123,13 @@ For more help on a command:
|
||||
|
||||
webhook_parser.set_defaults(func=cmd_webhook)
|
||||
|
||||
# =========================================================================
|
||||
# kanban command — multi-profile collaboration board
|
||||
# =========================================================================
|
||||
from hermes_cli.kanban import build_parser as _build_kanban_parser
|
||||
kanban_parser = _build_kanban_parser(subparsers)
|
||||
kanban_parser.set_defaults(func=cmd_kanban)
|
||||
|
||||
# =========================================================================
|
||||
# hooks command — shell-hook inspection and management
|
||||
# =========================================================================
|
||||
@@ -8770,6 +9282,12 @@ Examples:
|
||||
default=False,
|
||||
help="Gateway mode: use file-based IPC for prompts instead of stdin (used internally by /update)",
|
||||
)
|
||||
update_parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Check whether an update is available without installing anything",
|
||||
)
|
||||
update_parser.set_defaults(func=cmd_update)
|
||||
|
||||
# =========================================================================
|
||||
@@ -9116,6 +9634,17 @@ Examples:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
if getattr(args, "oneshot", None):
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
))
|
||||
|
||||
# Handle top-level --resume / --continue as shortcut to chat
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
@@ -0,0 +1,329 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
+39
-12
@@ -533,6 +533,7 @@ def resolve_display_context_length(
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
@@ -543,6 +544,11 @@ def resolve_display_context_length(
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
When ``custom_providers`` is provided, per-model ``context_length``
|
||||
overrides from ``custom_providers[].models.<id>.context_length`` are
|
||||
honored — this closes #15779 where ``/model`` switch ignored user-set
|
||||
overrides.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
@@ -553,6 +559,7 @@ def resolve_display_context_length(
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
@@ -831,9 +838,14 @@ def switch_model(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
# If resolution fell through to "custom" (e.g. named custom provider like
|
||||
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
|
||||
# credentials. Otherwise use the resolved values (picks up credential rotation,
|
||||
# base_url adjustments for OpenCode, etc.).
|
||||
if runtime.get("provider") != "custom":
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -867,16 +879,31 @@ def switch_model(
|
||||
"message": f"Could not validate `{new_model}`: {e}",
|
||||
}
|
||||
|
||||
# Override rejection if model is in the user's saved provider config.
|
||||
# API /v1/models may not list cloud/aliased models even though the server supports them.
|
||||
if not validation.get("accepted"):
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
|
||||
else:
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
|
||||
# Apply auto-correction if validation found a closer match
|
||||
if validation.get("corrected_model"):
|
||||
|
||||
+140
-58
@@ -383,6 +383,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
||||
"us.meta.llama4-scout-17b-instruct-v1:0",
|
||||
],
|
||||
# Azure Foundry: user-provided endpoint and model.
|
||||
# Empty list because models depend on the endpoint configuration.
|
||||
"azure-foundry": [],
|
||||
}
|
||||
|
||||
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
|
||||
@@ -740,6 +743,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
]
|
||||
|
||||
# Derived dicts — used throughout the codebase
|
||||
@@ -872,7 +876,16 @@ def fetch_openrouter_models(
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
|
||||
# snapshot when the manifest is unreachable. Both are curated lists that
|
||||
# drive the picker; the OpenRouter live /v1/models filter (tool support,
|
||||
# free pricing) is applied on top either way.
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_openrouter_models
|
||||
remote = get_curated_openrouter_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
@@ -925,6 +938,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def get_curated_nous_model_ids() -> list[str]:
|
||||
"""Return the curated Nous Portal model-id list.
|
||||
|
||||
Prefers the remotely-hosted catalog manifest (published under
|
||||
``website/static/api/model-catalog.json``); falls back to the in-repo
|
||||
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
|
||||
unreachable. Always returns a list (never None).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_nous_models
|
||||
remote = get_curated_nous_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
if remote:
|
||||
return list(remote)
|
||||
return list(_PROVIDER_MODELS.get("nous", []))
|
||||
|
||||
|
||||
def _ai_gateway_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True if an AI Gateway model has $0 input AND output pricing."""
|
||||
if not isinstance(pricing, dict):
|
||||
@@ -1379,27 +1410,93 @@ def curated_models_for_provider(
|
||||
return [(m, "") for m in models]
|
||||
|
||||
|
||||
def detect_provider_for_model(
|
||||
def _provider_keys(provider: str) -> set[str]:
|
||||
key = (provider or "").strip().lower()
|
||||
normalized = normalize_provider(provider)
|
||||
return {k for k in (key, normalized) if k}
|
||||
|
||||
|
||||
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
|
||||
return any(
|
||||
name_lower == model.lower()
|
||||
for provider in providers
|
||||
for model in _PROVIDER_MODELS.get(provider, [])
|
||||
)
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset(
|
||||
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_static_model_alias(
|
||||
name_lower: str,
|
||||
current_keys: set[str],
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
|
||||
try:
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES.get(name_lower)
|
||||
if identity is None:
|
||||
return None
|
||||
|
||||
vendor = identity.vendor
|
||||
family = identity.family
|
||||
|
||||
def _match(provider: str) -> Optional[str]:
|
||||
models = _PROVIDER_MODELS.get(provider, [])
|
||||
if not models:
|
||||
return None
|
||||
prefix = (
|
||||
f"{vendor}/{family}"
|
||||
if provider in _AGGREGATOR_PROVIDERS
|
||||
else family
|
||||
).lower()
|
||||
for model in models:
|
||||
if model.lower().startswith(prefix):
|
||||
return model
|
||||
return None
|
||||
|
||||
for provider in current_keys:
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _PROVIDER_MODELS:
|
||||
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _AGGREGATOR_PROVIDERS:
|
||||
if provider in current_keys and (matched := _match(provider)):
|
||||
return provider, matched
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_static_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
"""Auto-detect a provider from static catalogs only.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``(provider_id, model_name)``. The model name may be remapped
|
||||
when a static alias or bare provider name resolves to a catalog default.
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider with credentials (highest)
|
||||
2. Direct provider without credentials → remap to OpenRouter slug
|
||||
3. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
name_lower = name.lower()
|
||||
current_keys = _provider_keys(current_provider)
|
||||
|
||||
alias_match = _resolve_static_model_alias(name_lower, current_keys)
|
||||
if alias_match:
|
||||
return alias_match
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
@@ -1412,64 +1509,49 @@ def detect_provider_for_model(
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider != normalize_provider(current_provider)
|
||||
and resolved_provider not in current_keys
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if any(name_lower == m.lower() for m in current_models):
|
||||
if _model_in_provider_catalog(name_lower, current_keys):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
direct_match: Optional[str] = None
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid == current_provider or pid in _AGGREGATORS:
|
||||
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
direct_match = pid
|
||||
break
|
||||
return (pid, name)
|
||||
|
||||
if direct_match:
|
||||
# Check if we have credentials for this provider — env vars,
|
||||
# credential pool, or auth store entries.
|
||||
has_creds = False
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
||||
if pconfig:
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if os.getenv(env_var, "").strip():
|
||||
has_creds = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# Also check credential pool and auth store — covers OAuth,
|
||||
# Claude Code tokens, and other non-env-var credentials (#10300).
|
||||
if not has_creds:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(direct_match)
|
||||
if pool.has_credentials():
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# Always return the direct provider match. If credentials are
|
||||
# missing, the client init will give a clear error rather than
|
||||
# silently routing through the wrong provider (#10300).
|
||||
return (direct_match, name)
|
||||
|
||||
def detect_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider static catalog match
|
||||
2. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
static_match = detect_static_provider_for_model(name, current_provider)
|
||||
if static_match:
|
||||
return static_match
|
||||
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
|
||||
return None
|
||||
|
||||
# --- Step 2: check OpenRouter catalog ---
|
||||
# First try exact match (handles provider/model format)
|
||||
@@ -2571,8 +2653,8 @@ def validate_requested_model(
|
||||
)
|
||||
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -167,6 +167,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
|
||||
# The transport is determined at runtime from config.yaml model.api_mode.
|
||||
"azure-foundry": HermesOverlay(
|
||||
transport="openai_chat", # default; overridden by api_mode in config
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -221,6 +221,19 @@ def _resolve_runtime_from_pool_entry(
|
||||
elif provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
|
||||
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
|
||||
elif provider == "azure-foundry":
|
||||
# Azure Foundry: read api_mode and base_url from config
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# For Anthropic-style endpoints, strip /v1 suffix
|
||||
if api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
else:
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Honour model.base_url from config.yaml when the configured provider
|
||||
@@ -589,6 +602,71 @@ def _resolve_openrouter_runtime(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_azure_foundry_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
model_cfg: Dict[str, Any],
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve an Azure Foundry runtime entry.
|
||||
|
||||
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
|
||||
explicit overrides), pulls the API key from ``.env`` / env var, and
|
||||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
|
||||
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
except Exception:
|
||||
api_key = ""
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_explicit_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
@@ -678,6 +756,15 @@ def _resolve_explicit_runtime(
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
if provider == "azure-foundry":
|
||||
return _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
env_url = ""
|
||||
@@ -746,6 +833,40 @@ def resolve_runtime_provider(
|
||||
"""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
|
||||
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
|
||||
# return provider="custom" with chat_completions api_mode and no valid key).
|
||||
# Instead, use the Azure key directly with anthropic_messages api_mode.
|
||||
_eff_base = (explicit_base_url or "").strip()
|
||||
if requested_provider == "anthropic" and "azure.com" in _eff_base:
|
||||
_azure_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": _eff_base.rstrip("/"),
|
||||
"api_key": _azure_key,
|
||||
"source": "azure-explicit",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
|
||||
# Resolve before the custom-runtime / pool / generic paths so Azure
|
||||
# config is always picked up from model.base_url + model.api_mode,
|
||||
# regardless of whether the caller passed explicit_* args.
|
||||
if requested_provider == "azure-foundry":
|
||||
azure_runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=_get_model_config(),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
return azure_runtime
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
@@ -924,13 +1045,6 @@ def resolve_runtime_provider(
|
||||
|
||||
# Anthropic (native Messages API)
|
||||
if provider == "anthropic":
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url, but only
|
||||
# when the configured provider is anthropic — otherwise a non-Anthropic
|
||||
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
|
||||
@@ -939,6 +1053,33 @@ def resolve_runtime_provider(
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
|
||||
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
|
||||
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
|
||||
# would find the Claude Code OAuth token first (priority 3) and return
|
||||
# that instead, causing 401s. Detect Azure endpoints and use the env
|
||||
# key directly to bypass the OAuth priority chain.
|
||||
_is_azure_endpoint = "azure.com" in base_url.lower() or (
|
||||
cfg_base_url and "azure.com" in cfg_base_url.lower()
|
||||
)
|
||||
if _is_azure_endpoint:
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
|
||||
)
|
||||
else:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
|
||||
+27
-49
@@ -2863,17 +2863,6 @@ SETUP_SECTIONS = [
|
||||
("agent", "Agent Settings", setup_agent_settings),
|
||||
]
|
||||
|
||||
# The returning-user menu intentionally omits standalone TTS because model setup
|
||||
# already includes TTS selection and tools setup covers the rest of the provider
|
||||
# configuration. Keep this list in the same order as the visible menu entries.
|
||||
RETURNING_USER_MENU_SECTION_KEYS = [
|
||||
"model",
|
||||
"terminal",
|
||||
"gateway",
|
||||
"tools",
|
||||
"agent",
|
||||
]
|
||||
|
||||
|
||||
def run_setup_wizard(args):
|
||||
"""Run the interactive setup wizard.
|
||||
@@ -2898,6 +2887,9 @@ def run_setup_wizard(args):
|
||||
save_config(copy.deepcopy(DEFAULT_CONFIG))
|
||||
print_success("Configuration reset to defaults.")
|
||||
|
||||
reconfigure_requested = bool(getattr(args, "reconfigure", False))
|
||||
quick_requested = bool(getattr(args, "quick", False))
|
||||
|
||||
config = load_config()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
@@ -2989,50 +2981,36 @@ def run_setup_wizard(args):
|
||||
migration_ran = False
|
||||
|
||||
if is_existing:
|
||||
# ── Returning User Menu ──
|
||||
print()
|
||||
print_header("Welcome Back!")
|
||||
print_success("You already have Hermes configured.")
|
||||
print()
|
||||
|
||||
menu_choices = [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"Exit",
|
||||
]
|
||||
choice = prompt_choice("What would you like to do?", menu_choices, 0)
|
||||
|
||||
if choice == 0:
|
||||
# Quick setup
|
||||
# Existing install — default is the full-wizard reconfigure flow.
|
||||
# Every prompt shows the current value as its default, so pressing
|
||||
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
|
||||
# missing items" flow (useful after a partial OpenClaw migration
|
||||
# or when a required API key got cleared).
|
||||
if quick_requested:
|
||||
_run_quick_setup(config, hermes_home)
|
||||
return
|
||||
elif choice == 1:
|
||||
# Full setup — fall through to run all sections
|
||||
pass
|
||||
elif choice == 7:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif 2 <= choice <= 6:
|
||||
# Individual section — map by key, not by position.
|
||||
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
|
||||
# so positional indexing (choice - 2) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
|
||||
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
|
||||
if section:
|
||||
_, label, func = section
|
||||
func(config)
|
||||
save_config(config)
|
||||
_print_setup_summary(config, hermes_home)
|
||||
return
|
||||
|
||||
print()
|
||||
print_header("Reconfigure")
|
||||
print_success("You already have Hermes configured.")
|
||||
print_info("Running the full wizard — each prompt shows your current value.")
|
||||
print_info("Press Enter to keep it, or type a new value to change it.")
|
||||
print_info("")
|
||||
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
|
||||
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
|
||||
# Fall through to the "Full Setup — run all sections" block below.
|
||||
# --reconfigure is now the default on existing installs; the flag
|
||||
# is preserved for backwards compatibility but is a no-op here.
|
||||
else:
|
||||
# ── First-Time Setup ──
|
||||
print()
|
||||
|
||||
# --reconfigure / --quick on a fresh install are meaningless — fall
|
||||
# through to the normal first-time flow.
|
||||
if reconfigure_requested or quick_requested:
|
||||
print_info("No existing configuration found — running first-time setup.")
|
||||
print()
|
||||
|
||||
# Offer OpenClaw migration before configuration begins
|
||||
migration_ran = _offer_openclaw_migration(hermes_home)
|
||||
if migration_ran:
|
||||
|
||||
+1
-2
@@ -10,8 +10,7 @@ import random
|
||||
|
||||
TIPS = [
|
||||
# --- Slash Commands ---
|
||||
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
|
||||
"/background <prompt> runs a task in a separate session while your current one stays free.",
|
||||
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
|
||||
"/branch forks the current session so you can explore a different direction without losing progress.",
|
||||
"/compress manually compresses conversation context when things get long.",
|
||||
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
|
||||
|
||||
@@ -3103,13 +3103,23 @@ def _mount_plugin_api_routes():
|
||||
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
|
||||
continue
|
||||
try:
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
|
||||
)
|
||||
module_name = f"hermes_dashboard_plugin_{plugin['name']}"
|
||||
spec = importlib.util.spec_from_file_location(module_name, api_path)
|
||||
if spec is None or spec.loader is None:
|
||||
continue
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod)
|
||||
# Register in sys.modules BEFORE exec_module so pydantic/FastAPI
|
||||
# can resolve forward references (e.g. models defined in a file
|
||||
# that uses `from __future__ import annotations`). Without this,
|
||||
# TypeAdapter lazy-build fails at first request with
|
||||
# "is not fully defined" because the module namespace isn't
|
||||
# reachable by name for string-annotation resolution.
|
||||
sys.modules[module_name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
router = getattr(mod, "router", None)
|
||||
if router is None:
|
||||
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
|
||||
|
||||
+29
-5
@@ -31,7 +31,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 8
|
||||
SCHEMA_VERSION = 9
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
@@ -83,7 +83,8 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning TEXT,
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -356,6 +357,15 @@ class SessionDB:
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 8")
|
||||
if current_version < 9:
|
||||
# v9: preserve replayable Codex assistant message ids/phases so
|
||||
# follow-up turns can rebuild Responses API message items instead
|
||||
# of flattening everything to plain assistant text.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 9")
|
||||
|
||||
# Unique title index — always ensure it exists (safe to run after migrations
|
||||
# since the title column is guaranteed to exist at this point)
|
||||
@@ -956,6 +966,7 @@ class SessionDB:
|
||||
reasoning_content: str = None,
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
@@ -972,6 +983,10 @@ class SessionDB:
|
||||
json.dumps(codex_reasoning_items)
|
||||
if codex_reasoning_items else None
|
||||
)
|
||||
codex_message_items_json = (
|
||||
json.dumps(codex_message_items)
|
||||
if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
|
||||
# Pre-compute tool call count
|
||||
@@ -983,8 +998,9 @@ class SessionDB:
|
||||
cursor = conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -999,6 +1015,7 @@ class SessionDB:
|
||||
reasoning_content,
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1112,7 +1129,8 @@ class SessionDB:
|
||||
with self._lock:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
|
||||
"codex_message_items "
|
||||
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
|
||||
(session_id,),
|
||||
)
|
||||
@@ -1150,6 +1168,12 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
|
||||
msg["codex_reasoning_items"] = None
|
||||
if row["codex_message_items"]:
|
||||
try:
|
||||
msg["codex_message_items"] = json.loads(row["codex_message_items"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_message_items, falling back to None")
|
||||
msg["codex_message_items"] = None
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ import json
|
||||
import asyncio
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
@@ -567,6 +568,14 @@ def handle_function_call(
|
||||
except Exception:
|
||||
pass # file_tools may not be loaded yet
|
||||
|
||||
# Measure tool dispatch latency so post_tool_call and
|
||||
# transform_tool_result hooks can observe per-tool duration.
|
||||
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
|
||||
# PostToolUse hook inputs so plugin authors can build latency
|
||||
# dashboards, budget alerts, and regression canaries without having
|
||||
# to wrap every tool manually. We use monotonic() so the value is
|
||||
# unaffected by wall-clock adjustments during the call.
|
||||
_dispatch_start = time.monotonic()
|
||||
if function_name == "execute_code":
|
||||
# Prefer the caller-provided list so subagents can't overwrite
|
||||
# the parent's tool set via the process-global.
|
||||
@@ -582,6 +591,7 @@ def handle_function_call(
|
||||
task_id=task_id,
|
||||
user_task=user_task,
|
||||
)
|
||||
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
|
||||
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook
|
||||
@@ -593,6 +603,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -613,6 +624,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
for hook_result in hook_results:
|
||||
if isinstance(hook_result, str):
|
||||
|
||||
+1591
File diff suppressed because it is too large
Load Diff
+752
@@ -0,0 +1,752 @@
|
||||
/*
|
||||
* Hermes Kanban — dashboard plugin styles.
|
||||
*
|
||||
* All colors reference theme CSS vars so the board reskins with the
|
||||
* active dashboard theme. No hardcoded palette.
|
||||
*/
|
||||
|
||||
.hermes-kanban {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* ---- Columns layout -------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.hermes-kanban-column {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: color-mix(in srgb, var(--color-card) 85%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius);
|
||||
padding: 0.5rem;
|
||||
min-height: 200px;
|
||||
max-height: calc(100vh - 220px);
|
||||
transition: border-color 120ms ease, background-color 120ms ease;
|
||||
}
|
||||
|
||||
.hermes-kanban-column--drop {
|
||||
border-color: var(--color-ring);
|
||||
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-column-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.25rem 0.25rem 0.35rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-label {
|
||||
flex: 1;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-count {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-add {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 1px solid var(--color-border);
|
||||
color: var(--color-foreground);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-column-add:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-sub {
|
||||
padding: 0 0.25rem 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
overflow-y: auto;
|
||||
padding-right: 0.1rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-empty {
|
||||
padding: 1.5rem 0.5rem;
|
||||
text-align: center;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Status dots ----------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-dot {
|
||||
display: inline-block;
|
||||
width: 0.5rem;
|
||||
height: 0.5rem;
|
||||
border-radius: 999px;
|
||||
background: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
|
||||
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
|
||||
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
|
||||
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
|
||||
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
|
||||
.hermes-kanban-dot-archived { background: var(--color-border); }
|
||||
|
||||
/* ---- Progress pill (N/M child tasks done) --------------------------- */
|
||||
|
||||
.hermes-kanban-progress {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.62rem;
|
||||
padding: 0.05rem 0.35rem;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hermes-kanban-progress--full {
|
||||
background: color-mix(in srgb, #3fb97d 22%, transparent);
|
||||
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
|
||||
|
||||
.hermes-kanban-lane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.25rem 0 0.35rem;
|
||||
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
.hermes-kanban-lane:first-child {
|
||||
border-top: 0;
|
||||
padding-top: 0;
|
||||
}
|
||||
.hermes-kanban-lane-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
padding: 0 0.1rem;
|
||||
}
|
||||
.hermes-kanban-lane-name {
|
||||
font-weight: 600;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-lane-count {
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ---- Card ------------------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card {
|
||||
cursor: grab;
|
||||
transition: transform 100ms ease, box-shadow 100ms ease;
|
||||
}
|
||||
.hermes-kanban-card:hover {
|
||||
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
|
||||
}
|
||||
.hermes-kanban-card:active {
|
||||
cursor: grabbing;
|
||||
transform: scale(0.995);
|
||||
}
|
||||
|
||||
.hermes-kanban-card-content {
|
||||
padding: 0.5rem 0.6rem !important;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-id {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.65rem;
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-title {
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
line-height: 1.3;
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-meta {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-priority {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-tag {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
}
|
||||
|
||||
.hermes-kanban-assignee {
|
||||
font-weight: 500;
|
||||
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
|
||||
}
|
||||
.hermes-kanban-unassigned {
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-ago {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- Inline create --------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-inline-create {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
background: color-mix(in srgb, var(--color-card) 70%, transparent);
|
||||
border: 1px dashed var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Drawer (task detail side panel) --------------------------------- */
|
||||
|
||||
.hermes-kanban-drawer-shade {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.45);
|
||||
z-index: 60;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer {
|
||||
width: min(480px, 92vw);
|
||||
height: 100vh;
|
||||
background: var(--color-card);
|
||||
border-left: 1px solid var(--color-border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
|
||||
animation: hermes-kanban-drawer-in 180ms ease-out;
|
||||
}
|
||||
|
||||
@keyframes hermes-kanban-drawer-in {
|
||||
from { transform: translateX(100%); opacity: 0.3; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.6rem 0.8rem;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-close {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
cursor: pointer;
|
||||
padding: 0 0.25rem;
|
||||
}
|
||||
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
|
||||
|
||||
.hermes-kanban-drawer-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.9rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
.hermes-kanban-meta-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.hermes-kanban-meta-label {
|
||||
width: 92px;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-meta-value {
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-pre {
|
||||
margin: 0;
|
||||
padding: 0.45rem 0.55rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.72rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-comment {
|
||||
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
|
||||
padding-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.2rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-comment-head {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-comment-author {
|
||||
font-weight: 600;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-comment-ago {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-event {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-event-kind {
|
||||
color: var(--color-foreground);
|
||||
min-width: 6rem;
|
||||
}
|
||||
.hermes-kanban-event-payload {
|
||||
color: var(--color-muted-foreground);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-comment-row {
|
||||
display: flex;
|
||||
gap: 0.4rem;
|
||||
padding: 0.55rem 0.75rem;
|
||||
border-top: 1px solid var(--color-border);
|
||||
background: color-mix(in srgb, var(--color-card) 90%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-count {
|
||||
display: inline-flex;
|
||||
gap: 0.2rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* ---- Selection chrome ----------------------------------------------- */
|
||||
|
||||
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-ring) inset,
|
||||
0 0 0 1px var(--color-ring) inset;
|
||||
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-card-check {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
accent-color: var(--color-ring);
|
||||
}
|
||||
|
||||
/* ---- Bulk action bar ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-bulk {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-bulk-count {
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
padding-right: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-bulk-btn {
|
||||
height: 1.7rem !important;
|
||||
padding: 0 0.5rem !important;
|
||||
font-size: 0.7rem !important;
|
||||
border: 1px solid var(--color-border);
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-bulk-btn:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
.hermes-kanban-bulk-reassign {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding-left: 0.5rem;
|
||||
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Dependency editor chips --------------------------------------- */
|
||||
|
||||
.hermes-kanban-deps-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.hermes-kanban-deps-label {
|
||||
font-size: 0.68rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.hermes-kanban-deps-chips {
|
||||
display: flex;
|
||||
gap: 0.3rem;
|
||||
flex-wrap: wrap;
|
||||
flex: 1;
|
||||
}
|
||||
.hermes-kanban-deps-empty {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-dep-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.15rem;
|
||||
padding: 0.1rem 0.35rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.68rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-dep-chip-x {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1;
|
||||
padding: 0 0.15rem;
|
||||
}
|
||||
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
|
||||
|
||||
/* ---- Inline edit affordances --------------------------------------- */
|
||||
|
||||
.hermes-kanban-editable {
|
||||
cursor: pointer;
|
||||
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
}
|
||||
.hermes-kanban-editable:hover {
|
||||
color: var(--color-foreground);
|
||||
border-bottom-color: var(--color-ring);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title-text {
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-drawer-title-text:hover {
|
||||
text-decoration: underline;
|
||||
text-decoration-color: var(--color-ring);
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}
|
||||
|
||||
.hermes-kanban-edit-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-edit-link {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
cursor: pointer;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
|
||||
|
||||
.hermes-kanban-textarea {
|
||||
width: 100%;
|
||||
min-height: 8rem;
|
||||
background: var(--color-card);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
padding: 0.5rem 0.6rem;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.5;
|
||||
resize: vertical;
|
||||
}
|
||||
.hermes-kanban-textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-ring);
|
||||
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Markdown rendering -------------------------------------------- */
|
||||
|
||||
.hermes-kanban-md {
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.55;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-md p { margin: 0.25rem 0; }
|
||||
.hermes-kanban-md h1,
|
||||
.hermes-kanban-md h2,
|
||||
.hermes-kanban-md h3,
|
||||
.hermes-kanban-md h4 {
|
||||
margin: 0.6rem 0 0.2rem;
|
||||
line-height: 1.25;
|
||||
}
|
||||
.hermes-kanban-md h1 { font-size: 1.05rem; }
|
||||
.hermes-kanban-md h2 { font-size: 0.95rem; }
|
||||
.hermes-kanban-md h3 { font-size: 0.88rem; }
|
||||
.hermes-kanban-md h4 { font-size: 0.82rem; }
|
||||
.hermes-kanban-md ul {
|
||||
margin: 0.25rem 0 0.25rem 1.1rem;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-md li { margin: 0.1rem 0; }
|
||||
.hermes-kanban-md a {
|
||||
color: var(--color-ring);
|
||||
text-decoration: underline;
|
||||
}
|
||||
.hermes-kanban-md code {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.75rem;
|
||||
padding: 0.05rem 0.3rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.hermes-kanban-md-code {
|
||||
margin: 0.35rem 0;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
overflow-x: auto;
|
||||
}
|
||||
.hermes-kanban-md-code code {
|
||||
background: transparent;
|
||||
padding: 0;
|
||||
font-size: 0.75rem;
|
||||
white-space: pre;
|
||||
}
|
||||
.hermes-kanban-md strong { font-weight: 600; }
|
||||
|
||||
/* ---- Touch-drag proxy ---------------------------------------------- */
|
||||
|
||||
.hermes-kanban-touch-proxy {
|
||||
pointer-events: none;
|
||||
opacity: 0.85;
|
||||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
|
||||
transform: scale(1.02);
|
||||
transition: none;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Staleness tiers ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px #d4b34888 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px #d4b348 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
|
||||
}
|
||||
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Worker log pane ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-log {
|
||||
max-height: 340px;
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
font-size: 0.7rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Run history (per-attempt log in the drawer) ------------------- */
|
||||
|
||||
.hermes-kanban-run {
|
||||
border-left: 2px solid var(--color-border);
|
||||
padding: 0.35rem 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
.hermes-kanban-run--active { border-left-color: #3fb97d; }
|
||||
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
|
||||
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
|
||||
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-run--crashed,
|
||||
.hermes-kanban-run--timed_out,
|
||||
.hermes-kanban-run--gave_up,
|
||||
.hermes-kanban-run--spawn_failed {
|
||||
border-left-color: var(--color-destructive, #d14a4a);
|
||||
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
|
||||
}
|
||||
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
|
||||
|
||||
.hermes-kanban-run-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-run-outcome {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-profile {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-elapsed {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-ago {
|
||||
margin-left: auto;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-summary {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0 0;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-error {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-destructive, #d14a4a);
|
||||
padding: 0.15rem 0 0;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-run-meta {
|
||||
display: block;
|
||||
font-size: 0.65rem;
|
||||
padding: 0.15rem 0 0;
|
||||
color: var(--color-muted-foreground);
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "kanban",
|
||||
"label": "Kanban",
|
||||
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
|
||||
"icon": "Package",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/kanban",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"entry": "dist/index.js",
|
||||
"css": "dist/style.css",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
@@ -0,0 +1,830 @@
|
||||
"""Kanban dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
|
||||
|
||||
This layer is intentionally thin: every handler is a small wrapper around
|
||||
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
|
||||
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
|
||||
cannot drift.
|
||||
|
||||
Live updates arrive via the ``/events`` WebSocket, which tails the
|
||||
append-only ``task_events`` table on a short poll interval (WAL mode lets
|
||||
reads run alongside the dispatcher's IMMEDIATE write transactions).
|
||||
|
||||
Security note
|
||||
-------------
|
||||
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
|
||||
explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by
|
||||
design because the dashboard binds to localhost by default. For the
|
||||
WebSocket we still require the session token as a ``?token=`` query
|
||||
parameter (browsers cannot set the ``Authorization`` header on an upgrade
|
||||
request), matching the established pattern used by the in-browser PTY
|
||||
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
|
||||
``--host 0.0.0.0``, every plugin route — kanban included — becomes
|
||||
reachable from the network. Don't do that on a shared host.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from hermes_cli import kanban_db
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
|
||||
# existing plugin-bypass; this is documented above).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_ws_token(provided: Optional[str]) -> bool:
|
||||
"""Constant-time compare against the dashboard session token.
|
||||
|
||||
Imported lazily so the plugin still loads in test contexts where the
|
||||
dashboard web_server module isn't importable (e.g. the bare-FastAPI
|
||||
test harness).
|
||||
"""
|
||||
if not provided:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli import web_server as _ws
|
||||
except Exception:
|
||||
# No dashboard context (tests). Accept so the tail loop is still
|
||||
# testable; in production the dashboard module always imports
|
||||
# cleanly because it's the caller.
|
||||
return True
|
||||
expected = getattr(_ws, "_SESSION_TOKEN", None)
|
||||
if not expected:
|
||||
return True
|
||||
return hmac.compare_digest(str(provided), str(expected))
|
||||
|
||||
|
||||
def _conn():
|
||||
"""Open a kanban_db connection, creating the schema on first use.
|
||||
|
||||
Every handler that mutates the DB goes through this so the plugin
|
||||
self-heals on a fresh install (no user-visible "no such table"
|
||||
error if somebody hits POST /tasks before GET /board).
|
||||
``init_db`` is idempotent.
|
||||
"""
|
||||
try:
|
||||
kanban_db.init_db()
|
||||
except Exception as exc:
|
||||
log.warning("kanban init_db failed: %s", exc)
|
||||
return kanban_db.connect()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Columns shown by the dashboard, in left-to-right order. "archived" is
|
||||
# available via a filter toggle rather than a visible column.
|
||||
BOARD_COLUMNS: list[str] = [
|
||||
"triage", "todo", "ready", "running", "blocked", "done",
|
||||
]
|
||||
|
||||
|
||||
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
|
||||
d = asdict(task)
|
||||
# Add derived age metrics so the UI can colour stale cards without
|
||||
# computing deltas client-side.
|
||||
d["age"] = kanban_db.task_age(task)
|
||||
# Keep body short on list endpoints; full body comes from /tasks/:id.
|
||||
return d
|
||||
|
||||
|
||||
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
|
||||
return {
|
||||
"id": event.id,
|
||||
"task_id": event.task_id,
|
||||
"kind": event.kind,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at,
|
||||
"run_id": event.run_id,
|
||||
}
|
||||
|
||||
|
||||
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
|
||||
return {
|
||||
"id": c.id,
|
||||
"task_id": c.task_id,
|
||||
"author": c.author,
|
||||
"body": c.body,
|
||||
"created_at": c.created_at,
|
||||
}
|
||||
|
||||
|
||||
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
||||
"""Serialise a Run for the drawer's Run history section."""
|
||||
return {
|
||||
"id": r.id,
|
||||
"task_id": r.task_id,
|
||||
"profile": r.profile,
|
||||
"step_key": r.step_key,
|
||||
"status": r.status,
|
||||
"claim_lock": r.claim_lock,
|
||||
"claim_expires": r.claim_expires,
|
||||
"worker_pid": r.worker_pid,
|
||||
"max_runtime_seconds": r.max_runtime_seconds,
|
||||
"last_heartbeat_at": r.last_heartbeat_at,
|
||||
"started_at": r.started_at,
|
||||
"ended_at": r.ended_at,
|
||||
"outcome": r.outcome,
|
||||
"summary": r.summary,
|
||||
"metadata": r.metadata,
|
||||
"error": r.error,
|
||||
}
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
r["parent_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
children = [
|
||||
r["child_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
return {"parents": parents, "children": children}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/board")
|
||||
def get_board(
|
||||
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
|
||||
include_archived: bool = Query(False),
|
||||
):
|
||||
"""Return the full board grouped by status column.
|
||||
|
||||
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
|
||||
install doesn't surface a "failed to load" error on the plugin tab.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
tasks = kanban_db.list_tasks(
|
||||
conn, tenant=tenant, include_archived=include_archived
|
||||
)
|
||||
# Pre-fetch link counts per task (cheap: one query).
|
||||
link_counts: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT parent_id, child_id FROM task_links"
|
||||
).fetchall():
|
||||
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
|
||||
"children"
|
||||
] += 1
|
||||
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
|
||||
"parents"
|
||||
] += 1
|
||||
|
||||
# Comment + event counts (both cheap aggregates).
|
||||
comment_counts: dict[str, int] = {
|
||||
r["task_id"]: r["n"]
|
||||
for r in conn.execute(
|
||||
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
|
||||
)
|
||||
}
|
||||
|
||||
# Progress rollup: for each parent, how many children are done / total.
|
||||
# One pass over task_links joined with child status — cheaper than
|
||||
# N per-task queries and the plugin uses it to render "N/M".
|
||||
progress: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT l.parent_id AS pid, t.status AS cstatus "
|
||||
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
|
||||
).fetchall():
|
||||
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
|
||||
p["total"] += 1
|
||||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
||||
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
|
||||
if include_archived:
|
||||
columns["archived"] = []
|
||||
|
||||
for t in tasks:
|
||||
d = _task_dict(t)
|
||||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
# Stable per-column ordering already applied by list_tasks
|
||||
# (priority DESC, created_at ASC), keep as-is.
|
||||
|
||||
# List of known tenants for the UI filter dropdown.
|
||||
tenants = [
|
||||
r["tenant"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
|
||||
)
|
||||
]
|
||||
# List of distinct assignees for the lane-by-profile sub-grouping.
|
||||
assignees = [
|
||||
r["assignee"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
|
||||
"AND status != 'archived' ORDER BY assignee"
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"columns": [
|
||||
{"name": name, "tasks": columns[name]} for name in columns.keys()
|
||||
],
|
||||
"tenants": tenants,
|
||||
"assignees": assignees,
|
||||
"latest_event_id": int(latest_event_id),
|
||||
"now": int(time.time()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CreateTaskBody(BaseModel):
|
||||
title: str
|
||||
body: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
tenant: Optional[str] = None
|
||||
priority: int = 0
|
||||
workspace_kind: str = "scratch"
|
||||
workspace_path: Optional[str] = None
|
||||
parents: list[str] = Field(default_factory=list)
|
||||
triage: bool = False
|
||||
idempotency_key: Optional[str] = None
|
||||
max_runtime_seconds: Optional[int] = None
|
||||
skills: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.post("/tasks")
|
||||
def create_task(payload: CreateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task_id = kanban_db.create_task(
|
||||
conn,
|
||||
title=payload.title,
|
||||
body=payload.body,
|
||||
assignee=payload.assignee,
|
||||
created_by="dashboard",
|
||||
workspace_kind=payload.workspace_kind,
|
||||
workspace_path=payload.workspace_path,
|
||||
tenant=payload.tenant,
|
||||
priority=payload.priority,
|
||||
parents=payload.parents,
|
||||
triage=payload.triage,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
max_runtime_seconds=payload.max_runtime_seconds,
|
||||
skills=payload.skills,
|
||||
)
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(task) if task else None}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id (status / assignee / priority / title / body)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class UpdateTaskBody(BaseModel):
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
title: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
result: Optional[str] = None
|
||||
block_reason: Optional[str] = None
|
||||
# Structured handoff fields — forwarded to complete_task when status
|
||||
# transitions to 'done'. Dashboard parity with ``hermes kanban
|
||||
# complete --summary ... --metadata ...``.
|
||||
summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
@router.patch("/tasks/{task_id}")
|
||||
def update_task(task_id: str, payload: UpdateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
|
||||
# --- assignee ----------------------------------------------------
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
ok = kanban_db.assign_task(
|
||||
conn, task_id, payload.assignee or None,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=409, detail=str(e))
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="task not found")
|
||||
|
||||
# --- status -------------------------------------------------------
|
||||
if payload.status is not None:
|
||||
s = payload.status
|
||||
ok = True
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(
|
||||
conn, task_id,
|
||||
result=payload.result,
|
||||
summary=payload.summary,
|
||||
metadata=payload.metadata,
|
||||
)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
|
||||
elif s == "ready":
|
||||
# Re-open a blocked task, or just an explicit status set.
|
||||
current = kanban_db.get_task(conn, task_id)
|
||||
if current and current.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, task_id)
|
||||
else:
|
||||
# Direct status write for drag-drop (todo -> ready etc).
|
||||
ok = _set_status_direct(conn, task_id, "ready")
|
||||
elif s == "archived":
|
||||
ok = kanban_db.archive_task(conn, task_id)
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"status transition to {s!r} not valid from current state",
|
||||
)
|
||||
|
||||
# --- priority -----------------------------------------------------
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), task_id),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(task_id, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
|
||||
# --- title / body -------------------------------------------------
|
||||
if payload.title is not None or payload.body is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
sets, vals = [], []
|
||||
if payload.title is not None:
|
||||
if not payload.title.strip():
|
||||
raise HTTPException(status_code=400, detail="title cannot be empty")
|
||||
sets.append("title = ?")
|
||||
vals.append(payload.title.strip())
|
||||
if payload.body is not None:
|
||||
sets.append("body = ?")
|
||||
vals.append(payload.body)
|
||||
vals.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'edited', NULL, ?)",
|
||||
(task_id, int(time.time())),
|
||||
)
|
||||
|
||||
updated = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(updated) if updated else None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_status_direct(
|
||||
conn: sqlite3.Connection, task_id: str, new_status: str,
|
||||
) -> bool:
|
||||
"""Direct status write for drag-drop moves that aren't covered by the
|
||||
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
|
||||
running<->ready). Appends a ``status`` event row for the live feed.
|
||||
|
||||
When this transitions OFF ``running`` to anything other than the
|
||||
terminal verbs above (which own their own run closing), we close the
|
||||
active run with outcome='reclaimed' so attempt history isn't
|
||||
orphaned. ``running -> ready`` via drag-drop is the common case
|
||||
(user yanking a stuck worker back to the queue).
|
||||
"""
|
||||
with kanban_db.write_txn(conn):
|
||||
# Snapshot current state so we know whether to close a run.
|
||||
prev = conn.execute(
|
||||
"SELECT status, current_run_id FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if prev is None:
|
||||
return False
|
||||
was_running = prev["status"] == "running"
|
||||
|
||||
cur = conn.execute(
|
||||
"UPDATE tasks SET status = ?, "
|
||||
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
|
||||
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
|
||||
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
|
||||
"WHERE id = ?",
|
||||
(new_status, new_status, new_status, new_status, task_id),
|
||||
)
|
||||
if cur.rowcount != 1:
|
||||
return False
|
||||
run_id = None
|
||||
if was_running and new_status != "running" and prev["current_run_id"]:
|
||||
run_id = kanban_db._end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
summary=f"status changed to {new_status} (dashboard/direct)",
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
|
||||
"VALUES (?, ?, 'status', ?, ?)",
|
||||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CommentBody(BaseModel):
|
||||
body: str
|
||||
author: Optional[str] = "dashboard"
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/comments")
|
||||
def add_comment(task_id: str, payload: CommentBody):
|
||||
if not payload.body.strip():
|
||||
raise HTTPException(status_code=400, detail="body is required")
|
||||
conn = _conn()
|
||||
try:
|
||||
if kanban_db.get_task(conn, task_id) is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
kanban_db.add_comment(
|
||||
conn, task_id, author=payload.author or "dashboard", body=payload.body,
|
||||
)
|
||||
return {"ok": True}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LinkBody(BaseModel):
|
||||
parent_id: str
|
||||
child_id: str
|
||||
|
||||
|
||||
@router.post("/links")
|
||||
def add_link(payload: LinkBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
|
||||
return {"ok": True}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.delete("/links")
|
||||
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
|
||||
conn = _conn()
|
||||
try:
|
||||
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
|
||||
return {"ok": bool(ok)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions (multi-select on the board)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BulkTaskBody(BaseModel):
|
||||
ids: list[str]
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None # "" or None = unassign
|
||||
priority: Optional[int] = None
|
||||
archive: bool = False
|
||||
|
||||
|
||||
@router.post("/tasks/bulk")
|
||||
def bulk_update(payload: BulkTaskBody):
|
||||
"""Apply the same patch to every id in ``payload.ids``.
|
||||
|
||||
This is an *independent* iteration — per-task failures don't abort
|
||||
siblings. Returns per-id outcome so the UI can surface partials.
|
||||
"""
|
||||
ids = [i for i in (payload.ids or []) if i]
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
results: list[dict] = []
|
||||
conn = _conn()
|
||||
try:
|
||||
for tid in ids:
|
||||
entry: dict[str, Any] = {"id": tid, "ok": True}
|
||||
try:
|
||||
task = kanban_db.get_task(conn, tid)
|
||||
if task is None:
|
||||
entry.update(ok=False, error="not found")
|
||||
results.append(entry)
|
||||
continue
|
||||
if payload.archive:
|
||||
if not kanban_db.archive_task(conn, tid):
|
||||
entry.update(ok=False, error="archive refused")
|
||||
if payload.status is not None and not payload.archive:
|
||||
s = payload.status
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(conn, tid)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, tid)
|
||||
elif s == "ready":
|
||||
cur = kanban_db.get_task(conn, tid)
|
||||
if cur and cur.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
results.append(entry)
|
||||
continue
|
||||
if not ok:
|
||||
entry.update(ok=False, error=f"transition to {s!r} refused")
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
if not kanban_db.assign_task(
|
||||
conn, tid, payload.assignee or None,
|
||||
):
|
||||
entry.update(ok=False, error="assign refused")
|
||||
except RuntimeError as e:
|
||||
entry.update(ok=False, error=str(e))
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(tid, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
except Exception as e: # defensive — one bad id shouldn't kill the batch
|
||||
entry.update(ok=False, error=str(e))
|
||||
results.append(entry)
|
||||
return {"results": results}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/config")
|
||||
def get_config():
|
||||
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
|
||||
|
||||
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
|
||||
Used by the UI to pre-select tenant filters, toggle markdown rendering,
|
||||
or set column-width preferences without a round-trip per page load.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
dash_cfg = (cfg.get("dashboard") or {})
|
||||
# dashboard.kanban may itself be a dict; fall back to {}.
|
||||
k_cfg = dash_cfg.get("kanban") or {}
|
||||
return {
|
||||
"default_tenant": k_cfg.get("default_tenant") or "",
|
||||
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
|
||||
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
|
||||
"render_markdown": bool(k_cfg.get("render_markdown", True)),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats (per-profile / per-status counts + oldest-ready age)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats():
|
||||
"""Per-status + per-assignee counts + oldest-ready age.
|
||||
|
||||
Designed for the dashboard HUD and for router profiles that need to
|
||||
answer "is this specialist overloaded?" without scanning the whole
|
||||
board themselves.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return kanban_db.board_stats(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.get("/assignees")
|
||||
def get_assignees():
|
||||
"""Known profiles + per-profile task counts.
|
||||
|
||||
Returns the union of ``~/.hermes/profiles/*`` on disk and every
|
||||
distinct assignee currently used on the board. The dashboard uses
|
||||
this to populate its assignee dropdown so a freshly-created profile
|
||||
appears in the picker before it's been given any task.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return {"assignees": kanban_db.known_assignees(conn)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker log (read-only; file written by _default_spawn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}/log")
|
||||
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
|
||||
"""Return the worker's stdout/stderr log.
|
||||
|
||||
``tail`` caps the response size (bytes) so the dashboard drawer
|
||||
doesn't paginate megabytes into the browser. Returns 404 if the task
|
||||
has never spawned. The on-disk log is rotated at 2 MiB per
|
||||
``_rotate_worker_log`` — a single ``.log.1`` is kept, no further
|
||||
generations, so disk usage per task is bounded at ~4 MiB.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
|
||||
log_path = kanban_db.worker_log_path(task_id)
|
||||
size = log_path.stat().st_size if log_path.exists() else 0
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"path": str(log_path),
|
||||
"exists": content is not None,
|
||||
"size_bytes": size,
|
||||
"content": content or "",
|
||||
# Truncated when the on-disk file was larger than the tail cap.
|
||||
"truncated": bool(tail and size > tail),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/dispatch")
|
||||
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
|
||||
conn = _conn()
|
||||
try:
|
||||
result = kanban_db.dispatch_once(
|
||||
conn, dry_run=dry_run, max_spawn=max_n,
|
||||
)
|
||||
# DispatchResult is a dataclass.
|
||||
try:
|
||||
return asdict(result)
|
||||
except TypeError:
|
||||
return {"result": str(result)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket: /events?since=<event_id>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
|
||||
# the simplest and most robust approach; it adds a fraction of a percent
|
||||
# of CPU and has no shared state to synchronize across workers.
|
||||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
# set Authorization on a WS upgrade. This matches how the PTY bridge
|
||||
# authenticates in hermes_cli/web_server.py.
|
||||
token = ws.query_params.get("token")
|
||||
if not _check_ws_token(token):
|
||||
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
|
||||
return
|
||||
await ws.accept()
|
||||
try:
|
||||
since_raw = ws.query_params.get("since", "0")
|
||||
try:
|
||||
cursor = int(since_raw)
|
||||
except ValueError:
|
||||
cursor = 0
|
||||
|
||||
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
|
||||
conn = kanban_db.connect()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, task_id, run_id, kind, payload, created_at "
|
||||
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
|
||||
(cursor_val,),
|
||||
).fetchall()
|
||||
out: list[dict] = []
|
||||
new_cursor = cursor_val
|
||||
for r in rows:
|
||||
try:
|
||||
payload = json.loads(r["payload"]) if r["payload"] else None
|
||||
except Exception:
|
||||
payload = None
|
||||
out.append({
|
||||
"id": r["id"],
|
||||
"task_id": r["task_id"],
|
||||
"run_id": r["run_id"],
|
||||
"kind": r["kind"],
|
||||
"payload": payload,
|
||||
"created_at": r["created_at"],
|
||||
})
|
||||
new_cursor = r["id"]
|
||||
return new_cursor, out
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
while True:
|
||||
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
|
||||
if events:
|
||||
await ws.send_json({"events": events, "cursor": cursor})
|
||||
await asyncio.sleep(_EVENT_POLL_SECONDS)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
except Exception as exc: # defensive: never crash the dashboard worker
|
||||
log.warning("Kanban event stream error: %s", exc)
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,17 @@
|
||||
[Unit]
|
||||
Description=Hermes Kanban dispatcher (hermes kanban daemon)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env hermes kanban daemon --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
|
||||
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
# even if shutdown_memory_provider is never called (e.g. gateway crash,
|
||||
# SIGKILL, or exception in _async_flush_memories preventing shutdown).
|
||||
# SIGKILL, or exception in the session expiry watcher preventing shutdown).
|
||||
# ---------------------------------------------------------------------------
|
||||
_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
|
||||
|
||||
|
||||
+292
-347
@@ -40,6 +40,7 @@ from types import SimpleNamespace
|
||||
import urllib.request
|
||||
import uuid
|
||||
from typing import List, Dict, Any, Optional
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
from openai import OpenAI
|
||||
import fire
|
||||
from datetime import datetime
|
||||
@@ -85,6 +86,7 @@ from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
build_nous_subscription_prompt,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
@@ -891,7 +893,6 @@ class AIAgent:
|
||||
checkpoints_enabled: bool = False,
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
pass_session_id: bool = False,
|
||||
persist_session: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -963,7 +964,6 @@ class AIAgent:
|
||||
self.background_review_callback = None # Optional sync callback for gateway delivery
|
||||
self.skip_context_files = skip_context_files
|
||||
self.pass_session_id = pass_session_id
|
||||
self.persist_session = persist_session
|
||||
self._credential_pool = credential_pool
|
||||
self.log_prefix_chars = log_prefix_chars
|
||||
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
||||
@@ -1033,12 +1033,16 @@ class AIAgent:
|
||||
# surface.
|
||||
# When api_mode was explicitly provided, respect it — the user
|
||||
# knows what their endpoint supports (#10473).
|
||||
# Exception: Azure OpenAI serves gpt-5.x on /chat/completions and
|
||||
# does NOT support the Responses API — skip the upgrade for Azure
|
||||
# (openai.azure.com), even though it looks OpenAI-compatible.
|
||||
if (
|
||||
api_mode is None
|
||||
and self.api_mode == "chat_completions"
|
||||
and self.provider != "copilot-acp"
|
||||
and not str(self.base_url or "").lower().startswith("acp://copilot")
|
||||
and not str(self.base_url or "").lower().startswith("acp+tcp://")
|
||||
and not self._is_azure_openai_url()
|
||||
and (
|
||||
self._is_direct_openai_url()
|
||||
or self._provider_model_requires_responses_api(
|
||||
@@ -1314,7 +1318,22 @@ class AIAgent:
|
||||
if api_key and base_url:
|
||||
# Explicit credentials from CLI/gateway — construct directly.
|
||||
# The runtime provider resolver already handled auth for us.
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
# Extract query params (e.g. Azure api-version) from base_url
|
||||
# and pass via default_query to prevent loss during SDK URL
|
||||
# joining (httpx drops query string when joining paths).
|
||||
_parsed_url = urlparse(base_url)
|
||||
if _parsed_url.query:
|
||||
_clean_url = urlunparse(_parsed_url._replace(query=""))
|
||||
_query_params = {
|
||||
k: v[0] for k, v in parse_qs(_parsed_url.query).items()
|
||||
}
|
||||
client_kwargs = {
|
||||
"api_key": api_key,
|
||||
"base_url": _clean_url,
|
||||
"default_query": _query_params,
|
||||
}
|
||||
else:
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
if _provider_timeout is not None:
|
||||
client_kwargs["timeout"] = _provider_timeout
|
||||
if self.provider == "copilot-acp":
|
||||
@@ -1578,7 +1597,6 @@ class AIAgent:
|
||||
self._memory_enabled = False
|
||||
self._user_profile_enabled = False
|
||||
self._memory_nudge_interval = 10
|
||||
self._memory_flush_min_turns = 6
|
||||
self._turns_since_memory = 0
|
||||
self._iters_since_skill = 0
|
||||
if not skip_memory:
|
||||
@@ -1587,7 +1605,6 @@ class AIAgent:
|
||||
self._memory_enabled = mem_config.get("memory_enabled", False)
|
||||
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
||||
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
|
||||
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
|
||||
if self._memory_enabled or self._user_profile_enabled:
|
||||
from tools.memory_tool import MemoryStore
|
||||
self._memory_store = MemoryStore(
|
||||
@@ -1767,43 +1784,64 @@ class AIAgent:
|
||||
# Store for reuse in switch_model (so config override persists across model switches)
|
||||
self._config_context_length = _config_context_length
|
||||
|
||||
# Resolve custom_providers list once for reuse below (startup
|
||||
# context-length override and plugin context-engine init).
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers
|
||||
_custom_providers = get_compatible_custom_providers(_agent_cfg)
|
||||
except Exception:
|
||||
_custom_providers = _agent_cfg.get("custom_providers")
|
||||
if not isinstance(_custom_providers, list):
|
||||
_custom_providers = []
|
||||
|
||||
# Check custom_providers per-model context_length
|
||||
if _config_context_length is None:
|
||||
if _config_context_length is None and _custom_providers:
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers
|
||||
_custom_providers = get_compatible_custom_providers(_agent_cfg)
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
_cp_ctx_resolved = get_custom_provider_context_length(
|
||||
model=self.model,
|
||||
base_url=self.base_url,
|
||||
custom_providers=_custom_providers,
|
||||
)
|
||||
if _cp_ctx_resolved:
|
||||
_config_context_length = int(_cp_ctx_resolved)
|
||||
except Exception:
|
||||
_custom_providers = _agent_cfg.get("custom_providers")
|
||||
if not isinstance(_custom_providers, list):
|
||||
_custom_providers = []
|
||||
for _cp_entry in _custom_providers:
|
||||
if not isinstance(_cp_entry, dict):
|
||||
continue
|
||||
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
|
||||
if _cp_url and _cp_url == self.base_url.rstrip("/"):
|
||||
_cp_models = _cp_entry.get("models", {})
|
||||
if isinstance(_cp_models, dict):
|
||||
_cp_model_cfg = _cp_models.get(self.model, {})
|
||||
if isinstance(_cp_model_cfg, dict):
|
||||
_cp_ctx = _cp_model_cfg.get("context_length")
|
||||
if _cp_ctx is not None:
|
||||
try:
|
||||
_config_context_length = int(_cp_ctx)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"Invalid context_length for model %r in "
|
||||
"custom_providers: %r — must be a plain "
|
||||
"integer (e.g. 256000, not '256K'). "
|
||||
"Falling back to auto-detection.",
|
||||
self.model, _cp_ctx,
|
||||
)
|
||||
print(
|
||||
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
|
||||
f" Must be a plain integer (e.g. 256000, not '256K').\n"
|
||||
f" Falling back to auto-detected context window.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
break
|
||||
_cp_ctx_resolved = None
|
||||
|
||||
# Surface a clear warning if the user set a context_length but it
|
||||
# wasn't a valid positive int — the helper silently skips those.
|
||||
if _config_context_length is None:
|
||||
_target = self.base_url.rstrip("/") if self.base_url else ""
|
||||
for _cp_entry in _custom_providers:
|
||||
if not isinstance(_cp_entry, dict):
|
||||
continue
|
||||
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
|
||||
if _target and _cp_url == _target:
|
||||
_cp_models = _cp_entry.get("models", {})
|
||||
if isinstance(_cp_models, dict):
|
||||
_cp_model_cfg = _cp_models.get(self.model, {})
|
||||
if isinstance(_cp_model_cfg, dict):
|
||||
_cp_ctx = _cp_model_cfg.get("context_length")
|
||||
if _cp_ctx is not None:
|
||||
try:
|
||||
_parsed = int(_cp_ctx)
|
||||
if _parsed <= 0:
|
||||
raise ValueError
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"Invalid context_length for model %r in "
|
||||
"custom_providers: %r — must be a positive "
|
||||
"integer (e.g. 256000, not '256K'). "
|
||||
"Falling back to auto-detection.",
|
||||
self.model, _cp_ctx,
|
||||
)
|
||||
print(
|
||||
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
|
||||
f" Must be a positive integer (e.g. 256000, not '256K').\n"
|
||||
f" Falling back to auto-detected context window.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
break
|
||||
|
||||
# Select context engine: config-driven (like memory providers).
|
||||
# 1. Check config.yaml context.engine setting
|
||||
@@ -1853,6 +1891,7 @@ class AIAgent:
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
config_context_length=_config_context_length,
|
||||
provider=self.provider,
|
||||
custom_providers=_custom_providers,
|
||||
)
|
||||
self.context_compressor.update_model(
|
||||
model=self.model,
|
||||
@@ -2143,12 +2182,23 @@ class AIAgent:
|
||||
# ── Update context compressor ──
|
||||
if hasattr(self, "context_compressor") and self.context_compressor:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
# Re-read custom_providers from live config so per-model
|
||||
# context_length overrides are honored when switching to a
|
||||
# custom provider mid-session (closes #15779).
|
||||
_sm_custom_providers = None
|
||||
try:
|
||||
from hermes_cli.config import load_config, get_compatible_custom_providers
|
||||
_sm_cfg = load_config()
|
||||
_sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
|
||||
except Exception:
|
||||
_sm_custom_providers = None
|
||||
new_context_length = get_model_context_length(
|
||||
self.model,
|
||||
base_url=self.base_url,
|
||||
api_key=self.api_key,
|
||||
provider=self.provider,
|
||||
config_context_length=getattr(self, "_config_context_length", None),
|
||||
custom_providers=_sm_custom_providers,
|
||||
)
|
||||
self.context_compressor.update_model(
|
||||
model=self.model,
|
||||
@@ -2427,23 +2477,12 @@ class AIAgent:
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# Headroom: the threshold budgets RAW MESSAGES only, but the
|
||||
# actual request auxiliary callers send also includes the
|
||||
# system prompt and every tool schema. With 50+ tools that
|
||||
# overhead can be 25-30K tokens; setting new_threshold =
|
||||
# aux_context directly would let messages grow right to the
|
||||
# aux limit and the first compression/flush request would
|
||||
# overflow with HTTP 400. Subtract a dynamic headroom
|
||||
# estimate so the full request still fits.
|
||||
from agent.model_metadata import estimate_request_tokens_rough
|
||||
tool_overhead = estimate_request_tokens_rough([], tools=self.tools)
|
||||
# System prompt is not yet built at __init__ time; allow a
|
||||
# conservative 10K budget (SOUL/AGENTS.md + memory snapshot +
|
||||
# skills guidance) plus 2K for the flush instruction and a
|
||||
# small safety margin.
|
||||
headroom = tool_overhead + 12_000
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = max(aux_context - headroom, MINIMUM_CONTEXT_LENGTH)
|
||||
new_threshold = aux_context
|
||||
self.context_compressor.threshold_tokens = new_threshold
|
||||
# Keep threshold_percent in sync so future main-model
|
||||
# context_length changes (update_model) re-derive from a
|
||||
@@ -2517,6 +2556,22 @@ class AIAgent:
|
||||
)
|
||||
return hostname == "api.openai.com"
|
||||
|
||||
def _is_azure_openai_url(self, base_url: str = None) -> bool:
|
||||
"""Return True when a base URL targets Azure OpenAI.
|
||||
|
||||
Azure OpenAI exposes an OpenAI-compatible endpoint at
|
||||
``{resource}.openai.azure.com/openai/v1`` that accepts the
|
||||
standard ``openai`` Python client. Unlike api.openai.com it
|
||||
does NOT support the Responses API — gpt-5.x models are served
|
||||
on the regular ``/chat/completions`` path — so routing decisions
|
||||
must treat Azure separately from direct OpenAI.
|
||||
"""
|
||||
if base_url is not None:
|
||||
url = str(base_url).lower()
|
||||
else:
|
||||
url = getattr(self, "_base_url_lower", "") or ""
|
||||
return "openai.azure.com" in url
|
||||
|
||||
def _resolved_api_call_timeout(self) -> float:
|
||||
"""Resolve the effective per-call request timeout in seconds.
|
||||
|
||||
@@ -2688,12 +2743,14 @@ class AIAgent:
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
|
||||
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
|
||||
'max_completion_tokens'. OpenRouter, local models, and older
|
||||
'max_completion_tokens'. Azure OpenAI also requires
|
||||
'max_completion_tokens' for gpt-5.x models served via the
|
||||
OpenAI-compatible endpoint. OpenRouter, local models, and older
|
||||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
if self._is_direct_openai_url():
|
||||
if self._is_direct_openai_url() or self._is_azure_openai_url():
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
@@ -3051,13 +3108,28 @@ class AIAgent:
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
|
||||
"Focus on: was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome?\n\n"
|
||||
"If a relevant skill already exists, update it with what you learned. "
|
||||
"Otherwise, create a new skill if the approach is reusable.\n"
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
"Review the conversation above and consider whether a skill should be saved or updated.\n\n"
|
||||
"Work in this order — do not skip steps:\n\n"
|
||||
"1. SURVEY the existing skill landscape first. Call skills_list to see what you "
|
||||
"have. If anything looks potentially relevant, skill_view it before deciding. "
|
||||
"You are looking for the CLASS of task that just happened, not the exact task. "
|
||||
"Example: a successful Tauri build is in the class \"desktop app build "
|
||||
"troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
|
||||
"2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
|
||||
"What conditions will trigger this pattern again? Describe the class in one "
|
||||
"sentence before looking at what to save.\n\n"
|
||||
"3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
|
||||
"already covers the class — even partially — update it (skill_manage patch) "
|
||||
"with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
|
||||
"4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
|
||||
"When you create one, name and scope it at the class level "
|
||||
"(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
|
||||
"section must describe the class of situations, not this one session.\n\n"
|
||||
"5. If you notice two existing skills that overlap, note it in your response "
|
||||
"so a future review can consolidate them. Do not consolidate now unless the "
|
||||
"overlap is obvious and low-risk.\n\n"
|
||||
"Only act when something is genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
@@ -3067,9 +3139,16 @@ class AIAgent:
|
||||
"about how you should behave, their work style, or ways they want you to operate? "
|
||||
"If so, save using the memory tool.\n\n"
|
||||
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome? If a relevant skill "
|
||||
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
|
||||
"and error, changing course due to experiential findings, or a different method "
|
||||
"or outcome than the user expected? If so, work in this order:\n"
|
||||
" a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
|
||||
" b. Identify the CLASS of task, not the specific task "
|
||||
"(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
|
||||
" c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
|
||||
" d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
|
||||
"the class level, not this one session.\n"
|
||||
" e. If you notice overlapping skills during the survey, note it so a future "
|
||||
"review can consolidate them.\n\n"
|
||||
"Only act if there's something genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
@@ -3167,12 +3246,25 @@ class AIAgent:
|
||||
with open(os.devnull, "w") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = self._current_main_runtime()
|
||||
review_agent = AIAgent(
|
||||
model=self.model,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
api_mode=_parent_runtime.get("api_mode") or None,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(self, "_credential_pool", None),
|
||||
parent_session_id=self.session_id,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
@@ -3273,10 +3365,7 @@ class AIAgent:
|
||||
"""Save session state to both JSON log and SQLite on any exit path.
|
||||
|
||||
Ensures conversations are never lost, even on errors or early returns.
|
||||
Skipped when ``persist_session=False`` (ephemeral helper flows).
|
||||
"""
|
||||
if not self.persist_session:
|
||||
return
|
||||
self._apply_persist_user_message_override(messages)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
@@ -3326,6 +3415,7 @@ class AIAgent:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
self._last_flushed_db_idx = len(messages)
|
||||
except Exception as e:
|
||||
@@ -4408,6 +4498,12 @@ class AIAgent:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in self.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
@@ -5154,6 +5250,8 @@ class AIAgent:
|
||||
# response.incomplete instead of response.completed).
|
||||
self._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
@@ -5448,6 +5546,11 @@ class AIAgent:
|
||||
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) use their own keys.
|
||||
if self.provider != "anthropic":
|
||||
return False
|
||||
# Azure endpoints use static API keys — OAuth token rotation doesn't apply.
|
||||
# Refreshing would pick up ~/.claude/.credentials.json OAuth token and break auth.
|
||||
_base = getattr(self, "_anthropic_base_url", "") or ""
|
||||
if "azure.com" in _base:
|
||||
return False
|
||||
|
||||
try:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
|
||||
@@ -6323,6 +6426,14 @@ class AIAgent:
|
||||
|
||||
try:
|
||||
for _stream_attempt in range(_max_stream_retries + 1):
|
||||
# Check for interrupt before each retry attempt. Without
|
||||
# this, /stop closes the HTTP connection (outer poll loop),
|
||||
# but the retry loop opens a FRESH connection — negating the
|
||||
# interrupt entirely. On slow providers (ollama-cloud) each
|
||||
# retry can block for the full stream-read timeout (120s+),
|
||||
# causing multi-minute delays between /stop and response.
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before stream retry")
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
@@ -6796,10 +6907,15 @@ class AIAgent:
|
||||
# Determine api_mode from provider / base URL / model
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
_fb_is_azure = self._is_azure_openai_url(fb_base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
elif _fb_is_azure:
|
||||
# Azure OpenAI serves gpt-5.x on /chat/completions — does NOT
|
||||
# support the Responses API. Stay on chat_completions.
|
||||
fb_api_mode = "chat_completions"
|
||||
elif self._is_direct_openai_url(fb_base_url):
|
||||
fb_api_mode = "codex_responses"
|
||||
elif self._provider_model_requires_responses_api(
|
||||
@@ -7672,6 +7788,13 @@ class AIAgent:
|
||||
if codex_items:
|
||||
msg["codex_reasoning_items"] = codex_items
|
||||
|
||||
# Codex Responses API: preserve exact assistant message items (with
|
||||
# id/phase) so follow-up turns can replay structured items instead of
|
||||
# flattening to plain text. This is required for prefix cache hits.
|
||||
codex_message_items = getattr(assistant_message, "codex_message_items", None)
|
||||
if codex_message_items:
|
||||
msg["codex_message_items"] = codex_message_items
|
||||
|
||||
if assistant_message.tool_calls:
|
||||
tool_calls = []
|
||||
for tool_call in assistant_message.tool_calls:
|
||||
@@ -7757,25 +7880,53 @@ class AIAgent:
|
||||
if source_msg.get("role") != "assistant":
|
||||
return
|
||||
|
||||
explicit_reasoning = source_msg.get("reasoning_content")
|
||||
if isinstance(explicit_reasoning, str):
|
||||
api_msg["reasoning_content"] = explicit_reasoning
|
||||
# 1. Explicit reasoning_content already set — preserve it verbatim
|
||||
# (includes DeepSeek/Kimi's own empty-string placeholder written at
|
||||
# creation time, and any valid reasoning content from the same provider).
|
||||
existing = source_msg.get("reasoning_content")
|
||||
if isinstance(existing, str):
|
||||
api_msg["reasoning_content"] = existing
|
||||
return
|
||||
|
||||
# 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
|
||||
# for providers that use the internal 'reasoning' key.
|
||||
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
|
||||
# genuine reasoning content is not overwritten by the empty-string
|
||||
# fallback (#15812 regression in PR #15478).
|
||||
normalized_reasoning = source_msg.get("reasoning")
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
# Providers that require an echoed reasoning_content on every
|
||||
# assistant tool-call turn. Detection logic lives in the per-provider
|
||||
# helpers so both the creation path (_build_assistant_message) and
|
||||
# this replay path stay in sync.
|
||||
if source_msg.get("tool_calls") and (
|
||||
# 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
|
||||
# reasoning_content are "poisoned history" — a prior provider (MiniMax,
|
||||
# etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
|
||||
# is absent on replay; inject "" to satisfy the provider's requirement
|
||||
# without forwarding any cross-provider reasoning content.
|
||||
needs_empty_reasoning = (
|
||||
source_msg.get("tool_calls")
|
||||
and (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
)
|
||||
)
|
||||
if needs_empty_reasoning:
|
||||
api_msg["reasoning_content"] = ""
|
||||
return
|
||||
|
||||
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
|
||||
# reasoning_content. Inject "" to satisfy the provider's requirement
|
||||
# when no explicit reasoning content is present.
|
||||
if (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
):
|
||||
api_msg["reasoning_content"] = ""
|
||||
return
|
||||
|
||||
# 5. reasoning_content was present but not a string (e.g. None after
|
||||
# context compaction). Don't pass null to the API.
|
||||
api_msg.pop("reasoning_content", None)
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
|
||||
@@ -7927,254 +8078,6 @@ class AIAgent:
|
||||
"""
|
||||
return self.api_mode != "codex_responses"
|
||||
|
||||
def flush_memories(self, messages: list = None, min_turns: int = None):
|
||||
"""Give the model one turn to persist memories before context is lost.
|
||||
|
||||
Called before compression, session reset, or CLI exit. Injects a flush
|
||||
message, makes one API call, executes any memory tool calls, then
|
||||
strips all flush artifacts from the message list.
|
||||
|
||||
Args:
|
||||
messages: The current conversation messages. If None, uses
|
||||
self._session_messages (last run_conversation state).
|
||||
min_turns: Minimum user turns required to trigger the flush.
|
||||
None = use config value (flush_min_turns).
|
||||
0 = always flush (used for compression).
|
||||
"""
|
||||
if self._memory_flush_min_turns == 0 and min_turns is None:
|
||||
return
|
||||
if "memory" not in self.valid_tool_names or not self._memory_store:
|
||||
return
|
||||
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
|
||||
if self._user_turn_count < effective_min:
|
||||
return
|
||||
|
||||
if messages is None:
|
||||
messages = getattr(self, '_session_messages', None)
|
||||
if not messages or len(messages) < 3:
|
||||
return
|
||||
|
||||
flush_content = (
|
||||
"[System: The session is being compressed. "
|
||||
"Save anything worth remembering — prioritize user preferences, "
|
||||
"corrections, and recurring patterns over task-specific details.]"
|
||||
)
|
||||
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
|
||||
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
|
||||
messages.append(flush_msg)
|
||||
|
||||
try:
|
||||
# Build API messages for the flush call
|
||||
_needs_sanitize = self._should_sanitize_tool_calls()
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
self._copy_reasoning_content_for_api(msg, api_msg)
|
||||
api_msg.pop("reasoning", None)
|
||||
api_msg.pop("finish_reason", None)
|
||||
api_msg.pop("_flush_sentinel", None)
|
||||
api_msg.pop("_thinking_prefill", None)
|
||||
if _needs_sanitize:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
api_messages.append(api_msg)
|
||||
|
||||
if self._cached_system_prompt:
|
||||
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
|
||||
|
||||
# Make one API call with only the memory tool available
|
||||
memory_tool_def = None
|
||||
for t in (self.tools or []):
|
||||
if t.get("function", {}).get("name") == "memory":
|
||||
memory_tool_def = t
|
||||
break
|
||||
|
||||
if not memory_tool_def:
|
||||
messages.pop() # remove flush msg
|
||||
return
|
||||
|
||||
# Use auxiliary client for the flush call when available --
|
||||
# it's cheaper and avoids Codex Responses API incompatibility.
|
||||
from agent.auxiliary_client import (
|
||||
call_llm as _call_llm,
|
||||
_fixed_temperature_for_model,
|
||||
OMIT_TEMPERATURE,
|
||||
)
|
||||
_aux_available = True
|
||||
# Kimi models manage temperature server-side — omit it entirely.
|
||||
# Other models with a fixed contract get that value; everyone else
|
||||
# gets the historical 0.3 default.
|
||||
_fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
|
||||
_omit_temperature = _fixed_temp is OMIT_TEMPERATURE
|
||||
if _omit_temperature:
|
||||
_flush_temperature = None
|
||||
elif _fixed_temp is not None:
|
||||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
aux_error = None
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
messages=api_messages,
|
||||
tools=[memory_tool_def],
|
||||
temperature=_flush_temperature,
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except Exception as e:
|
||||
aux_error = e
|
||||
_aux_available = False
|
||||
response = None
|
||||
|
||||
if not _aux_available and self.api_mode == "codex_responses":
|
||||
# No auxiliary client -- use the Codex Responses path directly.
|
||||
# The Responses API does not accept `temperature` on any
|
||||
# supported backend (chatgpt.com/backend-api/codex rejects it
|
||||
# outright; api.openai.com + gpt-5/o-series reasoning models
|
||||
# and Copilot Responses reject it on reasoning models). The
|
||||
# transport intentionally never sets it — strip any leftover
|
||||
# here so the flush fallback matches the main-loop behavior.
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
_ct_flush = self._get_transport()
|
||||
if _ct_flush is not None:
|
||||
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
|
||||
elif not codex_kwargs.get("tools"):
|
||||
codex_kwargs["tools"] = [memory_tool_def]
|
||||
codex_kwargs.pop("temperature", None)
|
||||
if "max_output_tokens" in codex_kwargs:
|
||||
codex_kwargs["max_output_tokens"] = 5120
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
elif not _aux_available and self.api_mode == "anthropic_messages":
|
||||
# Native Anthropic — use the transport for kwargs
|
||||
_tflush = self._get_transport()
|
||||
ant_kwargs = _tflush.build_kwargs(
|
||||
model=self.model, messages=api_messages,
|
||||
tools=[memory_tool_def], max_tokens=5120,
|
||||
reasoning_config=None,
|
||||
preserve_dots=self._anthropic_preserve_dots(),
|
||||
)
|
||||
response = self._anthropic_messages_create(ant_kwargs)
|
||||
elif not _aux_available:
|
||||
api_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
**self._max_tokens_param(5120),
|
||||
}
|
||||
if _flush_temperature is not None:
|
||||
api_kwargs["temperature"] = _flush_temperature
|
||||
from agent.auxiliary_client import _get_task_timeout
|
||||
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
|
||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
if aux_error is not None:
|
||||
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
|
||||
self._emit_auxiliary_failure("memory flush", aux_error)
|
||||
|
||||
def _openai_tool_calls(resp):
|
||||
if resp is not None and hasattr(resp, "choices") and resp.choices:
|
||||
msg = getattr(resp.choices[0], "message", None)
|
||||
calls = getattr(msg, "tool_calls", None)
|
||||
if calls:
|
||||
return calls
|
||||
return []
|
||||
|
||||
def _codex_output_tool_calls(resp):
|
||||
calls = []
|
||||
for item in getattr(resp, "output", []) or []:
|
||||
if getattr(item, "type", None) == "function_call":
|
||||
calls.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", None),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
return calls
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
if self.api_mode == "codex_responses" and not _aux_available:
|
||||
_ct_flush = self._get_transport()
|
||||
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
|
||||
if _cnr_flush and _cnr_flush.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _cnr_flush.tool_calls
|
||||
]
|
||||
else:
|
||||
tool_calls = _codex_output_tool_calls(response)
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _flush_result.tool_calls
|
||||
]
|
||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
# chat_completions / bedrock — normalize through transport
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = _flush_result.tool_calls
|
||||
else:
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||
# Auxiliary client returned OpenAI-shaped response while main
|
||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
|
||||
for tc in tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
flush_target = args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
_memory_tool(
|
||||
action=args.get("action"),
|
||||
target=flush_target,
|
||||
content=args.get("content"),
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
if self._memory_manager and args.get("action") in ("add", "replace"):
|
||||
try:
|
||||
self._memory_manager.on_memory_write(
|
||||
args.get("action", ""),
|
||||
flush_target,
|
||||
args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
write_origin="memory_flush",
|
||||
execution_context="flush_memories",
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.warning("Memory flush tool call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush tool", e)
|
||||
except Exception as e:
|
||||
logger.warning("Memory flush API call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
|
||||
messages.pop()
|
||||
if not messages:
|
||||
break
|
||||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
@@ -8193,8 +8096,6 @@ class AIAgent:
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
|
||||
focus_topic,
|
||||
)
|
||||
# Pre-compression memory flush: let the model save memories before they're lost
|
||||
self.flush_memories(messages, min_turns=0)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if self._memory_manager:
|
||||
@@ -11146,36 +11047,69 @@ class AIAgent:
|
||||
continue
|
||||
|
||||
# ── Nous Portal: record rate limit & skip retries ─────
|
||||
# When Nous returns a 429, record the reset time to a
|
||||
# shared file so ALL sessions (cron, gateway, auxiliary)
|
||||
# know not to pile on. Then skip further retries —
|
||||
# each one burns another RPH request and deepens the
|
||||
# rate limit hole. The retry loop's top-of-iteration
|
||||
# guard will catch this on the next pass and try
|
||||
# fallback or bail with a clear message.
|
||||
# When Nous returns a 429 that is a genuine account-
|
||||
# level rate limit, record the reset time to a shared
|
||||
# file so ALL sessions (cron, gateway, auxiliary) know
|
||||
# not to pile on, then skip further retries -- each
|
||||
# one burns another RPH request and deepens the hole.
|
||||
# The retry loop's top-of-iteration guard will catch
|
||||
# this on the next pass and try fallback or bail.
|
||||
#
|
||||
# IMPORTANT: Nous Portal multiplexes multiple upstream
|
||||
# providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can
|
||||
# also mean an UPSTREAM provider is out of capacity
|
||||
# for one specific model -- transient, clears in
|
||||
# seconds, nothing to do with the caller's quota.
|
||||
# Tripping the cross-session breaker on that would
|
||||
# block every Nous model for minutes. We use
|
||||
# ``is_genuine_nous_rate_limit`` to tell the two
|
||||
# apart via the 429's own x-ratelimit-* headers and
|
||||
# the last-known-good state captured on the previous
|
||||
# successful response.
|
||||
if (
|
||||
is_rate_limited
|
||||
and self.provider == "nous"
|
||||
and classified.reason == FailoverReason.rate_limit
|
||||
and not recovered_with_pool
|
||||
):
|
||||
_genuine_nous_rate_limit = False
|
||||
try:
|
||||
from agent.nous_rate_guard import record_nous_rate_limit
|
||||
from agent.nous_rate_guard import (
|
||||
is_genuine_nous_rate_limit,
|
||||
record_nous_rate_limit,
|
||||
)
|
||||
_err_resp = getattr(api_error, "response", None)
|
||||
_err_hdrs = (
|
||||
getattr(_err_resp, "headers", None)
|
||||
if _err_resp else None
|
||||
)
|
||||
record_nous_rate_limit(
|
||||
_genuine_nous_rate_limit = is_genuine_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
error_context=error_context,
|
||||
last_known_state=self._rate_limit_state,
|
||||
)
|
||||
if _genuine_nous_rate_limit:
|
||||
record_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
error_context=error_context,
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
"Nous 429 looks like upstream capacity "
|
||||
"(no exhausted bucket in headers or "
|
||||
"last-known state) -- not tripping "
|
||||
"cross-session breaker."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
# Skip straight to max_retries — the top-of-loop
|
||||
# guard will handle fallback or bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
if _genuine_nous_rate_limit:
|
||||
# Skip straight to max_retries -- the
|
||||
# top-of-loop guard will handle fallback or
|
||||
# bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
# Upstream capacity 429: fall through to normal
|
||||
# retry logic. A different model (or the same
|
||||
# model a moment later) will typically succeed.
|
||||
|
||||
is_payload_too_large = (
|
||||
classified.reason == FailoverReason.payload_too_large
|
||||
@@ -11777,16 +11711,26 @@ class AIAgent:
|
||||
interim_has_content = bool((interim_msg.get("content") or "").strip())
|
||||
interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
|
||||
interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
|
||||
interim_has_codex_message_items = bool(interim_msg.get("codex_message_items"))
|
||||
|
||||
if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
|
||||
if (
|
||||
interim_has_content
|
||||
or interim_has_reasoning
|
||||
or interim_has_codex_reasoning
|
||||
or interim_has_codex_message_items
|
||||
):
|
||||
last_msg = messages[-1] if messages else None
|
||||
# Duplicate detection: two consecutive incomplete assistant
|
||||
# messages with identical content AND reasoning are collapsed.
|
||||
# For reasoning-only messages (codex_reasoning_items differ but
|
||||
# visible content/reasoning are both empty), we also compare
|
||||
# the encrypted items to avoid silently dropping new state.
|
||||
# For provider-state-only changes (encrypted reasoning
|
||||
# items or replayable message ids/phases/statuses differ
|
||||
# while visible content/reasoning are unchanged), compare
|
||||
# those opaque payloads too so we don't silently drop the
|
||||
# newer continuation state.
|
||||
last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
|
||||
interim_codex_items = interim_msg.get("codex_reasoning_items")
|
||||
last_codex_message_items = last_msg.get("codex_message_items") if isinstance(last_msg, dict) else None
|
||||
interim_codex_message_items = interim_msg.get("codex_message_items")
|
||||
duplicate_interim = (
|
||||
isinstance(last_msg, dict)
|
||||
and last_msg.get("role") == "assistant"
|
||||
@@ -11794,6 +11738,7 @@ class AIAgent:
|
||||
and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
|
||||
and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
|
||||
and last_codex_items == interim_codex_items
|
||||
and last_codex_message_items == interim_codex_message_items
|
||||
)
|
||||
if not duplicate_interim:
|
||||
messages.append(interim_msg)
|
||||
|
||||
Executable
+95
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
|
||||
|
||||
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
|
||||
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
|
||||
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
|
||||
lets maintainers update model lists without shipping a Hermes release.
|
||||
|
||||
The runtime fetcher falls back to the same in-repo hardcoded lists if the
|
||||
manifest is unreachable, so this script is a convenience for keeping the
|
||||
manifest in sync — not a source of truth.
|
||||
|
||||
Usage::
|
||||
|
||||
python scripts/build_model_catalog.py
|
||||
|
||||
Output: ``website/static/api/model-catalog.json``
|
||||
|
||||
Live URL (after ``deploy-site.yml`` runs on merge to main):
|
||||
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, REPO_ROOT)
|
||||
|
||||
# Ensure HERMES_HOME is set for imports that touch it at module level.
|
||||
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
|
||||
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
|
||||
|
||||
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
|
||||
CATALOG_VERSION = 1
|
||||
|
||||
|
||||
def build_catalog() -> dict:
|
||||
return {
|
||||
"version": CATALOG_VERSION,
|
||||
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
|
||||
},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {
|
||||
"display_name": "OpenRouter",
|
||||
"note": (
|
||||
"Descriptions drive picker badges. Live /api/v1/models "
|
||||
"filters curated ids by tool-calling support and free pricing."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid, "description": desc}
|
||||
for mid, desc in OPENROUTER_MODELS
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {
|
||||
"display_name": "Nous Portal",
|
||||
"note": (
|
||||
"Free-tier gating is determined live via Portal pricing "
|
||||
"(partition_nous_models_by_tier), not this manifest."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid}
|
||||
for mid in _PROVIDER_MODELS.get("nous", [])
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
catalog = build_catalog()
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as fh:
|
||||
json.dump(catalog, fh, indent=2)
|
||||
fh.write("\n")
|
||||
|
||||
print(f"Wrote {OUTPUT_PATH}")
|
||||
for provider, block in catalog["providers"].items():
|
||||
print(f" {provider}: {len(block['models'])} models")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -43,6 +43,7 @@ AUTHOR_MAP = {
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"focusflow.app.help@gmail.com": "yes999zc",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
@@ -51,6 +52,7 @@ AUTHOR_MAP = {
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
"nerijusn76@gmail.com": "Nerijusas",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -67,7 +69,9 @@ AUTHOR_MAP = {
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"fqsy1416@gmail.com": "EKKOLearnAI",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"iris@growthpillars.co": "irispillars",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
@@ -92,6 +96,7 @@ AUTHOR_MAP = {
|
||||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||
"liusway405@gmail.com": "voidborne-d",
|
||||
"xydarcher@uestc.edu.cn": "Readon",
|
||||
"sir_even@icloud.com": "sirEven",
|
||||
"36056348+sirEven@users.noreply.github.com": "sirEven",
|
||||
@@ -176,6 +181,10 @@ AUTHOR_MAP = {
|
||||
"jaisehgal11299@gmail.com": "jaisup",
|
||||
"percydikec@gmail.com": "PercyDikec",
|
||||
"noonou7@gmail.com": "HenkDz",
|
||||
# Azure Foundry salvage (PRs #9029, #4599, #10086, #8766)
|
||||
"tech@smartlogics.net": "TechPrototyper",
|
||||
"637186+HangGlidersRule@users.noreply.github.com": "HangGlidersRule",
|
||||
"pein892@gmail.com": "pein892",
|
||||
"dean.kerr@gmail.com": "deankerr",
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
@@ -410,6 +419,7 @@ AUTHOR_MAP = {
|
||||
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
|
||||
"vivien000812@gmail.com": "iamagenius00",
|
||||
"89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
|
||||
"oluwadareferanmi11@gmail.com": "Feranmi10",
|
||||
"simon@gtcl.us": "simon-gtcl",
|
||||
"suzukaze.haduki@gmail.com": "houko",
|
||||
"cliff@cigii.com": "cgarwood82",
|
||||
|
||||
@@ -281,7 +281,6 @@ Type these during an interactive chat session.
|
||||
### Utility
|
||||
```
|
||||
/branch (/fork) Branch the current session
|
||||
/btw Ephemeral side question (doesn't interrupt main task)
|
||||
/fast Toggle priority/fast processing
|
||||
/browser Open CDP browser connection
|
||||
/history Show conversation history (CLI)
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
---
|
||||
name: kanban-orchestrator
|
||||
description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, orchestration, routing]
|
||||
related_skills: [kanban-worker]
|
||||
---
|
||||
|
||||
# Kanban Orchestrator — Decomposition Playbook
|
||||
|
||||
> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing.
|
||||
|
||||
## When to use the board (vs. just doing the work)
|
||||
|
||||
Create Kanban tasks when any of these are true:
|
||||
|
||||
1. **Multiple specialists are needed.** Research + analysis + writing is three profiles.
|
||||
2. **The work should survive a crash or restart.** Long-running, recurring, or important.
|
||||
3. **The user might want to interject.** Human-in-the-loop at any step.
|
||||
4. **Multiple subtasks can run in parallel.** Fan-out for speed.
|
||||
5. **Review / iteration is expected.** A reviewer profile loops on drafter output.
|
||||
6. **The audit trail matters.** Board rows persist in SQLite forever.
|
||||
|
||||
If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly.
|
||||
|
||||
## The anti-temptation rules
|
||||
|
||||
Your job description says "route, don't execute." The rules that enforce that:
|
||||
|
||||
- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist.
|
||||
- **For any concrete task, create a Kanban task and assign it.** Every single time.
|
||||
- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough."
|
||||
- **Decompose, route, and summarize — that's the whole job.**
|
||||
|
||||
## The standard specialist roster (convention)
|
||||
|
||||
Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure.
|
||||
|
||||
| Profile | Does | Typical workspace |
|
||||
|---|---|---|
|
||||
| `researcher` | Reads sources, gathers facts, writes findings | `scratch` |
|
||||
| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` |
|
||||
| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault |
|
||||
| `reviewer` | Reads output, leaves findings, gates approval | `scratch` |
|
||||
| `backend-eng` | Writes server-side code | `worktree` |
|
||||
| `frontend-eng` | Writes client-side code | `worktree` |
|
||||
| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo |
|
||||
| `pm` | Writes specs, acceptance criteria | `scratch` |
|
||||
|
||||
## Decomposition playbook
|
||||
|
||||
### Step 1 — Understand the goal
|
||||
|
||||
Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet.
|
||||
|
||||
### Step 2 — Sketch the task graph
|
||||
|
||||
Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres":
|
||||
|
||||
```
|
||||
T1 researcher research: Postgres cost vs current
|
||||
T2 researcher research: Postgres performance vs current
|
||||
T3 analyst synthesize migration recommendation parents: T1, T2
|
||||
T4 writer draft decision memo parents: T3
|
||||
```
|
||||
|
||||
Show this to the user. Let them correct it before you create anything.
|
||||
|
||||
### Step 3 — Create tasks and link
|
||||
|
||||
```python
|
||||
t1 = kanban_create(
|
||||
title="research: Postgres cost vs current",
|
||||
assignee="researcher",
|
||||
body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.",
|
||||
tenant=os.environ.get("HERMES_TENANT"),
|
||||
)["task_id"]
|
||||
|
||||
t2 = kanban_create(
|
||||
title="research: Postgres performance vs current",
|
||||
assignee="researcher",
|
||||
body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.",
|
||||
)["task_id"]
|
||||
|
||||
t3 = kanban_create(
|
||||
title="synthesize migration recommendation",
|
||||
assignee="analyst",
|
||||
body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.",
|
||||
parents=[t1, t2],
|
||||
)["task_id"]
|
||||
|
||||
t4 = kanban_create(
|
||||
title="draft decision memo",
|
||||
assignee="writer",
|
||||
body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.",
|
||||
parents=[t3],
|
||||
)["task_id"]
|
||||
```
|
||||
|
||||
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
|
||||
|
||||
### Step 4 — Complete your own task
|
||||
|
||||
If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
|
||||
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation",
|
||||
metadata={
|
||||
"task_graph": {
|
||||
"T1": {"assignee": "researcher", "parents": []},
|
||||
"T2": {"assignee": "researcher", "parents": []},
|
||||
"T3": {"assignee": "analyst", "parents": ["T1", "T2"]},
|
||||
"T4": {"assignee": "writer", "parents": ["T3"]},
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### Step 5 — Report back to the user
|
||||
|
||||
Tell them what you created in plain prose:
|
||||
|
||||
> I've queued 4 tasks:
|
||||
> - **T1** (researcher): cost comparison
|
||||
> - **T2** (researcher): performance comparison, in parallel with T1
|
||||
> - **T3** (analyst): synthesizes T1 + T2 into a recommendation
|
||||
> - **T4** (writer): turns T3 into a CTO memo
|
||||
>
|
||||
> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail <id>` to follow along.
|
||||
|
||||
## Common patterns
|
||||
|
||||
**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents.
|
||||
|
||||
**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns.
|
||||
|
||||
**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory.
|
||||
|
||||
**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile.
|
||||
|
||||
**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`.
|
||||
|
||||
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
|
||||
|
||||
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
|
||||
@@ -0,0 +1,134 @@
|
||||
---
|
||||
name: kanban-worker
|
||||
description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, collaboration, workflow, pitfalls]
|
||||
related_skills: [kanban-orchestrator]
|
||||
---
|
||||
|
||||
# Kanban Worker — Pitfalls and Examples
|
||||
|
||||
> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases.
|
||||
|
||||
## Workspace handling
|
||||
|
||||
Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`:
|
||||
|
||||
| Kind | What it is | How to work |
|
||||
|---|---|---|
|
||||
| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
|
||||
| `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
|
||||
| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
|
||||
|
||||
## Tenant isolation
|
||||
|
||||
If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants:
|
||||
|
||||
- Good: `business-a: Acme is our biggest customer`
|
||||
- Bad (leaks): `Acme is our biggest customer`
|
||||
|
||||
## Good summary + metadata shapes
|
||||
|
||||
The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work:
|
||||
|
||||
**Coding task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass",
|
||||
metadata={
|
||||
"changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
|
||||
"tests_run": 14,
|
||||
"tests_passed": 14,
|
||||
"decisions": ["user_id primary, IP fallback for unauthenticated requests"],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Research task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency",
|
||||
metadata={
|
||||
"sources_read": 12,
|
||||
"recommendation": "vLLM",
|
||||
"benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Review task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)",
|
||||
metadata={
|
||||
"pr_number": 123,
|
||||
"findings": [
|
||||
{"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"},
|
||||
{"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"},
|
||||
],
|
||||
"approved": False,
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
|
||||
|
||||
## Block reasons that get answered fast
|
||||
|
||||
Bad: `"stuck"` — the human has no context.
|
||||
|
||||
Good: one sentence naming the specific decision you need. Leave longer context as a comment instead.
|
||||
|
||||
```python
|
||||
kanban_comment(
|
||||
task_id=os.environ["HERMES_KANBAN_TASK"],
|
||||
body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.",
|
||||
)
|
||||
kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?")
|
||||
```
|
||||
|
||||
The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task.
|
||||
|
||||
## Heartbeats worth sending
|
||||
|
||||
Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`.
|
||||
|
||||
Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes.
|
||||
|
||||
## Retry scenarios
|
||||
|
||||
If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics:
|
||||
|
||||
- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it.
|
||||
- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint.
|
||||
- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly.
|
||||
- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
|
||||
- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
|
||||
|
||||
## Do NOT
|
||||
|
||||
- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
|
||||
- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
|
||||
- Create follow-up tasks assigned to yourself — assign to the right specialist.
|
||||
- Complete a task you didn't actually finish. Block it instead.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running.
|
||||
|
||||
**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in.
|
||||
|
||||
**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban <verb>` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool.
|
||||
|
||||
## CLI fallback (for scripting)
|
||||
|
||||
Every tool has a CLI equivalent for human operators and scripts:
|
||||
- `kanban_show` ↔ `hermes kanban show <id> --json`
|
||||
- `kanban_complete` ↔ `hermes kanban complete <id> --summary "..." --metadata '{...}'`
|
||||
- `kanban_block` ↔ `hermes kanban block <id> "reason"`
|
||||
- `kanban_create` ↔ `hermes kanban create "title" --assignee <profile> [--parent <id>]`
|
||||
- etc.
|
||||
|
||||
Use the tools from inside an agent; the CLI exists for the human at the terminal.
|
||||
@@ -17,6 +17,13 @@ Remove refusal behaviors (guardrails) from open-weight LLMs without retraining o
|
||||
|
||||
**License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.
|
||||
|
||||
## Video Guide
|
||||
|
||||
Walkthrough of OBLITERATUS used by a Hermes agent to abliterate Gemma:
|
||||
https://www.youtube.com/watch?v=8fG9BrNTeHs ("OBLITERATUS: An AI Agent Removed Gemma 4's Safety Guardrails")
|
||||
|
||||
Useful when the user wants a visual overview of the end-to-end workflow before running it themselves.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Trigger when the user:
|
||||
|
||||
@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
},
|
||||
},
|
||||
"auxiliary": {
|
||||
"flush_memories": {
|
||||
"compression": {
|
||||
"provider": "myrelay",
|
||||
"model": "claude-sonnet-4.6",
|
||||
},
|
||||
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
AnthropicAuxiliaryClient,
|
||||
AsyncAnthropicAuxiliaryClient,
|
||||
)
|
||||
async_client, async_model = get_async_text_auxiliary_client("flush_memories")
|
||||
async_client, async_model = get_async_text_auxiliary_client("compression")
|
||||
assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
|
||||
assert async_model == "claude-sonnet-4.6"
|
||||
|
||||
sync_client, sync_model = get_text_auxiliary_client("flush_memories")
|
||||
sync_client, sync_model = get_text_auxiliary_client("compression")
|
||||
assert isinstance(sync_client, AnthropicAuxiliaryClient)
|
||||
assert sync_model == "claude-sonnet-4.6"
|
||||
|
||||
|
||||
@@ -192,6 +192,43 @@ class TestDefaultContextLengths:
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_deepseek_v4_models_1m_context(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
expected_keys = {
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
}
|
||||
for key, value in expected_keys.items():
|
||||
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
|
||||
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
|
||||
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
|
||||
)
|
||||
|
||||
# Longest-first substring matching must resolve both the bare V4
|
||||
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
|
||||
# / Nous Portal) to 1M without probing down to the legacy 128K
|
||||
# ``deepseek`` substring fallback.
|
||||
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
|
||||
cases = [
|
||||
("deepseek-v4-pro", 1_000_000),
|
||||
("deepseek-v4-flash", 1_000_000),
|
||||
("deepseek/deepseek-v4-pro", 1_000_000),
|
||||
("deepseek/deepseek-v4-flash", 1_000_000),
|
||||
("deepseek-chat", 1_000_000),
|
||||
("deepseek-reasoner", 1_000_000),
|
||||
]
|
||||
for model_id, expected_ctx in cases:
|
||||
actual = get_model_context_length(model_id)
|
||||
assert actual == expected_ctx, (
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_all_values_positive(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
assert value > 0, f"{key} has non-positive context length"
|
||||
@@ -303,7 +340,9 @@ class TestCodexOAuthContextLength:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
|
||||
# matching the real direct-API value — Codex OAuth's 272k cap is
|
||||
# provider-specific and must not leak here).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
@@ -314,7 +353,7 @@ class TestCodexOAuthContextLength:
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 400_000, (
|
||||
assert ctx == 1_050_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
@@ -459,9 +498,10 @@ class TestGetModelContextLength:
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_api_missing_context_length_key(self, mock_fetch):
|
||||
"""Model in API but without context_length → defaults to 128000."""
|
||||
"""Model in API but without context_length → defaults to the top
|
||||
probe tier (currently 256K)."""
|
||||
mock_fetch.return_value = {"test/model": {"name": "Test"}}
|
||||
assert get_model_context_length("test/model") == 128000
|
||||
assert get_model_context_length("test/model") == CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_cache_takes_priority_over_api(self, mock_fetch, tmp_path):
|
||||
@@ -814,14 +854,17 @@ class TestContextProbeTiers:
|
||||
for i in range(len(CONTEXT_PROBE_TIERS) - 1):
|
||||
assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]
|
||||
|
||||
def test_first_tier_is_128k(self):
|
||||
assert CONTEXT_PROBE_TIERS[0] == 128_000
|
||||
def test_first_tier_is_256k(self):
|
||||
assert CONTEXT_PROBE_TIERS[0] == 256_000
|
||||
|
||||
def test_last_tier_is_8k(self):
|
||||
assert CONTEXT_PROBE_TIERS[-1] == 8_000
|
||||
|
||||
|
||||
class TestGetNextProbeTier:
|
||||
def test_from_256k(self):
|
||||
assert get_next_probe_tier(256_000) == 128_000
|
||||
|
||||
def test_from_128k(self):
|
||||
assert get_next_probe_tier(128_000) == 64_000
|
||||
|
||||
@@ -841,8 +884,8 @@ class TestGetNextProbeTier:
|
||||
assert get_next_probe_tier(100_000) == 64_000
|
||||
|
||||
def test_above_max_tier(self):
|
||||
"""Value above 128K should return 128K."""
|
||||
assert get_next_probe_tier(500_000) == 128_000
|
||||
"""Value above 256K should return 256K."""
|
||||
assert get_next_probe_tier(500_000) == 256_000
|
||||
|
||||
def test_zero_returns_none(self):
|
||||
assert get_next_probe_tier(0) is None
|
||||
|
||||
@@ -251,3 +251,141 @@ class TestAuxiliaryClientIntegration:
|
||||
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
|
||||
result = aux._try_nous()
|
||||
assert result == (None, None)
|
||||
|
||||
|
||||
class TestIsGenuineNousRateLimit:
|
||||
"""Tell a real account-level 429 apart from an upstream-capacity 429.
|
||||
|
||||
Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
|
||||
A 429 from an upstream out of capacity should NOT trip the
|
||||
cross-session breaker; a real user-quota 429 should.
|
||||
"""
|
||||
|
||||
def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_exhausted_tokens_bucket_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "0",
|
||||
"x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "0",
|
||||
"x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_healthy_headers_on_429_are_upstream_capacity(self):
|
||||
# Classic upstream-capacity symptom: Nous edge reports plenty of
|
||||
# headroom on every bucket, but returns 429 anyway because
|
||||
# upstream (DeepSeek / Kimi / ...) is out of capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "40",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7800000",
|
||||
"x-ratelimit-reset-tokens-1h": "3100",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_bare_429_with_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(headers=None) is False
|
||||
assert is_genuine_nous_rate_limit(headers={}) is False
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers={"content-type": "application/json"}
|
||||
) is False
|
||||
|
||||
def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
|
||||
# remaining == 0 but reset in < 60s: almost certainly a
|
||||
# secondary per-minute throttle that will clear immediately --
|
||||
# not worth tripping the cross-session breaker.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "0",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
|
||||
# Headers on the 429 lack rate-limit info, but the previous
|
||||
# successful response already showed the hourly bucket
|
||||
# exhausted -- the 429 is almost certainly that limit
|
||||
# continuing.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "100",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "700000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7000000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is True
|
||||
|
||||
def test_last_known_state_all_healthy_stays_upstream(self):
|
||||
# Prior state was healthy; bare 429 arrives; should be treated
|
||||
# as upstream capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "180",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7900000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is False
|
||||
|
||||
def test_none_last_state_and_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=None
|
||||
) is False
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
import pytest
|
||||
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
TOOL_PROGRESS_FLAG,
|
||||
busy_input_hint_cli,
|
||||
busy_input_hint_gateway,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
tool_progress_hint_gateway,
|
||||
)
|
||||
|
||||
|
||||
class TestIsSeen:
|
||||
def test_empty_config_unseen(self):
|
||||
assert is_seen({}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_missing_onboarding_unseen(self):
|
||||
assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_onboarding_not_dict_unseen(self):
|
||||
assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_dict_missing_flag(self):
|
||||
assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_flag_true(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is True
|
||||
|
||||
def test_seen_flag_falsy(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_other_flags_isolated(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
|
||||
class TestMarkSeen:
|
||||
def test_creates_missing_file_and_sets_flag(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_config(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"model": {"default": "claude-sonnet-4.6"},
|
||||
"display": {"skin": "default"},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["model"]["default"] == "claude-sonnet-4.6"
|
||||
assert loaded["display"]["skin"] == "default"
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_seen_flags(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_idempotent(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
first = cfg_path.read_text()
|
||||
|
||||
# Second call must be a no-op on-disk content (file may be touched,
|
||||
# but the YAML contents should be identical).
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
second = cfg_path.read_text()
|
||||
|
||||
assert yaml.safe_load(first) == yaml.safe_load(second)
|
||||
|
||||
def test_handles_non_dict_onboarding(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_handles_non_dict_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
|
||||
class TestHintMessages:
|
||||
def test_busy_input_hint_gateway_interrupt(self):
|
||||
msg = busy_input_hint_gateway("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
assert "interrupted" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_gateway_queue(self):
|
||||
msg = busy_input_hint_gateway("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
assert "queued" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_cli_interrupt(self):
|
||||
msg = busy_input_hint_cli("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
|
||||
def test_busy_input_hint_cli_queue(self):
|
||||
msg = busy_input_hint_cli("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
|
||||
def test_tool_progress_hints_mention_verbose(self):
|
||||
assert "/verbose" in tool_progress_hint_gateway()
|
||||
assert "/verbose" in tool_progress_hint_cli()
|
||||
|
||||
def test_hints_are_not_empty(self):
|
||||
for hint in (
|
||||
busy_input_hint_gateway("queue"),
|
||||
busy_input_hint_gateway("interrupt"),
|
||||
busy_input_hint_cli("queue"),
|
||||
busy_input_hint_cli("interrupt"),
|
||||
tool_progress_hint_gateway(),
|
||||
tool_progress_hint_cli(),
|
||||
):
|
||||
assert hint.strip()
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
"""After mark_seen, is_seen on the re-loaded config must return True."""
|
||||
|
||||
def test_mark_then_is_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
def test_mark_both_flags_independently(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Regression tests for the universal "unsupported temperature" retry in
|
||||
``agent.auxiliary_client``.
|
||||
|
||||
Auxiliary callers (``flush_memories``, context compression, session search,
|
||||
Auxiliary callers (context compression, session search,
|
||||
web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
|
||||
reasons. Several provider/model combinations reject ``temperature`` with a
|
||||
400:
|
||||
@@ -100,7 +100,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = call_llm(
|
||||
task="flush_memories",
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "remember this"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
@@ -136,7 +136,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
|
||||
):
|
||||
with pytest.raises(RuntimeError, match="Invalid value"):
|
||||
call_llm(
|
||||
task="flush_memories",
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
@@ -166,7 +166,7 @@ class TestCallLlmUnsupportedTemperatureRetry:
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
call_llm(
|
||||
task="flush_memories",
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=None, # explicit: no temperature sent
|
||||
max_tokens=500,
|
||||
|
||||
@@ -33,15 +33,18 @@ class TestChatCompletionsBasic:
|
||||
def test_convert_messages_strips_codex_fields(self, transport):
|
||||
msgs = [
|
||||
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
|
||||
"codex_message_items": [{"id": "msg_1", "type": "message"}],
|
||||
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
|
||||
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
|
||||
]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "codex_reasoning_items" not in result[0]
|
||||
assert "codex_message_items" not in result[0]
|
||||
assert "call_id" not in result[0]["tool_calls"][0]
|
||||
assert "response_item_id" not in result[0]["tool_calls"][0]
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
assert "codex_message_items" in msgs[0]
|
||||
|
||||
|
||||
class TestChatCompletionsBuildKwargs:
|
||||
|
||||
@@ -194,6 +194,36 @@ class TestCodexNormalizeResponse:
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_message_items_preserved_in_provider_data(self, transport):
|
||||
"""Codex assistant message item ids/phases must survive transport normalization."""
|
||||
r = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
id="msg_abc",
|
||||
phase="final_answer",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello world")],
|
||||
status="completed",
|
||||
),
|
||||
],
|
||||
status="completed",
|
||||
incomplete_details=None,
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5,
|
||||
input_tokens_details=None, output_tokens_details=None),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.codex_message_items == [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": [{"type": "output_text", "text": "Hello world"}],
|
||||
"id": "msg_abc",
|
||||
"phase": "final_answer",
|
||||
}
|
||||
]
|
||||
|
||||
def test_tool_call_response(self, transport):
|
||||
"""Normalize a Codex response with tool calls."""
|
||||
r = SimpleNamespace(
|
||||
|
||||
@@ -60,6 +60,13 @@ class TestTransportRegistry:
|
||||
assert t is not None
|
||||
assert t.api_mode == "anthropic_messages"
|
||||
|
||||
def test_discovers_missing_transport_when_registry_partially_populated(self):
|
||||
"""Importing one transport directly must not hide other valid api_modes."""
|
||||
import agent.transports.chat_completions # noqa: F401
|
||||
t = get_transport("codex_responses")
|
||||
assert t is not None
|
||||
assert t.api_mode == "codex_responses"
|
||||
|
||||
def test_register_and_get(self):
|
||||
class DummyTransport(ProviderTransport):
|
||||
@property
|
||||
|
||||
@@ -270,3 +270,15 @@ class TestNormalizedResponseBackwardCompat:
|
||||
def test_codex_reasoning_items_none_when_absent(self):
|
||||
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
|
||||
assert nr.codex_reasoning_items is None
|
||||
|
||||
def test_codex_message_items_from_provider_data(self):
|
||||
items = [{"id": "msg_1", "type": "message"}]
|
||||
nr = NormalizedResponse(
|
||||
content="hi", tool_calls=None, finish_reason="stop",
|
||||
provider_data={"codex_message_items": items},
|
||||
)
|
||||
assert nr.codex_message_items == items
|
||||
|
||||
def test_codex_message_items_none_when_absent(self):
|
||||
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
|
||||
assert nr.codex_message_items is None
|
||||
|
||||
@@ -33,7 +33,6 @@ class _FakeAgent:
|
||||
self._todo_store.write(
|
||||
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
|
||||
)
|
||||
self.flush_memories = MagicMock()
|
||||
self.commit_memory_session = MagicMock()
|
||||
self._invalidate_system_prompt = MagicMock()
|
||||
|
||||
@@ -157,7 +156,6 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
|
||||
assert cli.agent._todo_store.read() == []
|
||||
assert cli.session_start > old_session_start
|
||||
assert cli.agent.session_start == cli.session_start
|
||||
cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
|
||||
cli.agent._invalidate_system_prompt.assert_called_once()
|
||||
|
||||
|
||||
|
||||
@@ -346,6 +346,7 @@ def make_discord_message(
|
||||
|
||||
return SimpleNamespace(
|
||||
id=message_id, content=content, author=author, channel=channel,
|
||||
guild=getattr(channel, "guild", None),
|
||||
mentions=mentions, attachments=attachments,
|
||||
type=getattr(discord, "MessageType", SimpleNamespace()).default,
|
||||
reference=None, created_at=datetime.now(timezone.utc),
|
||||
|
||||
@@ -0,0 +1,365 @@
|
||||
"""Tests for /v1/runs endpoints: start, events, and stop.
|
||||
|
||||
Covers:
|
||||
- POST /v1/runs — start a run (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE event stream
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- Auth, error handling, and cleanup
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
import time as _time
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
from aiohttp.test_utils import TestClient, TestServer
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.api_server import (
|
||||
APIServerAdapter,
|
||||
cors_middleware,
|
||||
security_headers_middleware,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_adapter(api_key: str = "") -> APIServerAdapter:
|
||||
"""Create an adapter with optional API key."""
|
||||
extra = {}
|
||||
if api_key:
|
||||
extra["key"] = api_key
|
||||
config = PlatformConfig(enabled=True, extra=extra)
|
||||
adapter = APIServerAdapter(config)
|
||||
return adapter
|
||||
|
||||
|
||||
def _create_runs_app(adapter: APIServerAdapter) -> web.Application:
|
||||
"""Create an aiohttp app with /v1/runs routes registered."""
|
||||
mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
|
||||
app = web.Application(middlewares=mws)
|
||||
app["api_server_adapter"] = adapter
|
||||
app.router.add_post("/v1/runs", adapter._handle_runs)
|
||||
app.router.add_get("/v1/runs/{run_id}/events", adapter._handle_run_events)
|
||||
app.router.add_post("/v1/runs/{run_id}/stop", adapter._handle_stop_run)
|
||||
return app
|
||||
|
||||
|
||||
def _make_slow_agent(**kwargs):
|
||||
"""Create a mock agent that blocks in run_conversation until interrupted.
|
||||
|
||||
Returns (mock_agent, agent_ready_event, interrupt_event) where
|
||||
agent_ready_event is set once run_conversation starts, and
|
||||
interrupt_event is set when interrupt() is called.
|
||||
"""
|
||||
ready = threading.Event()
|
||||
interrupted = threading.Event()
|
||||
|
||||
mock_agent = MagicMock()
|
||||
|
||||
def _do_interrupt(message=None):
|
||||
interrupted.set()
|
||||
|
||||
mock_agent.interrupt = MagicMock(side_effect=_do_interrupt)
|
||||
|
||||
def _slow_run(user_message=None, conversation_history=None, task_id=None):
|
||||
ready.set()
|
||||
# Block until interrupt() is called
|
||||
interrupted.wait(timeout=10)
|
||||
return {"final_response": "interrupted"}
|
||||
|
||||
mock_agent.run_conversation.side_effect = _slow_run
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
|
||||
return mock_agent, ready, interrupted
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def adapter():
|
||||
return _make_adapter()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_adapter():
|
||||
return _make_adapter(api_key="sk-secret")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /v1/runs — start a run
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStartRun:
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_returns_202(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "done"}
|
||||
mock_agent.session_prompt_tokens = 10
|
||||
mock_agent.session_completion_tokens = 5
|
||||
mock_agent.session_total_tokens = 15
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
assert data["status"] == "started"
|
||||
assert data["run_id"].startswith("run_")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_invalid_json_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post(
|
||||
"/v1/runs",
|
||||
data="not json",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
assert resp.status == 400
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_missing_input_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"model": "test"})
|
||||
assert resp.status == 400
|
||||
data = await resp.json()
|
||||
assert "input" in data["error"]["message"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_empty_input_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"input": ""})
|
||||
assert resp.status == 400
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_with_valid_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(auth_adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post(
|
||||
"/v1/runs",
|
||||
json={"input": "hello"},
|
||||
headers={"Authorization": "Bearer sk-secret"},
|
||||
)
|
||||
assert resp.status == 202
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /v1/runs/{run_id}/events — SSE event stream
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRunEvents:
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_stream_returns_completed(self, adapter):
|
||||
"""Events stream should receive run.completed when agent finishes."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "Hello!"}
|
||||
mock_agent.session_prompt_tokens = 10
|
||||
mock_agent.session_completion_tokens = 5
|
||||
mock_agent.session_total_tokens = 15
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
# Subscribe to events
|
||||
events_resp = await cli.get(f"/v1/runs/{run_id}/events")
|
||||
assert events_resp.status == 200
|
||||
body = await events_resp.text()
|
||||
|
||||
# Should contain run.completed
|
||||
assert "run.completed" in body
|
||||
assert "Hello!" in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_not_found_returns_404(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/runs/run_nonexistent/events")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/runs/run_any/events")
|
||||
assert resp.status == 401
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStopRun:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_running_agent(self, adapter):
|
||||
"""Stop should interrupt the agent and cancel the task."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
# Wait for agent to start running in the thread
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Verify agent ref is stored
|
||||
assert run_id in adapter._active_run_agents
|
||||
|
||||
# Stop the run
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
stop_data = await stop_resp.json()
|
||||
assert stop_data["run_id"] == run_id
|
||||
assert stop_data["status"] == "stopping"
|
||||
|
||||
# Agent interrupt should have been called
|
||||
mock_agent.interrupt.assert_called_once_with("Stop requested via API")
|
||||
|
||||
# Refs should be cleaned up
|
||||
await asyncio.sleep(0.5)
|
||||
assert run_id not in adapter._active_run_agents
|
||||
assert run_id not in adapter._active_run_tasks
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_nonexistent_run_returns_404(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs/run_nonexistent/stop")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs/run_any/stop")
|
||||
assert resp.status == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_already_completed_run_returns_404(self, adapter):
|
||||
"""Stopping a run that already finished should return 404 (refs cleaned up)."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "done"}
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start and wait for completion
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# Run should be done, refs cleaned up
|
||||
assert run_id not in adapter._active_run_agents
|
||||
|
||||
# Stop should return 404
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_interrupt_exception_does_not_crash(self, adapter):
|
||||
"""If agent.interrupt() raises, stop should still succeed."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
# Override the interrupt side_effect to raise
|
||||
mock_agent.interrupt = MagicMock(side_effect=RuntimeError("interrupt failed"))
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
stop_data = await stop_resp.json()
|
||||
assert stop_data["status"] == "stopping"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_sends_sentinel_to_events_stream(self, adapter):
|
||||
"""After stop, the events stream should close."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Subscribe to events in background
|
||||
events_task = asyncio.ensure_future(
|
||||
cli.get(f"/v1/runs/{run_id}/events")
|
||||
)
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Stop the run
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
|
||||
# Events stream should close
|
||||
events_resp = await asyncio.wait_for(events_task, timeout=5.0)
|
||||
assert events_resp.status == 200
|
||||
body = await events_resp.text()
|
||||
# Stream should have received run.failed and closed
|
||||
assert "run.failed" in body or "stream closed" in body
|
||||
@@ -1,249 +0,0 @@
|
||||
"""Tests for proactive memory flush on session expiry.
|
||||
|
||||
Verifies that:
|
||||
1. _is_session_expired() works from a SessionEntry alone (no source needed)
|
||||
2. The sync callback is no longer called in get_or_create_session
|
||||
3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
|
||||
4. The background watcher can detect expired sessions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from gateway.config import Platform, GatewayConfig, SessionResetPolicy
|
||||
from gateway.session import SessionSource, SessionStore, SessionEntry
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def idle_store(tmp_path):
|
||||
"""SessionStore with a 60-minute idle reset policy."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def no_reset_store(tmp_path):
|
||||
"""SessionStore with no reset policy (mode=none)."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="none"),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
class TestIsSessionExpired:
|
||||
"""_is_session_expired should detect expiry from entry alone."""
|
||||
|
||||
def test_idle_session_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_1",
|
||||
created_at=datetime.now() - timedelta(hours=3),
|
||||
updated_at=datetime.now() - timedelta(minutes=120),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is True
|
||||
|
||||
def test_active_session_not_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_2",
|
||||
created_at=datetime.now() - timedelta(hours=1),
|
||||
updated_at=datetime.now() - timedelta(minutes=10),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_none_mode_never_expires(self, no_reset_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_3",
|
||||
created_at=datetime.now() - timedelta(days=30),
|
||||
updated_at=datetime.now() - timedelta(days=30),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert no_reset_store._is_session_expired(entry) is False
|
||||
|
||||
def test_active_processes_prevent_expiry(self, idle_store):
|
||||
"""Sessions with active background processes should never expire."""
|
||||
idle_store._has_active_processes_fn = lambda key: True
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_4",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_daily_mode_expired(self, tmp_path):
|
||||
"""Daily mode should expire sessions from before today's reset hour."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
store._db = None
|
||||
store._loaded = True
|
||||
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_5",
|
||||
created_at=datetime.now() - timedelta(days=2),
|
||||
updated_at=datetime.now() - timedelta(days=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert store._is_session_expired(entry) is True
|
||||
|
||||
|
||||
class TestGetOrCreateSessionNoCallback:
|
||||
"""get_or_create_session should NOT call a sync flush callback."""
|
||||
|
||||
def test_auto_reset_creates_new_session_after_flush(self, idle_store):
|
||||
"""When a flushed session auto-resets, a new session_id is created."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
# Create initial session
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
old_sid = entry1.session_id
|
||||
|
||||
# Simulate the watcher having flushed it
|
||||
entry1.memory_flushed = True
|
||||
|
||||
# Simulate the session going idle
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Next call should auto-reset
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.session_id != old_sid
|
||||
assert entry2.was_auto_reset is True
|
||||
# New session starts with memory_flushed=False
|
||||
assert entry2.memory_flushed is False
|
||||
|
||||
def test_no_sync_callback_invoked(self, idle_store):
|
||||
"""No synchronous callback should block during auto-reset."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Verify no _on_auto_reset attribute
|
||||
assert not hasattr(idle_store, '_on_auto_reset')
|
||||
|
||||
# This should NOT block (no sync LLM call)
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.was_auto_reset is True
|
||||
|
||||
|
||||
class TestMemoryFlushedFlag:
|
||||
"""The memory_flushed flag on SessionEntry prevents double-flushing."""
|
||||
|
||||
def test_defaults_to_false(self):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:123",
|
||||
session_id="sid_new",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert entry.memory_flushed is False
|
||||
|
||||
def test_persists_through_save_load(self, idle_store):
|
||||
"""memory_flushed=True must survive a save/load cycle (simulates restart)."""
|
||||
key = "agent:main:discord:thread:789"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.DISCORD,
|
||||
chat_type="thread",
|
||||
memory_flushed=True,
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
# Simulate restart: clear in-memory state, reload from disk
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is True
|
||||
|
||||
def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
|
||||
"""An entry without memory_flushed stays False after reload."""
|
||||
key = "agent:main:telegram:dm:456"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_not_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=2),
|
||||
updated_at=datetime.now() - timedelta(hours=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is False
|
||||
|
||||
def test_roundtrip_to_dict_from_dict(self):
|
||||
"""to_dict/from_dict must preserve memory_flushed."""
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:999",
|
||||
session_id="sid_rt",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
memory_flushed=True,
|
||||
)
|
||||
d = entry.to_dict()
|
||||
assert d["memory_flushed"] is True
|
||||
|
||||
restored = SessionEntry.from_dict(d)
|
||||
assert restored.memory_flushed is True
|
||||
|
||||
def test_legacy_entry_without_field_defaults_false(self):
|
||||
"""Old sessions.json entries missing memory_flushed should default to False."""
|
||||
data = {
|
||||
"session_key": "agent:main:telegram:dm:legacy",
|
||||
"session_id": "sid_legacy",
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platform": "telegram",
|
||||
"chat_type": "dm",
|
||||
# no memory_flushed key
|
||||
}
|
||||
entry = SessionEntry.from_dict(data)
|
||||
assert entry.memory_flushed is False
|
||||
@@ -349,3 +349,121 @@ class TestBusySessionAck:
|
||||
|
||||
result = await runner._handle_active_session_busy_message(event, sk)
|
||||
assert result is False # not handled, let default path try
|
||||
|
||||
|
||||
class TestBusySessionOnboardingHint:
|
||||
"""First-touch hint appended to the busy-ack the first time it fires."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
|
||||
"""First busy-while-running message gets an extra hint about /busy."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# mark_seen imports utils.atomic_yaml_write; make sure it resolves
|
||||
# against a writable dir by pointing _hermes_home at tmp_path.
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
# Normal ack body
|
||||
assert "Interrupting" in content
|
||||
# First-touch hint appended
|
||||
assert "First-time tip" in content
|
||||
assert "/busy queue" in content
|
||||
|
||||
# The flag is now persisted to tmp_path/config.yaml
|
||||
import yaml
|
||||
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
|
||||
assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
|
||||
"""Once the flag is marked, the hint never appears again."""
|
||||
import gateway.run as _gr
|
||||
import yaml
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# Pre-populate the config so is_seen() returns True from the start.
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {"busy_input_prompt": True}},
|
||||
}))
|
||||
monkeypatch.setattr(
|
||||
_gr, "_load_gateway_config",
|
||||
lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
|
||||
)
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping again")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
assert "Interrupting" in content
|
||||
assert "First-time tip" not in content
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
|
||||
"""In queue mode the hint should suggest /busy interrupt, not /busy queue."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "queue"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="queue me")
|
||||
sk = build_session_key(event.source)
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
agent = MagicMock()
|
||||
runner._running_agents[sk] = agent
|
||||
|
||||
with patch("gateway.run.merge_pending_message_event"):
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
content = adapter._send_with_retry.call_args.kwargs.get("content", "")
|
||||
assert "Queued for the next turn" in content
|
||||
assert "First-time tip" in content
|
||||
assert "/busy interrupt" in content
|
||||
# Must NOT tell the user to /busy queue when they're already on queue.
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
"""Tests for memory flush stale-overwrite prevention (#2670).
|
||||
|
||||
Verifies that:
|
||||
1. Cron sessions are skipped (no flush for headless cron runs)
|
||||
2. Current memory state is injected into the flush prompt so the
|
||||
flush agent can see what's already saved and avoid overwrites
|
||||
3. The flush still works normally when memory files don't exist
|
||||
"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _mock_dotenv(monkeypatch):
|
||||
"""gateway.run imports dotenv at module level; stub it so tests run without the package."""
|
||||
fake = types.ModuleType("dotenv")
|
||||
fake.load_dotenv = lambda *a, **kw: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner._honcho_managers = {}
|
||||
runner._honcho_configs = {}
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner.adapters = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.session_store = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
_TRANSCRIPT_4_MSGS = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
{"role": "user", "content": "remember my name is Alice"},
|
||||
{"role": "assistant", "content": "Got it, Alice!"},
|
||||
]
|
||||
|
||||
|
||||
class TestCronSessionBypass:
|
||||
"""Cron sessions should never trigger a memory flush."""
|
||||
|
||||
def test_cron_session_skipped(self):
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_job123_20260323_120000")
|
||||
# session_store.load_transcript should never be called
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_cron_session_with_prefix_skipped(self):
|
||||
"""Cron sessions with different prefixes are still skipped."""
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_daily_20260323")
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_non_cron_session_proceeds(self):
|
||||
"""Non-cron sessions should still attempt the flush."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner._flush_memories_for_session("session_abc123")
|
||||
runner.session_store.load_transcript.assert_called_once_with("session_abc123")
|
||||
|
||||
|
||||
def _make_flush_context(monkeypatch, memory_dir=None):
|
||||
"""Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
|
||||
tmp_agent = MagicMock()
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
return runner, tmp_agent, memory_dir
|
||||
|
||||
|
||||
class TestMemoryInjection:
|
||||
"""The flush prompt should include current memory state from disk."""
|
||||
|
||||
def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
|
||||
"""When memory files exist, their content appears in the flush prompt."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
|
||||
(memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_123")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
|
||||
assert "Agent knows Python" in flush_prompt
|
||||
assert "User prefers dark mode" in flush_prompt
|
||||
assert "Name: Alice" in flush_prompt
|
||||
assert "Timezone: PST" in flush_prompt
|
||||
assert "Do NOT overwrite or remove entries" in flush_prompt
|
||||
assert "current live state of memory" in flush_prompt
|
||||
|
||||
def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
|
||||
"""When no memory files exist, flush still runs without the guard."""
|
||||
empty_dir = tmp_path / "no_memories"
|
||||
empty_dir.mkdir()
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_456")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "Do NOT overwrite or remove entries" not in flush_prompt
|
||||
assert "Review the conversation above" in flush_prompt
|
||||
|
||||
def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
|
||||
"""Empty memory files should not trigger the guard section."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("")
|
||||
(memory_dir / "USER.md").write_text(" \n ") # whitespace only
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_789")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "current live state of memory" not in flush_prompt
|
||||
|
||||
|
||||
class TestFlushAgentSilenced:
|
||||
"""The flush agent must not produce any terminal output."""
|
||||
|
||||
def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
|
||||
"""_print_fn on the flush agent must be a no-op so tool output never leaks."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
|
||||
captured_agent = {}
|
||||
|
||||
def _fake_ai_agent(*args, **kwargs):
|
||||
agent = MagicMock()
|
||||
captured_agent["instance"] = agent
|
||||
return agent
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _fake_ai_agent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_silent")
|
||||
|
||||
agent = captured_agent["instance"]
|
||||
assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
|
||||
# Confirm it is callable and produces no output (no exception)
|
||||
agent._print_fn("should be silenced")
|
||||
|
||||
def test_kawaii_spinner_respects_print_fn(self):
|
||||
"""KawaiiSpinner must route all output through print_fn when supplied."""
|
||||
from agent.display import KawaiiSpinner
|
||||
|
||||
written = []
|
||||
spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
|
||||
spinner._write("hello")
|
||||
assert written == [("hello",)], "spinner should route through print_fn"
|
||||
|
||||
# A no-op print_fn must produce no output to stdout
|
||||
import io, sys
|
||||
buf = io.StringIO()
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = buf
|
||||
try:
|
||||
silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
|
||||
silent_spinner._write("should not appear")
|
||||
silent_spinner.stop("done")
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
|
||||
|
||||
def test_flush_agent_closes_resources_after_run(self, monkeypatch):
|
||||
"""Memory flush should close temporary agent resources after the turn."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
tmp_agent.shutdown_memory_provider = MagicMock()
|
||||
tmp_agent.close = MagicMock()
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_cleanup")
|
||||
|
||||
tmp_agent.shutdown_memory_provider.assert_called_once()
|
||||
tmp_agent.close.assert_called_once()
|
||||
|
||||
|
||||
class TestFlushPromptStructure:
|
||||
"""Verify the flush prompt retains its core instructions."""
|
||||
|
||||
def test_core_instructions_present(self, monkeypatch):
|
||||
"""The flush prompt should still contain the original guidance."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_struct")
|
||||
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "automatically reset" in flush_prompt
|
||||
assert "Save any important facts" in flush_prompt
|
||||
assert "consider saving it as a skill" in flush_prompt
|
||||
assert "Do NOT respond to the user" in flush_prompt
|
||||
@@ -33,6 +33,7 @@ def _make_runner():
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._show_reasoning = False
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
@@ -76,6 +77,10 @@ class TestReasoningCommand:
|
||||
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
|
||||
assert '"reasoning"' in source
|
||||
|
||||
def test_parse_reasoning_command_args_accepts_ascii_and_smart_global_flags(self):
|
||||
assert gateway_run.GatewayRunner._parse_reasoning_command_args("high --global") == ("high", True)
|
||||
assert gateway_run.GatewayRunner._parse_reasoning_command_args("—global xhigh") == ("xhigh", True)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
@@ -111,13 +116,90 @@ class TestReasoningCommand:
|
||||
runner = _make_runner()
|
||||
runner._reasoning_config = {"enabled": True, "effort": "medium"}
|
||||
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning low --global"))
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "low"
|
||||
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
|
||||
assert "takes effect on next message" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_reasoning_command_defaults_to_session_only(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning high")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "medium"
|
||||
assert runner._session_reasoning_overrides[session_key] == {"enabled": True, "effort": "high"}
|
||||
assert runner._reasoning_config == {"enabled": True, "effort": "high"}
|
||||
assert "session only" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_global_clears_existing_session_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning low --global")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "low"
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert "saved to config" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_reset_clears_session_override_without_config_write(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning reset")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "medium"
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert "cleared" in result
|
||||
|
||||
def test_resolve_session_reasoning_prefers_session_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
source = _make_event("/reasoning").source
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
assert runner._resolve_session_reasoning_config(source=source) == {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
@@ -167,6 +249,56 @@ class TestReasoningCommand:
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
|
||||
|
||||
def test_run_agent_prefers_session_reasoning_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "***",
|
||||
},
|
||||
)
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _CapturingAgent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
_CapturingAgent.last_init = None
|
||||
runner = _make_runner()
|
||||
session_key = "agent:main:local:dm"
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="ping",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="session-1",
|
||||
session_key=session_key,
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "ok"
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
|
||||
across gateway messenger platforms.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -53,9 +53,6 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
|
||||
mock_store.switch_session.return_value = mock_session_entry
|
||||
runner.session_store = mock_store
|
||||
|
||||
# Stub out memory flushing
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
|
||||
return runner
|
||||
|
||||
|
||||
@@ -233,28 +230,3 @@ class TestHandleResumeCommand:
|
||||
|
||||
assert real_key not in runner._running_agents
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_flushes_memories(self, tmp_path):
|
||||
"""Resume should flush memories from the current session before switching."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("old_session", "telegram")
|
||||
db.set_session_title("old_session", "Old Work")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Old Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
|
||||
await runner._handle_resume_command(event)
|
||||
|
||||
runner._async_flush_memories.assert_called_once_with(
|
||||
"current_session_001",
|
||||
"agent:main:telegram:dm:67890",
|
||||
)
|
||||
db.close()
|
||||
|
||||
@@ -0,0 +1,215 @@
|
||||
"""Tests for interrupt-aware tool-progress suppression in gateway.
|
||||
|
||||
When a user sends `stop` while the agent is executing a batch of parallel
|
||||
tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
|
||||
and the drain loop should drop any already-queued events. Without this
|
||||
guard, the stop acknowledgement appears first but is followed by a trail
|
||||
of tool-progress bubbles for calls that were already parsed from the LLM
|
||||
response — making the interrupt feel ignored.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
class ProgressCaptureAdapter(BasePlatformAdapter):
|
||||
def __init__(self, platform=Platform.TELEGRAM):
|
||||
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
|
||||
self.sent = []
|
||||
self.edits = []
|
||||
self.typing = []
|
||||
|
||||
async def connect(self) -> bool:
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
return None
|
||||
|
||||
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
|
||||
self.sent.append({"chat_id": chat_id, "content": content})
|
||||
return SendResult(success=True, message_id="progress-1")
|
||||
|
||||
async def edit_message(self, chat_id, message_id, content) -> SendResult:
|
||||
self.edits.append({"message_id": message_id, "content": content})
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
|
||||
async def send_typing(self, chat_id, metadata=None) -> None:
|
||||
self.typing.append(chat_id)
|
||||
|
||||
async def stop_typing(self, chat_id) -> None:
|
||||
return None
|
||||
|
||||
async def get_chat_info(self, chat_id: str):
|
||||
return {"id": chat_id}
|
||||
|
||||
|
||||
class PreInterruptAgent:
|
||||
"""Fires tool-progress events BEFORE the interrupt lands.
|
||||
|
||||
These should render normally. Baseline for comparison with the
|
||||
interrupted case — proves the harness renders events when no
|
||||
interrupt is active.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
self._interrupt_requested = False
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
self.tool_progress_callback("tool.started", "web_search", "first search", {})
|
||||
time.sleep(0.35) # let the drain loop process
|
||||
return {"final_response": "done", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
class InterruptedAgent:
|
||||
"""Fires tool.started events AFTER interrupt — all should be suppressed.
|
||||
|
||||
Mirrors the failure mode in the bug report: LLM returned N parallel
|
||||
web_search calls, interrupt flag flipped, remaining events still
|
||||
rendered as bubbles. With the fix, none of these should appear.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
# Start already interrupted — simulates stop having already landed
|
||||
# by the time the agent batch starts firing tool.started events.
|
||||
self._interrupt_requested = True
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
# Parallel tool batch — in production these come from one LLM
|
||||
# response with 5 tool_calls. All are post-interrupt.
|
||||
self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
|
||||
time.sleep(0.35) # let the drain loop attempt to process the queue
|
||||
return {"final_response": "interrupted", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
def _make_runner(adapter):
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
GatewayRunner = gateway_run.GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.adapters = {adapter.platform: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner._prefill_messages = []
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._session_db = None
|
||||
runner._running_agents = {}
|
||||
runner._session_run_generation = {}
|
||||
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
||||
runner.config = SimpleNamespace(
|
||||
thread_sessions_per_user=False,
|
||||
group_sessions_per_user=False,
|
||||
stt_enabled=False,
|
||||
)
|
||||
return runner
|
||||
|
||||
|
||||
async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
|
||||
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = agent_cls
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
adapter = ProgressCaptureAdapter()
|
||||
runner = _make_runner(adapter)
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {"api_key": "fake"},
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1001",
|
||||
chat_type="group",
|
||||
thread_id="17585",
|
||||
)
|
||||
result = await runner._run_agent(
|
||||
message="hi",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id=session_id,
|
||||
session_key="agent:main:telegram:group:-1001:17585",
|
||||
)
|
||||
return adapter, result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
|
||||
"""Sanity check: when is_interrupted is False, tool-progress renders normally."""
|
||||
adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
|
||||
assert result["final_response"] == "done"
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
assert "first search" in rendered, (
|
||||
"baseline agent should render its tool-progress event — "
|
||||
"if this fails the test harness is broken, not the fix"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
|
||||
"""Post-interrupt tool.started events must not render as bubbles.
|
||||
|
||||
This is Bug B from the screenshot: user sends `stop`, agent acks with
|
||||
⚡ Interrupting, but 5 more 🔍 web_search bubbles still render because
|
||||
their tool.started events were already parsed from the LLM response.
|
||||
With the fix, progress_callback and the drain loop both check
|
||||
is_interrupted and skip these events.
|
||||
"""
|
||||
adapter, result = await _run_once(
|
||||
monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
|
||||
)
|
||||
assert result["final_response"] == "interrupted"
|
||||
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
|
||||
# None of the post-interrupt queries should appear.
|
||||
for leaked_query in (
|
||||
"cognee hermes",
|
||||
"McBee deer hunting",
|
||||
"kuzu graph db",
|
||||
"moonshot kimi api",
|
||||
"platform.moonshot.cn",
|
||||
):
|
||||
assert leaked_query not in rendered, (
|
||||
f"event '{leaked_query}' leaked into the UI after interrupt — "
|
||||
f"progress_callback / drain loop is not checking is_interrupted"
|
||||
)
|
||||
@@ -165,3 +165,26 @@ async def test_reasoning_rejected_mid_run():
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" in result
|
||||
assert "/reasoning" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_btw_dispatches_mid_run():
|
||||
"""/btw mid-run must dispatch to /background's handler, not hit the catch-all.
|
||||
|
||||
/btw is an alias of /background (see hermes_cli/commands.py). Typing
|
||||
/btw mid-turn must spawn a parallel background task — that's the whole
|
||||
point of the command. Before the mid-turn bypass was added for
|
||||
/background, /btw fell through to the "Agent is running — wait or
|
||||
/stop first" catch-all, making it useless in exactly the scenario it
|
||||
was designed for. The alias and the bypass together make it work.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
runner._handle_background_command = AsyncMock(
|
||||
return_value='🚀 Background task started: "what module owns titles?"'
|
||||
)
|
||||
|
||||
result = await runner._handle_message(_make_event("/btw what module owns titles?"))
|
||||
|
||||
runner._handle_background_command.assert_awaited_once()
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" not in result
|
||||
|
||||
@@ -177,8 +177,8 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
its reset policy (idle timeout, scheduled reset), it must fire
|
||||
``on_session_finalize`` so plugin providers get the same final-pass
|
||||
extraction opportunity they'd get from /new or CLI shutdown. Before
|
||||
the fix, the expiry path flushed memories and evicted the agent but
|
||||
silently skipped the hook.
|
||||
the fix, the expiry path evicted the agent but silently skipped the
|
||||
hook.
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
@@ -200,7 +200,7 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
expired_entry.memory_flushed = False
|
||||
expired_entry.expiry_finalized = False
|
||||
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store._ensure_loaded = MagicMock()
|
||||
@@ -211,24 +211,24 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
runner.session_store._lock.__exit__ = MagicMock(return_value=None)
|
||||
runner.session_store._save = MagicMock()
|
||||
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
runner._evict_cached_agent = MagicMock()
|
||||
runner._cleanup_agent_resources = MagicMock()
|
||||
runner._sweep_idle_cached_agents = MagicMock(return_value=0)
|
||||
|
||||
# The watcher starts with `await asyncio.sleep(60)` and loops while
|
||||
# `self._running`. Patch sleep so the 60s initial delay is instant, then
|
||||
# flip `_running` false inside the flush call so the loop exits cleanly
|
||||
# after one pass.
|
||||
# `self._running`. Patch sleep so the 60s initial delay is instant, and
|
||||
# make the expiry hook invocation flip `_running` false so the loop
|
||||
# exits cleanly after one pass.
|
||||
_orig_sleep = __import__("asyncio").sleep
|
||||
|
||||
async def _fast_sleep(_):
|
||||
await _orig_sleep(0)
|
||||
|
||||
async def _flush_and_stop(session_id, key):
|
||||
runner._running = False # terminate the loop after this iteration
|
||||
def _hook_and_stop(*a, **kw):
|
||||
runner._running = False
|
||||
return None
|
||||
|
||||
runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop)
|
||||
mock_invoke_hook.side_effect = _hook_and_stop
|
||||
|
||||
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
|
||||
await runner._session_expiry_watcher(interval=0)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Regression tests for approval-state cleanup on session boundaries."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -72,7 +72,6 @@ def _make_resume_runner():
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._background_tasks = set()
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._busy_ack_ts = {}
|
||||
|
||||
@@ -58,7 +58,7 @@ class TestFormatSessionInfo:
|
||||
{"provider": "", "base_url": "", "api_key": ""})
|
||||
with p1, p2, p3:
|
||||
info = runner._format_session_info()
|
||||
assert "128K" in info
|
||||
assert "256K" in info
|
||||
assert "model.context_length" in info
|
||||
|
||||
def test_local_endpoint_shown(self, runner, tmp_path):
|
||||
|
||||
@@ -54,6 +54,7 @@ def _make_runner():
|
||||
runner._background_tasks = set()
|
||||
runner._session_db = None
|
||||
runner._session_model_overrides = {}
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._agent_cache = {}
|
||||
@@ -102,6 +103,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
|
||||
)
|
||||
session_key = "agent:main:local:dm"
|
||||
runner._session_model_overrides[session_key] = _codex_override()
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
@@ -121,6 +123,7 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
|
||||
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
|
||||
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert _CapturingAgent.last_init["api_key"] == "***"
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -149,6 +152,7 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
|
||||
)
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._session_model_overrides[session_key] = _codex_override()
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
await runner._run_background_task("say hello", source, "bg_test")
|
||||
|
||||
@@ -158,3 +162,4 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
|
||||
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
|
||||
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert _CapturingAgent.last_init["api_key"] == "***"
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests that /new (and its /reset alias) clears the session-scoped model override."""
|
||||
"""Tests that /new (and its /reset alias) clears session-scoped overrides."""
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
@@ -37,6 +37,7 @@ def _make_runner():
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner._session_model_overrides = {}
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._background_tasks = set()
|
||||
|
||||
@@ -75,14 +76,16 @@ async def test_new_command_clears_session_model_override():
|
||||
runner._session_model_overrides[session_key] = {
|
||||
"model": "gpt-4o",
|
||||
"provider": "openai",
|
||||
"api_key": "sk-test",
|
||||
"api_key": "***",
|
||||
"base_url": "",
|
||||
"api_mode": "openai",
|
||||
}
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -92,10 +95,12 @@ async def test_new_command_no_override_is_noop():
|
||||
session_key = build_session_key(_make_source())
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -115,12 +120,16 @@ async def test_new_command_only_clears_own_session():
|
||||
runner._session_model_overrides[other_key] = {
|
||||
"model": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"api_key": "sk-ant-test",
|
||||
"api_key": "***",
|
||||
"base_url": "",
|
||||
"api_mode": "anthropic",
|
||||
}
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"}
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert other_key in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert other_key in runner._session_reasoning_overrides
|
||||
|
||||
@@ -177,6 +177,53 @@ class TestHandleVoiceCommand:
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"123"}
|
||||
|
||||
def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
|
||||
"""Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
|
||||
|
||||
The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
|
||||
persisted voice_mode is ``voice_only`` or ``all`` — without this,
|
||||
``/voice on`` was relying on a "not in disabled set" default that
|
||||
silently enabled auto-TTS for every chat.
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
runner._voice_mode = {
|
||||
"telegram:off_chat": "off",
|
||||
"telegram:on_chat": "voice_only",
|
||||
"telegram:tts_chat": "all",
|
||||
"slack:999": "voice_only", # wrong platform, must be ignored
|
||||
}
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"off_chat"}
|
||||
assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
|
||||
|
||||
def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
|
||||
"""Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
|
||||
from gateway.config import Platform
|
||||
|
||||
fake_cfg = {"voice": {"auto_tts": True}}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: fake_cfg,
|
||||
)
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_default is True
|
||||
|
||||
def test_restart_restores_voice_off_state(self, runner, tmp_path):
|
||||
from gateway.config import Platform
|
||||
runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
|
||||
@@ -2706,3 +2753,56 @@ class TestUDPKeepalive:
|
||||
mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
|
||||
finally:
|
||||
DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
|
||||
# on voice input. Regression test for Issue #16007.
|
||||
# =====================================================================
|
||||
|
||||
class TestShouldAutoTtsForChat:
|
||||
"""Three-layer gate: per-chat enable > per-chat disable > config default."""
|
||||
|
||||
def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
|
||||
"""Build a bare adapter with only the attrs the gate reads."""
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=default,
|
||||
_auto_tts_enabled_chats=set(enabled),
|
||||
_auto_tts_disabled_chats=set(disabled),
|
||||
)
|
||||
# Bind the unbound method — _should_auto_tts_for_chat only reads the
|
||||
# three attrs above via ``self.``, so an unbound call works.
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
return BasePlatformAdapter._should_auto_tts_for_chat, adapter
|
||||
|
||||
def test_default_false_no_override_suppresses(self):
|
||||
"""Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
|
||||
fn, adapter = self._make_adapter(default=False)
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_default_true_no_override_fires(self):
|
||||
fn, adapter = self._make_adapter(default=True)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_enable_overrides_false_default(self):
|
||||
"""``/voice on`` with config auto_tts=False still fires."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_disable_overrides_true_default(self):
|
||||
"""``/voice off`` with config auto_tts=True still suppresses."""
|
||||
fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_enabled_wins_over_disabled(self):
|
||||
"""An explicit enable beats an explicit disable (enable takes priority)."""
|
||||
fn, adapter = self._make_adapter(
|
||||
default=False, enabled={"chat1"}, disabled={"chat1"}
|
||||
)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_per_chat_isolation(self):
|
||||
"""Enable for chat1 doesn't leak to chat2."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
assert fn(adapter, "chat2") is False
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
"""Regression test for the `/model` picker confirmation display.
|
||||
|
||||
Bug (April 2026): after choosing a model from the interactive `/model` picker,
|
||||
``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
|
||||
straight from models.dev, which always reports the vendor-wide value (e.g.
|
||||
gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in
|
||||
particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
|
||||
``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
|
||||
``resolve_display_context_length()``; the picker path was missed, causing
|
||||
"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
|
||||
|
||||
Fix: both display paths now go through ``resolve_display_context_length()``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import ModelSwitchResult
|
||||
|
||||
|
||||
class _FakeModelInfo:
|
||||
context_window = 1_050_000
|
||||
max_output = 0
|
||||
|
||||
def has_cost_data(self):
|
||||
return False
|
||||
|
||||
def format_capabilities(self):
|
||||
return ""
|
||||
|
||||
|
||||
class _StubCLI:
|
||||
"""Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
|
||||
agent = None
|
||||
model = ""
|
||||
provider = ""
|
||||
requested_provider = ""
|
||||
api_key = ""
|
||||
_explicit_api_key = ""
|
||||
base_url = ""
|
||||
_explicit_base_url = ""
|
||||
api_mode = ""
|
||||
_pending_model_switch_note = ""
|
||||
|
||||
|
||||
def _run_display(monkeypatch, result):
|
||||
import cli as cli_mod
|
||||
|
||||
captured: list[str] = []
|
||||
monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
|
||||
# Avoid writing to ~/.hermes/config.yaml during the test.
|
||||
monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
|
||||
cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
|
||||
return captured
|
||||
|
||||
|
||||
def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
|
||||
"""``_apply_model_switch_result`` must prefer the provider-aware resolver
|
||||
(272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="gpt-5.5",
|
||||
target_provider="openai-codex",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_mode="codex_responses",
|
||||
warning_message="",
|
||||
provider_label="ChatGPT Codex",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # models.dev says 1.05M
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=272_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "272,000" in ctx_line, (
|
||||
f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
|
||||
)
|
||||
assert "1,050,000" not in ctx_line, (
|
||||
f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
|
||||
"""On providers with no enforced cap (e.g. OpenRouter), the picker path
|
||||
should surface the real 1.05M context for gpt-5.5 — resolver and models.dev
|
||||
agree here.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="openai/gpt-5.5",
|
||||
target_provider="openrouter",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="OpenRouter",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(),
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=1_050_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
|
||||
"""If ``get_model_context_length`` returns nothing (rare — truly unknown
|
||||
endpoint), the display still surfaces ``ModelInfo.context_window`` so the
|
||||
user sees *something* rather than a silent blank.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="some-model",
|
||||
target_provider="some-provider",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="Some Provider",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # context_window = 1_050_000
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=None,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
|
||||
)
|
||||
@@ -0,0 +1,237 @@
|
||||
"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import azure_detect
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
class _FakeHTTPResponse:
|
||||
"""Minimal stand-in for urllib.request.urlopen's context manager."""
|
||||
|
||||
def __init__(self, status: int, body: bytes):
|
||||
self.status = status
|
||||
self._body = body
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._body
|
||||
|
||||
|
||||
def _openai_models_body(*ids: str) -> bytes:
|
||||
return json.dumps({
|
||||
"object": "list",
|
||||
"data": [{"id": i, "object": "model"} for i in ids],
|
||||
}).encode()
|
||||
|
||||
|
||||
def _anthropic_error_body(msg: str = "model not found") -> bytes:
|
||||
return json.dumps({
|
||||
"type": "error",
|
||||
"error": {"type": "invalid_request_error", "message": msg},
|
||||
}).encode()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _looks_like_anthropic_path
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("url, expected", [
|
||||
("https://foo.services.ai.azure.com/anthropic", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/v1", True),
|
||||
("https://foo.openai.azure.com/openai/v1", False),
|
||||
("https://foo.openai.azure.com/", False),
|
||||
("https://openrouter.ai/api/v1", False),
|
||||
])
|
||||
def test_looks_like_anthropic_path(url, expected):
|
||||
assert azure_detect._looks_like_anthropic_path(url) is expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _extract_model_ids
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_extract_model_ids_openai_shape():
|
||||
body = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "gpt-4.1-mini", "object": "model"},
|
||||
{"id": "claude-sonnet-4-6", "object": "model"},
|
||||
],
|
||||
}
|
||||
assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
|
||||
|
||||
|
||||
def test_extract_model_ids_bad_shape_returns_empty():
|
||||
assert azure_detect._extract_model_ids({}) == []
|
||||
assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
|
||||
assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# detect() integration
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_detect_anthropic_path_wins_without_http():
|
||||
"""URL path sniff short-circuits — no HTTP call happens."""
|
||||
with patch.object(azure_detect, "_http_get_json") as fake_get, \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
|
||||
result = azure_detect.detect(
|
||||
"https://foo.services.ai.azure.com/anthropic", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
assert "path" in result.reason.lower()
|
||||
fake_get.assert_not_called()
|
||||
fake_probe.assert_not_called()
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_success():
|
||||
"""/models probe returning a model list → chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
assert "key-abc" == api_key
|
||||
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models_probe_ok is True
|
||||
assert result.models == ["gpt-5.4", "claude-opus-4-6"]
|
||||
assert "/models" in result.reason
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_empty_list_still_counts():
|
||||
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 200, {"object": "list", "data": []}
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models == []
|
||||
assert result.models_probe_ok is True
|
||||
|
||||
|
||||
def test_detect_falls_back_to_anthropic_probe():
|
||||
"""/models fails but Anthropic Messages probe succeeds."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 401, None # /models forbidden
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
|
||||
result = azure_detect.detect(
|
||||
"https://my.services.ai.azure.com/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
|
||||
|
||||
def test_detect_all_probes_fail_returns_none():
|
||||
"""Every probe fails → api_mode is None and caller falls back to manual."""
|
||||
with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
|
||||
result = azure_detect.detect(
|
||||
"https://some-private.example.com/", "key-abc",
|
||||
)
|
||||
assert result.api_mode is None
|
||||
assert result.models == []
|
||||
assert "manual" in result.reason.lower()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _probe_openai_models URL list (Azure vs v1 api-version)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_probe_openai_models_tries_multiple_api_versions():
|
||||
"""First call (no api-version) fails, api-version fallback succeeds."""
|
||||
calls = []
|
||||
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
calls.append(url)
|
||||
if "api-version" not in url:
|
||||
return 404, None
|
||||
return 200, json.loads(_openai_models_body("gpt-4.1"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
ok, models = azure_detect._probe_openai_models(
|
||||
"https://my.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert ok is True
|
||||
assert models == ["gpt-4.1"]
|
||||
# Should have tried without api-version first, then with at least one
|
||||
assert any("api-version" not in u for u in calls)
|
||||
assert any("api-version" in u for u in calls)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _http_get_json error handling
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_http_get_json_on_urlerror_returns_zero_none():
|
||||
"""Network failure returns (0, None), never raises."""
|
||||
import urllib.error
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen",
|
||||
side_effect=urllib.error.URLError("dns fail")):
|
||||
status, body = azure_detect._http_get_json("https://bad.example/", "k")
|
||||
assert status == 0
|
||||
assert body is None
|
||||
|
||||
|
||||
def test_http_get_json_on_http_error_returns_code_none():
|
||||
"""HTTP 4xx/5xx returns (code, None)."""
|
||||
import urllib.error
|
||||
err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
|
||||
status, body = azure_detect._http_get_json("https://x/", "k")
|
||||
assert status == 403
|
||||
assert body is None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# lookup_context_length
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_lookup_context_length_returns_known():
|
||||
"""When model_metadata returns a non-fallback value, we pass it through."""
|
||||
fake = MagicMock(return_value=400000)
|
||||
with patch("agent.model_metadata.get_model_context_length", fake), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n == 400000
|
||||
|
||||
|
||||
def test_lookup_context_length_returns_none_on_fallback():
|
||||
"""When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
|
||||
with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n is None
|
||||
|
||||
|
||||
def test_lookup_context_length_swallows_exceptions():
|
||||
"""Resolver raising must not crash the wizard."""
|
||||
with patch("agent.model_metadata.get_model_context_length",
|
||||
side_effect=RuntimeError("boom")):
|
||||
assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
|
||||
@@ -0,0 +1,240 @@
|
||||
"""Regression tests for custom_providers per-model context_length resolution.
|
||||
|
||||
Covers the fix for #15779 — mid-session /model switch to a named custom
|
||||
provider must honor ``custom_providers[].models.<id>.context_length`` the
|
||||
same way startup already does.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
|
||||
|
||||
class TestGetCustomProviderContextLength:
|
||||
def test_returns_override_for_matching_entry(self):
|
||||
custom = [
|
||||
{
|
||||
"name": "my-endpoint",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"gpt-5.5", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 1_050_000
|
||||
)
|
||||
|
||||
def test_trailing_slash_insensitive(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1/",
|
||||
"models": {"m": {"context_length": 500_000}},
|
||||
}
|
||||
]
|
||||
# config has trailing slash, runtime doesn't — must match
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 500_000
|
||||
)
|
||||
# and the reverse
|
||||
custom2 = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 500_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1/", custom2
|
||||
)
|
||||
== 500_000
|
||||
)
|
||||
|
||||
def test_returns_none_when_url_does_not_match(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 400_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://other.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_when_model_does_not_match(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 400_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"different-model", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_for_string_value(self):
|
||||
"""'256K' string is not a valid int — skip silently.
|
||||
|
||||
(The inline startup path still emits a user-visible warning; the
|
||||
helper itself returns None so downstream fallbacks can run.)
|
||||
"""
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": "256K"}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_for_zero_or_negative(self):
|
||||
for bad in (0, -1, -100):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": bad}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
), f"value {bad!r} should be rejected"
|
||||
|
||||
def test_empty_inputs_return_none(self):
|
||||
assert get_custom_provider_context_length("", "http://x", [{"base_url": "http://x", "models": {"": {"context_length": 1}}}]) is None
|
||||
assert get_custom_provider_context_length("m", "", [{"base_url": "", "models": {"m": {"context_length": 1}}}]) is None
|
||||
assert get_custom_provider_context_length("m", "http://x", None) is None
|
||||
assert get_custom_provider_context_length("m", "http://x", []) is None
|
||||
|
||||
def test_ignores_non_dict_entries(self):
|
||||
"""Malformed entries must not crash the lookup."""
|
||||
custom = [
|
||||
"not a dict",
|
||||
None,
|
||||
{"base_url": "https://example.invalid/v1", "models": "not a dict"},
|
||||
{"base_url": "https://example.invalid/v1", "models": {"m": "not a dict"}},
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 400_000}},
|
||||
},
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 400_000
|
||||
)
|
||||
|
||||
|
||||
class TestGetModelContextLengthHonorsOverride:
|
||||
"""agent.model_metadata.get_model_context_length must honor the
|
||||
custom_providers override at step 0b — before any probe, cache hit,
|
||||
or models.dev lookup can override it.
|
||||
"""
|
||||
|
||||
def _mock_all_probes(self):
|
||||
"""Context manager that disables every downstream resolution step."""
|
||||
from agent import model_metadata as _mm
|
||||
return [
|
||||
patch.object(_mm, "get_cached_context_length", return_value=None),
|
||||
patch.object(_mm, "fetch_endpoint_model_metadata", return_value={}),
|
||||
patch.object(_mm, "fetch_model_metadata", return_value={}),
|
||||
patch.object(_mm, "is_local_endpoint", return_value=False),
|
||||
patch.object(_mm, "_is_known_provider_base_url", return_value=False),
|
||||
]
|
||||
|
||||
def test_custom_providers_override_wins_over_default_fallback(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
patches = self._mock_all_probes()
|
||||
for p in patches:
|
||||
p.start()
|
||||
try:
|
||||
ctx = get_model_context_length(
|
||||
"gpt-5.5",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
custom_providers=custom,
|
||||
)
|
||||
finally:
|
||||
for p in patches:
|
||||
p.stop()
|
||||
assert ctx == 1_050_000
|
||||
|
||||
def test_explicit_config_context_length_still_wins(self):
|
||||
"""Top-level model.context_length (step 0) outranks custom_providers (step 0b).
|
||||
|
||||
Users who set both should see the top-level value — that's the
|
||||
documented precedence and matches the long-standing step-0 behavior.
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
ctx = get_model_context_length(
|
||||
"m",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
config_context_length=500_000, # explicit top-level wins
|
||||
custom_providers=custom,
|
||||
)
|
||||
assert ctx == 500_000
|
||||
|
||||
def test_no_override_falls_through_to_default(self):
|
||||
"""With custom_providers=None and all probes disabled, resolver
|
||||
returns DEFAULT_FALLBACK_CONTEXT (256K after the stepdown bump).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
|
||||
patches = self._mock_all_probes()
|
||||
for p in patches:
|
||||
p.start()
|
||||
try:
|
||||
ctx = get_model_context_length(
|
||||
"unknown-model",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
custom_providers=None,
|
||||
)
|
||||
finally:
|
||||
for p in patches:
|
||||
p.stop()
|
||||
assert ctx == DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
|
||||
class TestContextProbeTiers:
|
||||
def test_256k_is_top_tier_and_default(self):
|
||||
"""The stepdown probe starts at 256K and 256K is the new default."""
|
||||
from agent.model_metadata import CONTEXT_PROBE_TIERS, DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
assert CONTEXT_PROBE_TIERS[0] == 256_000
|
||||
assert DEFAULT_FALLBACK_CONTEXT == 256_000
|
||||
# Tiers still descend monotonically
|
||||
for a, b in zip(CONTEXT_PROBE_TIERS, CONTEXT_PROBE_TIERS[1:]):
|
||||
assert a > b, f"tiers must strictly descend, got {a} then {b}"
|
||||
# 128K is still a tier (users relying on it probe-down get there)
|
||||
assert 128_000 in CONTEXT_PROBE_TIERS
|
||||
@@ -52,7 +52,12 @@ class TestCustomProviderModelSwitch:
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
# fetch_api_models MUST be called even though model was saved
|
||||
mock_fetch.assert_called_once_with("sk-test", "https://vllm.example.com/v1", timeout=8.0)
|
||||
mock_fetch.assert_called_once_with(
|
||||
"sk-test",
|
||||
"https://vllm.example.com/v1",
|
||||
timeout=8.0,
|
||||
api_mode=None,
|
||||
)
|
||||
|
||||
def test_can_switch_to_different_model(self, config_home):
|
||||
"""User selects a different model than the saved one."""
|
||||
@@ -173,3 +178,147 @@ class TestCustomProviderModelSwitch:
|
||||
model = config.get("model")
|
||||
assert isinstance(model, dict)
|
||||
assert "api_mode" not in model, "Stale api_mode should be removed"
|
||||
|
||||
def test_env_template_api_key_is_preserved_in_model_config(self, config_home, monkeypatch):
|
||||
"""Selecting an env-backed custom provider must not inline the secret."""
|
||||
import yaml
|
||||
from hermes_cli.main import _model_flow_named_custom
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
" provider: openrouter\n"
|
||||
"custom_providers:\n"
|
||||
"- name: Example Provider\n"
|
||||
" base_url: https://api.example-provider.test/v1\n"
|
||||
" api_key: ${EXAMPLE_PROVIDER_API_KEY}\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
)
|
||||
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
|
||||
|
||||
provider_info = {
|
||||
"name": "Example Provider",
|
||||
"base_url": "https://api.example-provider.test/v1",
|
||||
"api_key": "sk-live-example-provider",
|
||||
"api_key_ref": "${EXAMPLE_PROVIDER_API_KEY}",
|
||||
"model": "qwen3.6-35b-fast",
|
||||
}
|
||||
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
mock_fetch.assert_called_once_with(
|
||||
"sk-live-example-provider",
|
||||
"https://api.example-provider.test/v1",
|
||||
timeout=8.0,
|
||||
api_mode=None,
|
||||
)
|
||||
config = yaml.safe_load(config_path.read_text()) or {}
|
||||
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert config["custom_providers"][0]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert "sk-live-example-provider" not in config_path.read_text()
|
||||
|
||||
def test_key_env_custom_provider_persists_reference_not_secret(self, config_home, monkeypatch):
|
||||
"""key_env custom providers should also avoid writing plaintext keys."""
|
||||
import yaml
|
||||
from hermes_cli.main import _model_flow_named_custom
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
"custom_providers:\n"
|
||||
"- name: Example Provider\n"
|
||||
" base_url: https://api.example-provider.test/v1\n"
|
||||
" key_env: EXAMPLE_PROVIDER_API_KEY\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
)
|
||||
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
|
||||
|
||||
provider_info = {
|
||||
"name": "Example Provider",
|
||||
"base_url": "https://api.example-provider.test/v1",
|
||||
"api_key": "",
|
||||
"key_env": "EXAMPLE_PROVIDER_API_KEY",
|
||||
"model": "qwen3.6-35b-fast",
|
||||
}
|
||||
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]), \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
config = yaml.safe_load(config_path.read_text()) or {}
|
||||
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert config["custom_providers"][0]["key_env"] == "EXAMPLE_PROVIDER_API_KEY"
|
||||
assert "sk-live-example-provider" not in config_path.read_text()
|
||||
|
||||
def test_env_ref_base_url_preserves_api_key_ref_through_picker(
|
||||
self, config_home, monkeypatch
|
||||
):
|
||||
"""Integration regression: when BOTH ``base_url`` and ``api_key`` use
|
||||
``${VAR}`` templates (the Discord-reported NeuralWatt case), the picker
|
||||
must still preserve the env reference in ``model.api_key``.
|
||||
|
||||
The earlier lookup went through ``get_compatible_custom_providers``
|
||||
which dropped entries whose ``base_url`` was an env-ref template
|
||||
(``urlparse("${NEURALWATT_API_BASE}")`` has no scheme/netloc), causing
|
||||
``api_key_ref`` to stay empty and the resolved secret to be written to
|
||||
``config.yaml``. This test drives the real picker-callsite code path.
|
||||
"""
|
||||
import yaml
|
||||
from hermes_cli.main import select_provider_and_model
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
" provider: openrouter\n"
|
||||
"custom_providers:\n"
|
||||
"- name: NeuralWatt\n"
|
||||
" base_url: ${NEURALWATT_API_BASE}\n"
|
||||
" api_key: ${NEURALWATT_API_KEY}\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
" models: []\n"
|
||||
)
|
||||
monkeypatch.setenv("NEURALWATT_API_BASE", "https://api.neuralwatt.com/v1")
|
||||
monkeypatch.setenv("NEURALWATT_API_KEY", "sk-live-neuralwatt-secret")
|
||||
|
||||
# Exercise the real picker: select "custom:neuralwatt" from the
|
||||
# provider menu. ``select_provider_and_model`` prompts for a provider
|
||||
# choice (returns an index), then hands off to
|
||||
# ``_model_flow_named_custom`` with the provider_info built by
|
||||
# ``_named_custom_provider_map``.
|
||||
def _pick_neuralwatt(labels, default=0):
|
||||
for i, label in enumerate(labels):
|
||||
if "NeuralWatt" in label:
|
||||
return i
|
||||
raise AssertionError(
|
||||
f"NeuralWatt entry missing from provider menu: {labels}"
|
||||
)
|
||||
|
||||
with patch("hermes_cli.main._prompt_provider_choice",
|
||||
side_effect=_pick_neuralwatt), \
|
||||
patch("hermes_cli.models.fetch_api_models",
|
||||
return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
select_provider_and_model()
|
||||
|
||||
# The live probe must still use the resolved secret.
|
||||
mock_fetch.assert_called_once()
|
||||
probe_args, probe_kwargs = mock_fetch.call_args
|
||||
assert probe_args[0] == "sk-live-neuralwatt-secret"
|
||||
|
||||
# But config.yaml must keep the env reference, not the plaintext secret.
|
||||
saved = config_path.read_text()
|
||||
config = yaml.safe_load(saved) or {}
|
||||
assert config["model"]["api_key"] == "${NEURALWATT_API_KEY}"
|
||||
assert config["custom_providers"][0]["api_key"] == "${NEURALWATT_API_KEY}"
|
||||
assert "sk-live-neuralwatt-secret" not in saved
|
||||
|
||||
@@ -308,6 +308,43 @@ def test_run_doctor_accepts_named_provider_from_providers_section(monkeypatch, t
|
||||
assert "model.provider 'volcengine-plan' is not a recognised provider" not in out
|
||||
|
||||
|
||||
def test_run_doctor_accepts_bare_custom_provider(monkeypatch, tmp_path):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
(home / "config.yaml").write_text(
|
||||
"model:\n"
|
||||
" provider: custom\n"
|
||||
" default: local-model\n"
|
||||
" base_url: http://localhost:8000/v1\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
|
||||
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
|
||||
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
|
||||
(tmp_path / "project").mkdir(exist_ok=True)
|
||||
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: ([], []),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
try:
|
||||
from hermes_cli import auth as _auth_mod
|
||||
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
|
||||
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
buf = io.StringIO()
|
||||
with contextlib.redirect_stdout(buf):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
|
||||
out = buf.getvalue()
|
||||
assert "model.provider 'custom' is not a recognised provider" not in out
|
||||
|
||||
|
||||
def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -0,0 +1,486 @@
|
||||
"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture()
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _write_config(home: Path, data: dict) -> None:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
|
||||
|
||||
|
||||
def _read_config(home: Path) -> dict:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _read_chain / _write_chain
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReadChain:
|
||||
def test_returns_empty_list_when_unset(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
assert _read_chain({}) == []
|
||||
|
||||
def test_reads_new_list_format(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
|
||||
def test_migrates_legacy_single_dict(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
|
||||
assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
|
||||
|
||||
def test_skips_incomplete_entries(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter"}, # missing model
|
||||
{"model": "gpt-5.4"}, # missing provider
|
||||
{"provider": "nous", "model": "foo"}, # valid
|
||||
"not-a-dict", # noise
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
|
||||
|
||||
def test_returns_copies_not_aliases(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
|
||||
result = _read_chain(cfg)
|
||||
result[0]["provider"] = "mutated"
|
||||
assert cfg["fallback_providers"][0]["provider"] == "nous"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_fallback_from_model_cfg
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExtractFallback:
|
||||
def test_extracts_from_default_field(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
}
|
||||
|
||||
def test_extracts_optional_base_url_and_api_mode(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {
|
||||
"provider": "custom",
|
||||
"default": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "custom",
|
||||
"model": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
|
||||
def test_returns_none_without_provider(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
|
||||
|
||||
def test_returns_none_without_model(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
|
||||
|
||||
def test_returns_none_for_non_dict(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg("plain-string") is None
|
||||
assert _extract_fallback_from_model_cfg(None) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestListCommand:
|
||||
def test_list_empty(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
assert "hermes fallback add" in out
|
||||
|
||||
def test_list_with_entries(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain (2 entries)" in out
|
||||
assert "anthropic/claude-sonnet-4.6" in out
|
||||
assert "Hermes-4" in out
|
||||
# Primary should be shown too
|
||||
assert "claude-sonnet-4-6" in out
|
||||
|
||||
def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "1 entry" in out
|
||||
assert "gpt-5.4" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_add — mock select_provider_and_model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAddCommand:
|
||||
def test_add_appends_new_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# Simulate what the real picker does: writes the selection to config["model"]
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary is preserved
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
# Fallback was appended
|
||||
assert cfg["fallback_providers"] == [
|
||||
{
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Added fallback" in out
|
||||
|
||||
def test_add_rejects_duplicate(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Should still have exactly one entry
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "already in the fallback chain" in out
|
||||
|
||||
def test_add_rejects_same_as_primary(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "openrouter", "default": "gpt-5.4"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User picks the same thing that's already the primary
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
assert "matches the current primary" in out
|
||||
|
||||
def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
|
||||
"""The picker mutates config["model"]; fallback_add must restore the primary."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {
|
||||
"provider": "anthropic",
|
||||
"default": "claude-sonnet-4-6",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary exactly as it was
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
assert cfg["model"]["base_url"] == "https://api.anthropic.com"
|
||||
assert cfg["model"]["api_mode"] == "anthropic_messages"
|
||||
# Fallback added
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
assert cfg["fallback_providers"][0]["provider"] == "openrouter"
|
||||
|
||||
def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User cancelled — no change to config
|
||||
pass
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
# Either "No fallback added" (picker fully cancelled) or "matches the current primary"
|
||||
# (picker left config untouched) — both indicate a non-add outcome.
|
||||
assert ("No fallback added" in out) or ("matches the current primary" in out)
|
||||
|
||||
def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
|
||||
"""Simulate picker explicitly clearing model.default (unusual but possible)."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "", "default": ""}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback added" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_remove
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRemoveCommand:
|
||||
def test_remove_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_remove_selected_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
],
|
||||
})
|
||||
|
||||
# Picker returns index 1 (the middle entry, "nous / Hermes-4")
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg["fallback_providers"] == [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Removed fallback" in out
|
||||
assert "Hermes-4" in out
|
||||
|
||||
def test_remove_cancel_keeps_chain(self, isolated_home):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
# Cancel = last item (index == len(chain) == 1 in our menu)
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_clear
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestClearCommand:
|
||||
def test_clear_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to clear" in out
|
||||
|
||||
def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg.get("fallback_providers") == []
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain cleared" in out
|
||||
|
||||
def test_clear_cancelled(self, isolated_home, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDispatcher:
|
||||
def test_no_subcommand_lists(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command=None))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_list_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_remove_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_unknown_subcommand_exits(self, isolated_home):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
with pytest.raises(SystemExit):
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring — verify the subparser is registered
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestArgparseWiring:
|
||||
"""Verify `hermes fallback` is wired into main.py's argparse tree.
|
||||
|
||||
main() builds the parser inline, so we invoke main([...]) via subprocess
|
||||
with --help to introspect registered subcommands without side effects.
|
||||
"""
|
||||
|
||||
def test_fallback_help_lists_subcommands(self):
|
||||
import subprocess
|
||||
import sys
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
# --help exits 0
|
||||
assert result.returncode == 0, f"stderr: {result.stderr}"
|
||||
out = result.stdout + result.stderr
|
||||
# All four subcommands should appear in help
|
||||
assert "list" in out
|
||||
assert "add" in out
|
||||
assert "remove" in out
|
||||
assert "clear" in out
|
||||
@@ -0,0 +1,210 @@
|
||||
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban as kc
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value,expected",
|
||||
[
|
||||
("scratch", ("scratch", None)),
|
||||
("worktree", ("worktree", None)),
|
||||
("dir:/tmp/work", ("dir", "/tmp/work")),
|
||||
],
|
||||
)
|
||||
def test_parse_workspace_flag_valid(value, expected):
|
||||
assert kc._parse_workspace_flag(value) == expected
|
||||
|
||||
|
||||
def test_parse_workspace_flag_expands_user():
|
||||
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
||||
assert kind == "dir"
|
||||
assert path.endswith("/vault")
|
||||
assert not path.startswith("~")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
||||
def test_parse_workspace_flag_rejects(bad):
|
||||
if not bad:
|
||||
# Empty -> defaults; not an error.
|
||||
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
||||
return
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
kc._parse_workspace_flag(bad)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_slash_no_args_shows_usage(kanban_home):
|
||||
out = kc.run_slash("")
|
||||
assert "kanban" in out.lower()
|
||||
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_create_and_list(kanban_home):
|
||||
out = kc.run_slash("create 'ship feature' --assignee alice")
|
||||
assert "Created" in out
|
||||
out = kc.run_slash("list")
|
||||
assert "ship feature" in out
|
||||
assert "alice" in out
|
||||
|
||||
|
||||
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
||||
# Parent then child via --parent
|
||||
out1 = kc.run_slash("create 'parent' --assignee alice")
|
||||
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
||||
import re
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
assert m
|
||||
p = m.group(1)
|
||||
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
||||
assert "todo" in out2 # child starts as todo
|
||||
|
||||
# Complete parent; list should promote child to ready
|
||||
kc.run_slash(f"complete {p}")
|
||||
# Explicit filter: child should now be ready (was todo before complete).
|
||||
ready_list = kc.run_slash("list --status ready")
|
||||
assert "child" in ready_list
|
||||
|
||||
|
||||
def test_run_slash_show_includes_comments(kanban_home):
|
||||
out = kc.run_slash("create 'x'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "source is paywalled" in show
|
||||
|
||||
|
||||
def test_run_slash_block_unblock_cycle(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
# Claim first so block() finds it running
|
||||
kc.run_slash(f"claim {tid}")
|
||||
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
||||
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
||||
|
||||
|
||||
def test_run_slash_json_output(kanban_home):
|
||||
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
||||
payload = json.loads(out)
|
||||
assert payload["title"] == "jsontask"
|
||||
assert payload["assignee"] == "alice"
|
||||
assert payload["status"] == "ready"
|
||||
|
||||
|
||||
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
||||
kc.run_slash("create 'a' --assignee alice")
|
||||
kc.run_slash("create 'b' --assignee bob")
|
||||
out = kc.run_slash("dispatch --dry-run")
|
||||
assert "Spawned:" in out
|
||||
|
||||
|
||||
def test_run_slash_context_output_format(kanban_home):
|
||||
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
||||
ctx = kc.run_slash(f"context {tid}")
|
||||
assert "tech spec" in ctx
|
||||
assert "write an RFC" in ctx
|
||||
assert "performance section" in ctx
|
||||
|
||||
|
||||
def test_run_slash_tenant_filter(kanban_home):
|
||||
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
||||
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
||||
a = kc.run_slash("list --tenant biz-a")
|
||||
b = kc.run_slash("list --tenant biz-b")
|
||||
assert "biz-a task" in a and "biz-b task" not in a
|
||||
assert "biz-b task" in b and "biz-a task" not in b
|
||||
|
||||
|
||||
def test_run_slash_usage_error_returns_message(kanban_home):
|
||||
# Missing required argument for create
|
||||
out = kc.run_slash("create")
|
||||
assert "usage" in out.lower() or "error" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_assign_reassigns(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "bob" in show
|
||||
|
||||
|
||||
def test_run_slash_link_unlink(kanban_home):
|
||||
a = kc.run_slash("create 'a'")
|
||||
b = kc.run_slash("create 'b'")
|
||||
import re
|
||||
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
||||
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
||||
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
||||
# After link, b is todo
|
||||
show = kc.run_slash(f"show {tb}")
|
||||
assert "todo" in show
|
||||
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration with the COMMAND_REGISTRY
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_is_resolvable():
|
||||
from hermes_cli.commands import resolve_command
|
||||
|
||||
cmd = resolve_command("kanban")
|
||||
assert cmd is not None
|
||||
assert cmd.name == "kanban"
|
||||
|
||||
|
||||
def test_kanban_bypasses_active_session_guard():
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
assert should_bypass_active_session("kanban")
|
||||
|
||||
|
||||
def test_kanban_in_autocomplete_table():
|
||||
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
||||
|
||||
assert "/kanban" in COMMANDS
|
||||
subs = SUBCOMMANDS.get("/kanban") or []
|
||||
assert "create" in subs
|
||||
assert "dispatch" in subs
|
||||
|
||||
|
||||
def test_kanban_not_gateway_only():
|
||||
# kanban is available in BOTH CLI and gateway surfaces.
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,438 @@
|
||||
"""Tests for the Kanban DB layer (hermes_cli.kanban_db)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import concurrent.futures
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema / init
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_init_db_is_idempotent(kanban_home):
|
||||
# Second call should not error or drop data.
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="persisted")
|
||||
kb.init_db()
|
||||
with kb.connect() as conn:
|
||||
tasks = kb.list_tasks(conn)
|
||||
assert len(tasks) == 1
|
||||
assert tasks[0].title == "persisted"
|
||||
|
||||
|
||||
def test_init_creates_expected_tables(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
|
||||
).fetchall()
|
||||
names = {r["name"] for r in rows}
|
||||
assert {"tasks", "task_links", "task_comments", "task_events"} <= names
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Task creation + status inference
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_no_parents_is_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
tid = kb.create_task(conn, title="ship it", assignee="alice")
|
||||
t = kb.get_task(conn, tid)
|
||||
assert t is not None
|
||||
assert t.status == "ready"
|
||||
assert t.assignee == "alice"
|
||||
assert t.workspace_kind == "scratch"
|
||||
|
||||
|
||||
def test_create_task_with_parent_is_todo_until_parent_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="parent")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, p, result="ok")
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_create_task_unknown_parent_errors(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"):
|
||||
kb.create_task(conn, title="orphan", parents=["t_ghost"])
|
||||
|
||||
|
||||
def test_workspace_kind_validation(kanban_home):
|
||||
with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"):
|
||||
kb.create_task(conn, title="bad ws", workspace_kind="cloud")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links + dependency resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "todo"
|
||||
|
||||
|
||||
def test_link_keeps_ready_child_when_parent_already_done(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
kb.complete_task(conn, a)
|
||||
b = kb.create_task(conn, title="b")
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.link_tasks(conn, a, b)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
|
||||
|
||||
def test_link_rejects_self_loop(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
with pytest.raises(ValueError, match="itself"):
|
||||
kb.link_tasks(conn, a, a)
|
||||
|
||||
|
||||
def test_link_detects_cycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, c, a)
|
||||
with pytest.raises(ValueError, match="cycle"):
|
||||
kb.link_tasks(conn, b, a)
|
||||
|
||||
|
||||
def test_recompute_ready_cascades_through_chain(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b", parents=[a])
|
||||
c = kb.create_task(conn, title="c", parents=[b])
|
||||
assert [kb.get_task(conn, x).status for x in (a, b, c)] == \
|
||||
["ready", "todo", "todo"]
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, b).status == "ready"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
a = kb.create_task(conn, title="a")
|
||||
b = kb.create_task(conn, title="b")
|
||||
c = kb.create_task(conn, title="c", parents=[a, b])
|
||||
kb.complete_task(conn, a)
|
||||
assert kb.get_task(conn, c).status == "todo"
|
||||
kb.complete_task(conn, b)
|
||||
assert kb.get_task(conn, c).status == "ready"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Atomic claim (CAS)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_claim_once_wins_second_loses(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
first = kb.claim_task(conn, t, claimer="host:1")
|
||||
assert first is not None and first.status == "running"
|
||||
second = kb.claim_task(conn, t, claimer="host:2")
|
||||
assert second is None
|
||||
|
||||
|
||||
def test_claim_fails_on_non_ready(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
# Move to todo by introducing an unsatisfied parent.
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.link_tasks(conn, p, t)
|
||||
assert kb.get_task(conn, t).status == "todo"
|
||||
assert kb.claim_task(conn, t) is None
|
||||
|
||||
|
||||
def test_stale_claim_reclaimed(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
# Rewind claim_expires so it looks stale.
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 3600, t),
|
||||
)
|
||||
reclaimed = kb.release_stale_claims(conn)
|
||||
assert reclaimed == 1
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_heartbeat_extends_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
claimer = "host:hb"
|
||||
kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60)
|
||||
original = kb.get_task(conn, t).claim_expires
|
||||
# Rewind then heartbeat.
|
||||
conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t))
|
||||
ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600)
|
||||
assert ok
|
||||
new = kb.get_task(conn, t).claim_expires
|
||||
assert new > int(time.time()) + 3000
|
||||
|
||||
|
||||
def test_concurrent_claims_only_one_wins(kanban_home):
|
||||
"""Fire N threads claiming the same task; exactly one must win."""
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="race", assignee="a")
|
||||
|
||||
def attempt(i):
|
||||
with kb.connect() as c:
|
||||
return kb.claim_task(c, t, claimer=f"host:{i}")
|
||||
|
||||
n_workers = 8
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex:
|
||||
results = list(ex.map(attempt, range(n_workers)))
|
||||
winners = [r for r in results if r is not None]
|
||||
assert len(winners) == 1
|
||||
assert winners[0].status == "running"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Complete / block / unblock / archive / assign
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_complete_records_result(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
assert kb.complete_task(conn, t, result="done and dusted")
|
||||
task = kb.get_task(conn, t)
|
||||
assert task.status == "done"
|
||||
assert task.result == "done and dusted"
|
||||
assert task.completed_at is not None
|
||||
|
||||
|
||||
def test_block_then_unblock(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
assert kb.block_task(conn, t, reason="need input")
|
||||
assert kb.get_task(conn, t).status == "blocked"
|
||||
assert kb.unblock_task(conn, t)
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
|
||||
|
||||
def test_assign_refuses_while_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
with pytest.raises(RuntimeError, match="currently running"):
|
||||
kb.assign_task(conn, t, "b")
|
||||
|
||||
|
||||
def test_assign_reassigns_when_not_running(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
assert kb.assign_task(conn, t, "b")
|
||||
assert kb.get_task(conn, t).assignee == "b"
|
||||
|
||||
|
||||
def test_archive_hides_from_default_list(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.complete_task(conn, t)
|
||||
assert kb.archive_task(conn, t)
|
||||
assert len(kb.list_tasks(conn)) == 0
|
||||
assert len(kb.list_tasks(conn, include_archived=True)) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments / events / worker context
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_comments_recorded_in_order(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
kb.add_comment(conn, t, "user", "first")
|
||||
kb.add_comment(conn, t, "researcher", "second")
|
||||
comments = kb.list_comments(conn, t)
|
||||
assert [c.body for c in comments] == ["first", "second"]
|
||||
assert [c.author for c in comments] == ["user", "researcher"]
|
||||
|
||||
|
||||
def test_empty_comment_rejected(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
with pytest.raises(ValueError, match="body is required"):
|
||||
kb.add_comment(conn, t, "user", "")
|
||||
|
||||
|
||||
def test_events_capture_lifecycle(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="a")
|
||||
kb.claim_task(conn, t)
|
||||
kb.complete_task(conn, t, result="ok")
|
||||
events = kb.list_events(conn, t)
|
||||
kinds = [e.kind for e in events]
|
||||
assert "created" in kinds
|
||||
assert "claimed" in kinds
|
||||
assert "completed" in kinds
|
||||
|
||||
|
||||
def test_worker_context_includes_parent_results_and_comments(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p")
|
||||
kb.complete_task(conn, p, result="PARENT_RESULT_MARKER")
|
||||
c = kb.create_task(conn, title="child", parents=[p])
|
||||
kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER")
|
||||
ctx = kb.build_worker_context(conn, c)
|
||||
assert "PARENT_RESULT_MARKER" in ctx
|
||||
assert "CLARIFICATION_MARKER" in ctx
|
||||
assert c in ctx
|
||||
assert "child" in ctx
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_dispatch_dry_run_does_not_claim(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t1 = kb.create_task(conn, title="a", assignee="alice")
|
||||
t2 = kb.create_task(conn, title="b", assignee="bob")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert {s[0] for s in res.spawned} == {t1, t2}
|
||||
with kb.connect() as conn:
|
||||
# Dry run must NOT mutate status.
|
||||
assert kb.get_task(conn, t1).status == "ready"
|
||||
assert kb.get_task(conn, t2).status == "ready"
|
||||
|
||||
|
||||
def test_dispatch_skips_unassigned(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="floater")
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert t in res.skipped_unassigned
|
||||
assert not res.spawned
|
||||
|
||||
|
||||
def test_dispatch_promotes_ready_and_spawns(kanban_home):
|
||||
spawns = []
|
||||
|
||||
def fake_spawn(task, workspace):
|
||||
spawns.append((task.id, task.assignee, workspace))
|
||||
|
||||
with kb.connect() as conn:
|
||||
p = kb.create_task(conn, title="p", assignee="alice")
|
||||
c = kb.create_task(conn, title="c", assignee="bob", parents=[p])
|
||||
# Finish parent outside dispatch; promotion happens inside.
|
||||
kb.complete_task(conn, p)
|
||||
res = kb.dispatch_once(conn, spawn_fn=fake_spawn)
|
||||
# Spawned c (a was already done when dispatch was called).
|
||||
assert len(spawns) == 1
|
||||
assert spawns[0][0] == c
|
||||
assert spawns[0][1] == "bob"
|
||||
# c is now running
|
||||
with kb.connect() as conn:
|
||||
assert kb.get_task(conn, c).status == "running"
|
||||
|
||||
|
||||
def test_dispatch_spawn_failure_releases_claim(kanban_home):
|
||||
def boom(task, workspace):
|
||||
raise RuntimeError("spawn failed")
|
||||
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="boom", assignee="alice")
|
||||
kb.dispatch_once(conn, spawn_fn=boom)
|
||||
# Must return to ready so the next tick can retry.
|
||||
assert kb.get_task(conn, t).status == "ready"
|
||||
assert kb.get_task(conn, t).claim_lock is None
|
||||
|
||||
|
||||
def test_dispatch_reclaims_stale_before_spawning(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x", assignee="alice")
|
||||
kb.claim_task(conn, t)
|
||||
conn.execute(
|
||||
"UPDATE tasks SET claim_expires = ? WHERE id = ?",
|
||||
(int(time.time()) - 1, t),
|
||||
)
|
||||
res = kb.dispatch_once(conn, dry_run=True)
|
||||
assert res.reclaimed == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_scratch_workspace_created_under_hermes_home(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="x")
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws.exists()
|
||||
assert ws.is_dir()
|
||||
assert "kanban" in str(ws)
|
||||
|
||||
|
||||
def test_dir_workspace_honors_given_path(kanban_home, tmp_path):
|
||||
target = tmp_path / "my-vault"
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="biz", workspace_kind="dir", workspace_path=str(target)
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
assert ws == target
|
||||
assert ws.exists()
|
||||
|
||||
|
||||
def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path):
|
||||
target = str(tmp_path / ".worktrees" / "my-task")
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(
|
||||
conn, title="ship", workspace_kind="worktree", workspace_path=target
|
||||
)
|
||||
task = kb.get_task(conn, t)
|
||||
ws = kb.resolve_workspace(task)
|
||||
# We do NOT auto-create worktrees; the worker's skill handles that.
|
||||
assert str(ws) == target
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tenancy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_tenant_column_filters_listings(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
kb.create_task(conn, title="a1", tenant="biz-a")
|
||||
kb.create_task(conn, title="b1", tenant="biz-b")
|
||||
kb.create_task(conn, title="shared") # no tenant
|
||||
biz_a = kb.list_tasks(conn, tenant="biz-a")
|
||||
biz_b = kb.list_tasks(conn, tenant="biz-b")
|
||||
assert [t.title for t in biz_a] == ["a1"]
|
||||
assert [t.title for t in biz_b] == ["b1"]
|
||||
|
||||
|
||||
def test_tenant_propagates_to_events(kanban_home):
|
||||
with kb.connect() as conn:
|
||||
t = kb.create_task(conn, title="tenant-task", tenant="biz-a")
|
||||
events = kb.list_events(conn, t)
|
||||
# The "created" event should have tenant in its payload.
|
||||
created = [e for e in events if e.kind == "created"]
|
||||
assert created and created[0].payload.get("tenant") == "biz-a"
|
||||
@@ -0,0 +1,284 @@
|
||||
"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
"""Isolate HERMES_HOME + reset any module-level catalog cache per test."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
|
||||
# Force a fresh catalog module state for each test.
|
||||
import importlib
|
||||
from hermes_cli import model_catalog
|
||||
importlib.reload(model_catalog)
|
||||
yield home
|
||||
model_catalog.reset_cache()
|
||||
|
||||
|
||||
def _valid_manifest() -> dict:
|
||||
return {
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {"source": "test"},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {"display_name": "OpenRouter"},
|
||||
"models": [
|
||||
{"id": "anthropic/claude-opus-4.7", "description": "recommended"},
|
||||
{"id": "openai/gpt-5.4", "description": ""},
|
||||
{"id": "openrouter/elephant-alpha", "description": "free"},
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {"display_name": "Nous Portal"},
|
||||
"models": [
|
||||
{"id": "anthropic/claude-opus-4.7"},
|
||||
{"id": "moonshotai/kimi-k2.6"},
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class TestValidation:
|
||||
def test_accepts_well_formed_manifest(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
assert _validate_manifest(_valid_manifest()) is True
|
||||
|
||||
def test_rejects_non_dict(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
assert _validate_manifest("string") is False
|
||||
assert _validate_manifest([]) is False
|
||||
assert _validate_manifest(None) is False
|
||||
|
||||
def test_rejects_missing_version(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
del m["version"]
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_future_version(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["version"] = 999
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_missing_providers(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
del m["providers"]
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_malformed_model_entry(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["providers"]["openrouter"]["models"][0] = {"id": ""} # empty id
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
def test_rejects_non_string_model_id(self, isolated_home):
|
||||
from hermes_cli.model_catalog import _validate_manifest
|
||||
m = _valid_manifest()
|
||||
m["providers"]["openrouter"]["models"][0] = {"id": 42}
|
||||
assert _validate_manifest(m) is False
|
||||
|
||||
|
||||
class TestFetchSuccess:
|
||||
def test_fetch_and_cache_writes_disk(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
assert result == manifest
|
||||
assert fetch.called
|
||||
|
||||
cache_file = model_catalog._cache_path()
|
||||
assert cache_file.exists()
|
||||
with open(cache_file) as fh:
|
||||
assert json.load(fh) == manifest
|
||||
|
||||
def test_second_call_uses_in_process_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
model_catalog.get_catalog() # should not hit network again
|
||||
assert fetch.call_count == 1
|
||||
|
||||
def test_force_refresh_always_refetches(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=manifest
|
||||
) as fetch:
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
assert fetch.call_count == 2
|
||||
|
||||
|
||||
class TestFetchFailure:
|
||||
def test_network_failure_returns_empty_when_no_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
assert result == {}
|
||||
|
||||
def test_network_failure_falls_back_to_disk_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
# Prime disk cache with a fresh copy.
|
||||
manifest = _valid_manifest()
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=manifest):
|
||||
model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
# Now wipe in-process cache and simulate network failure on refetch.
|
||||
model_catalog.reset_cache()
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog(force_refresh=True)
|
||||
|
||||
assert result == manifest
|
||||
|
||||
def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
manifest = _valid_manifest()
|
||||
# Write stale cache directly (mtime in the past).
|
||||
cache = model_catalog._cache_path()
|
||||
cache.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(cache, "w") as fh:
|
||||
json.dump(manifest, fh)
|
||||
old = time.time() - 30 * 24 * 3600 # 30 days ago
|
||||
import os as _os
|
||||
_os.utime(cache, (old, old))
|
||||
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = model_catalog.get_catalog()
|
||||
|
||||
# Stale cache is better than nothing.
|
||||
assert result == manifest
|
||||
|
||||
|
||||
class TestCuratedAccessors:
|
||||
def test_openrouter_returns_tuples(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = model_catalog.get_curated_openrouter_models()
|
||||
assert result == [
|
||||
("anthropic/claude-opus-4.7", "recommended"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
]
|
||||
|
||||
def test_nous_returns_ids(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = model_catalog.get_curated_nous_models()
|
||||
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
|
||||
|
||||
def test_openrouter_returns_none_when_catalog_empty(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
assert model_catalog.get_curated_openrouter_models() is None
|
||||
|
||||
def test_nous_returns_none_when_catalog_empty(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
assert model_catalog.get_curated_nous_models() is None
|
||||
|
||||
|
||||
class TestDisabled:
|
||||
def test_disabled_config_short_circuits(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
with patch.object(
|
||||
model_catalog,
|
||||
"_load_catalog_config",
|
||||
return_value={
|
||||
"enabled": False,
|
||||
"url": "http://ignored",
|
||||
"ttl_hours": 24.0,
|
||||
"providers": {},
|
||||
},
|
||||
):
|
||||
with patch.object(model_catalog, "_fetch_manifest") as fetch:
|
||||
result = model_catalog.get_catalog()
|
||||
assert result == {}
|
||||
fetch.assert_not_called()
|
||||
|
||||
|
||||
class TestProviderOverride:
|
||||
def test_override_url_takes_precedence(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
|
||||
override_payload = {
|
||||
"version": 1,
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"models": [
|
||||
{"id": "override/model", "description": "custom"},
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
def fake_fetch(url, timeout):
|
||||
if "override" in url:
|
||||
return override_payload
|
||||
return _valid_manifest()
|
||||
|
||||
with patch.object(
|
||||
model_catalog,
|
||||
"_load_catalog_config",
|
||||
return_value={
|
||||
"enabled": True,
|
||||
"url": "http://master",
|
||||
"ttl_hours": 24.0,
|
||||
"providers": {"openrouter": {"url": "http://override"}},
|
||||
},
|
||||
):
|
||||
with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch):
|
||||
result = model_catalog.get_curated_openrouter_models()
|
||||
|
||||
assert result == [("override/model", "custom")]
|
||||
|
||||
|
||||
class TestIntegrationWithModelsModule:
|
||||
"""Exercise the fallback paths via the real callers in hermes_cli.models."""
|
||||
|
||||
def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog(
|
||||
self, isolated_home
|
||||
):
|
||||
from hermes_cli import model_catalog
|
||||
from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS
|
||||
|
||||
with patch.object(model_catalog, "_fetch_manifest", return_value=None):
|
||||
result = get_curated_nous_model_ids()
|
||||
|
||||
assert result == list(_PROVIDER_MODELS["nous"])
|
||||
|
||||
def test_curated_nous_ids_prefers_manifest(self, isolated_home):
|
||||
from hermes_cli import model_catalog
|
||||
from hermes_cli.models import get_curated_nous_model_ids
|
||||
|
||||
with patch.object(
|
||||
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
|
||||
):
|
||||
result = get_curated_nous_model_ids()
|
||||
|
||||
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
|
||||
@@ -88,3 +88,61 @@ class TestResolveDisplayContextLength:
|
||||
model_info=fake_mi,
|
||||
)
|
||||
assert ctx == 128_000
|
||||
|
||||
def test_custom_providers_override_honored(self):
|
||||
"""Regression for #15779: /model switch onto a custom provider must
|
||||
surface the configured per-model context_length, not the 128K/256K
|
||||
fallback.
|
||||
"""
|
||||
custom_provs = [
|
||||
{
|
||||
"name": "my-custom-endpoint",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
# Real resolver call — no mock — so the override path is exercised
|
||||
# through agent.model_metadata.get_model_context_length.
|
||||
from unittest.mock import patch as _p
|
||||
from agent import model_metadata as _mm
|
||||
with _p.object(_mm, "get_cached_context_length", return_value=None), \
|
||||
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
|
||||
_p.object(_mm, "fetch_model_metadata", return_value={}), \
|
||||
_p.object(_mm, "is_local_endpoint", return_value=False), \
|
||||
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
|
||||
ctx = resolve_display_context_length(
|
||||
"gpt-5.5",
|
||||
"custom",
|
||||
base_url="https://example.invalid/v1",
|
||||
api_key="k",
|
||||
custom_providers=custom_provs,
|
||||
)
|
||||
assert ctx == 1_050_000, (
|
||||
"custom_providers[].models.gpt-5.5.context_length=1.05M must win "
|
||||
"over probe-down fallback"
|
||||
)
|
||||
|
||||
def test_custom_providers_trailing_slash_insensitive(self):
|
||||
"""Base URL comparison must tolerate trailing-slash differences
|
||||
between config.yaml and the runtime value.
|
||||
"""
|
||||
custom_provs = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1/",
|
||||
"models": {"m": {"context_length": 400_000}},
|
||||
}
|
||||
]
|
||||
from unittest.mock import patch as _p
|
||||
from agent import model_metadata as _mm
|
||||
with _p.object(_mm, "get_cached_context_length", return_value=None), \
|
||||
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
|
||||
_p.object(_mm, "fetch_model_metadata", return_value={}), \
|
||||
_p.object(_mm, "is_local_endpoint", return_value=False), \
|
||||
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
|
||||
ctx = resolve_display_context_length(
|
||||
"m",
|
||||
"custom",
|
||||
base_url="https://example.invalid/v1", # no trailing slash
|
||||
custom_providers=custom_provs,
|
||||
)
|
||||
assert ctx == 400_000
|
||||
|
||||
@@ -256,6 +256,17 @@ class TestDetectProviderForModel:
|
||||
"""Models belonging to the current provider should not trigger a switch."""
|
||||
assert detect_provider_for_model("gpt-5.3-codex", "openai-codex") is None
|
||||
|
||||
def test_short_alias_resolves_to_static_model(self):
|
||||
"""Short aliases (e.g. sonnet) should resolve without network lookups."""
|
||||
with patch(
|
||||
"hermes_cli.models.fetch_openrouter_models",
|
||||
side_effect=AssertionError("network lookup should not run"),
|
||||
):
|
||||
result = detect_provider_for_model("sonnet", "auto")
|
||||
assert result is not None
|
||||
assert result[0] == "anthropic"
|
||||
assert result[1].startswith("claude-sonnet")
|
||||
|
||||
def test_openrouter_slug_match(self):
|
||||
"""Models in the OpenRouter catalog should be found."""
|
||||
with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import pytest
|
||||
|
||||
from hermes_cli import runtime_provider as rp
|
||||
|
||||
|
||||
@@ -1565,3 +1567,79 @@ class TestOllamaUrlSubstringLeak:
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Azure Foundry — both OpenAI-style and Anthropic-style endpoints
|
||||
# =============================================================================
|
||||
|
||||
class TestAzureFoundryResolution:
|
||||
"""Verify Azure Foundry resolves correctly for both API modes."""
|
||||
|
||||
def _make_cfg(self, base_url: str, api_mode: str = "chat_completions"):
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"base_url": base_url,
|
||||
"api_mode": api_mode,
|
||||
"default": "gpt-5.4",
|
||||
}
|
||||
|
||||
def test_azure_foundry_openai_style_explicit(self, monkeypatch):
|
||||
"""OpenAI-style Azure Foundry → chat_completions, keeps base_url as-is."""
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-openai")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.openai.azure.com/openai/v1",
|
||||
"chat_completions",
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
assert resolved["provider"] == "azure-foundry"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://my-resource.openai.azure.com/openai/v1"
|
||||
assert resolved["api_key"] == "az-key-openai"
|
||||
|
||||
def test_azure_foundry_anthropic_style_strips_v1_suffix(self, monkeypatch):
|
||||
"""Anthropic-style Azure Foundry → anthropic_messages, /v1 stripped
|
||||
because the Anthropic SDK appends /v1/messages itself."""
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key-ant")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.services.ai.azure.com/anthropic/v1",
|
||||
"anthropic_messages",
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
assert resolved["provider"] == "azure-foundry"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
# /v1 stripped so SDK can append /v1/messages cleanly
|
||||
assert resolved["base_url"] == "https://my-resource.services.ai.azure.com/anthropic"
|
||||
|
||||
def test_azure_foundry_missing_base_url_raises(self, monkeypatch):
|
||||
monkeypatch.setenv("AZURE_FOUNDRY_API_KEY", "az-key")
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_BASE_URL", raising=False)
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
with pytest.raises(rp.AuthError, match="base URL"):
|
||||
rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
def test_azure_foundry_missing_api_key_raises(self, monkeypatch):
|
||||
monkeypatch.delenv("AZURE_FOUNDRY_API_KEY", raising=False)
|
||||
# `get_env_value` reads from ~/.hermes/.env — mock it to return None
|
||||
# so the resolver can't find a key there either.
|
||||
import hermes_cli.config as cfg_mod
|
||||
monkeypatch.setattr(cfg_mod, "get_env_value", lambda k: None)
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "azure-foundry")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://my-resource.openai.azure.com/openai/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
|
||||
with pytest.raises(rp.AuthError, match="API key"):
|
||||
rp.resolve_runtime_provider(requested="azure-foundry")
|
||||
|
||||
@@ -144,91 +144,6 @@ class TestNonInteractiveSetup:
|
||||
out = capsys.readouterr().out
|
||||
assert "hermes config set model.provider custom" in out
|
||||
|
||||
def test_returning_user_terminal_menu_choice_dispatches_terminal_section(self, tmp_path):
|
||||
"""Returning-user menu should map Terminal Backend to the terminal setup, not TTS."""
|
||||
from hermes_cli import setup as setup_mod
|
||||
|
||||
args = _make_setup_args()
|
||||
config = {}
|
||||
model_section = MagicMock()
|
||||
tts_section = MagicMock()
|
||||
terminal_section = MagicMock()
|
||||
gateway_section = MagicMock()
|
||||
tools_section = MagicMock()
|
||||
agent_section = MagicMock()
|
||||
|
||||
with (
|
||||
patch.object(setup_mod, "ensure_hermes_home"),
|
||||
patch.object(setup_mod, "load_config", return_value=config),
|
||||
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
|
||||
patch.object(
|
||||
setup_mod,
|
||||
"get_env_value",
|
||||
side_effect=lambda key: "sk-test" if key == "OPENROUTER_API_KEY" else "",
|
||||
),
|
||||
patch("hermes_cli.auth.get_active_provider", return_value=None),
|
||||
patch.object(setup_mod, "prompt_choice", return_value=3),
|
||||
patch.object(
|
||||
setup_mod,
|
||||
"SETUP_SECTIONS",
|
||||
[
|
||||
("model", "Model & Provider", model_section),
|
||||
("tts", "Text-to-Speech", tts_section),
|
||||
("terminal", "Terminal Backend", terminal_section),
|
||||
("gateway", "Messaging Platforms (Gateway)", gateway_section),
|
||||
("tools", "Tools", tools_section),
|
||||
("agent", "Agent Settings", agent_section),
|
||||
],
|
||||
),
|
||||
patch.object(setup_mod, "save_config"),
|
||||
patch.object(setup_mod, "_print_setup_summary"),
|
||||
):
|
||||
setup_mod.run_setup_wizard(args)
|
||||
|
||||
terminal_section.assert_called_once_with(config)
|
||||
tts_section.assert_not_called()
|
||||
|
||||
def test_returning_user_menu_does_not_show_separator_rows(self, tmp_path):
|
||||
"""Returning-user menu should only show selectable actions."""
|
||||
from hermes_cli import setup as setup_mod
|
||||
|
||||
args = _make_setup_args()
|
||||
captured = {}
|
||||
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
captured["question"] = question
|
||||
captured["choices"] = list(choices)
|
||||
return len(choices) - 1
|
||||
|
||||
with (
|
||||
patch.object(setup_mod, "ensure_hermes_home"),
|
||||
patch.object(setup_mod, "load_config", return_value={}),
|
||||
patch.object(setup_mod, "get_hermes_home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "is_interactive_stdin", return_value=True),
|
||||
patch.object(
|
||||
setup_mod,
|
||||
"get_env_value",
|
||||
side_effect=lambda key: "sk-test" if key == "OPENROUTER_API_KEY" else "",
|
||||
),
|
||||
patch("hermes_cli.auth.get_active_provider", return_value=None),
|
||||
patch.object(setup_mod, "prompt_choice", side_effect=fake_prompt_choice),
|
||||
):
|
||||
setup_mod.run_setup_wizard(args)
|
||||
|
||||
assert captured["question"] == "What would you like to do?"
|
||||
assert "---" not in captured["choices"]
|
||||
assert captured["choices"] == [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"Exit",
|
||||
]
|
||||
|
||||
def test_main_accepts_tts_setup_section(self, monkeypatch):
|
||||
"""`hermes setup tts` should parse and dispatch like other setup sections."""
|
||||
from hermes_cli import main as main_mod
|
||||
|
||||
@@ -0,0 +1,287 @@
|
||||
"""Tests for the setup wizard's returning-user behavior.
|
||||
|
||||
On an existing install:
|
||||
- Bare `hermes setup` drops straight into the full reconfigure wizard
|
||||
(every prompt shows the current value as its default).
|
||||
- `hermes setup --quick` runs the narrower "fill in missing items" flow.
|
||||
- `hermes setup --reconfigure` is a backwards-compat alias for the
|
||||
bare-setup default.
|
||||
|
||||
On a fresh install, all three are no-ops — fall through to first-time setup.
|
||||
"""
|
||||
|
||||
from argparse import Namespace
|
||||
from contextlib import ExitStack
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def _make_setup_args(**overrides):
|
||||
return Namespace(
|
||||
non_interactive=overrides.get("non_interactive", False),
|
||||
section=overrides.get("section", None),
|
||||
reset=overrides.get("reset", False),
|
||||
reconfigure=overrides.get("reconfigure", False),
|
||||
quick=overrides.get("quick", False),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def existing_install(tmp_path, monkeypatch):
|
||||
"""Simulate a returning user with an existing configured install."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return home
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fresh_install(tmp_path, monkeypatch):
|
||||
"""Simulate a first-time user with no existing configuration."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setattr("pathlib.Path.home", lambda: tmp_path)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return home
|
||||
|
||||
|
||||
def _enter_existing_install_patches(stack, **extra):
|
||||
"""Apply standard existing-install mocks via an ExitStack.
|
||||
|
||||
Returns a dict of mocks from the `extra` kwargs (which map mock-name to
|
||||
target path) so callers can assert on them.
|
||||
"""
|
||||
# Unconditional mocks (no return values to assert against).
|
||||
for target, kwargs in [
|
||||
("hermes_cli.setup.ensure_hermes_home", {}),
|
||||
("hermes_cli.setup.is_interactive_stdin", {"return_value": True}),
|
||||
("hermes_cli.config.is_managed", {"return_value": False}),
|
||||
("hermes_cli.setup.load_config", {"return_value": {}}),
|
||||
("hermes_cli.setup.save_config", {}),
|
||||
("hermes_cli.setup.get_env_value", {"return_value": None}),
|
||||
("hermes_cli.auth.get_active_provider", {"return_value": "openrouter"}),
|
||||
("hermes_cli.setup._print_setup_summary", {}),
|
||||
("hermes_cli.setup._offer_launch_chat", {}),
|
||||
("hermes_cli.setup._offer_openclaw_migration", {"return_value": False}),
|
||||
]:
|
||||
stack.enter_context(patch(target, **kwargs))
|
||||
|
||||
# Named mocks caller wants to assert on.
|
||||
named = {}
|
||||
for name, target in extra.items():
|
||||
named[name] = stack.enter_context(patch(target))
|
||||
return named
|
||||
|
||||
|
||||
def _enter_fresh_install_patches(stack, **extra):
|
||||
for target, kwargs in [
|
||||
("hermes_cli.setup.ensure_hermes_home", {}),
|
||||
("hermes_cli.setup.is_interactive_stdin", {"return_value": True}),
|
||||
("hermes_cli.config.is_managed", {"return_value": False}),
|
||||
("hermes_cli.setup.load_config", {"return_value": {}}),
|
||||
("hermes_cli.setup.save_config", {}),
|
||||
("hermes_cli.auth.get_active_provider", {"return_value": None}),
|
||||
("hermes_cli.setup.get_env_value", {"return_value": None}),
|
||||
("hermes_cli.setup._offer_openclaw_migration", {"return_value": False}),
|
||||
]:
|
||||
stack.enter_context(patch(target, **kwargs))
|
||||
|
||||
named = {}
|
||||
for name, target_spec in extra.items():
|
||||
if isinstance(target_spec, tuple):
|
||||
target, kwargs = target_spec
|
||||
named[name] = stack.enter_context(patch(target, **kwargs))
|
||||
else:
|
||||
named[name] = stack.enter_context(patch(target_spec))
|
||||
return named
|
||||
|
||||
|
||||
class TestExistingInstallDefault:
|
||||
"""Bare `hermes setup` on an existing install = full reconfigure wizard."""
|
||||
|
||||
def test_bare_setup_runs_full_reconfigure_without_menu(self, existing_install):
|
||||
"""No menu, no prompt_choice — just run every section in sequence."""
|
||||
args = _make_setup_args() # no flags
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_existing_install_patches(
|
||||
stack,
|
||||
prompt_choice="hermes_cli.setup.prompt_choice",
|
||||
quick="hermes_cli.setup._run_quick_setup",
|
||||
model="hermes_cli.setup.setup_model_provider",
|
||||
terminal="hermes_cli.setup.setup_terminal_backend",
|
||||
agent="hermes_cli.setup.setup_agent_settings",
|
||||
gateway="hermes_cli.setup.setup_gateway",
|
||||
tools="hermes_cli.setup.setup_tools",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
# No menu shown.
|
||||
m["prompt_choice"].assert_not_called()
|
||||
# Quick-setup path NOT taken.
|
||||
m["quick"].assert_not_called()
|
||||
# All five sections ran.
|
||||
m["model"].assert_called_once()
|
||||
m["terminal"].assert_called_once()
|
||||
m["agent"].assert_called_once()
|
||||
m["gateway"].assert_called_once()
|
||||
m["tools"].assert_called_once()
|
||||
|
||||
def test_reconfigure_flag_is_backwards_compat_noop(self, existing_install):
|
||||
"""`hermes setup --reconfigure` behaves the same as bare `hermes setup`."""
|
||||
args = _make_setup_args(reconfigure=True)
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_existing_install_patches(
|
||||
stack,
|
||||
prompt_choice="hermes_cli.setup.prompt_choice",
|
||||
model="hermes_cli.setup.setup_model_provider",
|
||||
terminal="hermes_cli.setup.setup_terminal_backend",
|
||||
agent="hermes_cli.setup.setup_agent_settings",
|
||||
gateway="hermes_cli.setup.setup_gateway",
|
||||
tools="hermes_cli.setup.setup_tools",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
m["prompt_choice"].assert_not_called()
|
||||
m["model"].assert_called_once()
|
||||
m["terminal"].assert_called_once()
|
||||
m["agent"].assert_called_once()
|
||||
m["gateway"].assert_called_once()
|
||||
m["tools"].assert_called_once()
|
||||
|
||||
|
||||
class TestQuickFlag:
|
||||
"""`--quick` on an existing install runs the fill-missing flow."""
|
||||
|
||||
def test_quick_flag_runs_quick_setup_only(self, existing_install):
|
||||
args = _make_setup_args(quick=True)
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_existing_install_patches(
|
||||
stack,
|
||||
quick="hermes_cli.setup._run_quick_setup",
|
||||
model="hermes_cli.setup.setup_model_provider",
|
||||
terminal="hermes_cli.setup.setup_terminal_backend",
|
||||
agent="hermes_cli.setup.setup_agent_settings",
|
||||
gateway="hermes_cli.setup.setup_gateway",
|
||||
tools="hermes_cli.setup.setup_tools",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
m["quick"].assert_called_once()
|
||||
# Full reconfigure sections must NOT run.
|
||||
m["model"].assert_not_called()
|
||||
m["terminal"].assert_not_called()
|
||||
m["agent"].assert_not_called()
|
||||
m["gateway"].assert_not_called()
|
||||
m["tools"].assert_not_called()
|
||||
|
||||
|
||||
class TestFreshInstall:
|
||||
"""On a fresh install (no active provider), flags are no-ops."""
|
||||
|
||||
def test_bare_setup_runs_first_time_flow(self, fresh_install):
|
||||
args = _make_setup_args()
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_fresh_install_patches(
|
||||
stack,
|
||||
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
|
||||
first="hermes_cli.setup._run_first_time_quick_setup",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
m["prompt"].assert_called_once() # quick-vs-full prompt
|
||||
m["first"].assert_called_once()
|
||||
|
||||
def test_reconfigure_on_fresh_install_falls_through(self, fresh_install):
|
||||
args = _make_setup_args(reconfigure=True)
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_fresh_install_patches(
|
||||
stack,
|
||||
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
|
||||
first="hermes_cli.setup._run_first_time_quick_setup",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
m["prompt"].assert_called_once()
|
||||
m["first"].assert_called_once()
|
||||
|
||||
def test_quick_on_fresh_install_falls_through(self, fresh_install):
|
||||
args = _make_setup_args(quick=True)
|
||||
|
||||
with ExitStack() as stack:
|
||||
m = _enter_fresh_install_patches(
|
||||
stack,
|
||||
prompt=("hermes_cli.setup.prompt_choice", {"return_value": 0}),
|
||||
first="hermes_cli.setup._run_first_time_quick_setup",
|
||||
)
|
||||
from hermes_cli.setup import run_setup_wizard
|
||||
run_setup_wizard(args)
|
||||
|
||||
m["prompt"].assert_called_once()
|
||||
m["first"].assert_called_once()
|
||||
|
||||
|
||||
class TestArgparse:
|
||||
"""The flags are plumbed through argparse to cmd_setup."""
|
||||
|
||||
def test_reconfigure_flag_reaches_cmd_setup(self, monkeypatch):
|
||||
import sys
|
||||
from hermes_cli.main import main
|
||||
|
||||
captured = {}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.run_setup_wizard",
|
||||
lambda args: captured.setdefault("args", args),
|
||||
)
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "setup", "--reconfigure"])
|
||||
try:
|
||||
main()
|
||||
except SystemExit:
|
||||
pass
|
||||
assert captured["args"].reconfigure is True
|
||||
assert captured["args"].quick is False
|
||||
|
||||
def test_quick_flag_reaches_cmd_setup(self, monkeypatch):
|
||||
import sys
|
||||
from hermes_cli.main import main
|
||||
|
||||
captured = {}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.run_setup_wizard",
|
||||
lambda args: captured.setdefault("args", args),
|
||||
)
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "setup", "--quick"])
|
||||
try:
|
||||
main()
|
||||
except SystemExit:
|
||||
pass
|
||||
assert captured["args"].quick is True
|
||||
assert captured["args"].reconfigure is False
|
||||
|
||||
def test_bare_setup_has_both_flags_false(self, monkeypatch):
|
||||
import sys
|
||||
from hermes_cli.main import main
|
||||
|
||||
captured = {}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.run_setup_wizard",
|
||||
lambda args: captured.setdefault("args", args),
|
||||
)
|
||||
monkeypatch.setattr(sys, "argv", ["hermes", "setup"])
|
||||
try:
|
||||
main()
|
||||
except SystemExit:
|
||||
pass
|
||||
assert captured["args"].reconfigure is False
|
||||
assert captured["args"].quick is False
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Tests for OSError EIO suppression during interrupt shutdown (#13710).
|
||||
|
||||
When the user interrupts a running task, prompt_toolkit tries to flush
|
||||
stdout during emergency shutdown. If stdout is already in a broken state
|
||||
(redirected to /dev/null, pipe closed, etc.), the flush raises
|
||||
``OSError: [Errno 5] Input/output error``.
|
||||
|
||||
The ``_suppress_closed_loop_errors`` asyncio exception handler and the
|
||||
outer ``except (KeyError, OSError)`` block must both suppress this error
|
||||
to prevent a hard crash.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import os
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _suppress_closed_loop_errors – asyncio exception handler
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_suppress_fn():
|
||||
"""Build a standalone copy of ``_suppress_closed_loop_errors``.
|
||||
|
||||
The real function is defined as a closure inside
|
||||
``CLI._run_interactive``; we reconstruct an equivalent here so the
|
||||
unit tests don't need a full CLI instance.
|
||||
"""
|
||||
def _suppress_closed_loop_errors(loop, context):
|
||||
exc = context.get("exception")
|
||||
if isinstance(exc, RuntimeError) and "Event loop is closed" in str(exc):
|
||||
return
|
||||
if isinstance(exc, KeyError) and "is not registered" in str(exc):
|
||||
return
|
||||
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
|
||||
return
|
||||
loop.default_exception_handler(context)
|
||||
return _suppress_closed_loop_errors
|
||||
|
||||
|
||||
class TestSuppressClosedLoopErrors:
|
||||
"""Verify the asyncio exception handler suppresses expected errors."""
|
||||
|
||||
def test_suppresses_event_loop_closed(self):
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
handler(loop, {"exception": RuntimeError("Event loop is closed")})
|
||||
loop.default_exception_handler.assert_not_called()
|
||||
|
||||
def test_suppresses_key_not_registered(self):
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
handler(loop, {"exception": KeyError("0 is not registered")})
|
||||
loop.default_exception_handler.assert_not_called()
|
||||
|
||||
def test_suppresses_oserror_eio(self):
|
||||
"""OSError with errno.EIO must be suppressed (#13710)."""
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
exc = OSError(errno.EIO, "Input/output error")
|
||||
handler(loop, {"exception": exc})
|
||||
loop.default_exception_handler.assert_not_called()
|
||||
|
||||
def test_does_not_suppress_oserror_other_errno(self):
|
||||
"""OSError with a different errno must still propagate."""
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
exc = OSError(errno.EACCES, "Permission denied")
|
||||
handler(loop, {"exception": exc})
|
||||
loop.default_exception_handler.assert_called_once()
|
||||
|
||||
def test_does_not_suppress_unrelated_exception(self):
|
||||
"""Unrelated exceptions must still propagate."""
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
handler(loop, {"exception": ValueError("something else")})
|
||||
loop.default_exception_handler.assert_called_once()
|
||||
|
||||
def test_no_exception_key(self):
|
||||
"""Context without 'exception' must propagate to default handler."""
|
||||
handler = _make_suppress_fn()
|
||||
loop = MagicMock()
|
||||
handler(loop, {"message": "some log"})
|
||||
loop.default_exception_handler.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Outer except block – EIO handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestOuterExceptEIO:
|
||||
"""Verify the outer ``except (KeyError, OSError)`` block logic."""
|
||||
|
||||
def test_eio_does_not_reraise(self):
|
||||
"""OSError with errno.EIO should be silently suppressed."""
|
||||
exc = OSError(errno.EIO, "Input/output error")
|
||||
# Simulate the condition check from the outer except block:
|
||||
assert isinstance(exc, OSError)
|
||||
assert getattr(exc, "errno", None) == errno.EIO
|
||||
|
||||
def test_bad_file_descriptor_matches(self):
|
||||
"""'Bad file descriptor' string should be caught."""
|
||||
exc = OSError(errno.EBADF, "Bad file descriptor")
|
||||
assert "Bad file descriptor" in str(exc)
|
||||
|
||||
def test_other_oserror_reraises(self):
|
||||
"""Other OSError variants must not match the EIO guard."""
|
||||
exc = OSError(errno.EACCES, "Permission denied")
|
||||
assert not (getattr(exc, "errno", None) == errno.EIO)
|
||||
assert "is not registered" not in str(exc)
|
||||
assert "Bad file descriptor" not in str(exc)
|
||||
@@ -19,6 +19,18 @@ def _touch_ink(root: Path) -> None:
|
||||
ink.write_text("{}")
|
||||
|
||||
|
||||
def _touch_tui_entry(root: Path) -> None:
|
||||
entry = root / "dist" / "entry.js"
|
||||
entry.parent.mkdir(parents=True, exist_ok=True)
|
||||
entry.write_text("console.log('tui')")
|
||||
|
||||
|
||||
def _touch_ink_bundle(root: Path) -> None:
|
||||
bundle = root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js"
|
||||
bundle.parent.mkdir(parents=True, exist_ok=True)
|
||||
bundle.write_text("export {}")
|
||||
|
||||
|
||||
def test_need_install_when_ink_missing(tmp_path: Path, main_mod) -> None:
|
||||
(tmp_path / "package-lock.json").write_text("{}")
|
||||
assert main_mod._tui_need_npm_install(tmp_path) is True
|
||||
@@ -51,3 +63,19 @@ def test_need_install_when_marker_missing(tmp_path: Path, main_mod) -> None:
|
||||
def test_no_install_without_lockfile_when_ink_present(tmp_path: Path, main_mod) -> None:
|
||||
_touch_ink(tmp_path)
|
||||
assert main_mod._tui_need_npm_install(tmp_path) is False
|
||||
|
||||
|
||||
def test_build_needed_when_local_ink_bundle_missing(tmp_path: Path, main_mod) -> None:
|
||||
_touch_tui_entry(tmp_path)
|
||||
_touch_ink(tmp_path)
|
||||
|
||||
assert main_mod._tui_need_npm_install(tmp_path) is False
|
||||
assert main_mod._tui_build_needed(tmp_path) is True
|
||||
|
||||
|
||||
def test_build_not_needed_when_entry_and_ink_bundle_present(tmp_path: Path, main_mod) -> None:
|
||||
_touch_tui_entry(tmp_path)
|
||||
_touch_ink(tmp_path)
|
||||
_touch_ink_bundle(tmp_path)
|
||||
|
||||
assert main_mod._tui_build_needed(tmp_path) is False
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import types
|
||||
|
||||
@@ -8,8 +9,11 @@ import pytest
|
||||
def _args(**overrides):
|
||||
base = {
|
||||
"continue_last": None,
|
||||
"model": None,
|
||||
"provider": None,
|
||||
"resume": None,
|
||||
"tui": True,
|
||||
"tui_dev": False,
|
||||
}
|
||||
base.update(overrides)
|
||||
return Namespace(**base)
|
||||
@@ -31,7 +35,7 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
|
||||
calls.append(source)
|
||||
return "20260408_235959_a1b2c3" if source == "tui" else None
|
||||
|
||||
def fake_launch(resume_session_id=None, tui_dev=False):
|
||||
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
|
||||
captured["resume"] = resume_session_id
|
||||
raise SystemExit(0)
|
||||
|
||||
@@ -58,7 +62,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
|
||||
return "20260408_235959_d4e5f6"
|
||||
return None
|
||||
|
||||
def fake_launch(resume_session_id=None, tui_dev=False):
|
||||
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
|
||||
captured["resume"] = resume_session_id
|
||||
raise SystemExit(0)
|
||||
|
||||
@@ -76,7 +80,7 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
|
||||
def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
|
||||
captured = {}
|
||||
|
||||
def fake_launch(resume_session_id=None, tui_dev=False):
|
||||
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
|
||||
captured["resume"] = resume_session_id
|
||||
raise SystemExit(0)
|
||||
|
||||
@@ -89,6 +93,60 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
|
||||
assert captured["resume"] == "20260409_000000_aa11bb"
|
||||
|
||||
|
||||
def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
|
||||
captured = {}
|
||||
|
||||
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None):
|
||||
captured.update(
|
||||
{
|
||||
"model": model,
|
||||
"provider": provider,
|
||||
"resume": resume_session_id,
|
||||
"tui_dev": tui_dev,
|
||||
}
|
||||
)
|
||||
raise SystemExit(0)
|
||||
|
||||
monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
main_mod.cmd_chat(
|
||||
_args(model="anthropic/claude-sonnet-4.6", provider="anthropic")
|
||||
)
|
||||
|
||||
assert captured == {
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
"provider": "anthropic",
|
||||
"resume": None,
|
||||
"tui_dev": False,
|
||||
}
|
||||
|
||||
|
||||
def test_launch_tui_exports_model_and_provider(monkeypatch, main_mod):
|
||||
captured = {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
main_mod,
|
||||
"_make_tui_argv",
|
||||
lambda tui_dir, tui_dev: (["node", "dist/entry.js"], Path(".")),
|
||||
)
|
||||
|
||||
def fake_call(argv, cwd=None, env=None):
|
||||
captured.update({"argv": argv, "cwd": cwd, "env": env})
|
||||
return 1
|
||||
|
||||
monkeypatch.setattr(main_mod.subprocess, "call", fake_call)
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
main_mod._launch_tui(model="nous/hermes-test", provider="nous")
|
||||
|
||||
env = captured["env"]
|
||||
assert env["HERMES_MODEL"] == "nous/hermes-test"
|
||||
assert env["HERMES_INFERENCE_MODEL"] == "nous/hermes-test"
|
||||
assert env["HERMES_TUI_PROVIDER"] == "nous"
|
||||
assert env["HERMES_INFERENCE_PROVIDER"] == "nous"
|
||||
|
||||
|
||||
def test_print_tui_exit_summary_includes_resume_and_token_totals(monkeypatch, capsys):
|
||||
import hermes_cli.main as main_mod
|
||||
|
||||
|
||||
@@ -0,0 +1,822 @@
|
||||
"""Tests for the Kanban dashboard plugin backend (plugins/kanban/dashboard/plugin_api.py).
|
||||
|
||||
The plugin mounts as /api/plugins/kanban/ inside the dashboard's FastAPI app,
|
||||
but here we attach its router to a bare FastAPI instance so we can test the
|
||||
REST surface without spinning up the whole dashboard.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_plugin_router():
|
||||
"""Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router."""
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py"
|
||||
assert plugin_file.exists(), f"plugin file missing: {plugin_file}"
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"hermes_dashboard_plugin_kanban_test", plugin_file,
|
||||
)
|
||||
assert spec is not None and spec.loader is not None
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = mod
|
||||
spec.loader.exec_module(mod)
|
||||
return mod.router
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
"""Isolated HERMES_HOME with an empty kanban DB."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(kanban_home):
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board on an empty DB
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_empty(client):
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# All canonical columns present (triage + the rest), each empty.
|
||||
names = [c["name"] for c in data["columns"]]
|
||||
for expected in ("triage", "todo", "ready", "running", "blocked", "done"):
|
||||
assert expected in names, f"missing column {expected}: {names}"
|
||||
assert all(len(c["tasks"]) == 0 for c in data["columns"])
|
||||
assert data["tenants"] == []
|
||||
assert data["assignees"] == []
|
||||
assert data["latest_event_id"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks then GET /board sees it
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_task_appears_on_board(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "Research LLM caching",
|
||||
"assignee": "researcher",
|
||||
"priority": 3,
|
||||
"tenant": "acme",
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["title"] == "Research LLM caching"
|
||||
assert task["assignee"] == "researcher"
|
||||
assert task["status"] == "ready" # no parents -> immediately ready
|
||||
assert task["priority"] == 3
|
||||
assert task["tenant"] == "acme"
|
||||
task_id = task["id"]
|
||||
|
||||
# Board now lists it under 'ready'.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
ready = next(c for c in data["columns"] if c["name"] == "ready")
|
||||
assert len(ready["tasks"]) == 1
|
||||
assert ready["tasks"][0]["id"] == task_id
|
||||
assert "acme" in data["tenants"]
|
||||
assert "researcher" in data["assignees"]
|
||||
|
||||
|
||||
def test_tenant_filter(client):
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "A", "tenant": "t1"})
|
||||
client.post("/api/plugins/kanban/tasks", json={"title": "B", "tenant": "t2"})
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t1")
|
||||
counts = {c["name"]: len(c["tasks"]) for c in r.json()["columns"]}
|
||||
total = sum(counts.values())
|
||||
assert total == 1
|
||||
|
||||
r = client.get("/api/plugins/kanban/board?tenant=t2")
|
||||
total = sum(len(c["tasks"]) for c in r.json()["columns"])
|
||||
assert total == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id returns body + comments + events + links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_task_detail_includes_links_and_events(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "child", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo" # parent not done yet
|
||||
|
||||
# Detail for the child shows the parent link.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{child['id']}")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["task"]["id"] == child["id"]
|
||||
assert parent["id"] in data["links"]["parents"]
|
||||
|
||||
# Detail for the parent shows the child.
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{parent['id']}")
|
||||
assert child["id"] in r.json()["links"]["children"]
|
||||
|
||||
# Events exist from creation.
|
||||
assert len(data["events"]) >= 1
|
||||
|
||||
|
||||
def test_task_detail_404_on_unknown(client):
|
||||
r = client.get("/api/plugins/kanban/tasks/does-not-exist")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id — status transitions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_patch_status_complete(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "done", "result": "shipped"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "done"
|
||||
|
||||
# Board reflects the move.
|
||||
done = next(
|
||||
c for c in client.get("/api/plugins/kanban/board").json()["columns"]
|
||||
if c["name"] == "done"
|
||||
)
|
||||
assert any(x["id"] == t["id"] for x in done["tasks"])
|
||||
|
||||
|
||||
def test_patch_block_then_unblock(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "blocked", "block_reason": "need input"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "blocked"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_drag_drop_move_todo_to_ready(client):
|
||||
"""Direct status write: the drag-drop path for statuses without a
|
||||
dedicated verb (e.g. manually promoting todo -> ready)."""
|
||||
parent = client.post("/api/plugins/kanban/tasks", json={"title": "p"}).json()["task"]
|
||||
child = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "c", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
assert child["status"] == "todo"
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child['id']}",
|
||||
json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "ready"
|
||||
|
||||
|
||||
def test_patch_reassign(client):
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "x", "assignee": "a"},
|
||||
).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"assignee": "b"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["assignee"] == "b"
|
||||
|
||||
|
||||
def test_patch_priority_and_edit(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"priority": 5, "title": "renamed"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
data = r.json()["task"]
|
||||
assert data["priority"] == 5
|
||||
assert data["title"] == "renamed"
|
||||
|
||||
|
||||
def test_patch_invalid_status(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}",
|
||||
json={"status": "banana"},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments + Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_add_comment(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": "how's progress?", "author": "teknium"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{t['id']}")
|
||||
comments = r.json()["comments"]
|
||||
assert len(comments) == 1
|
||||
assert comments[0]["body"] == "how's progress?"
|
||||
assert comments[0]["author"] == "teknium"
|
||||
|
||||
|
||||
def test_add_comment_empty_rejected(client):
|
||||
t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"]
|
||||
r = client.post(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}/comments",
|
||||
json={"body": " "},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
def test_add_link_and_delete_link(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{b['id']}")
|
||||
assert a["id"] in r.json()["links"]["parents"]
|
||||
|
||||
r = client.delete(
|
||||
"/api/plugins/kanban/links",
|
||||
params={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["ok"] is True
|
||||
|
||||
|
||||
def test_add_link_cycle_rejected(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": a["id"], "child_id": b["id"]},
|
||||
)
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/links",
|
||||
json={"parent_id": b["id"], "child_id": a["id"]},
|
||||
)
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dispatch_dry_run(client):
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "work", "assignee": "researcher"},
|
||||
)
|
||||
r = client.post("/api/plugins/kanban/dispatch?dry_run=true&max=4")
|
||||
assert r.status_code == 200
|
||||
body = r.json()
|
||||
# DispatchResult is serialized as a dataclass dict.
|
||||
assert isinstance(body, dict)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Triage column (new v1 status)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_create_triage_lands_in_triage_column(client):
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "rough idea, spec me", "triage": True},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
task = r.json()["task"]
|
||||
assert task["status"] == "triage"
|
||||
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert triage["tasks"][0]["title"] == "rough idea, spec me"
|
||||
|
||||
|
||||
def test_triage_task_not_promoted_to_ready(client):
|
||||
"""Triage tasks must stay in triage even when they have no parents."""
|
||||
client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "must stay put", "triage": True},
|
||||
)
|
||||
# Run the dispatcher — it should NOT promote the triage task.
|
||||
client.post("/api/plugins/kanban/dispatch?dry_run=false&max=4")
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
triage = next(c for c in r.json()["columns"] if c["name"] == "triage")
|
||||
ready = next(c for c in r.json()["columns"] if c["name"] == "ready")
|
||||
assert len(triage["tasks"]) == 1
|
||||
assert len(ready["tasks"]) == 0
|
||||
|
||||
|
||||
def test_patch_status_triage_works(client):
|
||||
"""A user (or specifier) can push a task back into triage, and out of it."""
|
||||
t = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "x"},
|
||||
).json()["task"]
|
||||
# Normal creation is 'ready'; push to triage.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "triage"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "triage"
|
||||
|
||||
# Now promote to todo.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "todo"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
assert r.json()["task"]["status"] == "todo"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Progress rollup (done children / total children)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_progress_rollup(client):
|
||||
parent = client.post(
|
||||
"/api/plugins/kanban/tasks", json={"title": "parent"},
|
||||
).json()["task"]
|
||||
child_a = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
child_b = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "parents": [parent["id"]]},
|
||||
).json()["task"]
|
||||
# Children start as "todo" because the parent isn't done yet; promote
|
||||
# them to "ready" so complete_task will accept the transition.
|
||||
for cid in (child_a["id"], child_b["id"]):
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{cid}", json={"status": "ready"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
|
||||
# 0/2 done.
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 0, "total": 2}
|
||||
|
||||
# Complete one child. 1/2.
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{child_a['id']}",
|
||||
json={"status": "done"},
|
||||
)
|
||||
assert r.status_code == 200
|
||||
r = client.get("/api/plugins/kanban/board")
|
||||
parent_row = next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == parent["id"]
|
||||
)
|
||||
assert parent_row["progress"] == {"done": 1, "total": 2}
|
||||
|
||||
# Childless tasks report progress=None, not {0/0}.
|
||||
assert next(
|
||||
t for col in r.json()["columns"] for t in col["tasks"]
|
||||
if t["id"] == child_b["id"]
|
||||
)["progress"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auto-init on first board read
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_board_auto_initializes_missing_db(tmp_path, monkeypatch):
|
||||
"""If kanban.db doesn't exist yet, GET /board must create it, not 500."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
# Deliberately DO NOT call kb.init_db().
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
r = c.get("/api/plugins/kanban/board")
|
||||
assert r.status_code == 200
|
||||
assert (home / "kanban.db").exists(), "init_db wasn't invoked by /board"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket auth (query-param token)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch):
|
||||
"""When _SESSION_TOKEN is set (normal dashboard context), a missing or
|
||||
wrong ?token= query param must be rejected with policy-violation."""
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
|
||||
# Stub web_server so _check_ws_token has a token to compare against.
|
||||
import types
|
||||
stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz")
|
||||
monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub)
|
||||
|
||||
app = FastAPI()
|
||||
app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban")
|
||||
c = TestClient(app)
|
||||
|
||||
# No token → policy violation close.
|
||||
from starlette.websockets import WebSocketDisconnect
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Wrong token → policy violation close.
|
||||
with pytest.raises(WebSocketDisconnect) as exc:
|
||||
with c.websocket_connect("/api/plugins/kanban/events?token=nope"):
|
||||
pass
|
||||
assert exc.value.code == 1008
|
||||
|
||||
# Correct token → accepted (connect then close cleanly from our side).
|
||||
with c.websocket_connect(
|
||||
"/api/plugins/kanban/events?token=secret-xyz"
|
||||
) as ws:
|
||||
assert ws is not None # handshake succeeded
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_bulk_status_ready(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
# Parent-less tasks land in "ready" already; push them to blocked first.
|
||||
for tid in (a["id"], b["id"], c2["id"]):
|
||||
client.patch(f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "blocked", "block_reason": "wait"})
|
||||
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"], c2["id"]], "status": "ready"})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert all(r["ok"] for r in results)
|
||||
# All three are now ready.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ready = next(col for col in board["columns"] if col["name"] == "ready")
|
||||
ids = {t["id"] for t in ready["tasks"]}
|
||||
assert {a["id"], b["id"], c2["id"]}.issubset(ids)
|
||||
|
||||
|
||||
def test_bulk_archive(client):
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "archive": True})
|
||||
assert r.status_code == 200
|
||||
assert all(r["ok"] for r in r.json()["results"])
|
||||
# Default board (archived hidden) — both gone.
|
||||
board = client.get("/api/plugins/kanban/board").json()
|
||||
ids = {t["id"] for col in board["columns"] for t in col["tasks"]}
|
||||
assert a["id"] not in ids
|
||||
assert b["id"] not in ids
|
||||
|
||||
|
||||
def test_bulk_reassign(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "old"}).json()["task"]
|
||||
b = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "b", "assignee": "old"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], b["id"]], "assignee": "new"})
|
||||
assert r.status_code == 200
|
||||
for tid in (a["id"], b["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["assignee"] == "new"
|
||||
|
||||
|
||||
def test_bulk_unassign_via_empty_string(client):
|
||||
a = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "a", "assignee": "x"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"]], "assignee": ""})
|
||||
assert r.status_code == 200
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{a['id']}").json()["task"]
|
||||
assert t["assignee"] is None
|
||||
|
||||
|
||||
def test_bulk_partial_failure_doesnt_abort_siblings(client):
|
||||
"""One bad id in the middle of a batch must not prevent others from
|
||||
applying."""
|
||||
a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"]
|
||||
c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"]
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk",
|
||||
json={"ids": [a["id"], "bogus-id", c2["id"]], "priority": 7})
|
||||
assert r.status_code == 200
|
||||
results = r.json()["results"]
|
||||
assert len(results) == 3
|
||||
ok_ids = {r["id"] for r in results if r["ok"]}
|
||||
assert a["id"] in ok_ids
|
||||
assert c2["id"] in ok_ids
|
||||
assert any(not r["ok"] and r["id"] == "bogus-id" for r in results)
|
||||
# Good siblings actually got the priority bump.
|
||||
for tid in (a["id"], c2["id"]):
|
||||
t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"]
|
||||
assert t["priority"] == 7
|
||||
|
||||
|
||||
def test_bulk_empty_ids_400(client):
|
||||
r = client.post("/api/plugins/kanban/tasks/bulk", json={"ids": []})
|
||||
assert r.status_code == 400
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /config endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_config_returns_defaults_when_section_missing(client):
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# Defaults when dashboard.kanban is missing.
|
||||
assert data["default_tenant"] == ""
|
||||
assert data["lane_by_profile"] is True
|
||||
assert data["include_archived_by_default"] is False
|
||||
assert data["render_markdown"] is True
|
||||
|
||||
|
||||
def test_config_reads_dashboard_kanban_section(tmp_path, monkeypatch, client):
|
||||
home = Path(os.environ["HERMES_HOME"])
|
||||
(home / "config.yaml").write_text(
|
||||
"dashboard:\n"
|
||||
" kanban:\n"
|
||||
" default_tenant: acme\n"
|
||||
" lane_by_profile: false\n"
|
||||
" include_archived_by_default: true\n"
|
||||
" render_markdown: false\n"
|
||||
)
|
||||
r = client.get("/api/plugins/kanban/config")
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
assert data["default_tenant"] == "acme"
|
||||
assert data["lane_by_profile"] is False
|
||||
assert data["include_archived_by_default"] is True
|
||||
assert data["render_markdown"] is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Runs surfacing (vulcan-artivus RFC feedback)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_task_detail_includes_runs(client):
|
||||
"""GET /tasks/:id carries a runs[] array with the attempt history."""
|
||||
r = client.post("/api/plugins/kanban/tasks",
|
||||
json={"title": "port x", "assignee": "worker"}).json()
|
||||
tid = r["task"]["id"]
|
||||
|
||||
# Drive status running to force a run creation: PATCH to running
|
||||
# doesn't call claim_task (the PATCH path uses _set_status_direct),
|
||||
# so use the bulk/claim indirection via the kernel.
|
||||
import hermes_cli.kanban_db as _kb
|
||||
conn = _kb.connect()
|
||||
try:
|
||||
_kb.claim_task(conn, tid)
|
||||
_kb.complete_task(
|
||||
conn, tid,
|
||||
result="done",
|
||||
summary="tested on rate limiter",
|
||||
metadata={"changed_files": ["limiter.py"]},
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{tid}").json()
|
||||
assert "runs" in d
|
||||
assert len(d["runs"]) == 1
|
||||
run = d["runs"][0]
|
||||
assert run["outcome"] == "completed"
|
||||
assert run["profile"] == "worker"
|
||||
assert run["summary"] == "tested on rate limiter"
|
||||
assert run["metadata"] == {"changed_files": ["limiter.py"]}
|
||||
assert run["ended_at"] is not None
|
||||
|
||||
|
||||
def test_task_detail_runs_empty_before_claim(client):
|
||||
"""A task that's never been claimed has an empty runs[] list, not
|
||||
a missing key."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "fresh"}).json()
|
||||
d = client.get(f"/api/plugins/kanban/tasks/{r['task']['id']}").json()
|
||||
assert d["runs"] == []
|
||||
|
||||
|
||||
def test_patch_status_done_with_summary_and_metadata(client):
|
||||
"""PATCH /tasks/:id with status=done + summary + metadata must
|
||||
reach complete_task, so the dashboard has CLI parity."""
|
||||
# Create + claim.
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "x", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={
|
||||
"status": "done",
|
||||
"summary": "shipped the thing",
|
||||
"metadata": {"changed_files": ["a.py", "b.py"], "tests_run": 7},
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
|
||||
# The run must have the summary + metadata attached.
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "shipped the thing"
|
||||
assert run.metadata == {"changed_files": ["a.py", "b.py"], "tests_run": 7}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_done_without_summary_still_works(client):
|
||||
"""Back-compat: PATCH without the new fields still completes."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "y", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "done", "result": "legacy shape"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
run = kb.latest_run(conn, tid)
|
||||
assert run.outcome == "completed"
|
||||
assert run.summary == "legacy shape" # falls back to result
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_patch_status_archive_closes_running_run(client):
|
||||
"""PATCH to archived while running must close the in-flight run."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "z", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
open_run = kb.latest_run(conn, tid)
|
||||
assert open_run.ended_at is None
|
||||
finally:
|
||||
conn.close()
|
||||
r = client.patch(
|
||||
f"/api/plugins/kanban/tasks/{tid}",
|
||||
json={"status": "archived"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
conn = kb.connect()
|
||||
try:
|
||||
task = kb.get_task(conn, tid)
|
||||
assert task.status == "archived"
|
||||
assert task.current_run_id is None
|
||||
assert kb.latest_run(conn, tid).outcome == "reclaimed"
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_event_dict_includes_run_id(client):
|
||||
"""GET /tasks/:id returns events with run_id populated."""
|
||||
r = client.post("/api/plugins/kanban/tasks", json={"title": "e", "assignee": "worker"})
|
||||
tid = r.json()["task"]["id"]
|
||||
from hermes_cli import kanban_db as kb
|
||||
conn = kb.connect()
|
||||
try:
|
||||
kb.claim_task(conn, tid)
|
||||
run_id = kb.latest_run(conn, tid).id
|
||||
kb.complete_task(conn, tid, summary="wss")
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
r = client.get(f"/api/plugins/kanban/tasks/{tid}")
|
||||
assert r.status_code == 200
|
||||
events = r.json()["events"]
|
||||
# Every event in the response must have a run_id key (None or int).
|
||||
for e in events:
|
||||
assert "run_id" in e, f"missing run_id in event: {e}"
|
||||
# completed event must have the actual run_id.
|
||||
comp = [e for e in events if e["kind"] == "completed"]
|
||||
assert comp[0]["run_id"] == run_id
|
||||
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-task force-loaded skills via REST
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_create_task_with_skills_roundtrips(client):
|
||||
"""POST /tasks accepts `skills: [...]`, GET /tasks/:id returns it."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={
|
||||
"title": "translate docs",
|
||||
"assignee": "linguist",
|
||||
"skills": ["translation", "github-code-review"],
|
||||
},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
assert task["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
# Fetch via GET /tasks/:id as the drawer does.
|
||||
got = client.get(f"/api/plugins/kanban/tasks/{task['id']}").json()
|
||||
assert got["task"]["skills"] == ["translation", "github-code-review"]
|
||||
|
||||
|
||||
def test_create_task_without_skills_defaults_to_empty_list(client):
|
||||
"""_task_dict serializes Task.skills=None as [] so the drawer can
|
||||
always .length check without guarding against null."""
|
||||
r = client.post(
|
||||
"/api/plugins/kanban/tasks",
|
||||
json={"title": "no skills", "assignee": "x"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
task = r.json()["task"]
|
||||
# Task.skills is None in-memory; _task_dict serializes via
|
||||
# dataclasses.asdict which keeps it None. The drawer's
|
||||
# `t.skills && t.skills.length > 0` guard handles both null and [].
|
||||
assert task.get("skills") in (None, [])
|
||||
@@ -31,7 +31,6 @@ def _make_agent_with_engine(engine):
|
||||
agent._vprint = lambda *a, **kw: None
|
||||
agent._last_flushed_db_idx = 0
|
||||
# Stub the few AIAgent methods _compress_context uses.
|
||||
agent.flush_memories = lambda *a, **kw: None
|
||||
agent._invalidate_system_prompt = lambda *a, **kw: None
|
||||
agent._build_system_prompt = lambda *a, **kw: "new-system-prompt"
|
||||
agent.commit_memory_session = lambda *a, **kw: None
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user