Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 97a4018dfc |
@@ -390,16 +390,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
@@ -1689,9 +1680,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
|
||||
+13
-134
@@ -42,7 +42,6 @@ import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -53,17 +52,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
clean = urlunparse(parsed._replace(query=""))
|
||||
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
|
||||
return clean, params
|
||||
return url, None
|
||||
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
@@ -402,7 +390,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
# Tools support for flush_memories and similar callers
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -1169,10 +1157,8 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
_extra = {"default_query": _dq} if _dq else {}
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -1186,12 +1172,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1363,49 +1349,6 @@ def _is_auth_error(exc: Exception) -> bool:
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for an unsupported request parameter.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase the same class of error a few
|
||||
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
|
||||
``param`` field, ``X is not supported``, ``unknown parameter: X``,
|
||||
``unrecognized request argument: X``. We match on both the parameter
|
||||
name and a generic "unsupported/unknown/unrecognized parameter" marker so
|
||||
call sites can reactively retry without the offending key instead of
|
||||
surfacing a noisy auxiliary failure.
|
||||
|
||||
Generalizes the temperature-specific detector that originally shipped
|
||||
with PR #15621 so the same retry strategy can cover ``max_tokens``,
|
||||
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
|
||||
for the generalization pattern.
|
||||
"""
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
if param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unsupported_parameter",
|
||||
"not supported",
|
||||
"does not support",
|
||||
"unknown parameter",
|
||||
"unrecognized request argument",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
||||
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
|
||||
|
||||
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
|
||||
public symbol because existing tests and call sites import it by name.
|
||||
"""
|
||||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
@@ -1839,15 +1782,12 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
@@ -1884,8 +1824,6 @@ def resolve_provider_client(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
@@ -1903,7 +1841,7 @@ def resolve_provider_client(
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
@@ -1912,7 +1850,7 @@ def resolve_provider_client(
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
@@ -2822,8 +2760,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2911,7 +2849,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
@@ -3014,45 +2952,13 @@ def call_llm(
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s: provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
# If retry still fails, fall through to the max_tokens /
|
||||
# payment / auth chains below using the temperature-stripped
|
||||
# kwargs. Re-raise only if the retry hit something those
|
||||
# chains won't handle.
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3315,35 +3221,8 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
|
||||
@@ -44,31 +44,22 @@ _TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": text_type, "text": part})
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -76,7 +67,7 @@ def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> Lis
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": text_type, "text": text})
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -227,23 +218,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -259,10 +233,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -289,57 +262,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
if content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -499,47 +422,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -547,16 +429,13 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": text_type, "text": part})
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -567,7 +446,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": text_type, "text": text})
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -824,7 +703,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -843,7 +721,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -853,18 +730,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -977,7 +842,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
|
||||
@@ -318,13 +318,6 @@ class ContextCompressor(ContextEngine):
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Recalculate token budgets for the new context length so the
|
||||
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
+8
-39
@@ -106,11 +106,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -145,11 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
# GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
|
||||
# can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
|
||||
# Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
|
||||
"gpt-5.5": 400000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -165,17 +162,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
# DeepSeek
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -1206,7 +1193,6 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
@@ -1227,23 +1213,6 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
@@ -1383,7 +1352,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
|
||||
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -176,64 +176,6 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
@@ -23,14 +23,9 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
if "codex_reasoning_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -59,7 +59,6 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -120,24 +120,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
@@ -178,8 +160,6 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -126,11 +126,6 @@ class NormalizedResponse:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
+10
-32
@@ -790,16 +790,9 @@ code_execution:
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
@@ -824,9 +817,7 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
# Use compact banner mode
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -840,15 +831,12 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -864,22 +852,17 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -889,15 +872,10 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
@@ -22,7 +22,6 @@ import re
|
||||
import concurrent.futures
|
||||
import base64
|
||||
import atexit
|
||||
import errno
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
@@ -417,11 +416,6 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
"onboarding": {
|
||||
# First-touch hint flags (see agent/onboarding.py). Each hint is
|
||||
# shown once per install then latched here.
|
||||
"seen": {},
|
||||
},
|
||||
}
|
||||
|
||||
# Track whether the config file explicitly set terminal config.
|
||||
@@ -3182,14 +3176,7 @@ class HermesCLI:
|
||||
# the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
|
||||
runtime_model = runtime.get("model")
|
||||
if runtime_model and isinstance(runtime_model, str):
|
||||
# Only use runtime model if: model is unset, or model equals provider name
|
||||
should_use_runtime_model = (
|
||||
not self.model or # No model configured yet
|
||||
self.model == self.provider or # Model is the provider slug
|
||||
self.model == runtime.get("name") # Model matches provider display name
|
||||
)
|
||||
if should_use_runtime_model:
|
||||
self.model = runtime_model
|
||||
self.model = runtime_model
|
||||
|
||||
# If model is still empty (e.g. user ran `hermes auth add openai-codex`
|
||||
# without `hermes model`), fall back to the provider's first catalog
|
||||
@@ -4324,7 +4311,7 @@ class HermesCLI:
|
||||
|
||||
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
|
||||
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G (Alt+G in VSCode/Cursor){_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G{_RST}")
|
||||
if _is_termux_environment():
|
||||
_cprint(f" {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n")
|
||||
else:
|
||||
@@ -4674,6 +4661,10 @@ class HermesCLI:
|
||||
def new_session(self, silent=False):
|
||||
"""Start a fresh session with a new session ID and cleared agent state."""
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Trigger memory extraction on the old session before session_id rotates.
|
||||
self.agent.commit_memory_session(self.conversation_history)
|
||||
self._notify_session_boundary("on_session_finalize")
|
||||
@@ -5158,29 +5149,27 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
try:
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cache_enabled = (
|
||||
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
|
||||
@@ -5281,22 +5270,24 @@ class HermesCLI:
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
|
||||
# Load providers for switch_model (picker path needs them below)
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# No args at all: open prompt_toolkit-native picker modal
|
||||
if not model_input and not explicit_provider:
|
||||
model_display = self.model or "unknown"
|
||||
provider_display = get_label(self.provider) if self.provider else "unknown"
|
||||
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=self.provider or "",
|
||||
@@ -5818,28 +5809,7 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
def _handle_kanban_command(self, cmd: str):
|
||||
"""Handle the /kanban command — delegate to the shared kanban CLI.
|
||||
|
||||
The string form passed here is the user's full ``/kanban ...``
|
||||
including the leading slash; we strip it and hand the remainder
|
||||
to ``kanban.run_slash`` which returns a single formatted string.
|
||||
"""
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
rest = cmd.strip()
|
||||
if rest.startswith("/"):
|
||||
rest = rest.lstrip("/")
|
||||
if rest.startswith("kanban"):
|
||||
rest = rest[len("kanban"):].lstrip()
|
||||
try:
|
||||
output = run_slash(rest)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
output = f"(._.) kanban error: {exc}"
|
||||
if output:
|
||||
print(output)
|
||||
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6076,8 +6046,6 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "kanban":
|
||||
self._handle_kanban_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6152,6 +6120,8 @@ class HermesCLI:
|
||||
self._handle_agents_command()
|
||||
elif canonical == "background":
|
||||
self._handle_background_command(cmd_original)
|
||||
elif canonical == "btw":
|
||||
self._handle_btw_command(cmd_original)
|
||||
elif canonical == "queue":
|
||||
# Extract prompt after "/queue " or "/q "
|
||||
parts = cmd_original.split(None, 1)
|
||||
@@ -6438,6 +6408,122 @@ class HermesCLI:
|
||||
self._background_tasks[task_id] = thread
|
||||
thread.start()
|
||||
|
||||
def _handle_btw_command(self, cmd: str):
|
||||
"""Handle /btw <question> — ephemeral side question using session context.
|
||||
|
||||
Snapshots the current conversation history, spawns a no-tools agent in
|
||||
a background thread, and prints the answer without persisting anything
|
||||
to the main session.
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
_cprint(" Usage: /btw <question>")
|
||||
_cprint(" Example: /btw what module owns session title sanitization?")
|
||||
_cprint(" Answers using session context. No tools, not persisted.")
|
||||
return
|
||||
|
||||
question = parts[1].strip()
|
||||
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
|
||||
if not self._ensure_runtime_credentials():
|
||||
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
|
||||
return
|
||||
|
||||
turn_route = self._resolve_turn_agent_config(question)
|
||||
history_snapshot = list(self.conversation_history)
|
||||
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
_cprint(f' 💬 /btw: "{preview}"')
|
||||
|
||||
def run_btw():
|
||||
try:
|
||||
btw_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_iterations=8,
|
||||
enabled_toolsets=[],
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
provider_sort=self._provider_sort,
|
||||
provider_require_parameters=self._provider_require_params,
|
||||
provider_data_collection=self._provider_data_collection,
|
||||
fallback_model=self._fallback_model,
|
||||
session_db=None,
|
||||
skip_memory=True,
|
||||
skip_context_files=True,
|
||||
persist_session=False,
|
||||
)
|
||||
|
||||
btw_prompt = (
|
||||
"[Ephemeral /btw side question. Answer using the conversation "
|
||||
"context. No tools available. Be direct and concise.]\n\n"
|
||||
+ question
|
||||
)
|
||||
result = btw_agent.run_conversation(
|
||||
user_message=btw_prompt,
|
||||
conversation_history=history_snapshot,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
response = (result.get("final_response") or "") if result else ""
|
||||
if not response and result and result.get("error"):
|
||||
response = f"Error: {result['error']}"
|
||||
|
||||
# TUI refresh before printing
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
|
||||
if response:
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_skin = get_active_skin()
|
||||
_resp_color = _skin.get_color("response_border", "#4F6D4A")
|
||||
except Exception:
|
||||
_resp_color = "#4F6D4A"
|
||||
|
||||
ChatConsole().print(Panel(
|
||||
_render_final_assistant_content(response, mode=self.final_response_markdown),
|
||||
title=f"[{_resp_color} bold]⚕ /btw[/]",
|
||||
title_align="left",
|
||||
border_style=_resp_color,
|
||||
box=rich_box.HORIZONTALS,
|
||||
padding=(1, 4),
|
||||
))
|
||||
else:
|
||||
_cprint(" 💬 /btw: (no response)")
|
||||
|
||||
if self.bell_on_complete:
|
||||
sys.stdout.write("\a")
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception as e:
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
_cprint(f" ❌ /btw failed: {e}")
|
||||
finally:
|
||||
if self._app:
|
||||
self._invalidate(min_interval=0)
|
||||
|
||||
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
|
||||
thread.start()
|
||||
|
||||
@staticmethod
|
||||
def _try_launch_chrome_debug(port: int, system: str) -> bool:
|
||||
"""Try to launch Chrome/Chromium with remote debugging enabled.
|
||||
@@ -7322,31 +7408,6 @@ class HermesCLI:
|
||||
_cprint(f" {line}")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the first tool in this process
|
||||
# that takes longer than the threshold while we're in the
|
||||
# noisiest progress mode, print a one-time hint about
|
||||
# /verbose. Latched on self so it fires at most once per
|
||||
# process; persisted to config.yaml so it never fires again
|
||||
# across processes either.
|
||||
try:
|
||||
if (
|
||||
not getattr(self, "_long_tool_hint_fired", False)
|
||||
and self.tool_progress_mode == "all"
|
||||
and duration >= 30.0
|
||||
):
|
||||
from agent.onboarding import (
|
||||
TOOL_PROGRESS_FLAG,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
|
||||
self._long_tool_hint_fired = True
|
||||
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
self._invalidate()
|
||||
return
|
||||
if event_type != "tool.started":
|
||||
@@ -9230,24 +9291,6 @@ class HermesCLI:
|
||||
f"agent_running={self._agent_running}\n")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the very first busy-while-running
|
||||
# event for this install, print a one-line tip explaining the
|
||||
# /busy knob. Flag persists to config.yaml and never fires
|
||||
# again. Guarded for exceptions so onboarding can't break
|
||||
# the input loop.
|
||||
try:
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
busy_input_hint_cli,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
|
||||
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self._pending_input.put(payload)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
@@ -9262,18 +9305,14 @@ class HermesCLI:
|
||||
"""Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
|
||||
event.current_buffer.insert_text('\n')
|
||||
|
||||
# VSCode/Cursor bind Ctrl+G to "Find Next" at the editor level, so
|
||||
# the keystroke never reaches the embedded terminal. Alt+G is unbound
|
||||
# in those IDEs and arrives here as ('escape', 'g') — register it as
|
||||
# a fallback so the editor handoff works inside Cursor/VSCode too.
|
||||
_editor_filter = Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
@kb.add(
|
||||
'c-g',
|
||||
filter=Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
),
|
||||
)
|
||||
|
||||
@kb.add('c-g', filter=_editor_filter)
|
||||
@kb.add('escape', 'g', filter=_editor_filter)
|
||||
def handle_open_in_editor(event):
|
||||
"""Ctrl+G (or Alt+G in VSCode/Cursor) opens the current draft in an external editor."""
|
||||
"""Ctrl+G opens the current draft in an external editor."""
|
||||
cli_ref._open_external_editor(event.current_buffer)
|
||||
|
||||
@kb.add('tab', eager=True)
|
||||
@@ -9737,11 +9776,6 @@ class HermesCLI:
|
||||
completer=_completer,
|
||||
),
|
||||
)
|
||||
# Keep prompt_toolkit on its simple tempfile path. Setting
|
||||
# buffer.tempfile = "prompt.md" triggers its complex-tempfile branch,
|
||||
# which tries to mkdir() the mkdtemp() directory again and raises
|
||||
# EEXIST. The suffix keeps markdown highlighting without that bug.
|
||||
input_area.buffer.tempfile_suffix = '.md'
|
||||
|
||||
# Dynamic height: accounts for both explicit newlines AND visual
|
||||
# wrapping of long lines so the input area always fits its content.
|
||||
@@ -10694,8 +10728,6 @@ class HermesCLI:
|
||||
return # silently suppress
|
||||
if isinstance(exc, KeyError) and "is not registered" in str(exc):
|
||||
return # suppress selector registration failures (#6393)
|
||||
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
|
||||
return # suppress I/O errors from broken stdout on interrupt (#13710)
|
||||
# Fall back to default handler for everything else
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
@@ -10728,11 +10760,9 @@ class HermesCLI:
|
||||
except (EOFError, KeyboardInterrupt, BrokenPipeError):
|
||||
pass
|
||||
except (KeyError, OSError) as _stdin_err:
|
||||
# Catch selector registration failures from broken stdin (#6393)
|
||||
# and I/O errors from broken stdout during interrupt (#13710).
|
||||
if isinstance(_stdin_err, OSError) and getattr(_stdin_err, "errno", None) == errno.EIO:
|
||||
pass # suppress broken-stdout I/O errors on interrupt (#13710)
|
||||
elif "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
# Catch selector registration failures from broken stdin (#6393).
|
||||
# This is the fallback for cases that slip past the fstat() guard.
|
||||
if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
print(
|
||||
f"\nError: stdin is not usable ({_stdin_err}).\n"
|
||||
"This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
|
||||
@@ -10751,6 +10781,12 @@ class HermesCLI:
|
||||
self.agent.interrupt()
|
||||
except Exception:
|
||||
pass
|
||||
# Flush memories before exit (only for substantial conversations)
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Shut down voice recorder (release persistent audio stream)
|
||||
if hasattr(self, '_voice_recorder') and self._voice_recorder:
|
||||
try:
|
||||
|
||||
+1
-14
@@ -16,7 +16,7 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
from typing import Optional, Dict, List, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -417,7 +417,6 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
@@ -439,9 +438,6 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
@@ -485,14 +481,6 @@ def create_job(
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
"id": job_id,
|
||||
@@ -504,7 +492,6 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
|
||||
@@ -671,47 +671,6 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
|
||||
@@ -41,15 +41,6 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -76,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
|
||||
Binary file not shown.
+3
-16
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.
|
||||
|
||||
import asyncio
|
||||
import importlib.util
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
@@ -104,28 +103,16 @@ class HookRegistry:
|
||||
print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
|
||||
continue
|
||||
|
||||
# Dynamically load the handler module.
|
||||
# Register in sys.modules BEFORE exec_module so Pydantic /
|
||||
# dataclasses / typing introspection can resolve forward
|
||||
# references (triggered by `from __future__ import annotations`
|
||||
# in the handler). Without this, a handler that declares a
|
||||
# Pydantic BaseModel for webhook/event payloads fails at first
|
||||
# dispatch with "TypeAdapter ... is not fully defined".
|
||||
module_name = f"hermes_hook_{hook_name}"
|
||||
# Dynamically load the handler module
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name, handler_path
|
||||
f"hermes_hook_{hook_name}", handler_path
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
|
||||
continue
|
||||
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
handle_fn = getattr(module, "handle", None)
|
||||
if handle_fn is None:
|
||||
|
||||
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -587,9 +586,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -2445,7 +2441,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2485,11 +2480,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2548,44 +2540,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2600,8 +2554,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2637,7 +2589,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
@@ -1025,20 +1025,7 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -1060,21 +1047,6 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -2242,14 +2214,12 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
_tts_path = None
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
if (event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files):
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
@@ -2573,9 +2543,6 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2593,9 +2560,6 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -2315,6 +2315,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
@@ -3256,7 +3261,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3316,9 +3320,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
|
||||
@@ -532,20 +532,6 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
await crypto_store.open()
|
||||
|
||||
# Bind the store to the runtime device_id before any
|
||||
# put_account() runs. PgCryptoStore defaults _device_id
|
||||
# to "" and its crypto_account UPSERT never updates the
|
||||
# device_id column on conflict — so once put_account
|
||||
# writes blank, it stays blank forever. That breaks
|
||||
# every downstream device-scoped olm operation: peer
|
||||
# to-device ciphertext can't find our identity key and
|
||||
# no megolm sessions ever land. Setting _device_id here
|
||||
# (in-memory; the on-disk row may not exist yet) makes
|
||||
# the first put_account write the correct value.
|
||||
# DeviceID is a NewType(str) so plain str works at runtime.
|
||||
if client.device_id:
|
||||
await crypto_store.put_device_id(client.device_id)
|
||||
|
||||
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
|
||||
olm = OlmMachine(client, crypto_store, crypto_state)
|
||||
|
||||
|
||||
+411
-668
File diff suppressed because it is too large
Load Diff
+16
-74
@@ -87,9 +87,6 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -127,14 +124,8 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -148,9 +139,6 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -202,31 +190,6 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
and the LLM needs the real ID to tag users."""
|
||||
|
||||
|
||||
def _discord_tools_loaded() -> bool:
|
||||
"""True iff the agent will actually have Discord tools this session.
|
||||
|
||||
Two conditions must hold:
|
||||
1. The `discord` or `discord_admin` toolset is enabled for the
|
||||
Discord platform via `hermes tools` (opt-in, default OFF).
|
||||
2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
|
||||
at registry time, so the toolset being enabled in config is not
|
||||
enough if the token isn't configured.
|
||||
|
||||
Returns False (safe default — keeps the stale-API disclaimer) on any
|
||||
error so a bad config can't silently promise tools the agent lacks.
|
||||
"""
|
||||
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
cfg = load_config()
|
||||
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
|
||||
return "discord" in enabled or "discord_admin" in enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
@@ -314,33 +277,14 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
# Inject the Discord IDs block only when the agent actually has
|
||||
# Discord tools loaded this session — i.e. the user opted into
|
||||
# `discord` / `discord_admin` via `hermes tools` AND the bot
|
||||
# token is configured. Otherwise keep the stale-API disclaimer
|
||||
# honest so we never promise tools the agent lacks.
|
||||
if _discord_tools_loaded():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
@@ -439,11 +383,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -479,7 +423,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -531,7 +475,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -1232,7 +1176,6 @@ class SessionStore:
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1265,7 +1208,6 @@ class SessionStore:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
+2
-10
@@ -356,14 +356,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
api_key_env_vars=(),
|
||||
base_url_env_var="BEDROCK_BASE_URL",
|
||||
),
|
||||
"azure-foundry": ProviderConfig(
|
||||
id="azure-foundry",
|
||||
name="Azure Foundry",
|
||||
auth_type="api_key",
|
||||
inference_base_url="", # User-provided endpoint
|
||||
api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -4244,10 +4236,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
|
||||
)
|
||||
|
||||
from hermes_cli.models import (
|
||||
get_curated_nous_model_ids, get_pricing_for_provider,
|
||||
_PROVIDER_MODELS, get_pricing_for_provider,
|
||||
check_nous_free_tier, partition_nous_models_by_tier,
|
||||
)
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
|
||||
print()
|
||||
unavailable_models: list = []
|
||||
|
||||
@@ -1,300 +0,0 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
@@ -84,7 +84,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -101,8 +103,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
@@ -140,11 +141,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
|
||||
+9
-118
@@ -612,6 +612,14 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -775,15 +783,6 @@ DEFAULT_CONFIG = {
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
# When a subagent hits a dangerous-command approval prompt, the parent's
|
||||
# prompt_toolkit TUI owns stdin — a thread-local input() call from the
|
||||
# subagent worker would deadlock the parent UI. To avoid the deadlock,
|
||||
# subagent threads ALWAYS resolve approvals non-interactively:
|
||||
# false (default) → auto-deny with a logger.warning audit line (safe)
|
||||
# true → auto-approve "once" with a logger.warning audit line
|
||||
# Flip to true only if you trust delegated work to run dangerous cmds
|
||||
# without human review (cron pipelines, batch automation, etc.).
|
||||
"subagent_auto_approve": False,
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -840,7 +839,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -959,27 +958,6 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -1016,13 +994,6 @@ DEFAULT_CONFIG = {
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -1399,21 +1370,6 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"AZURE_FOUNDRY_API_KEY": {
|
||||
"description": "Azure Foundry API key for custom Azure endpoints",
|
||||
"prompt": "Azure Foundry API Key",
|
||||
"url": "https://ai.azure.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"AZURE_FOUNDRY_BASE_URL": {
|
||||
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
|
||||
"prompt": "Azure Foundry base URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
@@ -2249,71 +2205,6 @@ def get_compatible_custom_providers(
|
||||
return compatible
|
||||
|
||||
|
||||
def get_custom_provider_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Look up a per-model ``context_length`` override from ``custom_providers``.
|
||||
|
||||
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
|
||||
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
|
||||
if present and valid. Returns ``None`` when no override applies.
|
||||
|
||||
This is the single source of truth for custom-provider context overrides,
|
||||
used by:
|
||||
* ``AIAgent.__init__`` (startup resolution)
|
||||
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
|
||||
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
|
||||
* ``gateway.run._format_session_info`` (``/info`` display)
|
||||
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
|
||||
|
||||
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
|
||||
startup path only; every other path (notably ``/model`` switch) fell back
|
||||
to the 128K default. See #15779.
|
||||
"""
|
||||
if not model or not base_url:
|
||||
return None
|
||||
if custom_providers is None:
|
||||
try:
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
if config is None:
|
||||
return None
|
||||
raw = config.get("custom_providers")
|
||||
custom_providers = raw if isinstance(raw, list) else []
|
||||
if not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
target_url = (base_url or "").rstrip("/")
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_url = (entry.get("base_url") or "").rstrip("/")
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
models = entry.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
model_cfg = models.get(model)
|
||||
if not isinstance(model_cfg, dict):
|
||||
continue
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
if raw_ctx is None:
|
||||
continue
|
||||
try:
|
||||
ctx = int(raw_ctx)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
|
||||
@@ -320,11 +320,7 @@ def run_doctor(args):
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
if provider and _resolve_provider_full is not None and provider != "auto":
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
|
||||
@@ -1,361 +0,0 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
@@ -125,7 +125,6 @@ _DEFAULT_PAYLOADS = {
|
||||
"task_id": "test-task",
|
||||
"tool_call_id": "test-call",
|
||||
"result": '{"output": "hello"}',
|
||||
"duration_ms": 42,
|
||||
},
|
||||
"pre_llm_call": {
|
||||
"session_id": "test-session",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+32
-576
@@ -839,8 +839,6 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
|
||||
|
||||
|
||||
def _tui_build_needed(tui_dir: Path) -> bool:
|
||||
if _hermes_ink_bundle_stale(tui_dir):
|
||||
return True
|
||||
entry = tui_dir / "dist" / "entry.js"
|
||||
if not entry.exists():
|
||||
return True
|
||||
@@ -1028,12 +1026,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
|
||||
return [node, str(root / "dist" / "entry.js")], root
|
||||
|
||||
|
||||
def _launch_tui(
|
||||
resume_session_id: Optional[str] = None,
|
||||
tui_dev: bool = False,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
):
|
||||
def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
|
||||
"""Replace current process with the TUI."""
|
||||
tui_dir = PROJECT_ROOT / "ui-tui"
|
||||
|
||||
@@ -1043,12 +1036,6 @@ def _launch_tui(
|
||||
)
|
||||
env.setdefault("HERMES_PYTHON", sys.executable)
|
||||
env.setdefault("HERMES_CWD", os.getcwd())
|
||||
if model:
|
||||
env["HERMES_MODEL"] = model
|
||||
env["HERMES_INFERENCE_MODEL"] = model
|
||||
if provider:
|
||||
env["HERMES_TUI_PROVIDER"] = provider
|
||||
env["HERMES_INFERENCE_PROVIDER"] = provider
|
||||
# Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
|
||||
# ~1.5–4GB depending on version and can fatal-OOM on long sessions with
|
||||
# large transcripts / reasoning blobs. Token-level merge: respect any
|
||||
@@ -1187,8 +1174,6 @@ def cmd_chat(args):
|
||||
_launch_tui(
|
||||
getattr(args, "resume", None),
|
||||
tui_dev=getattr(args, "tui_dev", False),
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
)
|
||||
|
||||
# Import and run the CLI
|
||||
@@ -1527,83 +1512,6 @@ def select_provider_and_model(args=None):
|
||||
all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
|
||||
|
||||
def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
|
||||
from hermes_cli.config import read_raw_config
|
||||
|
||||
# Build a lookup of raw (un-expanded) api_key templates keyed by a
|
||||
# stable identity. We intentionally bypass
|
||||
# ``get_compatible_custom_providers(read_raw_config())`` here because
|
||||
# its ``_normalize_custom_provider_entry`` step calls ``urlparse()``
|
||||
# on ``base_url`` and drops any entry whose ``base_url`` is itself an
|
||||
# env-ref template (e.g. ``${NEURALWATT_API_BASE}``). Dropping those
|
||||
# entries is exactly how env-ref preservation fails for the user
|
||||
# config that motivated this fix.
|
||||
raw_api_key_refs: dict[tuple, str] = {}
|
||||
raw_cfg = read_raw_config()
|
||||
|
||||
def _record_raw(
|
||||
name: str,
|
||||
provider_key: str,
|
||||
model: str,
|
||||
api_key: str,
|
||||
) -> None:
|
||||
template = str(api_key or "").strip()
|
||||
if "${" not in template:
|
||||
return
|
||||
name = str(name or "").strip()
|
||||
provider_key = str(provider_key or "").strip()
|
||||
model = str(model or "").strip()
|
||||
# Index by every plausible identity the loaded (expanded) config
|
||||
# might present: (name), (name, model), (provider_key), and
|
||||
# (provider_key, model). Case-insensitive on name/provider_key so
|
||||
# the loaded entry matches regardless of display casing.
|
||||
if name:
|
||||
raw_api_key_refs.setdefault((name.lower(),), template)
|
||||
raw_api_key_refs.setdefault((name.lower(), model), template)
|
||||
if provider_key:
|
||||
raw_api_key_refs.setdefault((provider_key.lower(),), template)
|
||||
raw_api_key_refs.setdefault(
|
||||
(provider_key.lower(), model), template
|
||||
)
|
||||
|
||||
raw_list = raw_cfg.get("custom_providers")
|
||||
if isinstance(raw_list, list):
|
||||
for raw_entry in raw_list:
|
||||
if not isinstance(raw_entry, dict):
|
||||
continue
|
||||
_record_raw(
|
||||
raw_entry.get("name", ""),
|
||||
"",
|
||||
raw_entry.get("model", "")
|
||||
or raw_entry.get("default_model", ""),
|
||||
raw_entry.get("api_key", ""),
|
||||
)
|
||||
raw_providers = raw_cfg.get("providers")
|
||||
if isinstance(raw_providers, dict):
|
||||
for raw_key, raw_entry in raw_providers.items():
|
||||
if not isinstance(raw_entry, dict):
|
||||
continue
|
||||
_record_raw(
|
||||
raw_entry.get("name", "") or raw_key,
|
||||
raw_key,
|
||||
raw_entry.get("model", "")
|
||||
or raw_entry.get("default_model", ""),
|
||||
raw_entry.get("api_key", ""),
|
||||
)
|
||||
|
||||
def _lookup_ref(name: str, provider_key: str, model: str) -> str:
|
||||
name_lc = str(name or "").strip().lower()
|
||||
pkey_lc = str(provider_key or "").strip().lower()
|
||||
model = str(model or "").strip()
|
||||
for identity in (
|
||||
(pkey_lc, model),
|
||||
(pkey_lc,),
|
||||
(name_lc, model),
|
||||
(name_lc,),
|
||||
):
|
||||
if identity[0] and identity in raw_api_key_refs:
|
||||
return raw_api_key_refs[identity]
|
||||
return ""
|
||||
|
||||
custom_provider_map = {}
|
||||
for entry in get_compatible_custom_providers(cfg):
|
||||
if not isinstance(entry, dict):
|
||||
@@ -1627,9 +1535,6 @@ def select_provider_and_model(args=None):
|
||||
"model": entry.get("model", ""),
|
||||
"api_mode": entry.get("api_mode", ""),
|
||||
"provider_key": provider_key,
|
||||
"api_key_ref": _lookup_ref(
|
||||
name, provider_key, entry.get("model", "")
|
||||
),
|
||||
}
|
||||
return custom_provider_map
|
||||
|
||||
@@ -1719,8 +1624,6 @@ def select_provider_and_model(args=None):
|
||||
_model_flow_stepfun(config, current_model)
|
||||
elif selected_provider == "bedrock":
|
||||
_model_flow_bedrock(config, current_model)
|
||||
elif selected_provider == "azure-foundry":
|
||||
_model_flow_azure_foundry(config, current_model)
|
||||
elif selected_provider in (
|
||||
"gemini",
|
||||
"deepseek",
|
||||
@@ -1804,6 +1707,7 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
|
||||
("session_search", "Session search", "past-conversation recall"),
|
||||
("approval", "Approval", "smart command approval"),
|
||||
("mcp", "MCP", "MCP tool reasoning"),
|
||||
("flush_memories", "Flush memories", "memory consolidation"),
|
||||
("title_generation", "Title generation", "session titles"),
|
||||
("skills_hub", "Skills hub", "skills search/install"),
|
||||
]
|
||||
@@ -2315,13 +2219,13 @@ def _model_flow_nous(config, current_model="", args=None):
|
||||
# The live /models endpoint returns hundreds of models; the curated list
|
||||
# shows only agentic models users recognize from OpenRouter.
|
||||
from hermes_cli.models import (
|
||||
get_curated_nous_model_ids,
|
||||
_PROVIDER_MODELS,
|
||||
get_pricing_for_provider,
|
||||
check_nous_free_tier,
|
||||
partition_nous_models_by_tier,
|
||||
)
|
||||
|
||||
model_ids = get_curated_nous_model_ids()
|
||||
model_ids = _PROVIDER_MODELS.get("nous", [])
|
||||
if not model_ids:
|
||||
print("No curated models available for Nous Portal.")
|
||||
return
|
||||
@@ -2864,19 +2768,6 @@ def _auto_provider_name(base_url: str) -> str:
|
||||
return name
|
||||
|
||||
|
||||
def _custom_provider_api_key_config_value(provider_info, resolved_api_key=""):
|
||||
"""Return the value that should be persisted for a custom provider key."""
|
||||
api_key_ref = str(provider_info.get("api_key_ref", "") or "").strip()
|
||||
if api_key_ref:
|
||||
return api_key_ref
|
||||
|
||||
key_env = str(provider_info.get("key_env", "") or "").strip()
|
||||
if key_env and not str(provider_info.get("api_key", "") or "").strip():
|
||||
return f"${{{key_env}}}"
|
||||
|
||||
return str(resolved_api_key or "").strip()
|
||||
|
||||
|
||||
def _save_custom_provider(
|
||||
base_url, api_key="", model="", context_length=None, name=None
|
||||
):
|
||||
@@ -2932,203 +2823,6 @@ def _save_custom_provider(
|
||||
print(f' 💾 Saved to custom providers as "{name}" (edit in config.yaml)')
|
||||
|
||||
|
||||
def _model_flow_azure_foundry(config, current_model=""):
|
||||
"""Azure Foundry provider: configure endpoint, API mode, API key, and model.
|
||||
|
||||
Azure Foundry supports both OpenAI-style (``/v1/chat/completions``) and
|
||||
Anthropic-style (``/v1/messages``) endpoints. The wizard auto-detects
|
||||
the transport and available models when possible:
|
||||
|
||||
* URLs ending in ``/anthropic`` → Anthropic Messages API.
|
||||
* Successful ``GET <base>/models`` probe → OpenAI-style + populates
|
||||
a picker with the returned deployment / model IDs.
|
||||
* Anthropic Messages probe fallback when ``/models`` fails.
|
||||
* Manual entry when every probe fails (private endpoints, etc.).
|
||||
|
||||
Context lengths for the chosen model are resolved via the standard
|
||||
:func:`agent.model_metadata.get_model_context_length` chain
|
||||
(models.dev, provider metadata, hardcoded family fallbacks).
|
||||
"""
|
||||
from hermes_cli.auth import _save_model_choice, deactivate_provider # noqa: F401
|
||||
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||
from hermes_cli import azure_detect
|
||||
import getpass
|
||||
|
||||
# ── Load current Azure Foundry configuration ─────────────────────
|
||||
model_cfg = config.get("model", {})
|
||||
if isinstance(model_cfg, dict) and model_cfg.get("provider") == "azure-foundry":
|
||||
current_base_url = str(model_cfg.get("base_url", "") or "")
|
||||
current_api_mode = str(model_cfg.get("api_mode", "") or "")
|
||||
else:
|
||||
current_base_url = ""
|
||||
current_api_mode = ""
|
||||
|
||||
current_api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
|
||||
print()
|
||||
print("Azure Foundry Configuration")
|
||||
print("=" * 50)
|
||||
print()
|
||||
print("Azure Foundry can host models with either OpenAI-style or")
|
||||
print("Anthropic-style API endpoints. Hermes will probe your")
|
||||
print("endpoint to auto-detect the transport and the deployed")
|
||||
print("models when possible.")
|
||||
print()
|
||||
|
||||
if current_base_url:
|
||||
print(f" Current endpoint: {current_base_url}")
|
||||
if current_api_mode:
|
||||
_lbl = "OpenAI-style" if current_api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f" Current API mode: {_lbl}")
|
||||
if current_api_key:
|
||||
print(f" Current API key: {current_api_key[:8]}...")
|
||||
print()
|
||||
|
||||
# ── Step 1: endpoint URL ─────────────────────────────────────────
|
||||
try:
|
||||
base_url = input(
|
||||
f"API endpoint URL [{current_base_url or 'e.g. https://your-resource.openai.azure.com/openai/v1'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_url = (base_url or current_base_url).rstrip("/")
|
||||
if not effective_url:
|
||||
print("No endpoint URL provided. Cancelled.")
|
||||
return
|
||||
if not effective_url.startswith(("http://", "https://")):
|
||||
print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
|
||||
return
|
||||
|
||||
# ── Step 2: API key ──────────────────────────────────────────────
|
||||
print()
|
||||
try:
|
||||
api_key = getpass.getpass(
|
||||
f"API key [{current_api_key[:8] + '...' if current_api_key else 'required'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
effective_key = api_key or current_api_key
|
||||
if not effective_key:
|
||||
print("No API key provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 3: auto-detect transport + models ───────────────────────
|
||||
print()
|
||||
print("◐ Probing endpoint to auto-detect transport and models...")
|
||||
detection = azure_detect.detect(effective_url, effective_key)
|
||||
|
||||
discovered_models: list[str] = list(detection.models)
|
||||
api_mode: str = detection.api_mode or ""
|
||||
|
||||
if api_mode:
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print(f"✓ Detected API transport: {mode_label}")
|
||||
if detection.reason:
|
||||
print(f" ({detection.reason})")
|
||||
if discovered_models:
|
||||
print(f"✓ Found {len(discovered_models)} deployed model(s) on this endpoint")
|
||||
else:
|
||||
print(f"⚠ Auto-detection incomplete: {detection.reason}")
|
||||
print()
|
||||
print("Select the API format your Azure Foundry endpoint uses:")
|
||||
print(" 1. OpenAI-style (POST /v1/chat/completions)")
|
||||
print(" For: GPT models, Llama, Mistral, and most open models")
|
||||
print(" 2. Anthropic-style (POST /v1/messages)")
|
||||
print(" For: Claude models deployed via Anthropic API format")
|
||||
try:
|
||||
default_choice = "2" if current_api_mode == "anthropic_messages" else "1"
|
||||
mode_choice = input(f"API format [1/2] ({default_choice}): ").strip() or default_choice
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
api_mode = "anthropic_messages" if mode_choice == "2" else "chat_completions"
|
||||
|
||||
# ── Step 4: model name ───────────────────────────────────────────
|
||||
print()
|
||||
effective_model = ""
|
||||
if discovered_models:
|
||||
print("Available models on this endpoint:")
|
||||
for i, mid in enumerate(discovered_models[:30], start=1):
|
||||
print(f" {i:>2}. {mid}")
|
||||
if len(discovered_models) > 30:
|
||||
print(f" ... and {len(discovered_models) - 30} more (type name manually if not shown)")
|
||||
print()
|
||||
try:
|
||||
pick = input(
|
||||
f"Pick by number, or type a deployment name [{current_model or discovered_models[0]}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
if not pick:
|
||||
effective_model = current_model or discovered_models[0]
|
||||
elif pick.isdigit() and 1 <= int(pick) <= min(len(discovered_models), 30):
|
||||
effective_model = discovered_models[int(pick) - 1]
|
||||
else:
|
||||
effective_model = pick
|
||||
else:
|
||||
try:
|
||||
model_name = input(
|
||||
f"Model / deployment name [{current_model or 'e.g. gpt-5.4, claude-sonnet-4-6'}]: "
|
||||
).strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
effective_model = model_name or current_model
|
||||
|
||||
if not effective_model:
|
||||
print("No model name provided. Cancelled.")
|
||||
return
|
||||
|
||||
# ── Step 5: context-length lookup ────────────────────────────────
|
||||
ctx_len = azure_detect.lookup_context_length(
|
||||
effective_model, effective_url, effective_key,
|
||||
)
|
||||
|
||||
# ── Step 6: persist ──────────────────────────────────────────────
|
||||
save_env_value("AZURE_FOUNDRY_API_KEY", effective_key)
|
||||
|
||||
cfg = load_config()
|
||||
model = cfg.get("model")
|
||||
if not isinstance(model, dict):
|
||||
model = {"default": model} if model else {}
|
||||
cfg["model"] = model
|
||||
|
||||
model["provider"] = "azure-foundry"
|
||||
model["base_url"] = effective_url
|
||||
model["api_mode"] = api_mode
|
||||
model["default"] = effective_model
|
||||
if ctx_len:
|
||||
model["context_length"] = ctx_len
|
||||
|
||||
save_config(cfg)
|
||||
deactivate_provider()
|
||||
config["model"] = dict(model)
|
||||
|
||||
# Clear any conflicting env vars so auxiliary clients don't poison
|
||||
# themselves with a stale OpenAI base URL / key.
|
||||
if get_env_value("OPENAI_BASE_URL"):
|
||||
save_env_value("OPENAI_BASE_URL", "")
|
||||
if get_env_value("OPENAI_API_KEY"):
|
||||
save_env_value("OPENAI_API_KEY", "")
|
||||
|
||||
mode_label = "OpenAI-style" if api_mode == "chat_completions" else "Anthropic-style"
|
||||
print()
|
||||
print("✓ Azure Foundry configured:")
|
||||
print(f" Endpoint: {effective_url}")
|
||||
print(f" API mode: {mode_label}")
|
||||
print(f" Model: {effective_model}")
|
||||
if ctx_len:
|
||||
print(f" Context length: {ctx_len:,} tokens")
|
||||
else:
|
||||
print(" Context length: not auto-detected (will fall back at runtime)")
|
||||
print()
|
||||
|
||||
|
||||
def _remove_custom_provider(config):
|
||||
"""Let the user remove a saved custom provider from config.yaml."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
@@ -3215,7 +2909,6 @@ def _model_flow_named_custom(config, provider_info):
|
||||
# Resolve key from env var if api_key not set directly
|
||||
if not api_key and key_env:
|
||||
api_key = os.environ.get(key_env, "")
|
||||
config_api_key = _custom_provider_api_key_config_value(provider_info, api_key)
|
||||
|
||||
print(f" Provider: {name}")
|
||||
print(f" URL: {base_url}")
|
||||
@@ -3312,8 +3005,8 @@ def _model_flow_named_custom(config, provider_info):
|
||||
else:
|
||||
model["provider"] = "custom"
|
||||
model["base_url"] = base_url
|
||||
if config_api_key:
|
||||
model["api_key"] = config_api_key
|
||||
if api_key:
|
||||
model["api_key"] = api_key
|
||||
# Apply api_mode from custom_providers entry, or clear stale value
|
||||
custom_api_mode = provider_info.get("api_mode", "")
|
||||
if custom_api_mode:
|
||||
@@ -3331,15 +3024,15 @@ def _model_flow_named_custom(config, provider_info):
|
||||
provider_entry = providers_cfg.get(provider_key)
|
||||
if isinstance(provider_entry, dict):
|
||||
provider_entry["default_model"] = model_name
|
||||
if config_api_key and not str(provider_entry.get("api_key", "") or "").strip():
|
||||
provider_entry["api_key"] = config_api_key
|
||||
if api_key and not str(provider_entry.get("api_key", "") or "").strip():
|
||||
provider_entry["api_key"] = api_key
|
||||
if key_env and not str(provider_entry.get("key_env", "") or "").strip():
|
||||
provider_entry["key_env"] = key_env
|
||||
cfg["providers"] = providers_cfg
|
||||
save_config(cfg)
|
||||
else:
|
||||
# Save model name to the custom_providers entry for next time
|
||||
_save_custom_provider(base_url, config_api_key, model_name)
|
||||
_save_custom_provider(base_url, api_key, model_name)
|
||||
|
||||
print(f"\n✅ Model set to: {model_name}")
|
||||
print(f" Provider: {name} ({base_url})")
|
||||
@@ -4780,13 +4473,6 @@ def cmd_webhook(args):
|
||||
webhook_command(args)
|
||||
|
||||
|
||||
def cmd_kanban(args):
|
||||
"""Multi-profile collaboration board."""
|
||||
from hermes_cli.kanban import kanban_command
|
||||
|
||||
return kanban_command(args)
|
||||
|
||||
|
||||
def cmd_hooks(args):
|
||||
"""Shell-hook inspection and management."""
|
||||
from hermes_cli.hooks import hooks_command
|
||||
@@ -5884,54 +5570,6 @@ def _finalize_update_output(state):
|
||||
pass
|
||||
|
||||
|
||||
def _cmd_update_check():
|
||||
"""Implement ``hermes update --check``: fetch and report without installing."""
|
||||
git_dir = PROJECT_ROOT / ".git"
|
||||
if not git_dir.exists():
|
||||
print("✗ Not a git repository — cannot check for updates.")
|
||||
sys.exit(1)
|
||||
|
||||
git_cmd = ["git"]
|
||||
if sys.platform == "win32":
|
||||
git_cmd = ["git", "-c", "windows.appendAtomically=false"]
|
||||
|
||||
print("→ Fetching from origin...")
|
||||
fetch_result = subprocess.run(
|
||||
git_cmd + ["fetch", "origin"],
|
||||
cwd=PROJECT_ROOT,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if fetch_result.returncode != 0:
|
||||
stderr = fetch_result.stderr.strip()
|
||||
if "Could not resolve host" in stderr or "unable to access" in stderr:
|
||||
print("✗ Network error — cannot reach the remote repository.")
|
||||
elif "Authentication failed" in stderr or "could not read Username" in stderr:
|
||||
print("✗ Authentication failed — check your git credentials or SSH key.")
|
||||
else:
|
||||
print("✗ Failed to fetch from origin.")
|
||||
if stderr:
|
||||
print(f" {stderr.splitlines()[0]}")
|
||||
sys.exit(1)
|
||||
|
||||
rev_result = subprocess.run(
|
||||
git_cmd + ["rev-list", "HEAD..origin/main", "--count"],
|
||||
cwd=PROJECT_ROOT,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
behind = int(rev_result.stdout.strip())
|
||||
|
||||
if behind == 0:
|
||||
print("✓ Already up to date.")
|
||||
else:
|
||||
commits_word = "commit" if behind == 1 else "commits"
|
||||
print(f"⚕ Update available: {behind} {commits_word} behind origin/main.")
|
||||
from hermes_cli.config import recommended_update_command
|
||||
print(f" Run '{recommended_update_command()}' to install.")
|
||||
|
||||
|
||||
def cmd_update(args):
|
||||
"""Update Hermes Agent to the latest version.
|
||||
|
||||
@@ -5945,10 +5583,6 @@ def cmd_update(args):
|
||||
managed_error("update Hermes Agent")
|
||||
return
|
||||
|
||||
if getattr(args, "check", False):
|
||||
_cmd_update_check()
|
||||
return
|
||||
|
||||
gateway_mode = getattr(args, "gateway", False)
|
||||
|
||||
# Protect against mid-update terminal disconnects (SIGHUP) and tolerate
|
||||
@@ -6412,75 +6046,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
)
|
||||
import signal as _signal
|
||||
|
||||
def _wait_for_service_active(
|
||||
scope_cmd_: list, svc_name_: str, timeout: float = 10.0,
|
||||
) -> bool:
|
||||
"""Poll ``systemctl is-active`` until the unit reports active.
|
||||
|
||||
systemd's Stopped -> Started transition after a graceful exit
|
||||
(or a hard restart) is not instantaneous; a one-shot check
|
||||
races that window and falsely reports the unit as down.
|
||||
Poll every 0.5s up to ``timeout`` seconds before giving up.
|
||||
"""
|
||||
deadline = _time.monotonic() + max(timeout, 0.5)
|
||||
while True:
|
||||
try:
|
||||
_verify = subprocess.run(
|
||||
scope_cmd_ + ["is-active", svc_name_],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if _verify.stdout.strip() == "active":
|
||||
return True
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
if _time.monotonic() >= deadline:
|
||||
return False
|
||||
_time.sleep(0.5)
|
||||
|
||||
def _service_restart_sec(
|
||||
scope_cmd_: list, svc_name_: str, default: float = 0.0,
|
||||
) -> float:
|
||||
"""Read the unit's ``RestartUSec`` (RestartSec) in seconds.
|
||||
|
||||
After a graceful exit-75, systemd waits ``RestartSec`` before
|
||||
respawning the unit. Callers that poll for ``is-active``
|
||||
must use a timeout >= ``RestartSec`` + transition slack, or
|
||||
they'll give up *during* the cooldown window and wrongly
|
||||
conclude the unit didn't relaunch.
|
||||
"""
|
||||
try:
|
||||
_show = subprocess.run(
|
||||
scope_cmd_ + [
|
||||
"show", svc_name_,
|
||||
"--property=RestartUSec", "--value",
|
||||
],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
return default
|
||||
raw = (_show.stdout or "").strip()
|
||||
# systemd emits values like "30s", "100ms", "1min 30s", or
|
||||
# "infinity". Parse conservatively; on any miss return default.
|
||||
if not raw or raw == "infinity":
|
||||
return default
|
||||
total = 0.0
|
||||
matched = False
|
||||
for part in raw.split():
|
||||
for _suf, _mult in (
|
||||
("ms", 0.001),
|
||||
("us", 0.000001),
|
||||
("min", 60.0),
|
||||
("s", 1.0),
|
||||
):
|
||||
if part.endswith(_suf):
|
||||
try:
|
||||
total += float(part[: -len(_suf)]) * _mult
|
||||
matched = True
|
||||
except ValueError:
|
||||
pass
|
||||
break
|
||||
return total if matched else default
|
||||
|
||||
# Drain budget for graceful SIGUSR1 restarts. The gateway drains
|
||||
# for up to ``agent.restart_drain_timeout`` (default 60s) before
|
||||
# exiting with code 75; we wait slightly longer so the drain
|
||||
@@ -6587,23 +6152,14 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
|
||||
if _graceful_ok:
|
||||
# Gateway exited 75; systemd should relaunch
|
||||
# via Restart=on-failure. The unit's
|
||||
# RestartSec (default 30s on ours) gates the
|
||||
# respawn — poll past that + slack so we
|
||||
# don't give up mid-cooldown and falsely
|
||||
# print "drained but didn't relaunch". For
|
||||
# units without RestartSec set we fall back
|
||||
# to the original 10s budget.
|
||||
_restart_sec = _service_restart_sec(
|
||||
scope_cmd, svc_name, default=0.0,
|
||||
# via Restart=on-failure. Verify the new
|
||||
# process came up.
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
_post_drain_timeout = max(
|
||||
10.0, _restart_sec + 10.0,
|
||||
)
|
||||
if _wait_for_service_active(
|
||||
scope_cmd, svc_name,
|
||||
timeout=_post_drain_timeout,
|
||||
):
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
continue
|
||||
# Process exited but wasn't respawned (older
|
||||
@@ -6629,9 +6185,14 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
# Verify the service actually survived the
|
||||
# restart. systemctl restart returns 0 even
|
||||
# if the new process crashes immediately.
|
||||
if _wait_for_service_active(
|
||||
scope_cmd, svc_name, timeout=10.0,
|
||||
):
|
||||
_time.sleep(3)
|
||||
verify = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
else:
|
||||
# Retry once — transient startup failures
|
||||
@@ -6646,9 +6207,14 @@ def _cmd_update_impl(args, gateway_mode: bool):
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
if _wait_for_service_active(
|
||||
scope_cmd, svc_name, timeout=10.0,
|
||||
):
|
||||
_time.sleep(3)
|
||||
verify2 = subprocess.run(
|
||||
scope_cmd + ["is-active", svc_name],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
if verify2.stdout.strip() == "active":
|
||||
restarted_services.append(svc_name)
|
||||
print(f" ✓ {svc_name} recovered on retry")
|
||||
else:
|
||||
@@ -7230,9 +6796,6 @@ Examples:
|
||||
hermes auth remove <p> <t> Remove pooled credential by index, id, or label
|
||||
hermes auth reset <provider> Clear exhaustion status for a provider
|
||||
hermes model Select default model
|
||||
hermes fallback [list] Show fallback provider chain
|
||||
hermes fallback add Add a fallback provider (same picker as `hermes model`)
|
||||
hermes fallback remove Remove a fallback provider from the chain
|
||||
hermes config View configuration
|
||||
hermes config edit Edit config in $EDITOR
|
||||
hermes config set model gpt-4 Set a config value
|
||||
@@ -7258,40 +6821,6 @@ For more help on a command:
|
||||
parser.add_argument(
|
||||
"--version", "-V", action="store_true", help="Show version and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-z",
|
||||
"--oneshot",
|
||||
metavar="PROMPT",
|
||||
default=None,
|
||||
help=(
|
||||
"One-shot mode: send a single prompt and print ONLY the final "
|
||||
"response text to stdout. No banner, no spinner, no tool "
|
||||
"previews, no session_id line. Tools, memory, rules, and "
|
||||
"AGENTS.md in the CWD are loaded as normal; approvals are "
|
||||
"auto-bypassed. Intended for scripts / pipes."
|
||||
),
|
||||
)
|
||||
# --model / --provider are accepted at the top level so they can pair
|
||||
# with -z without needing the `chat` subcommand. If neither -z nor a
|
||||
# subcommand consumes them, they fall through harmlessly as None.
|
||||
# Mirrors `hermes chat --model ... --provider ...` semantics.
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
default=None,
|
||||
help=(
|
||||
"Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--provider",
|
||||
default=None,
|
||||
help=(
|
||||
"Provider override for this invocation (e.g. openrouter, anthropic). "
|
||||
"Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume",
|
||||
"-r",
|
||||
@@ -7574,42 +7103,6 @@ For more help on a command:
|
||||
)
|
||||
model_parser.set_defaults(func=cmd_model)
|
||||
|
||||
# =========================================================================
|
||||
# fallback command — manage the fallback provider chain
|
||||
# =========================================================================
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
|
||||
fallback_parser = subparsers.add_parser(
|
||||
"fallback",
|
||||
help="Manage fallback providers (tried when the primary model fails)",
|
||||
description=(
|
||||
"Manage the fallback provider chain. Fallback providers are tried "
|
||||
"in order when the primary model fails with rate-limit, overload, or "
|
||||
"connection errors. See: "
|
||||
"https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers"
|
||||
),
|
||||
)
|
||||
fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command")
|
||||
fallback_subparsers.add_parser(
|
||||
"list",
|
||||
aliases=["ls"],
|
||||
help="Show the current fallback chain (default when no subcommand)",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"add",
|
||||
help="Pick a provider + model (same picker as `hermes model`) and append to the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"remove",
|
||||
aliases=["rm"],
|
||||
help="Pick an entry to delete from the chain",
|
||||
)
|
||||
fallback_subparsers.add_parser(
|
||||
"clear",
|
||||
help="Remove all fallback entries",
|
||||
)
|
||||
fallback_parser.set_defaults(func=cmd_fallback)
|
||||
|
||||
# =========================================================================
|
||||
# gateway command
|
||||
# =========================================================================
|
||||
@@ -7780,19 +7273,6 @@ For more help on a command:
|
||||
setup_parser.add_argument(
|
||||
"--reset", action="store_true", help="Reset configuration to defaults"
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"--reconfigure",
|
||||
action="store_true",
|
||||
help="(Default on existing installs.) Re-run the full wizard, "
|
||||
"showing current values as defaults. Kept for backwards "
|
||||
"compatibility — a bare 'hermes setup' now does this.",
|
||||
)
|
||||
setup_parser.add_argument(
|
||||
"--quick",
|
||||
action="store_true",
|
||||
help="On existing installs: only prompt for items that are missing "
|
||||
"or unset, instead of running the full reconfigure wizard.",
|
||||
)
|
||||
setup_parser.set_defaults(func=cmd_setup)
|
||||
|
||||
# =========================================================================
|
||||
@@ -8123,13 +7603,6 @@ For more help on a command:
|
||||
|
||||
webhook_parser.set_defaults(func=cmd_webhook)
|
||||
|
||||
# =========================================================================
|
||||
# kanban command — multi-profile collaboration board
|
||||
# =========================================================================
|
||||
from hermes_cli.kanban import build_parser as _build_kanban_parser
|
||||
kanban_parser = _build_kanban_parser(subparsers)
|
||||
kanban_parser.set_defaults(func=cmd_kanban)
|
||||
|
||||
# =========================================================================
|
||||
# hooks command — shell-hook inspection and management
|
||||
# =========================================================================
|
||||
@@ -9282,12 +8755,6 @@ Examples:
|
||||
default=False,
|
||||
help="Gateway mode: use file-based IPC for prompts instead of stdin (used internally by /update)",
|
||||
)
|
||||
update_parser.add_argument(
|
||||
"--check",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Check whether an update is available without installing anything",
|
||||
)
|
||||
update_parser.set_defaults(func=cmd_update)
|
||||
|
||||
# =========================================================================
|
||||
@@ -9634,17 +9101,6 @@ Examples:
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
# Handle top-level --oneshot / -z: single-shot mode, stdout = final
|
||||
# response only, nothing else. Bypasses cli.py entirely.
|
||||
if getattr(args, "oneshot", None):
|
||||
from hermes_cli.oneshot import run_oneshot
|
||||
|
||||
sys.exit(run_oneshot(
|
||||
args.oneshot,
|
||||
model=getattr(args, "model", None),
|
||||
provider=getattr(args, "provider", None),
|
||||
))
|
||||
|
||||
# Handle top-level --resume / --continue as shortcut to chat
|
||||
if (args.resume or args.continue_last) and args.command is None:
|
||||
args.command = "chat"
|
||||
|
||||
@@ -1,329 +0,0 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
+12
-39
@@ -533,7 +533,6 @@ def resolve_display_context_length(
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
@@ -544,11 +543,6 @@ def resolve_display_context_length(
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
When ``custom_providers`` is provided, per-model ``context_length``
|
||||
overrides from ``custom_providers[].models.<id>.context_length`` are
|
||||
honored — this closes #15779 where ``/model`` switch ignored user-set
|
||||
overrides.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
@@ -559,7 +553,6 @@ def resolve_display_context_length(
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
@@ -838,14 +831,9 @@ def switch_model(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
# If resolution fell through to "custom" (e.g. named custom provider like
|
||||
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
|
||||
# credentials. Otherwise use the resolved values (picks up credential rotation,
|
||||
# base_url adjustments for OpenCode, etc.).
|
||||
if runtime.get("provider") != "custom":
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -879,31 +867,16 @@ def switch_model(
|
||||
"message": f"Could not validate `{new_model}`: {e}",
|
||||
}
|
||||
|
||||
# Override rejection if model is in the user's saved provider config.
|
||||
# API /v1/models may not list cloud/aliased models even though the server supports them.
|
||||
if not validation.get("accepted"):
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
|
||||
else:
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
|
||||
# Apply auto-correction if validation found a closer match
|
||||
if validation.get("corrected_model"):
|
||||
|
||||
+76
-158
@@ -383,9 +383,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
||||
"us.meta.llama4-scout-17b-instruct-v1:0",
|
||||
],
|
||||
# Azure Foundry: user-provided endpoint and model.
|
||||
# Empty list because models depend on the endpoint configuration.
|
||||
"azure-foundry": [],
|
||||
}
|
||||
|
||||
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
|
||||
@@ -743,7 +740,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
]
|
||||
|
||||
# Derived dicts — used throughout the codebase
|
||||
@@ -876,16 +872,7 @@ def fetch_openrouter_models(
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
|
||||
# snapshot when the manifest is unreachable. Both are curated lists that
|
||||
# drive the picker; the OpenRouter live /v1/models filter (tool support,
|
||||
# free pricing) is applied on top either way.
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_openrouter_models
|
||||
remote = get_curated_openrouter_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
@@ -938,24 +925,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def get_curated_nous_model_ids() -> list[str]:
|
||||
"""Return the curated Nous Portal model-id list.
|
||||
|
||||
Prefers the remotely-hosted catalog manifest (published under
|
||||
``website/static/api/model-catalog.json``); falls back to the in-repo
|
||||
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
|
||||
unreachable. Always returns a list (never None).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_nous_models
|
||||
remote = get_curated_nous_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
if remote:
|
||||
return list(remote)
|
||||
return list(_PROVIDER_MODELS.get("nous", []))
|
||||
|
||||
|
||||
def _ai_gateway_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True if an AI Gateway model has $0 input AND output pricing."""
|
||||
if not isinstance(pricing, dict):
|
||||
@@ -1410,124 +1379,6 @@ def curated_models_for_provider(
|
||||
return [(m, "") for m in models]
|
||||
|
||||
|
||||
def _provider_keys(provider: str) -> set[str]:
|
||||
key = (provider or "").strip().lower()
|
||||
normalized = normalize_provider(provider)
|
||||
return {k for k in (key, normalized) if k}
|
||||
|
||||
|
||||
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
|
||||
return any(
|
||||
name_lower == model.lower()
|
||||
for provider in providers
|
||||
for model in _PROVIDER_MODELS.get(provider, [])
|
||||
)
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset(
|
||||
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_static_model_alias(
|
||||
name_lower: str,
|
||||
current_keys: set[str],
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
|
||||
try:
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES.get(name_lower)
|
||||
if identity is None:
|
||||
return None
|
||||
|
||||
vendor = identity.vendor
|
||||
family = identity.family
|
||||
|
||||
def _match(provider: str) -> Optional[str]:
|
||||
models = _PROVIDER_MODELS.get(provider, [])
|
||||
if not models:
|
||||
return None
|
||||
prefix = (
|
||||
f"{vendor}/{family}"
|
||||
if provider in _AGGREGATOR_PROVIDERS
|
||||
else family
|
||||
).lower()
|
||||
for model in models:
|
||||
if model.lower().startswith(prefix):
|
||||
return model
|
||||
return None
|
||||
|
||||
for provider in current_keys:
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _PROVIDER_MODELS:
|
||||
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _AGGREGATOR_PROVIDERS:
|
||||
if provider in current_keys and (matched := _match(provider)):
|
||||
return provider, matched
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_static_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect a provider from static catalogs only.
|
||||
|
||||
Returns ``(provider_id, model_name)``. The model name may be remapped
|
||||
when a static alias or bare provider name resolves to a catalog default.
|
||||
Returns ``None`` when no confident match is found.
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
name_lower = name.lower()
|
||||
current_keys = _provider_keys(current_provider)
|
||||
|
||||
alias_match = _resolve_static_model_alias(name_lower, current_keys)
|
||||
if alias_match:
|
||||
return alias_match
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
# provider switch and pick the first model from that provider's catalog.
|
||||
# Skip "custom" and "openrouter" — custom has no model catalog, and
|
||||
# openrouter requires an explicit model name to be useful.
|
||||
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
|
||||
if resolved_provider not in {"custom", "openrouter"}:
|
||||
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider not in current_keys
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
if _model_in_provider_catalog(name_lower, current_keys):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
return (pid, name)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
@@ -1540,19 +1391,86 @@ def detect_provider_for_model(
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider static catalog match
|
||||
2. OpenRouter catalog match
|
||||
1. Direct provider with credentials (highest)
|
||||
2. Direct provider without credentials → remap to OpenRouter slug
|
||||
3. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
static_match = detect_static_provider_for_model(name, current_provider)
|
||||
if static_match:
|
||||
return static_match
|
||||
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
|
||||
name_lower = name.lower()
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
# provider switch and pick the first model from that provider's catalog.
|
||||
# Skip "custom" and "openrouter" — custom has no model catalog, and
|
||||
# openrouter requires an explicit model name to be useful.
|
||||
resolved_provider = _PROVIDER_ALIASES.get(name_lower, name_lower)
|
||||
if resolved_provider not in {"custom", "openrouter"}:
|
||||
default_models = _PROVIDER_MODELS.get(resolved_provider, [])
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider != normalize_provider(current_provider)
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if any(name_lower == m.lower() for m in current_models):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
direct_match: Optional[str] = None
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid == current_provider or pid in _AGGREGATORS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
direct_match = pid
|
||||
break
|
||||
|
||||
if direct_match:
|
||||
# Check if we have credentials for this provider — env vars,
|
||||
# credential pool, or auth store entries.
|
||||
has_creds = False
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
||||
if pconfig:
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if os.getenv(env_var, "").strip():
|
||||
has_creds = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# Also check credential pool and auth store — covers OAuth,
|
||||
# Claude Code tokens, and other non-env-var credentials (#10300).
|
||||
if not has_creds:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(direct_match)
|
||||
if pool.has_credentials():
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Always return the direct provider match. If credentials are
|
||||
# missing, the client init will give a clear error rather than
|
||||
# silently routing through the wrong provider (#10300).
|
||||
return (direct_match, name)
|
||||
|
||||
# --- Step 2: check OpenRouter catalog ---
|
||||
# First try exact match (handles provider/model format)
|
||||
or_slug = _find_openrouter_slug(name)
|
||||
@@ -2653,8 +2571,8 @@ def validate_requested_model(
|
||||
)
|
||||
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
@@ -1,202 +0,0 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -167,12 +167,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
|
||||
# The transport is determined at runtime from config.yaml model.api_mode.
|
||||
"azure-foundry": HermesOverlay(
|
||||
transport="openai_chat", # default; overridden by api_mode in config
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -221,19 +221,6 @@ def _resolve_runtime_from_pool_entry(
|
||||
elif provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
|
||||
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
|
||||
elif provider == "azure-foundry":
|
||||
# Azure Foundry: read api_mode and base_url from config
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# For Anthropic-style endpoints, strip /v1 suffix
|
||||
if api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
else:
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Honour model.base_url from config.yaml when the configured provider
|
||||
@@ -602,71 +589,6 @@ def _resolve_openrouter_runtime(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_azure_foundry_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
model_cfg: Dict[str, Any],
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve an Azure Foundry runtime entry.
|
||||
|
||||
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
|
||||
explicit overrides), pulls the API key from ``.env`` / env var, and
|
||||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
|
||||
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
except Exception:
|
||||
api_key = ""
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_explicit_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
@@ -756,15 +678,6 @@ def _resolve_explicit_runtime(
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
if provider == "azure-foundry":
|
||||
return _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
env_url = ""
|
||||
@@ -833,40 +746,6 @@ def resolve_runtime_provider(
|
||||
"""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
|
||||
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
|
||||
# return provider="custom" with chat_completions api_mode and no valid key).
|
||||
# Instead, use the Azure key directly with anthropic_messages api_mode.
|
||||
_eff_base = (explicit_base_url or "").strip()
|
||||
if requested_provider == "anthropic" and "azure.com" in _eff_base:
|
||||
_azure_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": _eff_base.rstrip("/"),
|
||||
"api_key": _azure_key,
|
||||
"source": "azure-explicit",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
|
||||
# Resolve before the custom-runtime / pool / generic paths so Azure
|
||||
# config is always picked up from model.base_url + model.api_mode,
|
||||
# regardless of whether the caller passed explicit_* args.
|
||||
if requested_provider == "azure-foundry":
|
||||
azure_runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=_get_model_config(),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
return azure_runtime
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
@@ -1045,6 +924,13 @@ def resolve_runtime_provider(
|
||||
|
||||
# Anthropic (native Messages API)
|
||||
if provider == "anthropic":
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url, but only
|
||||
# when the configured provider is anthropic — otherwise a non-Anthropic
|
||||
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
|
||||
@@ -1053,33 +939,6 @@ def resolve_runtime_provider(
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
|
||||
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
|
||||
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
|
||||
# would find the Claude Code OAuth token first (priority 3) and return
|
||||
# that instead, causing 401s. Detect Azure endpoints and use the env
|
||||
# key directly to bypass the OAuth priority chain.
|
||||
_is_azure_endpoint = "azure.com" in base_url.lower() or (
|
||||
cfg_base_url and "azure.com" in cfg_base_url.lower()
|
||||
)
|
||||
if _is_azure_endpoint:
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
|
||||
)
|
||||
else:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
|
||||
+49
-27
@@ -2863,6 +2863,17 @@ SETUP_SECTIONS = [
|
||||
("agent", "Agent Settings", setup_agent_settings),
|
||||
]
|
||||
|
||||
# The returning-user menu intentionally omits standalone TTS because model setup
|
||||
# already includes TTS selection and tools setup covers the rest of the provider
|
||||
# configuration. Keep this list in the same order as the visible menu entries.
|
||||
RETURNING_USER_MENU_SECTION_KEYS = [
|
||||
"model",
|
||||
"terminal",
|
||||
"gateway",
|
||||
"tools",
|
||||
"agent",
|
||||
]
|
||||
|
||||
|
||||
def run_setup_wizard(args):
|
||||
"""Run the interactive setup wizard.
|
||||
@@ -2887,9 +2898,6 @@ def run_setup_wizard(args):
|
||||
save_config(copy.deepcopy(DEFAULT_CONFIG))
|
||||
print_success("Configuration reset to defaults.")
|
||||
|
||||
reconfigure_requested = bool(getattr(args, "reconfigure", False))
|
||||
quick_requested = bool(getattr(args, "quick", False))
|
||||
|
||||
config = load_config()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
@@ -2981,36 +2989,50 @@ def run_setup_wizard(args):
|
||||
migration_ran = False
|
||||
|
||||
if is_existing:
|
||||
# Existing install — default is the full-wizard reconfigure flow.
|
||||
# Every prompt shows the current value as its default, so pressing
|
||||
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
|
||||
# missing items" flow (useful after a partial OpenClaw migration
|
||||
# or when a required API key got cleared).
|
||||
if quick_requested:
|
||||
# ── Returning User Menu ──
|
||||
print()
|
||||
print_header("Welcome Back!")
|
||||
print_success("You already have Hermes configured.")
|
||||
print()
|
||||
|
||||
menu_choices = [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"Exit",
|
||||
]
|
||||
choice = prompt_choice("What would you like to do?", menu_choices, 0)
|
||||
|
||||
if choice == 0:
|
||||
# Quick setup
|
||||
_run_quick_setup(config, hermes_home)
|
||||
return
|
||||
|
||||
print()
|
||||
print_header("Reconfigure")
|
||||
print_success("You already have Hermes configured.")
|
||||
print_info("Running the full wizard — each prompt shows your current value.")
|
||||
print_info("Press Enter to keep it, or type a new value to change it.")
|
||||
print_info("")
|
||||
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
|
||||
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
|
||||
# Fall through to the "Full Setup — run all sections" block below.
|
||||
# --reconfigure is now the default on existing installs; the flag
|
||||
# is preserved for backwards compatibility but is a no-op here.
|
||||
elif choice == 1:
|
||||
# Full setup — fall through to run all sections
|
||||
pass
|
||||
elif choice == 7:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif 2 <= choice <= 6:
|
||||
# Individual section — map by key, not by position.
|
||||
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
|
||||
# so positional indexing (choice - 2) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
|
||||
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
|
||||
if section:
|
||||
_, label, func = section
|
||||
func(config)
|
||||
save_config(config)
|
||||
_print_setup_summary(config, hermes_home)
|
||||
return
|
||||
else:
|
||||
# ── First-Time Setup ──
|
||||
print()
|
||||
|
||||
# --reconfigure / --quick on a fresh install are meaningless — fall
|
||||
# through to the normal first-time flow.
|
||||
if reconfigure_requested or quick_requested:
|
||||
print_info("No existing configuration found — running first-time setup.")
|
||||
print()
|
||||
|
||||
# Offer OpenClaw migration before configuration begins
|
||||
migration_ran = _offer_openclaw_migration(hermes_home)
|
||||
if migration_ran:
|
||||
|
||||
+2
-1
@@ -10,7 +10,8 @@ import random
|
||||
|
||||
TIPS = [
|
||||
# --- Slash Commands ---
|
||||
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
|
||||
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
|
||||
"/background <prompt> runs a task in a separate session while your current one stays free.",
|
||||
"/branch forks the current session so you can explore a different direction without losing progress.",
|
||||
"/compress manually compresses conversation context when things get long.",
|
||||
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
|
||||
|
||||
+14
-122
@@ -68,58 +68,25 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
("discord", "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
|
||||
|
||||
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
|
||||
# these platforms, and only resolve/save for these platforms. A toolset
|
||||
# absent from this map is available on every platform (current behaviour).
|
||||
#
|
||||
# Use this for tools whose APIs only make sense on one platform (Discord
|
||||
# server admin, Slack workspace admin, etc.). Keeps every other platform's
|
||||
# checklist from filling up with irrelevant toggles.
|
||||
_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
|
||||
"discord": {"discord"},
|
||||
"discord_admin": {"discord"},
|
||||
}
|
||||
|
||||
|
||||
def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
|
||||
"""Return True if ``ts_key`` is configurable on ``platform``.
|
||||
|
||||
Toolsets without a restriction entry are allowed everywhere (the default).
|
||||
"""
|
||||
allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
|
||||
return allowed is None or platform in allowed
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify"}
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
"""Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.
|
||||
|
||||
Plugin toolsets are appended at the end so they appear after the
|
||||
built-in toolsets in the TUI checklist. A plugin whose toolset key
|
||||
already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
|
||||
plugins (e.g. ``plugins/spotify``) share their toolset key with the
|
||||
built-in entry, and we want the built-in label/description to win.
|
||||
Without the dedupe, ``hermes tools`` → "reconfigure existing" would
|
||||
list the same toolset twice.
|
||||
built-in toolsets in the TUI checklist.
|
||||
"""
|
||||
result = list(CONFIGURABLE_TOOLSETS)
|
||||
seen = {ts_key for ts_key, _, _ in result}
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
|
||||
discover_plugins() # idempotent — ensures plugins are loaded
|
||||
for entry in get_plugin_toolsets():
|
||||
if entry[0] in seen:
|
||||
continue
|
||||
seen.add(entry[0])
|
||||
result.append(entry)
|
||||
result.extend(get_plugin_toolsets())
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -624,7 +591,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
from toolsets import resolve_toolset
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -638,8 +605,6 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -649,10 +614,7 @@ def _get_platform_tools(
|
||||
has_explicit_config = any(ts in configurable_keys for ts in toolset_names)
|
||||
|
||||
if has_explicit_config:
|
||||
enabled_toolsets = {
|
||||
ts for ts in toolset_names
|
||||
if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
|
||||
else:
|
||||
# No explicit config — fall back to resolving composite toolset names
|
||||
# (e.g. "hermes-cli") to individual tool names and reverse-mapping.
|
||||
@@ -662,52 +624,14 @@ def _get_platform_tools(
|
||||
|
||||
enabled_toolsets = set()
|
||||
for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
|
||||
if not _toolset_allowed_for_platform(ts_key, platform):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
# Legacy safety: if the platform's own name matches a default-off
|
||||
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
|
||||
# keep that toolset enabled on first install. Skip this dodge for
|
||||
# platform-restricted toolsets — those are always opt-in even on
|
||||
# their own platform (e.g. `discord` + `discord` should stay OFF).
|
||||
if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
|
||||
if platform in default_off:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
@@ -715,6 +639,7 @@ def _get_platform_tools(
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -732,6 +657,7 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -777,14 +703,6 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
"""
|
||||
config.setdefault("platform_toolsets", {})
|
||||
|
||||
# Drop platform-scoped toolsets that don't apply here. Prevents the
|
||||
# "Configure all platforms" checklist (or a hand-edited config.yaml)
|
||||
# from turning on, say, the `discord` toolset for Telegram.
|
||||
enabled_toolset_keys = {
|
||||
ts for ts in enabled_toolset_keys
|
||||
if _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
|
||||
# Get the set of all configurable toolset keys (built-in + plugin)
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_keys = _get_plugin_toolset_keys()
|
||||
@@ -807,11 +725,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
# Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
|
||||
# saving from the picker as consent to clear the "no_mcp" sentinel. The
|
||||
# picker has no checkbox for no_mcp, so without this users who once set it
|
||||
# by hand could never re-enable MCP servers through the UI.
|
||||
preserved_entries.discard("no_mcp")
|
||||
if "no_mcp" not in enabled_toolset_keys:
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
@@ -919,7 +834,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
return _tool_token_cache
|
||||
|
||||
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
|
||||
"""Multi-select checklist of toolsets. Returns set of selected toolset keys."""
|
||||
from hermes_cli.curses_ui import curses_checklist
|
||||
from toolsets import resolve_toolset
|
||||
@@ -927,12 +842,7 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform:
|
||||
# Pre-compute per-tool token counts (cached after first call).
|
||||
tool_tokens = _estimate_tool_tokens()
|
||||
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
# Drop platform-scoped toolsets that don't apply to this platform.
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
|
||||
labels = []
|
||||
for ts_key, ts_label, ts_desc in effective:
|
||||
@@ -1846,7 +1756,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
|
||||
checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
# Show checklist
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
|
||||
|
||||
added = new_enabled - current_enabled
|
||||
removed = current_enabled - new_enabled
|
||||
@@ -2202,11 +2112,7 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]
|
||||
|
||||
def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
|
||||
"""Print a summary of enabled/disabled toolsets and MCP tool filters."""
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
|
||||
print(f"Built-in toolsets ({platform}):")
|
||||
@@ -2272,20 +2178,6 @@ def tools_disable_enable_command(args):
|
||||
_print_error(f"Unknown toolset '{name}'")
|
||||
toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
|
||||
|
||||
# Reject platform-scoped toolsets on platforms that don't allow them.
|
||||
restricted_targets = [
|
||||
t for t in toolset_targets
|
||||
if not _toolset_allowed_for_platform(t, platform)
|
||||
]
|
||||
if restricted_targets:
|
||||
for name in restricted_targets:
|
||||
allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
|
||||
_print_error(
|
||||
f"Toolset '{name}' is not available on platform '{platform}' "
|
||||
f"(only: {', '.join(allowed)})"
|
||||
)
|
||||
toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
|
||||
|
||||
if toolset_targets:
|
||||
_apply_toolset_change(config, platform, toolset_targets, action)
|
||||
|
||||
|
||||
@@ -3103,23 +3103,13 @@ def _mount_plugin_api_routes():
|
||||
_log.warning("Plugin %s declares api=%s but file not found", plugin["name"], api_file_name)
|
||||
continue
|
||||
try:
|
||||
module_name = f"hermes_dashboard_plugin_{plugin['name']}"
|
||||
spec = importlib.util.spec_from_file_location(module_name, api_path)
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
f"hermes_dashboard_plugin_{plugin['name']}", api_path,
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
continue
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules BEFORE exec_module so pydantic/FastAPI
|
||||
# can resolve forward references (e.g. models defined in a file
|
||||
# that uses `from __future__ import annotations`). Without this,
|
||||
# TypeAdapter lazy-build fails at first request with
|
||||
# "is not fully defined" because the module namespace isn't
|
||||
# reachable by name for string-annotation resolution.
|
||||
sys.modules[module_name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
sys.modules.pop(module_name, None)
|
||||
raise
|
||||
spec.loader.exec_module(mod)
|
||||
router = getattr(mod, "router", None)
|
||||
if router is None:
|
||||
_log.warning("Plugin %s api file has no 'router' attribute", plugin["name"])
|
||||
|
||||
+5
-29
@@ -31,7 +31,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 9
|
||||
SCHEMA_VERSION = 8
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
@@ -83,8 +83,7 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning TEXT,
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
codex_reasoning_items TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -357,15 +356,6 @@ class SessionDB:
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 8")
|
||||
if current_version < 9:
|
||||
# v9: preserve replayable Codex assistant message ids/phases so
|
||||
# follow-up turns can rebuild Responses API message items instead
|
||||
# of flattening everything to plain assistant text.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 9")
|
||||
|
||||
# Unique title index — always ensure it exists (safe to run after migrations
|
||||
# since the title column is guaranteed to exist at this point)
|
||||
@@ -966,7 +956,6 @@ class SessionDB:
|
||||
reasoning_content: str = None,
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
@@ -983,10 +972,6 @@ class SessionDB:
|
||||
json.dumps(codex_reasoning_items)
|
||||
if codex_reasoning_items else None
|
||||
)
|
||||
codex_message_items_json = (
|
||||
json.dumps(codex_message_items)
|
||||
if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
|
||||
# Pre-compute tool call count
|
||||
@@ -998,9 +983,8 @@ class SessionDB:
|
||||
cursor = conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -1015,7 +999,6 @@ class SessionDB:
|
||||
reasoning_content,
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1129,8 +1112,7 @@ class SessionDB:
|
||||
with self._lock:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
|
||||
"codex_message_items "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
|
||||
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
|
||||
(session_id,),
|
||||
)
|
||||
@@ -1168,12 +1150,6 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
|
||||
msg["codex_reasoning_items"] = None
|
||||
if row["codex_message_items"]:
|
||||
try:
|
||||
msg["codex_message_items"] = json.loads(row["codex_message_items"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_message_items, falling back to None")
|
||||
msg["codex_message_items"] = None
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
|
||||
+23
-39
@@ -24,7 +24,6 @@ import json
|
||||
import asyncio
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
@@ -289,34 +288,30 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -568,14 +563,6 @@ def handle_function_call(
|
||||
except Exception:
|
||||
pass # file_tools may not be loaded yet
|
||||
|
||||
# Measure tool dispatch latency so post_tool_call and
|
||||
# transform_tool_result hooks can observe per-tool duration.
|
||||
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
|
||||
# PostToolUse hook inputs so plugin authors can build latency
|
||||
# dashboards, budget alerts, and regression canaries without having
|
||||
# to wrap every tool manually. We use monotonic() so the value is
|
||||
# unaffected by wall-clock adjustments during the call.
|
||||
_dispatch_start = time.monotonic()
|
||||
if function_name == "execute_code":
|
||||
# Prefer the caller-provided list so subagents can't overwrite
|
||||
# the parent's tool set via the process-global.
|
||||
@@ -591,7 +578,6 @@ def handle_function_call(
|
||||
task_id=task_id,
|
||||
user_task=user_task,
|
||||
)
|
||||
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
|
||||
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook
|
||||
@@ -603,7 +589,6 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -624,7 +609,6 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
for hook_result in hook_results:
|
||||
if isinstance(hook_result, str):
|
||||
|
||||
@@ -91,29 +91,4 @@
|
||||
|
||||
// Register this plugin — the dashboard picks it up automatically.
|
||||
window.__HERMES_PLUGINS__.register("example", ExamplePage);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Page-scoped slot demo: inject a small banner at the top of /sessions.
|
||||
//
|
||||
// Built-in pages expose named slots (<page>:top, <page>:bottom) that
|
||||
// plugins can populate without overriding the whole route. The
|
||||
// manifest lists the slots we use in its `slots` array so the shell
|
||||
// knows to render <PluginSlot name="sessions:top" /> there.
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
function SessionsTopBanner() {
|
||||
return React.createElement(Card, {
|
||||
className: "border-dashed",
|
||||
},
|
||||
React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
|
||||
React.createElement(Badge, { variant: "outline" }, "Example"),
|
||||
React.createElement("span", {
|
||||
className: "text-xs text-muted-foreground",
|
||||
}, "This banner was injected into the Sessions page by the example plugin via the ",
|
||||
React.createElement("code", { className: "font-courier" }, "sessions:top"),
|
||||
" slot."),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
|
||||
})();
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
"path": "/example",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"slots": ["sessions:top"],
|
||||
"entry": "dist/index.js",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
|
||||
-1591
File diff suppressed because it is too large
Load Diff
-752
@@ -1,752 +0,0 @@
|
||||
/*
|
||||
* Hermes Kanban — dashboard plugin styles.
|
||||
*
|
||||
* All colors reference theme CSS vars so the board reskins with the
|
||||
* active dashboard theme. No hardcoded palette.
|
||||
*/
|
||||
|
||||
.hermes-kanban {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* ---- Columns layout -------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.hermes-kanban-column {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: color-mix(in srgb, var(--color-card) 85%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius);
|
||||
padding: 0.5rem;
|
||||
min-height: 200px;
|
||||
max-height: calc(100vh - 220px);
|
||||
transition: border-color 120ms ease, background-color 120ms ease;
|
||||
}
|
||||
|
||||
.hermes-kanban-column--drop {
|
||||
border-color: var(--color-ring);
|
||||
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-column-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.25rem 0.25rem 0.35rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-label {
|
||||
flex: 1;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-count {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-add {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 1px solid var(--color-border);
|
||||
color: var(--color-foreground);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-column-add:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-sub {
|
||||
padding: 0 0.25rem 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
overflow-y: auto;
|
||||
padding-right: 0.1rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-empty {
|
||||
padding: 1.5rem 0.5rem;
|
||||
text-align: center;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Status dots ----------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-dot {
|
||||
display: inline-block;
|
||||
width: 0.5rem;
|
||||
height: 0.5rem;
|
||||
border-radius: 999px;
|
||||
background: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
|
||||
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
|
||||
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
|
||||
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
|
||||
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
|
||||
.hermes-kanban-dot-archived { background: var(--color-border); }
|
||||
|
||||
/* ---- Progress pill (N/M child tasks done) --------------------------- */
|
||||
|
||||
.hermes-kanban-progress {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.62rem;
|
||||
padding: 0.05rem 0.35rem;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hermes-kanban-progress--full {
|
||||
background: color-mix(in srgb, #3fb97d 22%, transparent);
|
||||
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
|
||||
|
||||
.hermes-kanban-lane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.25rem 0 0.35rem;
|
||||
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
.hermes-kanban-lane:first-child {
|
||||
border-top: 0;
|
||||
padding-top: 0;
|
||||
}
|
||||
.hermes-kanban-lane-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
padding: 0 0.1rem;
|
||||
}
|
||||
.hermes-kanban-lane-name {
|
||||
font-weight: 600;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-lane-count {
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ---- Card ------------------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card {
|
||||
cursor: grab;
|
||||
transition: transform 100ms ease, box-shadow 100ms ease;
|
||||
}
|
||||
.hermes-kanban-card:hover {
|
||||
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
|
||||
}
|
||||
.hermes-kanban-card:active {
|
||||
cursor: grabbing;
|
||||
transform: scale(0.995);
|
||||
}
|
||||
|
||||
.hermes-kanban-card-content {
|
||||
padding: 0.5rem 0.6rem !important;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-id {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.65rem;
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-title {
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
line-height: 1.3;
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-meta {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-priority {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-tag {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
}
|
||||
|
||||
.hermes-kanban-assignee {
|
||||
font-weight: 500;
|
||||
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
|
||||
}
|
||||
.hermes-kanban-unassigned {
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-ago {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- Inline create --------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-inline-create {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
background: color-mix(in srgb, var(--color-card) 70%, transparent);
|
||||
border: 1px dashed var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Drawer (task detail side panel) --------------------------------- */
|
||||
|
||||
.hermes-kanban-drawer-shade {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.45);
|
||||
z-index: 60;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer {
|
||||
width: min(480px, 92vw);
|
||||
height: 100vh;
|
||||
background: var(--color-card);
|
||||
border-left: 1px solid var(--color-border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
|
||||
animation: hermes-kanban-drawer-in 180ms ease-out;
|
||||
}
|
||||
|
||||
@keyframes hermes-kanban-drawer-in {
|
||||
from { transform: translateX(100%); opacity: 0.3; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.6rem 0.8rem;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-close {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
cursor: pointer;
|
||||
padding: 0 0.25rem;
|
||||
}
|
||||
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
|
||||
|
||||
.hermes-kanban-drawer-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.9rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
.hermes-kanban-meta-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.hermes-kanban-meta-label {
|
||||
width: 92px;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-meta-value {
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-pre {
|
||||
margin: 0;
|
||||
padding: 0.45rem 0.55rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.72rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-comment {
|
||||
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
|
||||
padding-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.2rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-comment-head {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-comment-author {
|
||||
font-weight: 600;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-comment-ago {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-event {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-event-kind {
|
||||
color: var(--color-foreground);
|
||||
min-width: 6rem;
|
||||
}
|
||||
.hermes-kanban-event-payload {
|
||||
color: var(--color-muted-foreground);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-comment-row {
|
||||
display: flex;
|
||||
gap: 0.4rem;
|
||||
padding: 0.55rem 0.75rem;
|
||||
border-top: 1px solid var(--color-border);
|
||||
background: color-mix(in srgb, var(--color-card) 90%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-count {
|
||||
display: inline-flex;
|
||||
gap: 0.2rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* ---- Selection chrome ----------------------------------------------- */
|
||||
|
||||
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-ring) inset,
|
||||
0 0 0 1px var(--color-ring) inset;
|
||||
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-card-check {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
accent-color: var(--color-ring);
|
||||
}
|
||||
|
||||
/* ---- Bulk action bar ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-bulk {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-bulk-count {
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
padding-right: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-bulk-btn {
|
||||
height: 1.7rem !important;
|
||||
padding: 0 0.5rem !important;
|
||||
font-size: 0.7rem !important;
|
||||
border: 1px solid var(--color-border);
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-bulk-btn:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
.hermes-kanban-bulk-reassign {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding-left: 0.5rem;
|
||||
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Dependency editor chips --------------------------------------- */
|
||||
|
||||
.hermes-kanban-deps-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.hermes-kanban-deps-label {
|
||||
font-size: 0.68rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.hermes-kanban-deps-chips {
|
||||
display: flex;
|
||||
gap: 0.3rem;
|
||||
flex-wrap: wrap;
|
||||
flex: 1;
|
||||
}
|
||||
.hermes-kanban-deps-empty {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-dep-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.15rem;
|
||||
padding: 0.1rem 0.35rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.68rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-dep-chip-x {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1;
|
||||
padding: 0 0.15rem;
|
||||
}
|
||||
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
|
||||
|
||||
/* ---- Inline edit affordances --------------------------------------- */
|
||||
|
||||
.hermes-kanban-editable {
|
||||
cursor: pointer;
|
||||
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
}
|
||||
.hermes-kanban-editable:hover {
|
||||
color: var(--color-foreground);
|
||||
border-bottom-color: var(--color-ring);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title-text {
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-drawer-title-text:hover {
|
||||
text-decoration: underline;
|
||||
text-decoration-color: var(--color-ring);
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}
|
||||
|
||||
.hermes-kanban-edit-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-edit-link {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
cursor: pointer;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
|
||||
|
||||
.hermes-kanban-textarea {
|
||||
width: 100%;
|
||||
min-height: 8rem;
|
||||
background: var(--color-card);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
padding: 0.5rem 0.6rem;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.5;
|
||||
resize: vertical;
|
||||
}
|
||||
.hermes-kanban-textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-ring);
|
||||
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Markdown rendering -------------------------------------------- */
|
||||
|
||||
.hermes-kanban-md {
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.55;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-md p { margin: 0.25rem 0; }
|
||||
.hermes-kanban-md h1,
|
||||
.hermes-kanban-md h2,
|
||||
.hermes-kanban-md h3,
|
||||
.hermes-kanban-md h4 {
|
||||
margin: 0.6rem 0 0.2rem;
|
||||
line-height: 1.25;
|
||||
}
|
||||
.hermes-kanban-md h1 { font-size: 1.05rem; }
|
||||
.hermes-kanban-md h2 { font-size: 0.95rem; }
|
||||
.hermes-kanban-md h3 { font-size: 0.88rem; }
|
||||
.hermes-kanban-md h4 { font-size: 0.82rem; }
|
||||
.hermes-kanban-md ul {
|
||||
margin: 0.25rem 0 0.25rem 1.1rem;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-md li { margin: 0.1rem 0; }
|
||||
.hermes-kanban-md a {
|
||||
color: var(--color-ring);
|
||||
text-decoration: underline;
|
||||
}
|
||||
.hermes-kanban-md code {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.75rem;
|
||||
padding: 0.05rem 0.3rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.hermes-kanban-md-code {
|
||||
margin: 0.35rem 0;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
overflow-x: auto;
|
||||
}
|
||||
.hermes-kanban-md-code code {
|
||||
background: transparent;
|
||||
padding: 0;
|
||||
font-size: 0.75rem;
|
||||
white-space: pre;
|
||||
}
|
||||
.hermes-kanban-md strong { font-weight: 600; }
|
||||
|
||||
/* ---- Touch-drag proxy ---------------------------------------------- */
|
||||
|
||||
.hermes-kanban-touch-proxy {
|
||||
pointer-events: none;
|
||||
opacity: 0.85;
|
||||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
|
||||
transform: scale(1.02);
|
||||
transition: none;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Staleness tiers ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px #d4b34888 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px #d4b348 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
|
||||
}
|
||||
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Worker log pane ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-log {
|
||||
max-height: 340px;
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
font-size: 0.7rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Run history (per-attempt log in the drawer) ------------------- */
|
||||
|
||||
.hermes-kanban-run {
|
||||
border-left: 2px solid var(--color-border);
|
||||
padding: 0.35rem 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
.hermes-kanban-run--active { border-left-color: #3fb97d; }
|
||||
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
|
||||
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
|
||||
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-run--crashed,
|
||||
.hermes-kanban-run--timed_out,
|
||||
.hermes-kanban-run--gave_up,
|
||||
.hermes-kanban-run--spawn_failed {
|
||||
border-left-color: var(--color-destructive, #d14a4a);
|
||||
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
|
||||
}
|
||||
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
|
||||
|
||||
.hermes-kanban-run-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-run-outcome {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-profile {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-elapsed {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-ago {
|
||||
margin-left: auto;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-summary {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0 0;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-error {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-destructive, #d14a4a);
|
||||
padding: 0.15rem 0 0;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-run-meta {
|
||||
display: block;
|
||||
font-size: 0.65rem;
|
||||
padding: 0.15rem 0 0;
|
||||
color: var(--color-muted-foreground);
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"name": "kanban",
|
||||
"label": "Kanban",
|
||||
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
|
||||
"icon": "Package",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/kanban",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"entry": "dist/index.js",
|
||||
"css": "dist/style.css",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
@@ -1,830 +0,0 @@
|
||||
"""Kanban dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
|
||||
|
||||
This layer is intentionally thin: every handler is a small wrapper around
|
||||
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
|
||||
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
|
||||
cannot drift.
|
||||
|
||||
Live updates arrive via the ``/events`` WebSocket, which tails the
|
||||
append-only ``task_events`` table on a short poll interval (WAL mode lets
|
||||
reads run alongside the dispatcher's IMMEDIATE write transactions).
|
||||
|
||||
Security note
|
||||
-------------
|
||||
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
|
||||
explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by
|
||||
design because the dashboard binds to localhost by default. For the
|
||||
WebSocket we still require the session token as a ``?token=`` query
|
||||
parameter (browsers cannot set the ``Authorization`` header on an upgrade
|
||||
request), matching the established pattern used by the in-browser PTY
|
||||
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
|
||||
``--host 0.0.0.0``, every plugin route — kanban included — becomes
|
||||
reachable from the network. Don't do that on a shared host.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from hermes_cli import kanban_db
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
|
||||
# existing plugin-bypass; this is documented above).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_ws_token(provided: Optional[str]) -> bool:
|
||||
"""Constant-time compare against the dashboard session token.
|
||||
|
||||
Imported lazily so the plugin still loads in test contexts where the
|
||||
dashboard web_server module isn't importable (e.g. the bare-FastAPI
|
||||
test harness).
|
||||
"""
|
||||
if not provided:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli import web_server as _ws
|
||||
except Exception:
|
||||
# No dashboard context (tests). Accept so the tail loop is still
|
||||
# testable; in production the dashboard module always imports
|
||||
# cleanly because it's the caller.
|
||||
return True
|
||||
expected = getattr(_ws, "_SESSION_TOKEN", None)
|
||||
if not expected:
|
||||
return True
|
||||
return hmac.compare_digest(str(provided), str(expected))
|
||||
|
||||
|
||||
def _conn():
|
||||
"""Open a kanban_db connection, creating the schema on first use.
|
||||
|
||||
Every handler that mutates the DB goes through this so the plugin
|
||||
self-heals on a fresh install (no user-visible "no such table"
|
||||
error if somebody hits POST /tasks before GET /board).
|
||||
``init_db`` is idempotent.
|
||||
"""
|
||||
try:
|
||||
kanban_db.init_db()
|
||||
except Exception as exc:
|
||||
log.warning("kanban init_db failed: %s", exc)
|
||||
return kanban_db.connect()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Columns shown by the dashboard, in left-to-right order. "archived" is
|
||||
# available via a filter toggle rather than a visible column.
|
||||
BOARD_COLUMNS: list[str] = [
|
||||
"triage", "todo", "ready", "running", "blocked", "done",
|
||||
]
|
||||
|
||||
|
||||
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
|
||||
d = asdict(task)
|
||||
# Add derived age metrics so the UI can colour stale cards without
|
||||
# computing deltas client-side.
|
||||
d["age"] = kanban_db.task_age(task)
|
||||
# Keep body short on list endpoints; full body comes from /tasks/:id.
|
||||
return d
|
||||
|
||||
|
||||
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
|
||||
return {
|
||||
"id": event.id,
|
||||
"task_id": event.task_id,
|
||||
"kind": event.kind,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at,
|
||||
"run_id": event.run_id,
|
||||
}
|
||||
|
||||
|
||||
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
|
||||
return {
|
||||
"id": c.id,
|
||||
"task_id": c.task_id,
|
||||
"author": c.author,
|
||||
"body": c.body,
|
||||
"created_at": c.created_at,
|
||||
}
|
||||
|
||||
|
||||
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
||||
"""Serialise a Run for the drawer's Run history section."""
|
||||
return {
|
||||
"id": r.id,
|
||||
"task_id": r.task_id,
|
||||
"profile": r.profile,
|
||||
"step_key": r.step_key,
|
||||
"status": r.status,
|
||||
"claim_lock": r.claim_lock,
|
||||
"claim_expires": r.claim_expires,
|
||||
"worker_pid": r.worker_pid,
|
||||
"max_runtime_seconds": r.max_runtime_seconds,
|
||||
"last_heartbeat_at": r.last_heartbeat_at,
|
||||
"started_at": r.started_at,
|
||||
"ended_at": r.ended_at,
|
||||
"outcome": r.outcome,
|
||||
"summary": r.summary,
|
||||
"metadata": r.metadata,
|
||||
"error": r.error,
|
||||
}
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
r["parent_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
children = [
|
||||
r["child_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
return {"parents": parents, "children": children}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/board")
|
||||
def get_board(
|
||||
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
|
||||
include_archived: bool = Query(False),
|
||||
):
|
||||
"""Return the full board grouped by status column.
|
||||
|
||||
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
|
||||
install doesn't surface a "failed to load" error on the plugin tab.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
tasks = kanban_db.list_tasks(
|
||||
conn, tenant=tenant, include_archived=include_archived
|
||||
)
|
||||
# Pre-fetch link counts per task (cheap: one query).
|
||||
link_counts: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT parent_id, child_id FROM task_links"
|
||||
).fetchall():
|
||||
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
|
||||
"children"
|
||||
] += 1
|
||||
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
|
||||
"parents"
|
||||
] += 1
|
||||
|
||||
# Comment + event counts (both cheap aggregates).
|
||||
comment_counts: dict[str, int] = {
|
||||
r["task_id"]: r["n"]
|
||||
for r in conn.execute(
|
||||
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
|
||||
)
|
||||
}
|
||||
|
||||
# Progress rollup: for each parent, how many children are done / total.
|
||||
# One pass over task_links joined with child status — cheaper than
|
||||
# N per-task queries and the plugin uses it to render "N/M".
|
||||
progress: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT l.parent_id AS pid, t.status AS cstatus "
|
||||
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
|
||||
).fetchall():
|
||||
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
|
||||
p["total"] += 1
|
||||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
||||
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
|
||||
if include_archived:
|
||||
columns["archived"] = []
|
||||
|
||||
for t in tasks:
|
||||
d = _task_dict(t)
|
||||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
# Stable per-column ordering already applied by list_tasks
|
||||
# (priority DESC, created_at ASC), keep as-is.
|
||||
|
||||
# List of known tenants for the UI filter dropdown.
|
||||
tenants = [
|
||||
r["tenant"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
|
||||
)
|
||||
]
|
||||
# List of distinct assignees for the lane-by-profile sub-grouping.
|
||||
assignees = [
|
||||
r["assignee"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
|
||||
"AND status != 'archived' ORDER BY assignee"
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"columns": [
|
||||
{"name": name, "tasks": columns[name]} for name in columns.keys()
|
||||
],
|
||||
"tenants": tenants,
|
||||
"assignees": assignees,
|
||||
"latest_event_id": int(latest_event_id),
|
||||
"now": int(time.time()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CreateTaskBody(BaseModel):
|
||||
title: str
|
||||
body: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
tenant: Optional[str] = None
|
||||
priority: int = 0
|
||||
workspace_kind: str = "scratch"
|
||||
workspace_path: Optional[str] = None
|
||||
parents: list[str] = Field(default_factory=list)
|
||||
triage: bool = False
|
||||
idempotency_key: Optional[str] = None
|
||||
max_runtime_seconds: Optional[int] = None
|
||||
skills: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.post("/tasks")
|
||||
def create_task(payload: CreateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task_id = kanban_db.create_task(
|
||||
conn,
|
||||
title=payload.title,
|
||||
body=payload.body,
|
||||
assignee=payload.assignee,
|
||||
created_by="dashboard",
|
||||
workspace_kind=payload.workspace_kind,
|
||||
workspace_path=payload.workspace_path,
|
||||
tenant=payload.tenant,
|
||||
priority=payload.priority,
|
||||
parents=payload.parents,
|
||||
triage=payload.triage,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
max_runtime_seconds=payload.max_runtime_seconds,
|
||||
skills=payload.skills,
|
||||
)
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(task) if task else None}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id (status / assignee / priority / title / body)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class UpdateTaskBody(BaseModel):
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
title: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
result: Optional[str] = None
|
||||
block_reason: Optional[str] = None
|
||||
# Structured handoff fields — forwarded to complete_task when status
|
||||
# transitions to 'done'. Dashboard parity with ``hermes kanban
|
||||
# complete --summary ... --metadata ...``.
|
||||
summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
@router.patch("/tasks/{task_id}")
|
||||
def update_task(task_id: str, payload: UpdateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
|
||||
# --- assignee ----------------------------------------------------
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
ok = kanban_db.assign_task(
|
||||
conn, task_id, payload.assignee or None,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=409, detail=str(e))
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="task not found")
|
||||
|
||||
# --- status -------------------------------------------------------
|
||||
if payload.status is not None:
|
||||
s = payload.status
|
||||
ok = True
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(
|
||||
conn, task_id,
|
||||
result=payload.result,
|
||||
summary=payload.summary,
|
||||
metadata=payload.metadata,
|
||||
)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
|
||||
elif s == "ready":
|
||||
# Re-open a blocked task, or just an explicit status set.
|
||||
current = kanban_db.get_task(conn, task_id)
|
||||
if current and current.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, task_id)
|
||||
else:
|
||||
# Direct status write for drag-drop (todo -> ready etc).
|
||||
ok = _set_status_direct(conn, task_id, "ready")
|
||||
elif s == "archived":
|
||||
ok = kanban_db.archive_task(conn, task_id)
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"status transition to {s!r} not valid from current state",
|
||||
)
|
||||
|
||||
# --- priority -----------------------------------------------------
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), task_id),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(task_id, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
|
||||
# --- title / body -------------------------------------------------
|
||||
if payload.title is not None or payload.body is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
sets, vals = [], []
|
||||
if payload.title is not None:
|
||||
if not payload.title.strip():
|
||||
raise HTTPException(status_code=400, detail="title cannot be empty")
|
||||
sets.append("title = ?")
|
||||
vals.append(payload.title.strip())
|
||||
if payload.body is not None:
|
||||
sets.append("body = ?")
|
||||
vals.append(payload.body)
|
||||
vals.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'edited', NULL, ?)",
|
||||
(task_id, int(time.time())),
|
||||
)
|
||||
|
||||
updated = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(updated) if updated else None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_status_direct(
|
||||
conn: sqlite3.Connection, task_id: str, new_status: str,
|
||||
) -> bool:
|
||||
"""Direct status write for drag-drop moves that aren't covered by the
|
||||
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
|
||||
running<->ready). Appends a ``status`` event row for the live feed.
|
||||
|
||||
When this transitions OFF ``running`` to anything other than the
|
||||
terminal verbs above (which own their own run closing), we close the
|
||||
active run with outcome='reclaimed' so attempt history isn't
|
||||
orphaned. ``running -> ready`` via drag-drop is the common case
|
||||
(user yanking a stuck worker back to the queue).
|
||||
"""
|
||||
with kanban_db.write_txn(conn):
|
||||
# Snapshot current state so we know whether to close a run.
|
||||
prev = conn.execute(
|
||||
"SELECT status, current_run_id FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if prev is None:
|
||||
return False
|
||||
was_running = prev["status"] == "running"
|
||||
|
||||
cur = conn.execute(
|
||||
"UPDATE tasks SET status = ?, "
|
||||
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
|
||||
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
|
||||
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
|
||||
"WHERE id = ?",
|
||||
(new_status, new_status, new_status, new_status, task_id),
|
||||
)
|
||||
if cur.rowcount != 1:
|
||||
return False
|
||||
run_id = None
|
||||
if was_running and new_status != "running" and prev["current_run_id"]:
|
||||
run_id = kanban_db._end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
summary=f"status changed to {new_status} (dashboard/direct)",
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
|
||||
"VALUES (?, ?, 'status', ?, ?)",
|
||||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CommentBody(BaseModel):
|
||||
body: str
|
||||
author: Optional[str] = "dashboard"
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/comments")
|
||||
def add_comment(task_id: str, payload: CommentBody):
|
||||
if not payload.body.strip():
|
||||
raise HTTPException(status_code=400, detail="body is required")
|
||||
conn = _conn()
|
||||
try:
|
||||
if kanban_db.get_task(conn, task_id) is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
kanban_db.add_comment(
|
||||
conn, task_id, author=payload.author or "dashboard", body=payload.body,
|
||||
)
|
||||
return {"ok": True}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LinkBody(BaseModel):
|
||||
parent_id: str
|
||||
child_id: str
|
||||
|
||||
|
||||
@router.post("/links")
|
||||
def add_link(payload: LinkBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
|
||||
return {"ok": True}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.delete("/links")
|
||||
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
|
||||
conn = _conn()
|
||||
try:
|
||||
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
|
||||
return {"ok": bool(ok)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions (multi-select on the board)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BulkTaskBody(BaseModel):
|
||||
ids: list[str]
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None # "" or None = unassign
|
||||
priority: Optional[int] = None
|
||||
archive: bool = False
|
||||
|
||||
|
||||
@router.post("/tasks/bulk")
|
||||
def bulk_update(payload: BulkTaskBody):
|
||||
"""Apply the same patch to every id in ``payload.ids``.
|
||||
|
||||
This is an *independent* iteration — per-task failures don't abort
|
||||
siblings. Returns per-id outcome so the UI can surface partials.
|
||||
"""
|
||||
ids = [i for i in (payload.ids or []) if i]
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
results: list[dict] = []
|
||||
conn = _conn()
|
||||
try:
|
||||
for tid in ids:
|
||||
entry: dict[str, Any] = {"id": tid, "ok": True}
|
||||
try:
|
||||
task = kanban_db.get_task(conn, tid)
|
||||
if task is None:
|
||||
entry.update(ok=False, error="not found")
|
||||
results.append(entry)
|
||||
continue
|
||||
if payload.archive:
|
||||
if not kanban_db.archive_task(conn, tid):
|
||||
entry.update(ok=False, error="archive refused")
|
||||
if payload.status is not None and not payload.archive:
|
||||
s = payload.status
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(conn, tid)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, tid)
|
||||
elif s == "ready":
|
||||
cur = kanban_db.get_task(conn, tid)
|
||||
if cur and cur.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
results.append(entry)
|
||||
continue
|
||||
if not ok:
|
||||
entry.update(ok=False, error=f"transition to {s!r} refused")
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
if not kanban_db.assign_task(
|
||||
conn, tid, payload.assignee or None,
|
||||
):
|
||||
entry.update(ok=False, error="assign refused")
|
||||
except RuntimeError as e:
|
||||
entry.update(ok=False, error=str(e))
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(tid, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
except Exception as e: # defensive — one bad id shouldn't kill the batch
|
||||
entry.update(ok=False, error=str(e))
|
||||
results.append(entry)
|
||||
return {"results": results}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/config")
|
||||
def get_config():
|
||||
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
|
||||
|
||||
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
|
||||
Used by the UI to pre-select tenant filters, toggle markdown rendering,
|
||||
or set column-width preferences without a round-trip per page load.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
dash_cfg = (cfg.get("dashboard") or {})
|
||||
# dashboard.kanban may itself be a dict; fall back to {}.
|
||||
k_cfg = dash_cfg.get("kanban") or {}
|
||||
return {
|
||||
"default_tenant": k_cfg.get("default_tenant") or "",
|
||||
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
|
||||
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
|
||||
"render_markdown": bool(k_cfg.get("render_markdown", True)),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats (per-profile / per-status counts + oldest-ready age)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats():
|
||||
"""Per-status + per-assignee counts + oldest-ready age.
|
||||
|
||||
Designed for the dashboard HUD and for router profiles that need to
|
||||
answer "is this specialist overloaded?" without scanning the whole
|
||||
board themselves.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return kanban_db.board_stats(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.get("/assignees")
|
||||
def get_assignees():
|
||||
"""Known profiles + per-profile task counts.
|
||||
|
||||
Returns the union of ``~/.hermes/profiles/*`` on disk and every
|
||||
distinct assignee currently used on the board. The dashboard uses
|
||||
this to populate its assignee dropdown so a freshly-created profile
|
||||
appears in the picker before it's been given any task.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return {"assignees": kanban_db.known_assignees(conn)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker log (read-only; file written by _default_spawn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}/log")
|
||||
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
|
||||
"""Return the worker's stdout/stderr log.
|
||||
|
||||
``tail`` caps the response size (bytes) so the dashboard drawer
|
||||
doesn't paginate megabytes into the browser. Returns 404 if the task
|
||||
has never spawned. The on-disk log is rotated at 2 MiB per
|
||||
``_rotate_worker_log`` — a single ``.log.1`` is kept, no further
|
||||
generations, so disk usage per task is bounded at ~4 MiB.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
|
||||
log_path = kanban_db.worker_log_path(task_id)
|
||||
size = log_path.stat().st_size if log_path.exists() else 0
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"path": str(log_path),
|
||||
"exists": content is not None,
|
||||
"size_bytes": size,
|
||||
"content": content or "",
|
||||
# Truncated when the on-disk file was larger than the tail cap.
|
||||
"truncated": bool(tail and size > tail),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/dispatch")
|
||||
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
|
||||
conn = _conn()
|
||||
try:
|
||||
result = kanban_db.dispatch_once(
|
||||
conn, dry_run=dry_run, max_spawn=max_n,
|
||||
)
|
||||
# DispatchResult is a dataclass.
|
||||
try:
|
||||
return asdict(result)
|
||||
except TypeError:
|
||||
return {"result": str(result)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket: /events?since=<event_id>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
|
||||
# the simplest and most robust approach; it adds a fraction of a percent
|
||||
# of CPU and has no shared state to synchronize across workers.
|
||||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
# set Authorization on a WS upgrade. This matches how the PTY bridge
|
||||
# authenticates in hermes_cli/web_server.py.
|
||||
token = ws.query_params.get("token")
|
||||
if not _check_ws_token(token):
|
||||
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
|
||||
return
|
||||
await ws.accept()
|
||||
try:
|
||||
since_raw = ws.query_params.get("since", "0")
|
||||
try:
|
||||
cursor = int(since_raw)
|
||||
except ValueError:
|
||||
cursor = 0
|
||||
|
||||
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
|
||||
conn = kanban_db.connect()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, task_id, run_id, kind, payload, created_at "
|
||||
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
|
||||
(cursor_val,),
|
||||
).fetchall()
|
||||
out: list[dict] = []
|
||||
new_cursor = cursor_val
|
||||
for r in rows:
|
||||
try:
|
||||
payload = json.loads(r["payload"]) if r["payload"] else None
|
||||
except Exception:
|
||||
payload = None
|
||||
out.append({
|
||||
"id": r["id"],
|
||||
"task_id": r["task_id"],
|
||||
"run_id": r["run_id"],
|
||||
"kind": r["kind"],
|
||||
"payload": payload,
|
||||
"created_at": r["created_at"],
|
||||
})
|
||||
new_cursor = r["id"]
|
||||
return new_cursor, out
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
while True:
|
||||
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
|
||||
if events:
|
||||
await ws.send_json({"events": events, "cursor": cursor})
|
||||
await asyncio.sleep(_EVENT_POLL_SECONDS)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
except Exception as exc: # defensive: never crash the dashboard worker
|
||||
log.warning("Kanban event stream error: %s", exc)
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1,17 +0,0 @@
|
||||
[Unit]
|
||||
Description=Hermes Kanban dispatcher (hermes kanban daemon)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env hermes kanban daemon --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
|
||||
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
# even if shutdown_memory_provider is never called (e.g. gateway crash,
|
||||
# SIGKILL, or exception in the session expiry watcher preventing shutdown).
|
||||
# SIGKILL, or exception in _async_flush_memories preventing shutdown).
|
||||
# ---------------------------------------------------------------------------
|
||||
_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
|
||||
|
||||
|
||||
+328
-293
@@ -40,7 +40,6 @@ from types import SimpleNamespace
|
||||
import urllib.request
|
||||
import uuid
|
||||
from typing import List, Dict, Any, Optional
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
from openai import OpenAI
|
||||
import fire
|
||||
from datetime import datetime
|
||||
@@ -86,7 +85,6 @@ from agent.error_classifier import classify_api_error, FailoverReason
|
||||
from agent.prompt_builder import (
|
||||
DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
|
||||
MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
|
||||
KANBAN_GUIDANCE,
|
||||
build_nous_subscription_prompt,
|
||||
)
|
||||
from agent.model_metadata import (
|
||||
@@ -893,6 +891,7 @@ class AIAgent:
|
||||
checkpoints_enabled: bool = False,
|
||||
checkpoint_max_snapshots: int = 50,
|
||||
pass_session_id: bool = False,
|
||||
persist_session: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
@@ -964,6 +963,7 @@ class AIAgent:
|
||||
self.background_review_callback = None # Optional sync callback for gateway delivery
|
||||
self.skip_context_files = skip_context_files
|
||||
self.pass_session_id = pass_session_id
|
||||
self.persist_session = persist_session
|
||||
self._credential_pool = credential_pool
|
||||
self.log_prefix_chars = log_prefix_chars
|
||||
self.log_prefix = f"{log_prefix} " if log_prefix else ""
|
||||
@@ -1033,16 +1033,12 @@ class AIAgent:
|
||||
# surface.
|
||||
# When api_mode was explicitly provided, respect it — the user
|
||||
# knows what their endpoint supports (#10473).
|
||||
# Exception: Azure OpenAI serves gpt-5.x on /chat/completions and
|
||||
# does NOT support the Responses API — skip the upgrade for Azure
|
||||
# (openai.azure.com), even though it looks OpenAI-compatible.
|
||||
if (
|
||||
api_mode is None
|
||||
and self.api_mode == "chat_completions"
|
||||
and self.provider != "copilot-acp"
|
||||
and not str(self.base_url or "").lower().startswith("acp://copilot")
|
||||
and not str(self.base_url or "").lower().startswith("acp+tcp://")
|
||||
and not self._is_azure_openai_url()
|
||||
and (
|
||||
self._is_direct_openai_url()
|
||||
or self._provider_model_requires_responses_api(
|
||||
@@ -1318,22 +1314,7 @@ class AIAgent:
|
||||
if api_key and base_url:
|
||||
# Explicit credentials from CLI/gateway — construct directly.
|
||||
# The runtime provider resolver already handled auth for us.
|
||||
# Extract query params (e.g. Azure api-version) from base_url
|
||||
# and pass via default_query to prevent loss during SDK URL
|
||||
# joining (httpx drops query string when joining paths).
|
||||
_parsed_url = urlparse(base_url)
|
||||
if _parsed_url.query:
|
||||
_clean_url = urlunparse(_parsed_url._replace(query=""))
|
||||
_query_params = {
|
||||
k: v[0] for k, v in parse_qs(_parsed_url.query).items()
|
||||
}
|
||||
client_kwargs = {
|
||||
"api_key": api_key,
|
||||
"base_url": _clean_url,
|
||||
"default_query": _query_params,
|
||||
}
|
||||
else:
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
if _provider_timeout is not None:
|
||||
client_kwargs["timeout"] = _provider_timeout
|
||||
if self.provider == "copilot-acp":
|
||||
@@ -1597,6 +1578,7 @@ class AIAgent:
|
||||
self._memory_enabled = False
|
||||
self._user_profile_enabled = False
|
||||
self._memory_nudge_interval = 10
|
||||
self._memory_flush_min_turns = 6
|
||||
self._turns_since_memory = 0
|
||||
self._iters_since_skill = 0
|
||||
if not skip_memory:
|
||||
@@ -1605,6 +1587,7 @@ class AIAgent:
|
||||
self._memory_enabled = mem_config.get("memory_enabled", False)
|
||||
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
||||
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
|
||||
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
|
||||
if self._memory_enabled or self._user_profile_enabled:
|
||||
from tools.memory_tool import MemoryStore
|
||||
self._memory_store = MemoryStore(
|
||||
@@ -1784,64 +1767,43 @@ class AIAgent:
|
||||
# Store for reuse in switch_model (so config override persists across model switches)
|
||||
self._config_context_length = _config_context_length
|
||||
|
||||
# Resolve custom_providers list once for reuse below (startup
|
||||
# context-length override and plugin context-engine init).
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers
|
||||
_custom_providers = get_compatible_custom_providers(_agent_cfg)
|
||||
except Exception:
|
||||
_custom_providers = _agent_cfg.get("custom_providers")
|
||||
if not isinstance(_custom_providers, list):
|
||||
_custom_providers = []
|
||||
|
||||
# Check custom_providers per-model context_length
|
||||
if _config_context_length is None and _custom_providers:
|
||||
if _config_context_length is None:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
_cp_ctx_resolved = get_custom_provider_context_length(
|
||||
model=self.model,
|
||||
base_url=self.base_url,
|
||||
custom_providers=_custom_providers,
|
||||
)
|
||||
if _cp_ctx_resolved:
|
||||
_config_context_length = int(_cp_ctx_resolved)
|
||||
from hermes_cli.config import get_compatible_custom_providers
|
||||
_custom_providers = get_compatible_custom_providers(_agent_cfg)
|
||||
except Exception:
|
||||
_cp_ctx_resolved = None
|
||||
|
||||
# Surface a clear warning if the user set a context_length but it
|
||||
# wasn't a valid positive int — the helper silently skips those.
|
||||
if _config_context_length is None:
|
||||
_target = self.base_url.rstrip("/") if self.base_url else ""
|
||||
for _cp_entry in _custom_providers:
|
||||
if not isinstance(_cp_entry, dict):
|
||||
continue
|
||||
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
|
||||
if _target and _cp_url == _target:
|
||||
_cp_models = _cp_entry.get("models", {})
|
||||
if isinstance(_cp_models, dict):
|
||||
_cp_model_cfg = _cp_models.get(self.model, {})
|
||||
if isinstance(_cp_model_cfg, dict):
|
||||
_cp_ctx = _cp_model_cfg.get("context_length")
|
||||
if _cp_ctx is not None:
|
||||
try:
|
||||
_parsed = int(_cp_ctx)
|
||||
if _parsed <= 0:
|
||||
raise ValueError
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"Invalid context_length for model %r in "
|
||||
"custom_providers: %r — must be a positive "
|
||||
"integer (e.g. 256000, not '256K'). "
|
||||
"Falling back to auto-detection.",
|
||||
self.model, _cp_ctx,
|
||||
)
|
||||
print(
|
||||
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
|
||||
f" Must be a positive integer (e.g. 256000, not '256K').\n"
|
||||
f" Falling back to auto-detected context window.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
break
|
||||
_custom_providers = _agent_cfg.get("custom_providers")
|
||||
if not isinstance(_custom_providers, list):
|
||||
_custom_providers = []
|
||||
for _cp_entry in _custom_providers:
|
||||
if not isinstance(_cp_entry, dict):
|
||||
continue
|
||||
_cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
|
||||
if _cp_url and _cp_url == self.base_url.rstrip("/"):
|
||||
_cp_models = _cp_entry.get("models", {})
|
||||
if isinstance(_cp_models, dict):
|
||||
_cp_model_cfg = _cp_models.get(self.model, {})
|
||||
if isinstance(_cp_model_cfg, dict):
|
||||
_cp_ctx = _cp_model_cfg.get("context_length")
|
||||
if _cp_ctx is not None:
|
||||
try:
|
||||
_config_context_length = int(_cp_ctx)
|
||||
except (TypeError, ValueError):
|
||||
logger.warning(
|
||||
"Invalid context_length for model %r in "
|
||||
"custom_providers: %r — must be a plain "
|
||||
"integer (e.g. 256000, not '256K'). "
|
||||
"Falling back to auto-detection.",
|
||||
self.model, _cp_ctx,
|
||||
)
|
||||
print(
|
||||
f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n"
|
||||
f" Must be a plain integer (e.g. 256000, not '256K').\n"
|
||||
f" Falling back to auto-detected context window.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
break
|
||||
|
||||
# Select context engine: config-driven (like memory providers).
|
||||
# 1. Check config.yaml context.engine setting
|
||||
@@ -1891,7 +1853,6 @@ class AIAgent:
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
config_context_length=_config_context_length,
|
||||
provider=self.provider,
|
||||
custom_providers=_custom_providers,
|
||||
)
|
||||
self.context_compressor.update_model(
|
||||
model=self.model,
|
||||
@@ -2182,23 +2143,12 @@ class AIAgent:
|
||||
# ── Update context compressor ──
|
||||
if hasattr(self, "context_compressor") and self.context_compressor:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
# Re-read custom_providers from live config so per-model
|
||||
# context_length overrides are honored when switching to a
|
||||
# custom provider mid-session (closes #15779).
|
||||
_sm_custom_providers = None
|
||||
try:
|
||||
from hermes_cli.config import load_config, get_compatible_custom_providers
|
||||
_sm_cfg = load_config()
|
||||
_sm_custom_providers = get_compatible_custom_providers(_sm_cfg)
|
||||
except Exception:
|
||||
_sm_custom_providers = None
|
||||
new_context_length = get_model_context_length(
|
||||
self.model,
|
||||
base_url=self.base_url,
|
||||
api_key=self.api_key,
|
||||
provider=self.provider,
|
||||
config_context_length=getattr(self, "_config_context_length", None),
|
||||
custom_providers=_sm_custom_providers,
|
||||
)
|
||||
self.context_compressor.update_model(
|
||||
model=self.model,
|
||||
@@ -2449,7 +2399,6 @@ class AIAgent:
|
||||
base_url=aux_base_url,
|
||||
api_key=aux_api_key,
|
||||
config_context_length=getattr(self, "_aux_compression_context_length_config", None),
|
||||
provider=getattr(self, "provider", ""),
|
||||
)
|
||||
|
||||
# Hard floor: the auxiliary compression model must have at least
|
||||
@@ -2476,11 +2425,6 @@ class AIAgent:
|
||||
# compression actually works this session. The hard floor
|
||||
# above guarantees aux_context >= MINIMUM_CONTEXT_LENGTH,
|
||||
# so the new threshold is always >= 64K.
|
||||
#
|
||||
# The compression summariser sends a single user-role
|
||||
# prompt (no system prompt, no tools) to the aux model, so
|
||||
# new_threshold == aux_context is safe: the request is
|
||||
# the raw messages plus a small summarisation instruction.
|
||||
old_threshold = threshold
|
||||
new_threshold = aux_context
|
||||
self.context_compressor.threshold_tokens = new_threshold
|
||||
@@ -2556,22 +2500,6 @@ class AIAgent:
|
||||
)
|
||||
return hostname == "api.openai.com"
|
||||
|
||||
def _is_azure_openai_url(self, base_url: str = None) -> bool:
|
||||
"""Return True when a base URL targets Azure OpenAI.
|
||||
|
||||
Azure OpenAI exposes an OpenAI-compatible endpoint at
|
||||
``{resource}.openai.azure.com/openai/v1`` that accepts the
|
||||
standard ``openai`` Python client. Unlike api.openai.com it
|
||||
does NOT support the Responses API — gpt-5.x models are served
|
||||
on the regular ``/chat/completions`` path — so routing decisions
|
||||
must treat Azure separately from direct OpenAI.
|
||||
"""
|
||||
if base_url is not None:
|
||||
url = str(base_url).lower()
|
||||
else:
|
||||
url = getattr(self, "_base_url_lower", "") or ""
|
||||
return "openai.azure.com" in url
|
||||
|
||||
def _resolved_api_call_timeout(self) -> float:
|
||||
"""Resolve the effective per-call request timeout in seconds.
|
||||
|
||||
@@ -2743,14 +2671,12 @@ class AIAgent:
|
||||
|
||||
def _max_tokens_param(self, value: int) -> dict:
|
||||
"""Return the correct max tokens kwarg for the current provider.
|
||||
|
||||
|
||||
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
|
||||
'max_completion_tokens'. Azure OpenAI also requires
|
||||
'max_completion_tokens' for gpt-5.x models served via the
|
||||
OpenAI-compatible endpoint. OpenRouter, local models, and older
|
||||
'max_completion_tokens'. OpenRouter, local models, and older
|
||||
OpenAI models use 'max_tokens'.
|
||||
"""
|
||||
if self._is_direct_openai_url() or self._is_azure_openai_url():
|
||||
if self._is_direct_openai_url():
|
||||
return {"max_completion_tokens": value}
|
||||
return {"max_tokens": value}
|
||||
|
||||
@@ -3108,28 +3034,13 @@ class AIAgent:
|
||||
)
|
||||
|
||||
_SKILL_REVIEW_PROMPT = (
|
||||
"Review the conversation above and consider whether a skill should be saved or updated.\n\n"
|
||||
"Work in this order — do not skip steps:\n\n"
|
||||
"1. SURVEY the existing skill landscape first. Call skills_list to see what you "
|
||||
"have. If anything looks potentially relevant, skill_view it before deciding. "
|
||||
"You are looking for the CLASS of task that just happened, not the exact task. "
|
||||
"Example: a successful Tauri build is in the class \"desktop app build "
|
||||
"troubleshooting\", not \"fix my specific Tauri error today\".\n\n"
|
||||
"2. THINK CLASS-FIRST. What general pattern of task did the user just complete? "
|
||||
"What conditions will trigger this pattern again? Describe the class in one "
|
||||
"sentence before looking at what to save.\n\n"
|
||||
"3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill "
|
||||
"already covers the class — even partially — update it (skill_manage patch) "
|
||||
"with the new insight. Broaden its \"when to use\" trigger if needed.\n\n"
|
||||
"4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. "
|
||||
"When you create one, name and scope it at the class level "
|
||||
"(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger "
|
||||
"section must describe the class of situations, not this one session.\n\n"
|
||||
"5. If you notice two existing skills that overlap, note it in your response "
|
||||
"so a future review can consolidate them. Do not consolidate now unless the "
|
||||
"overlap is obvious and low-risk.\n\n"
|
||||
"Only act when something is genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
"Review the conversation above and consider saving or updating a skill if appropriate.\n\n"
|
||||
"Focus on: was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome?\n\n"
|
||||
"If a relevant skill already exists, update it with what you learned. "
|
||||
"Otherwise, create a new skill if the approach is reusable.\n"
|
||||
"If nothing is worth saving, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
|
||||
_COMBINED_REVIEW_PROMPT = (
|
||||
@@ -3139,16 +3050,9 @@ class AIAgent:
|
||||
"about how you should behave, their work style, or ways they want you to operate? "
|
||||
"If so, save using the memory tool.\n\n"
|
||||
"**Skills**: Was a non-trivial approach used to complete a task that required trial "
|
||||
"and error, changing course due to experiential findings, or a different method "
|
||||
"or outcome than the user expected? If so, work in this order:\n"
|
||||
" a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n"
|
||||
" b. Identify the CLASS of task, not the specific task "
|
||||
"(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n"
|
||||
" c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n"
|
||||
" d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at "
|
||||
"the class level, not this one session.\n"
|
||||
" e. If you notice overlapping skills during the survey, note it so a future "
|
||||
"review can consolidate them.\n\n"
|
||||
"and error, or changing course due to experiential findings along the way, or did "
|
||||
"the user expect or desire a different method or outcome? If a relevant skill "
|
||||
"already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n"
|
||||
"Only act if there's something genuinely worth saving. "
|
||||
"If nothing stands out, just say 'Nothing to save.' and stop."
|
||||
)
|
||||
@@ -3246,25 +3150,12 @@ class AIAgent:
|
||||
with open(os.devnull, "w") as _devnull, \
|
||||
contextlib.redirect_stdout(_devnull), \
|
||||
contextlib.redirect_stderr(_devnull):
|
||||
# Inherit the parent agent's live runtime (provider, model,
|
||||
# base_url, api_key, api_mode) so the fork uses the exact
|
||||
# same credentials the main turn is using. Without this,
|
||||
# AIAgent.__init__ re-runs auto-resolution from env vars,
|
||||
# which fails for OAuth-only providers, session-scoped
|
||||
# creds, or credential-pool setups where the resolver can't
|
||||
# reconstruct auth from scratch -- producing the spurious
|
||||
# "No LLM provider configured" warning at end of turn.
|
||||
_parent_runtime = self._current_main_runtime()
|
||||
review_agent = AIAgent(
|
||||
model=self.model,
|
||||
max_iterations=8,
|
||||
quiet_mode=True,
|
||||
platform=self.platform,
|
||||
provider=self.provider,
|
||||
api_mode=_parent_runtime.get("api_mode") or None,
|
||||
base_url=_parent_runtime.get("base_url") or None,
|
||||
api_key=_parent_runtime.get("api_key") or None,
|
||||
credential_pool=getattr(self, "_credential_pool", None),
|
||||
parent_session_id=self.session_id,
|
||||
)
|
||||
review_agent._memory_write_origin = "background_review"
|
||||
@@ -3365,7 +3256,10 @@ class AIAgent:
|
||||
"""Save session state to both JSON log and SQLite on any exit path.
|
||||
|
||||
Ensures conversations are never lost, even on errors or early returns.
|
||||
Skipped when ``persist_session=False`` (ephemeral helper flows).
|
||||
"""
|
||||
if not self.persist_session:
|
||||
return
|
||||
self._apply_persist_user_message_override(messages)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
@@ -3415,7 +3309,6 @@ class AIAgent:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
self._last_flushed_db_idx = len(messages)
|
||||
except Exception as e:
|
||||
@@ -4498,12 +4391,6 @@ class AIAgent:
|
||||
tool_guidance.append(SESSION_SEARCH_GUIDANCE)
|
||||
if "skill_manage" in self.valid_tool_names:
|
||||
tool_guidance.append(SKILLS_GUIDANCE)
|
||||
# Kanban worker/orchestrator lifecycle — only present when the
|
||||
# dispatcher spawned this process (kanban_show check_fn gates on
|
||||
# HERMES_KANBAN_TASK env var). Normal chat sessions never see
|
||||
# this block.
|
||||
if "kanban_show" in self.valid_tool_names:
|
||||
tool_guidance.append(KANBAN_GUIDANCE)
|
||||
if tool_guidance:
|
||||
prompt_parts.append(" ".join(tool_guidance))
|
||||
|
||||
@@ -5250,8 +5137,6 @@ class AIAgent:
|
||||
# response.incomplete instead of response.completed).
|
||||
self._codex_streamed_text_parts: list = []
|
||||
for attempt in range(max_stream_retries + 1):
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before Codex stream retry")
|
||||
collected_output_items: list = []
|
||||
try:
|
||||
with active_client.responses.stream(**api_kwargs) as stream:
|
||||
@@ -5546,11 +5431,6 @@ class AIAgent:
|
||||
# Other anthropic_messages providers (MiniMax, Alibaba, etc.) use their own keys.
|
||||
if self.provider != "anthropic":
|
||||
return False
|
||||
# Azure endpoints use static API keys — OAuth token rotation doesn't apply.
|
||||
# Refreshing would pick up ~/.claude/.credentials.json OAuth token and break auth.
|
||||
_base = getattr(self, "_anthropic_base_url", "") or ""
|
||||
if "azure.com" in _base:
|
||||
return False
|
||||
|
||||
try:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
|
||||
@@ -6426,14 +6306,6 @@ class AIAgent:
|
||||
|
||||
try:
|
||||
for _stream_attempt in range(_max_stream_retries + 1):
|
||||
# Check for interrupt before each retry attempt. Without
|
||||
# this, /stop closes the HTTP connection (outer poll loop),
|
||||
# but the retry loop opens a FRESH connection — negating the
|
||||
# interrupt entirely. On slow providers (ollama-cloud) each
|
||||
# retry can block for the full stream-read timeout (120s+),
|
||||
# causing multi-minute delays between /stop and response.
|
||||
if self._interrupt_requested:
|
||||
raise InterruptedError("Agent interrupted before stream retry")
|
||||
try:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._try_refresh_anthropic_client_credentials()
|
||||
@@ -6907,15 +6779,10 @@ class AIAgent:
|
||||
# Determine api_mode from provider / base URL / model
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
_fb_is_azure = self._is_azure_openai_url(fb_base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
elif _fb_is_azure:
|
||||
# Azure OpenAI serves gpt-5.x on /chat/completions — does NOT
|
||||
# support the Responses API. Stay on chat_completions.
|
||||
fb_api_mode = "chat_completions"
|
||||
elif self._is_direct_openai_url(fb_base_url):
|
||||
fb_api_mode = "codex_responses"
|
||||
elif self._provider_model_requires_responses_api(
|
||||
@@ -7788,13 +7655,6 @@ class AIAgent:
|
||||
if codex_items:
|
||||
msg["codex_reasoning_items"] = codex_items
|
||||
|
||||
# Codex Responses API: preserve exact assistant message items (with
|
||||
# id/phase) so follow-up turns can replay structured items instead of
|
||||
# flattening to plain text. This is required for prefix cache hits.
|
||||
codex_message_items = getattr(assistant_message, "codex_message_items", None)
|
||||
if codex_message_items:
|
||||
msg["codex_message_items"] = codex_message_items
|
||||
|
||||
if assistant_message.tool_calls:
|
||||
tool_calls = []
|
||||
for tool_call in assistant_message.tool_calls:
|
||||
@@ -7880,53 +7740,25 @@ class AIAgent:
|
||||
if source_msg.get("role") != "assistant":
|
||||
return
|
||||
|
||||
# 1. Explicit reasoning_content already set — preserve it verbatim
|
||||
# (includes DeepSeek/Kimi's own empty-string placeholder written at
|
||||
# creation time, and any valid reasoning content from the same provider).
|
||||
existing = source_msg.get("reasoning_content")
|
||||
if isinstance(existing, str):
|
||||
api_msg["reasoning_content"] = existing
|
||||
explicit_reasoning = source_msg.get("reasoning_content")
|
||||
if isinstance(explicit_reasoning, str):
|
||||
api_msg["reasoning_content"] = explicit_reasoning
|
||||
return
|
||||
|
||||
# 2. Healthy session: promote 'reasoning' field to 'reasoning_content'
|
||||
# for providers that use the internal 'reasoning' key.
|
||||
# This must happen BEFORE the DeepSeek/Kimi tool-call check so that
|
||||
# genuine reasoning content is not overwritten by the empty-string
|
||||
# fallback (#15812 regression in PR #15478).
|
||||
normalized_reasoning = source_msg.get("reasoning")
|
||||
if isinstance(normalized_reasoning, str) and normalized_reasoning:
|
||||
api_msg["reasoning_content"] = normalized_reasoning
|
||||
return
|
||||
|
||||
# 3. DeepSeek / Kimi thinking mode: tool-call turns that lack
|
||||
# reasoning_content are "poisoned history" — a prior provider (MiniMax,
|
||||
# etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content
|
||||
# is absent on replay; inject "" to satisfy the provider's requirement
|
||||
# without forwarding any cross-provider reasoning content.
|
||||
needs_empty_reasoning = (
|
||||
source_msg.get("tool_calls")
|
||||
and (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
)
|
||||
)
|
||||
if needs_empty_reasoning:
|
||||
api_msg["reasoning_content"] = ""
|
||||
return
|
||||
|
||||
# 4. DeepSeek / Kimi thinking mode: all assistant messages need
|
||||
# reasoning_content. Inject "" to satisfy the provider's requirement
|
||||
# when no explicit reasoning content is present.
|
||||
if (
|
||||
# Providers that require an echoed reasoning_content on every
|
||||
# assistant tool-call turn. Detection logic lives in the per-provider
|
||||
# helpers so both the creation path (_build_assistant_message) and
|
||||
# this replay path stay in sync.
|
||||
if source_msg.get("tool_calls") and (
|
||||
self._needs_kimi_tool_reasoning()
|
||||
or self._needs_deepseek_tool_reasoning()
|
||||
):
|
||||
api_msg["reasoning_content"] = ""
|
||||
return
|
||||
|
||||
# 5. reasoning_content was present but not a string (e.g. None after
|
||||
# context compaction). Don't pass null to the API.
|
||||
api_msg.pop("reasoning_content", None)
|
||||
|
||||
@staticmethod
|
||||
def _sanitize_tool_calls_for_strict_api(api_msg: dict) -> dict:
|
||||
@@ -8078,6 +7910,251 @@ class AIAgent:
|
||||
"""
|
||||
return self.api_mode != "codex_responses"
|
||||
|
||||
def flush_memories(self, messages: list = None, min_turns: int = None):
|
||||
"""Give the model one turn to persist memories before context is lost.
|
||||
|
||||
Called before compression, session reset, or CLI exit. Injects a flush
|
||||
message, makes one API call, executes any memory tool calls, then
|
||||
strips all flush artifacts from the message list.
|
||||
|
||||
Args:
|
||||
messages: The current conversation messages. If None, uses
|
||||
self._session_messages (last run_conversation state).
|
||||
min_turns: Minimum user turns required to trigger the flush.
|
||||
None = use config value (flush_min_turns).
|
||||
0 = always flush (used for compression).
|
||||
"""
|
||||
if self._memory_flush_min_turns == 0 and min_turns is None:
|
||||
return
|
||||
if "memory" not in self.valid_tool_names or not self._memory_store:
|
||||
return
|
||||
effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
|
||||
if self._user_turn_count < effective_min:
|
||||
return
|
||||
|
||||
if messages is None:
|
||||
messages = getattr(self, '_session_messages', None)
|
||||
if not messages or len(messages) < 3:
|
||||
return
|
||||
|
||||
flush_content = (
|
||||
"[System: The session is being compressed. "
|
||||
"Save anything worth remembering — prioritize user preferences, "
|
||||
"corrections, and recurring patterns over task-specific details.]"
|
||||
)
|
||||
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
|
||||
flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
|
||||
messages.append(flush_msg)
|
||||
|
||||
try:
|
||||
# Build API messages for the flush call
|
||||
_needs_sanitize = self._should_sanitize_tool_calls()
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
self._copy_reasoning_content_for_api(msg, api_msg)
|
||||
api_msg.pop("reasoning", None)
|
||||
api_msg.pop("finish_reason", None)
|
||||
api_msg.pop("_flush_sentinel", None)
|
||||
api_msg.pop("_thinking_prefill", None)
|
||||
if _needs_sanitize:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
api_messages.append(api_msg)
|
||||
|
||||
if self._cached_system_prompt:
|
||||
api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
|
||||
|
||||
# Make one API call with only the memory tool available
|
||||
memory_tool_def = None
|
||||
for t in (self.tools or []):
|
||||
if t.get("function", {}).get("name") == "memory":
|
||||
memory_tool_def = t
|
||||
break
|
||||
|
||||
if not memory_tool_def:
|
||||
messages.pop() # remove flush msg
|
||||
return
|
||||
|
||||
# Use auxiliary client for the flush call when available --
|
||||
# it's cheaper and avoids Codex Responses API incompatibility.
|
||||
from agent.auxiliary_client import (
|
||||
call_llm as _call_llm,
|
||||
_fixed_temperature_for_model,
|
||||
OMIT_TEMPERATURE,
|
||||
)
|
||||
_aux_available = True
|
||||
# Kimi models manage temperature server-side — omit it entirely.
|
||||
# Other models with a fixed contract get that value; everyone else
|
||||
# gets the historical 0.3 default.
|
||||
_fixed_temp = _fixed_temperature_for_model(self.model, self.base_url)
|
||||
_omit_temperature = _fixed_temp is OMIT_TEMPERATURE
|
||||
if _omit_temperature:
|
||||
_flush_temperature = None
|
||||
elif _fixed_temp is not None:
|
||||
_flush_temperature = _fixed_temp
|
||||
else:
|
||||
_flush_temperature = 0.3
|
||||
aux_error = None
|
||||
try:
|
||||
response = _call_llm(
|
||||
task="flush_memories",
|
||||
messages=api_messages,
|
||||
tools=[memory_tool_def],
|
||||
temperature=_flush_temperature,
|
||||
max_tokens=5120,
|
||||
# timeout resolved from auxiliary.flush_memories.timeout config
|
||||
)
|
||||
except Exception as e:
|
||||
aux_error = e
|
||||
_aux_available = False
|
||||
response = None
|
||||
|
||||
if not _aux_available and self.api_mode == "codex_responses":
|
||||
# No auxiliary client -- use the Codex Responses path directly
|
||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||
_ct_flush = self._get_transport()
|
||||
if _ct_flush is not None:
|
||||
codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def])
|
||||
elif not codex_kwargs.get("tools"):
|
||||
codex_kwargs["tools"] = [memory_tool_def]
|
||||
if _flush_temperature is not None:
|
||||
codex_kwargs["temperature"] = _flush_temperature
|
||||
else:
|
||||
codex_kwargs.pop("temperature", None)
|
||||
if "max_output_tokens" in codex_kwargs:
|
||||
codex_kwargs["max_output_tokens"] = 5120
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
elif not _aux_available and self.api_mode == "anthropic_messages":
|
||||
# Native Anthropic — use the transport for kwargs
|
||||
_tflush = self._get_transport()
|
||||
ant_kwargs = _tflush.build_kwargs(
|
||||
model=self.model, messages=api_messages,
|
||||
tools=[memory_tool_def], max_tokens=5120,
|
||||
reasoning_config=None,
|
||||
preserve_dots=self._anthropic_preserve_dots(),
|
||||
)
|
||||
response = self._anthropic_messages_create(ant_kwargs)
|
||||
elif not _aux_available:
|
||||
api_kwargs = {
|
||||
"model": self.model,
|
||||
"messages": api_messages,
|
||||
"tools": [memory_tool_def],
|
||||
**self._max_tokens_param(5120),
|
||||
}
|
||||
if _flush_temperature is not None:
|
||||
api_kwargs["temperature"] = _flush_temperature
|
||||
from agent.auxiliary_client import _get_task_timeout
|
||||
response = self._ensure_primary_openai_client(reason="flush_memories").chat.completions.create(
|
||||
**api_kwargs, timeout=_get_task_timeout("flush_memories")
|
||||
)
|
||||
|
||||
if aux_error is not None:
|
||||
logger.warning("Auxiliary memory flush failed; used fallback path: %s", aux_error)
|
||||
self._emit_auxiliary_failure("memory flush", aux_error)
|
||||
|
||||
def _openai_tool_calls(resp):
|
||||
if resp is not None and hasattr(resp, "choices") and resp.choices:
|
||||
msg = getattr(resp.choices[0], "message", None)
|
||||
calls = getattr(msg, "tool_calls", None)
|
||||
if calls:
|
||||
return calls
|
||||
return []
|
||||
|
||||
def _codex_output_tool_calls(resp):
|
||||
calls = []
|
||||
for item in getattr(resp, "output", []) or []:
|
||||
if getattr(item, "type", None) == "function_call":
|
||||
calls.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", None),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
return calls
|
||||
|
||||
# Extract tool calls from the response, handling all API formats
|
||||
tool_calls = []
|
||||
if self.api_mode == "codex_responses" and not _aux_available:
|
||||
_ct_flush = self._get_transport()
|
||||
_cnr_flush = _ct_flush.normalize_response(response) if _ct_flush is not None else None
|
||||
if _cnr_flush and _cnr_flush.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _cnr_flush.tool_calls
|
||||
]
|
||||
else:
|
||||
tool_calls = _codex_output_tool_calls(response)
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _flush_result.tool_calls
|
||||
]
|
||||
elif self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
# chat_completions / bedrock — normalize through transport
|
||||
_tfn = self._get_transport()
|
||||
_flush_result = _tfn.normalize_response(response) if _tfn is not None else None
|
||||
if _flush_result and _flush_result.tool_calls:
|
||||
tool_calls = _flush_result.tool_calls
|
||||
else:
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
elif _aux_available and hasattr(response, "choices") and response.choices:
|
||||
# Auxiliary client returned OpenAI-shaped response while main
|
||||
# api_mode is codex/anthropic — extract tool_calls from .choices
|
||||
tool_calls = _openai_tool_calls(response)
|
||||
|
||||
for tc in tool_calls:
|
||||
if tc.function.name == "memory":
|
||||
try:
|
||||
args = json.loads(tc.function.arguments)
|
||||
flush_target = args.get("target", "memory")
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
_memory_tool(
|
||||
action=args.get("action"),
|
||||
target=flush_target,
|
||||
content=args.get("content"),
|
||||
old_text=args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
if self._memory_manager and args.get("action") in ("add", "replace"):
|
||||
try:
|
||||
self._memory_manager.on_memory_write(
|
||||
args.get("action", ""),
|
||||
flush_target,
|
||||
args.get("content", ""),
|
||||
metadata=self._build_memory_write_metadata(
|
||||
write_origin="memory_flush",
|
||||
execution_context="flush_memories",
|
||||
),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if not self.quiet_mode:
|
||||
print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}")
|
||||
except Exception as e:
|
||||
logger.warning("Memory flush tool call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush tool", e)
|
||||
except Exception as e:
|
||||
logger.warning("Memory flush API call failed: %s", e)
|
||||
self._emit_auxiliary_failure("memory flush", e)
|
||||
finally:
|
||||
# Strip flush artifacts: remove everything from the flush message onward.
|
||||
# Use sentinel marker instead of identity check for robustness.
|
||||
while messages and messages[-1].get("_flush_sentinel") != _sentinel:
|
||||
messages.pop()
|
||||
if not messages:
|
||||
break
|
||||
if messages and messages[-1].get("_flush_sentinel") == _sentinel:
|
||||
messages.pop()
|
||||
|
||||
def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None, task_id: str = "default", focus_topic: str = None) -> tuple:
|
||||
"""Compress conversation context and split the session in SQLite.
|
||||
|
||||
@@ -8096,6 +8173,8 @@ class AIAgent:
|
||||
f"{approx_tokens:,}" if approx_tokens else "unknown", self.model,
|
||||
focus_topic,
|
||||
)
|
||||
# Pre-compression memory flush: let the model save memories before they're lost
|
||||
self.flush_memories(messages, min_turns=0)
|
||||
|
||||
# Notify external memory provider before compression discards context
|
||||
if self._memory_manager:
|
||||
@@ -11047,69 +11126,36 @@ class AIAgent:
|
||||
continue
|
||||
|
||||
# ── Nous Portal: record rate limit & skip retries ─────
|
||||
# When Nous returns a 429 that is a genuine account-
|
||||
# level rate limit, record the reset time to a shared
|
||||
# file so ALL sessions (cron, gateway, auxiliary) know
|
||||
# not to pile on, then skip further retries -- each
|
||||
# one burns another RPH request and deepens the hole.
|
||||
# The retry loop's top-of-iteration guard will catch
|
||||
# this on the next pass and try fallback or bail.
|
||||
#
|
||||
# IMPORTANT: Nous Portal multiplexes multiple upstream
|
||||
# providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can
|
||||
# also mean an UPSTREAM provider is out of capacity
|
||||
# for one specific model -- transient, clears in
|
||||
# seconds, nothing to do with the caller's quota.
|
||||
# Tripping the cross-session breaker on that would
|
||||
# block every Nous model for minutes. We use
|
||||
# ``is_genuine_nous_rate_limit`` to tell the two
|
||||
# apart via the 429's own x-ratelimit-* headers and
|
||||
# the last-known-good state captured on the previous
|
||||
# successful response.
|
||||
# When Nous returns a 429, record the reset time to a
|
||||
# shared file so ALL sessions (cron, gateway, auxiliary)
|
||||
# know not to pile on. Then skip further retries —
|
||||
# each one burns another RPH request and deepens the
|
||||
# rate limit hole. The retry loop's top-of-iteration
|
||||
# guard will catch this on the next pass and try
|
||||
# fallback or bail with a clear message.
|
||||
if (
|
||||
is_rate_limited
|
||||
and self.provider == "nous"
|
||||
and classified.reason == FailoverReason.rate_limit
|
||||
and not recovered_with_pool
|
||||
):
|
||||
_genuine_nous_rate_limit = False
|
||||
try:
|
||||
from agent.nous_rate_guard import (
|
||||
is_genuine_nous_rate_limit,
|
||||
record_nous_rate_limit,
|
||||
)
|
||||
from agent.nous_rate_guard import record_nous_rate_limit
|
||||
_err_resp = getattr(api_error, "response", None)
|
||||
_err_hdrs = (
|
||||
getattr(_err_resp, "headers", None)
|
||||
if _err_resp else None
|
||||
)
|
||||
_genuine_nous_rate_limit = is_genuine_nous_rate_limit(
|
||||
record_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
last_known_state=self._rate_limit_state,
|
||||
error_context=error_context,
|
||||
)
|
||||
if _genuine_nous_rate_limit:
|
||||
record_nous_rate_limit(
|
||||
headers=_err_hdrs,
|
||||
error_context=error_context,
|
||||
)
|
||||
else:
|
||||
logging.info(
|
||||
"Nous 429 looks like upstream capacity "
|
||||
"(no exhausted bucket in headers or "
|
||||
"last-known state) -- not tripping "
|
||||
"cross-session breaker."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
if _genuine_nous_rate_limit:
|
||||
# Skip straight to max_retries -- the
|
||||
# top-of-loop guard will handle fallback or
|
||||
# bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
# Upstream capacity 429: fall through to normal
|
||||
# retry logic. A different model (or the same
|
||||
# model a moment later) will typically succeed.
|
||||
# Skip straight to max_retries — the top-of-loop
|
||||
# guard will handle fallback or bail cleanly.
|
||||
retry_count = max_retries
|
||||
continue
|
||||
|
||||
is_payload_too_large = (
|
||||
classified.reason == FailoverReason.payload_too_large
|
||||
@@ -11711,26 +11757,16 @@ class AIAgent:
|
||||
interim_has_content = bool((interim_msg.get("content") or "").strip())
|
||||
interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
|
||||
interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
|
||||
interim_has_codex_message_items = bool(interim_msg.get("codex_message_items"))
|
||||
|
||||
if (
|
||||
interim_has_content
|
||||
or interim_has_reasoning
|
||||
or interim_has_codex_reasoning
|
||||
or interim_has_codex_message_items
|
||||
):
|
||||
if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
|
||||
last_msg = messages[-1] if messages else None
|
||||
# Duplicate detection: two consecutive incomplete assistant
|
||||
# messages with identical content AND reasoning are collapsed.
|
||||
# For provider-state-only changes (encrypted reasoning
|
||||
# items or replayable message ids/phases/statuses differ
|
||||
# while visible content/reasoning are unchanged), compare
|
||||
# those opaque payloads too so we don't silently drop the
|
||||
# newer continuation state.
|
||||
# For reasoning-only messages (codex_reasoning_items differ but
|
||||
# visible content/reasoning are both empty), we also compare
|
||||
# the encrypted items to avoid silently dropping new state.
|
||||
last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
|
||||
interim_codex_items = interim_msg.get("codex_reasoning_items")
|
||||
last_codex_message_items = last_msg.get("codex_message_items") if isinstance(last_msg, dict) else None
|
||||
interim_codex_message_items = interim_msg.get("codex_message_items")
|
||||
duplicate_interim = (
|
||||
isinstance(last_msg, dict)
|
||||
and last_msg.get("role") == "assistant"
|
||||
@@ -11738,7 +11774,6 @@ class AIAgent:
|
||||
and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
|
||||
and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
|
||||
and last_codex_items == interim_codex_items
|
||||
and last_codex_message_items == interim_codex_message_items
|
||||
)
|
||||
if not duplicate_interim:
|
||||
messages.append(interim_msg)
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
|
||||
|
||||
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
|
||||
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
|
||||
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
|
||||
lets maintainers update model lists without shipping a Hermes release.
|
||||
|
||||
The runtime fetcher falls back to the same in-repo hardcoded lists if the
|
||||
manifest is unreachable, so this script is a convenience for keeping the
|
||||
manifest in sync — not a source of truth.
|
||||
|
||||
Usage::
|
||||
|
||||
python scripts/build_model_catalog.py
|
||||
|
||||
Output: ``website/static/api/model-catalog.json``
|
||||
|
||||
Live URL (after ``deploy-site.yml`` runs on merge to main):
|
||||
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, REPO_ROOT)
|
||||
|
||||
# Ensure HERMES_HOME is set for imports that touch it at module level.
|
||||
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
|
||||
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
|
||||
|
||||
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
|
||||
CATALOG_VERSION = 1
|
||||
|
||||
|
||||
def build_catalog() -> dict:
|
||||
return {
|
||||
"version": CATALOG_VERSION,
|
||||
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
|
||||
},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {
|
||||
"display_name": "OpenRouter",
|
||||
"note": (
|
||||
"Descriptions drive picker badges. Live /api/v1/models "
|
||||
"filters curated ids by tool-calling support and free pricing."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid, "description": desc}
|
||||
for mid, desc in OPENROUTER_MODELS
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {
|
||||
"display_name": "Nous Portal",
|
||||
"note": (
|
||||
"Free-tier gating is determined live via Portal pricing "
|
||||
"(partition_nous_models_by_tier), not this manifest."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid}
|
||||
for mid in _PROVIDER_MODELS.get("nous", [])
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
catalog = build_catalog()
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as fh:
|
||||
json.dump(catalog, fh, indent=2)
|
||||
fh.write("\n")
|
||||
|
||||
print(f"Wrote {OUTPUT_PATH}")
|
||||
for provider, block in catalog["providers"].items():
|
||||
print(f" {provider}: {len(block['models'])} models")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
+7
-99
@@ -29,25 +29,10 @@ BOLD='\033[1m'
|
||||
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
|
||||
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
|
||||
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
|
||||
# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
|
||||
# FHS-style layout for root installs. Track whether the user gave us an
|
||||
# explicit directory — if so we never override it.
|
||||
if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
|
||||
INSTALL_DIR="$HERMES_INSTALL_DIR"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
else
|
||||
INSTALL_DIR=""
|
||||
INSTALL_DIR_EXPLICIT=false
|
||||
fi
|
||||
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
|
||||
PYTHON_VERSION="3.11"
|
||||
NODE_VERSION="22"
|
||||
|
||||
# FHS-style root install layout (set by resolve_install_layout when applicable):
|
||||
# code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
|
||||
# data still at /root/.hermes (HERMES_HOME). Matches Claude Code / Codex CLI
|
||||
# and keeps Docker bind-mounted /root/ volumes lean.
|
||||
ROOT_FHS_LAYOUT=false
|
||||
|
||||
# Options
|
||||
USE_VENV=true
|
||||
RUN_SETUP=true
|
||||
@@ -79,7 +64,6 @@ while [[ $# -gt 0 ]]; do
|
||||
;;
|
||||
--dir)
|
||||
INSTALL_DIR="$2"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
shift 2
|
||||
;;
|
||||
--hermes-home)
|
||||
@@ -95,20 +79,9 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --no-venv Don't create virtual environment"
|
||||
echo " --skip-setup Skip interactive setup wizard"
|
||||
echo " --branch NAME Git branch to install (default: main)"
|
||||
echo " --dir PATH Installation directory"
|
||||
echo " default (non-root): ~/.hermes/hermes-agent"
|
||||
echo " default (root, Linux): /usr/local/lib/hermes-agent"
|
||||
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
|
||||
echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)"
|
||||
echo " -h, --help Show this help"
|
||||
echo ""
|
||||
echo "Notes:"
|
||||
echo " When running as root on Linux, Hermes installs the code under"
|
||||
echo " /usr/local/lib/hermes-agent and links the command into"
|
||||
echo " /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
|
||||
echo " Data, config, sessions, and logs still live in \$HERMES_HOME"
|
||||
echo " (default /root/.hermes). This keeps Docker bind-mounted volumes"
|
||||
echo " small and ensures the command is on PATH for all shells."
|
||||
echo " Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -190,60 +163,9 @@ is_termux() {
|
||||
[ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
|
||||
}
|
||||
|
||||
# Decide where the repo checkout + venv live, and where the `hermes` command
|
||||
# symlink goes. Called after detect_os so $OS/$DISTRO are known.
|
||||
#
|
||||
# Defaults:
|
||||
# - Non-root, any OS: INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $HOME/.local/bin
|
||||
# - Termux (any uid): INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $PREFIX/bin (already on PATH)
|
||||
# - Root on Linux (new): INSTALL_DIR = /usr/local/lib/hermes-agent
|
||||
# command link in /usr/local/bin
|
||||
# (unless a legacy install already exists at
|
||||
# $HERMES_HOME/hermes-agent — then preserve it)
|
||||
#
|
||||
# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
|
||||
resolve_install_layout() {
|
||||
if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
|
||||
log_info "Install directory: $INSTALL_DIR (explicit)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
|
||||
if is_termux; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Root on Linux: prefer FHS layout unless a legacy install already exists.
|
||||
# macOS root installs keep the legacy layout because /usr/local/ on macOS
|
||||
# is Homebrew territory and we don't want to fight that.
|
||||
if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
|
||||
if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
|
||||
log_info " (new root installs use /usr/local/lib/hermes-agent)"
|
||||
return 0
|
||||
fi
|
||||
INSTALL_DIR="/usr/local/lib/hermes-agent"
|
||||
ROOT_FHS_LAYOUT=true
|
||||
log_info "Root install on Linux — using FHS layout"
|
||||
log_info " Code: $INSTALL_DIR"
|
||||
log_info " Command: /usr/local/bin/hermes"
|
||||
log_info " Data: $HERMES_HOME (unchanged)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Default: non-root, non-Termux → legacy user-scoped layout.
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
}
|
||||
|
||||
get_command_link_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo "$PREFIX/bin"
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo "/usr/local/bin"
|
||||
else
|
||||
echo "$HOME/.local/bin"
|
||||
fi
|
||||
@@ -252,8 +174,6 @@ get_command_link_dir() {
|
||||
get_command_link_display_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo '$PREFIX/bin'
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo '/usr/local/bin'
|
||||
else
|
||||
echo '~/.local/bin'
|
||||
fi
|
||||
@@ -1055,14 +975,6 @@ setup_path() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
|
||||
if [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
export PATH="$command_link_dir:$PATH"
|
||||
log_info "/usr/local/bin is already on PATH for all shells"
|
||||
log_success "hermes command ready"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if ~/.local/bin is on PATH; if not, add it to shell config.
|
||||
# Detect the user's actual login shell (not the shell running this script,
|
||||
# which is always bash when piped from curl).
|
||||
@@ -1427,12 +1339,12 @@ print_success() {
|
||||
echo ""
|
||||
|
||||
# Show file locations
|
||||
echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
|
||||
echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
|
||||
echo ""
|
||||
echo -e " ${YELLOW}Config:${NC} $HERMES_HOME/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} $HERMES_HOME/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} $HERMES_HOME/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} $INSTALL_DIR"
|
||||
echo -e " ${YELLOW}Config:${NC} ~/.hermes/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} ~/.hermes/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} ~/.hermes/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} ~/.hermes/hermes-agent/"
|
||||
echo ""
|
||||
|
||||
echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
|
||||
@@ -1452,9 +1364,6 @@ print_success() {
|
||||
if [ "$DISTRO" = "termux" ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
|
||||
echo ""
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
|
||||
echo ""
|
||||
else
|
||||
echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
|
||||
echo ""
|
||||
@@ -1506,7 +1415,6 @@ main() {
|
||||
print_banner
|
||||
|
||||
detect_os
|
||||
resolve_install_layout
|
||||
install_uv
|
||||
check_python
|
||||
check_git
|
||||
|
||||
@@ -43,7 +43,6 @@ AUTHOR_MAP = {
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"focusflow.app.help@gmail.com": "yes999zc",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
@@ -52,7 +51,6 @@ AUTHOR_MAP = {
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
"nerijusn76@gmail.com": "Nerijusas",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -69,9 +67,7 @@ AUTHOR_MAP = {
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"fqsy1416@gmail.com": "EKKOLearnAI",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"iris@growthpillars.co": "irispillars",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
@@ -96,8 +92,6 @@ AUTHOR_MAP = {
|
||||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||
"liusway405@gmail.com": "voidborne-d",
|
||||
"xydarcher@uestc.edu.cn": "Readon",
|
||||
"sir_even@icloud.com": "sirEven",
|
||||
"36056348+sirEven@users.noreply.github.com": "sirEven",
|
||||
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
||||
@@ -181,10 +175,6 @@ AUTHOR_MAP = {
|
||||
"jaisehgal11299@gmail.com": "jaisup",
|
||||
"percydikec@gmail.com": "PercyDikec",
|
||||
"noonou7@gmail.com": "HenkDz",
|
||||
# Azure Foundry salvage (PRs #9029, #4599, #10086, #8766)
|
||||
"tech@smartlogics.net": "TechPrototyper",
|
||||
"637186+HangGlidersRule@users.noreply.github.com": "HangGlidersRule",
|
||||
"pein892@gmail.com": "pein892",
|
||||
"dean.kerr@gmail.com": "deankerr",
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
@@ -419,7 +409,6 @@ AUTHOR_MAP = {
|
||||
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
|
||||
"vivien000812@gmail.com": "iamagenius00",
|
||||
"89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
|
||||
"oluwadareferanmi11@gmail.com": "Feranmi10",
|
||||
"simon@gtcl.us": "simon-gtcl",
|
||||
"suzukaze.haduki@gmail.com": "houko",
|
||||
"cliff@cigii.com": "cgarwood82",
|
||||
@@ -514,8 +503,6 @@ AUTHOR_MAP = {
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
"chenzeshi@live.com": "chen1749144759",
|
||||
"mor.aleksandr@yahoo.com": "MorAlekss",
|
||||
"ash@users.noreply.github.com": "ash",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -281,6 +281,7 @@ Type these during an interactive chat session.
|
||||
### Utility
|
||||
```
|
||||
/branch (/fork) Branch the current session
|
||||
/btw Ephemeral side question (doesn't interrupt main task)
|
||||
/fast Toggle priority/fast processing
|
||||
/browser Open CDP browser connection
|
||||
/history Show conversation history (CLI)
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
---
|
||||
name: kanban-orchestrator
|
||||
description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, orchestration, routing]
|
||||
related_skills: [kanban-worker]
|
||||
---
|
||||
|
||||
# Kanban Orchestrator — Decomposition Playbook
|
||||
|
||||
> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing.
|
||||
|
||||
## When to use the board (vs. just doing the work)
|
||||
|
||||
Create Kanban tasks when any of these are true:
|
||||
|
||||
1. **Multiple specialists are needed.** Research + analysis + writing is three profiles.
|
||||
2. **The work should survive a crash or restart.** Long-running, recurring, or important.
|
||||
3. **The user might want to interject.** Human-in-the-loop at any step.
|
||||
4. **Multiple subtasks can run in parallel.** Fan-out for speed.
|
||||
5. **Review / iteration is expected.** A reviewer profile loops on drafter output.
|
||||
6. **The audit trail matters.** Board rows persist in SQLite forever.
|
||||
|
||||
If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly.
|
||||
|
||||
## The anti-temptation rules
|
||||
|
||||
Your job description says "route, don't execute." The rules that enforce that:
|
||||
|
||||
- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist.
|
||||
- **For any concrete task, create a Kanban task and assign it.** Every single time.
|
||||
- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough."
|
||||
- **Decompose, route, and summarize — that's the whole job.**
|
||||
|
||||
## The standard specialist roster (convention)
|
||||
|
||||
Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure.
|
||||
|
||||
| Profile | Does | Typical workspace |
|
||||
|---|---|---|
|
||||
| `researcher` | Reads sources, gathers facts, writes findings | `scratch` |
|
||||
| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` |
|
||||
| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault |
|
||||
| `reviewer` | Reads output, leaves findings, gates approval | `scratch` |
|
||||
| `backend-eng` | Writes server-side code | `worktree` |
|
||||
| `frontend-eng` | Writes client-side code | `worktree` |
|
||||
| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo |
|
||||
| `pm` | Writes specs, acceptance criteria | `scratch` |
|
||||
|
||||
## Decomposition playbook
|
||||
|
||||
### Step 1 — Understand the goal
|
||||
|
||||
Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet.
|
||||
|
||||
### Step 2 — Sketch the task graph
|
||||
|
||||
Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres":
|
||||
|
||||
```
|
||||
T1 researcher research: Postgres cost vs current
|
||||
T2 researcher research: Postgres performance vs current
|
||||
T3 analyst synthesize migration recommendation parents: T1, T2
|
||||
T4 writer draft decision memo parents: T3
|
||||
```
|
||||
|
||||
Show this to the user. Let them correct it before you create anything.
|
||||
|
||||
### Step 3 — Create tasks and link
|
||||
|
||||
```python
|
||||
t1 = kanban_create(
|
||||
title="research: Postgres cost vs current",
|
||||
assignee="researcher",
|
||||
body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.",
|
||||
tenant=os.environ.get("HERMES_TENANT"),
|
||||
)["task_id"]
|
||||
|
||||
t2 = kanban_create(
|
||||
title="research: Postgres performance vs current",
|
||||
assignee="researcher",
|
||||
body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.",
|
||||
)["task_id"]
|
||||
|
||||
t3 = kanban_create(
|
||||
title="synthesize migration recommendation",
|
||||
assignee="analyst",
|
||||
body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.",
|
||||
parents=[t1, t2],
|
||||
)["task_id"]
|
||||
|
||||
t4 = kanban_create(
|
||||
title="draft decision memo",
|
||||
assignee="writer",
|
||||
body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.",
|
||||
parents=[t3],
|
||||
)["task_id"]
|
||||
```
|
||||
|
||||
`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it.
|
||||
|
||||
### Step 4 — Complete your own task
|
||||
|
||||
If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created:
|
||||
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation",
|
||||
metadata={
|
||||
"task_graph": {
|
||||
"T1": {"assignee": "researcher", "parents": []},
|
||||
"T2": {"assignee": "researcher", "parents": []},
|
||||
"T3": {"assignee": "analyst", "parents": ["T1", "T2"]},
|
||||
"T4": {"assignee": "writer", "parents": ["T3"]},
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
### Step 5 — Report back to the user
|
||||
|
||||
Tell them what you created in plain prose:
|
||||
|
||||
> I've queued 4 tasks:
|
||||
> - **T1** (researcher): cost comparison
|
||||
> - **T2** (researcher): performance comparison, in parallel with T1
|
||||
> - **T3** (analyst): synthesizes T1 + T2 into a recommendation
|
||||
> - **T4** (writer): turns T3 into a CTO memo
|
||||
>
|
||||
> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail <id>` to follow along.
|
||||
|
||||
## Common patterns
|
||||
|
||||
**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents.
|
||||
|
||||
**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns.
|
||||
|
||||
**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory.
|
||||
|
||||
**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile.
|
||||
|
||||
**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`.
|
||||
|
||||
**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators.
|
||||
|
||||
**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace.
|
||||
@@ -1,134 +0,0 @@
|
||||
---
|
||||
name: kanban-worker
|
||||
description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios.
|
||||
version: 2.0.0
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [kanban, multi-agent, collaboration, workflow, pitfalls]
|
||||
related_skills: [kanban-orchestrator]
|
||||
---
|
||||
|
||||
# Kanban Worker — Pitfalls and Examples
|
||||
|
||||
> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases.
|
||||
|
||||
## Workspace handling
|
||||
|
||||
Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`:
|
||||
|
||||
| Kind | What it is | How to work |
|
||||
|---|---|---|
|
||||
| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. |
|
||||
| `dir:<path>` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). |
|
||||
| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add <path> <branch>` from the main repo first, then cd and work normally. Commit work here. |
|
||||
|
||||
## Tenant isolation
|
||||
|
||||
If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants:
|
||||
|
||||
- Good: `business-a: Acme is our biggest customer`
|
||||
- Bad (leaks): `Acme is our biggest customer`
|
||||
|
||||
## Good summary + metadata shapes
|
||||
|
||||
The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work:
|
||||
|
||||
**Coding task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass",
|
||||
metadata={
|
||||
"changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"],
|
||||
"tests_run": 14,
|
||||
"tests_passed": 14,
|
||||
"decisions": ["user_id primary, IP fallback for unauthenticated requests"],
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Research task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency",
|
||||
metadata={
|
||||
"sources_read": 12,
|
||||
"recommendation": "vLLM",
|
||||
"benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
**Review task:**
|
||||
```python
|
||||
kanban_complete(
|
||||
summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)",
|
||||
metadata={
|
||||
"pr_number": 123,
|
||||
"findings": [
|
||||
{"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"},
|
||||
{"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"},
|
||||
],
|
||||
"approved": False,
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose.
|
||||
|
||||
## Block reasons that get answered fast
|
||||
|
||||
Bad: `"stuck"` — the human has no context.
|
||||
|
||||
Good: one sentence naming the specific decision you need. Leave longer context as a comment instead.
|
||||
|
||||
```python
|
||||
kanban_comment(
|
||||
task_id=os.environ["HERMES_KANBAN_TASK"],
|
||||
body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.",
|
||||
)
|
||||
kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?")
|
||||
```
|
||||
|
||||
The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task.
|
||||
|
||||
## Heartbeats worth sending
|
||||
|
||||
Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`.
|
||||
|
||||
Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes.
|
||||
|
||||
## Retry scenarios
|
||||
|
||||
If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics:
|
||||
|
||||
- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it.
|
||||
- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint.
|
||||
- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly.
|
||||
- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully.
|
||||
- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now.
|
||||
|
||||
## Do NOT
|
||||
|
||||
- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop.
|
||||
- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to.
|
||||
- Create follow-up tasks assigned to yourself — assign to the right specialist.
|
||||
- Complete a task you didn't actually finish. Block it instead.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running.
|
||||
|
||||
**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in.
|
||||
|
||||
**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban <verb>` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool.
|
||||
|
||||
## CLI fallback (for scripting)
|
||||
|
||||
Every tool has a CLI equivalent for human operators and scripts:
|
||||
- `kanban_show` ↔ `hermes kanban show <id> --json`
|
||||
- `kanban_complete` ↔ `hermes kanban complete <id> --summary "..." --metadata '{...}'`
|
||||
- `kanban_block` ↔ `hermes kanban block <id> "reason"`
|
||||
- `kanban_create` ↔ `hermes kanban create "title" --assignee <profile> [--parent <id>]`
|
||||
- etc.
|
||||
|
||||
Use the tools from inside an agent; the CLI exists for the human at the terminal.
|
||||
@@ -17,13 +17,6 @@ Remove refusal behaviors (guardrails) from open-weight LLMs without retraining o
|
||||
|
||||
**License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.
|
||||
|
||||
## Video Guide
|
||||
|
||||
Walkthrough of OBLITERATUS used by a Hermes agent to abliterate Gemma:
|
||||
https://www.youtube.com/watch?v=8fG9BrNTeHs ("OBLITERATUS: An AI Agent Removed Gemma 4's Safety Guardrails")
|
||||
|
||||
Useful when the user wants a visual overview of the end-to-end workflow before running it themselves.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Trigger when the user:
|
||||
|
||||
@@ -386,7 +386,7 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
},
|
||||
},
|
||||
"auxiliary": {
|
||||
"compression": {
|
||||
"flush_memories": {
|
||||
"provider": "myrelay",
|
||||
"model": "claude-sonnet-4.6",
|
||||
},
|
||||
@@ -399,11 +399,11 @@ class TestProvidersDictApiModeAnthropicMessages:
|
||||
AnthropicAuxiliaryClient,
|
||||
AsyncAnthropicAuxiliaryClient,
|
||||
)
|
||||
async_client, async_model = get_async_text_auxiliary_client("compression")
|
||||
async_client, async_model = get_async_text_auxiliary_client("flush_memories")
|
||||
assert isinstance(async_client, AsyncAnthropicAuxiliaryClient)
|
||||
assert async_model == "claude-sonnet-4.6"
|
||||
|
||||
sync_client, sync_model = get_text_auxiliary_client("compression")
|
||||
sync_client, sync_model = get_text_auxiliary_client("flush_memories")
|
||||
assert isinstance(sync_client, AnthropicAuxiliaryClient)
|
||||
assert sync_model == "claude-sonnet-4.6"
|
||||
|
||||
|
||||
@@ -847,32 +847,6 @@ class TestTokenBudgetTailProtection:
|
||||
assert isinstance(pruned, int)
|
||||
|
||||
|
||||
class TestUpdateModelBudgets:
|
||||
"""Regression: update_model() must recalculate token budgets."""
|
||||
|
||||
def test_tail_budget_recalculated(self):
|
||||
"""tail_token_budget must change after switching to a different context length."""
|
||||
from unittest.mock import patch
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
|
||||
comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
|
||||
old_tail = comp.tail_token_budget
|
||||
old_max_summary = comp.max_summary_tokens
|
||||
|
||||
comp.update_model("model-b", context_length=32_000)
|
||||
assert comp.tail_token_budget != old_tail, "tail_token_budget should change"
|
||||
assert comp.tail_token_budget < old_tail, "smaller context → smaller budget"
|
||||
assert comp.max_summary_tokens != old_max_summary, "max_summary_tokens should change"
|
||||
|
||||
def test_budgets_proportional(self):
|
||||
"""Budgets should be proportional to context_length after update."""
|
||||
from unittest.mock import patch
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||
comp = ContextCompressor("model-a", threshold_percent=0.50, quiet_mode=True)
|
||||
comp.update_model("model-b", context_length=10_000)
|
||||
assert comp.tail_token_budget == int(comp.threshold_tokens * comp.summary_target_ratio)
|
||||
assert comp.max_summary_tokens == min(int(10_000 * 0.05), 4000)
|
||||
|
||||
|
||||
class TestTruncateToolCallArgsJson:
|
||||
"""Regression tests for #11762.
|
||||
|
||||
|
||||
@@ -192,43 +192,6 @@ class TestDefaultContextLengths:
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_deepseek_v4_models_1m_context(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from unittest.mock import patch as mock_patch
|
||||
|
||||
expected_keys = {
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
}
|
||||
for key, value in expected_keys.items():
|
||||
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
|
||||
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
|
||||
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
|
||||
)
|
||||
|
||||
# Longest-first substring matching must resolve both the bare V4
|
||||
# ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
|
||||
# / Nous Portal) to 1M without probing down to the legacy 128K
|
||||
# ``deepseek`` substring fallback.
|
||||
with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
|
||||
cases = [
|
||||
("deepseek-v4-pro", 1_000_000),
|
||||
("deepseek-v4-flash", 1_000_000),
|
||||
("deepseek/deepseek-v4-pro", 1_000_000),
|
||||
("deepseek/deepseek-v4-flash", 1_000_000),
|
||||
("deepseek-chat", 1_000_000),
|
||||
("deepseek-reasoner", 1_000_000),
|
||||
]
|
||||
for model_id, expected_ctx in cases:
|
||||
actual = get_model_context_length(model_id)
|
||||
assert actual == expected_ctx, (
|
||||
f"{model_id}: expected {expected_ctx}, got {actual}"
|
||||
)
|
||||
|
||||
def test_all_values_positive(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
assert value > 0, f"{key} has non-positive context length"
|
||||
@@ -340,9 +303,7 @@ class TestCodexOAuthContextLength:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
|
||||
# OpenRouter — should hit its own catalog path first; when mocked
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M,
|
||||
# matching the real direct-API value — Codex OAuth's 272k cap is
|
||||
# provider-specific and must not leak here).
|
||||
# empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k).
|
||||
with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
|
||||
patch("agent.model_metadata.get_cached_context_length", return_value=None), \
|
||||
@@ -353,7 +314,7 @@ class TestCodexOAuthContextLength:
|
||||
api_key="",
|
||||
provider="openrouter",
|
||||
)
|
||||
assert ctx == 1_050_000, (
|
||||
assert ctx == 400_000, (
|
||||
f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override "
|
||||
"leaked outside openai-codex provider"
|
||||
)
|
||||
@@ -498,10 +459,9 @@ class TestGetModelContextLength:
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_api_missing_context_length_key(self, mock_fetch):
|
||||
"""Model in API but without context_length → defaults to the top
|
||||
probe tier (currently 256K)."""
|
||||
"""Model in API but without context_length → defaults to 128000."""
|
||||
mock_fetch.return_value = {"test/model": {"name": "Test"}}
|
||||
assert get_model_context_length("test/model") == CONTEXT_PROBE_TIERS[0]
|
||||
assert get_model_context_length("test/model") == 128000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_cache_takes_priority_over_api(self, mock_fetch, tmp_path):
|
||||
@@ -854,17 +814,14 @@ class TestContextProbeTiers:
|
||||
for i in range(len(CONTEXT_PROBE_TIERS) - 1):
|
||||
assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]
|
||||
|
||||
def test_first_tier_is_256k(self):
|
||||
assert CONTEXT_PROBE_TIERS[0] == 256_000
|
||||
def test_first_tier_is_128k(self):
|
||||
assert CONTEXT_PROBE_TIERS[0] == 128_000
|
||||
|
||||
def test_last_tier_is_8k(self):
|
||||
assert CONTEXT_PROBE_TIERS[-1] == 8_000
|
||||
|
||||
|
||||
class TestGetNextProbeTier:
|
||||
def test_from_256k(self):
|
||||
assert get_next_probe_tier(256_000) == 128_000
|
||||
|
||||
def test_from_128k(self):
|
||||
assert get_next_probe_tier(128_000) == 64_000
|
||||
|
||||
@@ -884,8 +841,8 @@ class TestGetNextProbeTier:
|
||||
assert get_next_probe_tier(100_000) == 64_000
|
||||
|
||||
def test_above_max_tier(self):
|
||||
"""Value above 256K should return 256K."""
|
||||
assert get_next_probe_tier(500_000) == 256_000
|
||||
"""Value above 128K should return 128K."""
|
||||
assert get_next_probe_tier(500_000) == 128_000
|
||||
|
||||
def test_zero_returns_none(self):
|
||||
assert get_next_probe_tier(0) is None
|
||||
|
||||
@@ -251,141 +251,3 @@ class TestAuxiliaryClientIntegration:
|
||||
monkeypatch.setattr(aux, "_read_nous_auth", lambda: None)
|
||||
result = aux._try_nous()
|
||||
assert result == (None, None)
|
||||
|
||||
|
||||
class TestIsGenuineNousRateLimit:
|
||||
"""Tell a real account-level 429 apart from an upstream-capacity 429.
|
||||
|
||||
Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes).
|
||||
A 429 from an upstream out of capacity should NOT trip the
|
||||
cross-session breaker; a real user-quota 429 should.
|
||||
"""
|
||||
|
||||
def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_exhausted_tokens_bucket_is_genuine(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "0",
|
||||
"x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "0",
|
||||
"x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is True
|
||||
|
||||
def test_healthy_headers_on_429_are_upstream_capacity(self):
|
||||
# Classic upstream-capacity symptom: Nous edge reports plenty of
|
||||
# headroom on every bucket, but returns 429 anyway because
|
||||
# upstream (DeepSeek / Kimi / ...) is out of capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "198",
|
||||
"x-ratelimit-reset-requests": "40",
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "3100",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "40",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7800000",
|
||||
"x-ratelimit-reset-tokens-1h": "3100",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_bare_429_with_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(headers=None) is False
|
||||
assert is_genuine_nous_rate_limit(headers={}) is False
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers={"content-type": "application/json"}
|
||||
) is False
|
||||
|
||||
def test_exhausted_bucket_with_short_reset_is_not_genuine(self):
|
||||
# remaining == 0 but reset in < 60s: almost certainly a
|
||||
# secondary per-minute throttle that will clear immediately --
|
||||
# not worth tripping the cross-session breaker.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
headers = {
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "0",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
}
|
||||
assert is_genuine_nous_rate_limit(headers=headers) is False
|
||||
|
||||
def test_last_known_state_with_exhausted_bucket_triggers_genuine(self):
|
||||
# Headers on the 429 lack rate-limit info, but the previous
|
||||
# successful response already showed the hourly bucket
|
||||
# exhausted -- the 429 is almost certainly that limit
|
||||
# continuing.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "0",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "100",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "700000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7000000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is True
|
||||
|
||||
def test_last_known_state_all_healthy_stays_upstream(self):
|
||||
# Prior state was healthy; bare 429 arrives; should be treated
|
||||
# as upstream capacity.
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
from agent.rate_limit_tracker import parse_rate_limit_headers
|
||||
|
||||
prior_headers = {
|
||||
"x-ratelimit-limit-requests-1h": "800",
|
||||
"x-ratelimit-remaining-requests-1h": "750",
|
||||
"x-ratelimit-reset-requests-1h": "2000",
|
||||
"x-ratelimit-limit-requests": "200",
|
||||
"x-ratelimit-remaining-requests": "180",
|
||||
"x-ratelimit-reset-requests": "30",
|
||||
"x-ratelimit-limit-tokens": "800000",
|
||||
"x-ratelimit-remaining-tokens": "790000",
|
||||
"x-ratelimit-reset-tokens": "30",
|
||||
"x-ratelimit-limit-tokens-1h": "8000000",
|
||||
"x-ratelimit-remaining-tokens-1h": "7900000",
|
||||
"x-ratelimit-reset-tokens-1h": "2000",
|
||||
}
|
||||
last_state = parse_rate_limit_headers(prior_headers, provider="nous")
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=last_state
|
||||
) is False
|
||||
|
||||
def test_none_last_state_and_no_headers_is_upstream(self):
|
||||
from agent.nous_rate_guard import is_genuine_nous_rate_limit
|
||||
|
||||
assert is_genuine_nous_rate_limit(
|
||||
headers=None, last_known_state=None
|
||||
) is False
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
"""Tests for agent/onboarding.py — contextual first-touch hint helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import yaml
|
||||
import pytest
|
||||
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
TOOL_PROGRESS_FLAG,
|
||||
busy_input_hint_cli,
|
||||
busy_input_hint_gateway,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
tool_progress_hint_gateway,
|
||||
)
|
||||
|
||||
|
||||
class TestIsSeen:
|
||||
def test_empty_config_unseen(self):
|
||||
assert is_seen({}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_missing_onboarding_unseen(self):
|
||||
assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_onboarding_not_dict_unseen(self):
|
||||
assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_dict_missing_flag(self):
|
||||
assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_seen_flag_true(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is True
|
||||
|
||||
def test_seen_flag_falsy(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}}
|
||||
assert is_seen(cfg, BUSY_INPUT_FLAG) is False
|
||||
|
||||
def test_other_flags_isolated(self):
|
||||
cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}}
|
||||
assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
|
||||
class TestMarkSeen:
|
||||
def test_creates_missing_file_and_sets_flag(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_config(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"model": {"default": "claude-sonnet-4.6"},
|
||||
"display": {"skin": "default"},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["model"]["default"] == "claude-sonnet-4.6"
|
||||
assert loaded["display"]["skin"] == "default"
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_preserves_other_seen_flags(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}},
|
||||
}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_idempotent(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
first = cfg_path.read_text()
|
||||
|
||||
# Second call must be a no-op on-disk content (file may be touched,
|
||||
# but the YAML contents should be identical).
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
second = cfg_path.read_text()
|
||||
|
||||
assert yaml.safe_load(first) == yaml.safe_load(second)
|
||||
|
||||
def test_handles_non_dict_onboarding(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
def test_handles_non_dict_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}}))
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True
|
||||
|
||||
|
||||
class TestHintMessages:
|
||||
def test_busy_input_hint_gateway_interrupt(self):
|
||||
msg = busy_input_hint_gateway("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
assert "interrupted" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_gateway_queue(self):
|
||||
msg = busy_input_hint_gateway("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
assert "queued" in msg.lower()
|
||||
|
||||
def test_busy_input_hint_cli_interrupt(self):
|
||||
msg = busy_input_hint_cli("interrupt")
|
||||
assert "/busy queue" in msg
|
||||
|
||||
def test_busy_input_hint_cli_queue(self):
|
||||
msg = busy_input_hint_cli("queue")
|
||||
assert "/busy interrupt" in msg
|
||||
|
||||
def test_tool_progress_hints_mention_verbose(self):
|
||||
assert "/verbose" in tool_progress_hint_gateway()
|
||||
assert "/verbose" in tool_progress_hint_cli()
|
||||
|
||||
def test_hints_are_not_empty(self):
|
||||
for hint in (
|
||||
busy_input_hint_gateway("queue"),
|
||||
busy_input_hint_gateway("interrupt"),
|
||||
busy_input_hint_cli("queue"),
|
||||
busy_input_hint_cli("interrupt"),
|
||||
tool_progress_hint_gateway(),
|
||||
tool_progress_hint_cli(),
|
||||
):
|
||||
assert hint.strip()
|
||||
|
||||
|
||||
class TestRoundTrip:
|
||||
"""After mark_seen, is_seen on the re-loaded config must return True."""
|
||||
|
||||
def test_mark_then_is_seen(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False
|
||||
|
||||
def test_mark_both_flags_independently(self, tmp_path):
|
||||
cfg_path = tmp_path / "config.yaml"
|
||||
|
||||
mark_seen(cfg_path, BUSY_INPUT_FLAG)
|
||||
mark_seen(cfg_path, TOOL_PROGRESS_FLAG)
|
||||
loaded = yaml.safe_load(cfg_path.read_text())
|
||||
|
||||
assert is_seen(loaded, BUSY_INPUT_FLAG) is True
|
||||
assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True
|
||||
@@ -1,201 +0,0 @@
|
||||
"""Regression tests for the generic unsupported-parameter detector in
|
||||
``agent.auxiliary_client``.
|
||||
|
||||
The original temperature-specific detector (PR #15621) was generalized so the
|
||||
same reactive-retry strategy covers any provider that rejects an arbitrary
|
||||
request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
|
||||
just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
|
||||
pattern.
|
||||
|
||||
These tests lock in:
|
||||
* ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
|
||||
* the back-compat wrapper ``_is_unsupported_temperature_error`` still works
|
||||
* the max_tokens retry branch no longer pops a key that was never set
|
||||
(``max_tokens is None`` gate)
|
||||
* the max_tokens retry branch matches via the generic helper on top of the
|
||||
legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_is_unsupported_parameter_error,
|
||||
_is_unsupported_temperature_error,
|
||||
)
|
||||
|
||||
|
||||
class TestIsUnsupportedParameterError:
|
||||
"""The generic detector must match real provider phrasings for any param."""
|
||||
|
||||
@pytest.mark.parametrize("param,message", [
|
||||
# temperature phrasings (regression coverage via the generic API)
|
||||
("temperature", "HTTP 400: Unsupported parameter: temperature"),
|
||||
("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
|
||||
("temperature", "this model does not support temperature"),
|
||||
# max_tokens phrasings
|
||||
("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
|
||||
("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
|
||||
("max_tokens", "Invalid parameter: max_tokens is not supported"),
|
||||
# arbitrary future params
|
||||
("seed", "HTTP 400: unrecognized parameter: seed"),
|
||||
("top_p", "Error: top_p is not supported for this model"),
|
||||
])
|
||||
def test_matches_real_provider_messages(self, param, message):
|
||||
assert _is_unsupported_parameter_error(RuntimeError(message), param) is True
|
||||
|
||||
@pytest.mark.parametrize("param,message", [
|
||||
# Param not mentioned at all
|
||||
("temperature", "HTTP 400: max_tokens is too large"),
|
||||
# Param mentioned but not flagged as unsupported
|
||||
("temperature", "temperature must be between 0 and 2"),
|
||||
# Totally unrelated 400
|
||||
("max_tokens", "Rate limit exceeded"),
|
||||
# Connection-level errors
|
||||
("temperature", "Connection reset by peer"),
|
||||
])
|
||||
def test_does_not_match_unrelated_errors(self, param, message):
|
||||
assert _is_unsupported_parameter_error(RuntimeError(message), param) is False
|
||||
|
||||
def test_empty_param_returns_false(self):
|
||||
assert _is_unsupported_parameter_error(
|
||||
RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
|
||||
) is False
|
||||
|
||||
def test_temperature_wrapper_delegates_to_generic(self):
|
||||
"""Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
|
||||
msg = "HTTP 400: Unsupported parameter: temperature"
|
||||
assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
|
||||
# And the unrelated-case still holds
|
||||
assert _is_unsupported_temperature_error(
|
||||
RuntimeError("max_tokens is too large")) is False
|
||||
|
||||
|
||||
def _dummy_response():
|
||||
"""Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
class TestMaxTokensRetryHardening:
|
||||
"""The max_tokens retry branch now (a) gates on ``max_tokens is not None``
|
||||
and (b) also matches the generic phrasings via the helper.
|
||||
"""
|
||||
|
||||
def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||||
"""No max_tokens kwarg → must not pop/retry even if the error mentions it.
|
||||
|
||||
Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
|
||||
``kwargs["max_completion_tokens"] = max_tokens`` would set a None
|
||||
value and hit the provider again. The gate skips the whole branch.
|
||||
"""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||||
client.chat.completions.create.side_effect = err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
# max_tokens omitted on purpose
|
||||
)
|
||||
|
||||
# Only the initial attempt — no retry because the gate blocked it
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_sync_max_tokens_retry_matches_generic_phrasing(self):
|
||||
"""A 400 saying "Unknown parameter: max_tokens" (not the legacy
|
||||
substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
|
||||
now triggers the retry via the generic helper.
|
||||
"""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("Unknown parameter: max_tokens")
|
||||
response = _dummy_response()
|
||||
client.chat.completions.create.side_effect = [err, response]
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=512,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
second_call = client.chat.completions.create.call_args_list[1]
|
||||
assert "max_tokens" not in second_call.kwargs
|
||||
assert second_call.kwargs["max_completion_tokens"] == 512
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||||
client.chat.completions.create = AsyncMock(side_effect=err)
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
)
|
||||
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_max_tokens_retry_matches_generic_phrasing(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
err = RuntimeError("Unknown parameter: max_tokens")
|
||||
response = _dummy_response()
|
||||
client.chat.completions.create = AsyncMock(side_effect=[err, response])
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
temperature=0.3,
|
||||
max_tokens=512,
|
||||
)
|
||||
|
||||
assert result is response
|
||||
assert client.chat.completions.create.await_count == 2
|
||||
second_call = client.chat.completions.create.call_args_list[1]
|
||||
assert "max_tokens" not in second_call.kwargs
|
||||
assert second_call.kwargs["max_completion_tokens"] == 512
|
||||
@@ -1,237 +0,0 @@
|
||||
"""Regression tests for the universal "unsupported temperature" retry in
|
||||
``agent.auxiliary_client``.
|
||||
|
||||
Auxiliary callers (context compression, session search,
|
||||
web extract summarisation, etc.) hardcode ``temperature=0.3`` for historical
|
||||
reasons. Several provider/model combinations reject ``temperature`` with a
|
||||
400:
|
||||
|
||||
* OpenAI Responses (gpt-5/o-series reasoning models)
|
||||
* Copilot Responses (reasoning models)
|
||||
* OpenRouter reasoning models (gpt-5.5, some anthropic via OAI-compat)
|
||||
* Anthropic Opus 4.7+ via OpenAI-compat endpoints
|
||||
* Kimi/Moonshot (server-managed)
|
||||
|
||||
``_fixed_temperature_for_model`` catches Kimi up front, and
|
||||
``build_chat_completion_kwargs`` drops temperature for Anthropic Opus 4.7+,
|
||||
but the same backend can accept ``temperature`` for some models and reject
|
||||
it for others (for example gpt-5.4 accepts but gpt-5.5 rejects on the same
|
||||
endpoint). An allow/deny-list is not maintainable across providers.
|
||||
|
||||
The universal fix is reactive: when a call returns an
|
||||
``Unsupported parameter: temperature`` 400, retry once without temperature.
|
||||
These tests lock in that behaviour for both sync and async paths.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import (
|
||||
call_llm,
|
||||
async_call_llm,
|
||||
_is_unsupported_temperature_error,
|
||||
)
|
||||
|
||||
|
||||
class TestIsUnsupportedTemperatureError:
|
||||
"""The detector must match the phrasings providers actually return."""
|
||||
|
||||
@pytest.mark.parametrize("message", [
|
||||
# OpenAI / Codex Responses
|
||||
"HTTP 400: Unsupported parameter: temperature",
|
||||
"Error code: 400 - {'error': {'message': \"Unsupported parameter: 'temperature'\"}}",
|
||||
# Copilot / OpenAI error-code form
|
||||
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
||||
# OpenRouter-style
|
||||
"Provider returned error: temperature is not supported for this model",
|
||||
"this model does not support temperature",
|
||||
# Anthropic-style via OAI-compat
|
||||
"temperature: unknown parameter",
|
||||
# Some gateways
|
||||
"unrecognized request argument supplied: temperature",
|
||||
])
|
||||
def test_matches_real_provider_messages(self, message):
|
||||
assert _is_unsupported_temperature_error(RuntimeError(message)) is True
|
||||
|
||||
@pytest.mark.parametrize("message", [
|
||||
# Unrelated 400s must NOT trigger a silent-retry
|
||||
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'...",
|
||||
"max_tokens is too large for this model",
|
||||
"Rate limit exceeded",
|
||||
"Connection reset by peer",
|
||||
# Temperature value error is a different class of problem
|
||||
"temperature must be between 0 and 2",
|
||||
])
|
||||
def test_does_not_match_unrelated_errors(self, message):
|
||||
assert _is_unsupported_temperature_error(RuntimeError(message)) is False
|
||||
|
||||
|
||||
def _dummy_response():
|
||||
# The real code calls _validate_llm_response which inspects
|
||||
# response.choices[0].message. The tests here patch that out, so
|
||||
# any sentinel object is fine.
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
class TestCallLlmUnsupportedTemperatureRetry:
|
||||
"""``call_llm`` retries once without temperature and returns on success."""
|
||||
|
||||
def _setup(self, first_exc):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create.side_effect = [first_exc, _dummy_response()]
|
||||
return client
|
||||
|
||||
@pytest.mark.parametrize("error_message", [
|
||||
"HTTP 400: Unsupported parameter: temperature",
|
||||
"Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}",
|
||||
"Provider error: this model does not support temperature",
|
||||
])
|
||||
def test_retries_once_without_temperature(self, error_message):
|
||||
client = self._setup(RuntimeError(error_message))
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "remember this"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert client.chat.completions.create.call_count == 2
|
||||
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
||||
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
||||
assert first_kwargs["temperature"] == 0.3
|
||||
assert "temperature" not in retry_kwargs
|
||||
# other kwargs preserved
|
||||
assert retry_kwargs["max_tokens"] == 500
|
||||
|
||||
def test_non_temperature_400_does_not_retry_as_temperature(self):
|
||||
"""Unrelated 400s (e.g. bad tool role) must not silently drop temp."""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
non_temp_err = RuntimeError(
|
||||
"HTTP 400: Invalid value: 'tool'. Supported values are: 'assistant'..."
|
||||
)
|
||||
client.chat.completions.create.side_effect = non_temp_err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError, match="Invalid value"):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
# Should NOT have retried (non-temperature 400 doesn't match)
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
def test_no_retry_when_temperature_not_in_kwargs(self):
|
||||
"""If caller didn't send temperature, don't invent a temperature-retry."""
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
# Provider complains about temperature even though we didn't send it.
|
||||
# (Pathological but possible with misleading error text.) The guard
|
||||
# ``"temperature" in kwargs`` must prevent an unnecessary retry.
|
||||
err = RuntimeError("HTTP 400: Unsupported parameter: temperature")
|
||||
client.chat.completions.create.side_effect = err
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError):
|
||||
call_llm(
|
||||
task="compression",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=None, # explicit: no temperature sent
|
||||
max_tokens=500,
|
||||
)
|
||||
assert client.chat.completions.create.call_count == 1
|
||||
|
||||
|
||||
class TestAsyncCallLlmUnsupportedTemperatureRetry:
|
||||
"""``async_call_llm`` mirror of the sync retry semantics."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_retries_once_without_temperature(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create = AsyncMock(side_effect=[
|
||||
RuntimeError("HTTP 400: Unsupported parameter: temperature"),
|
||||
_dummy_response(),
|
||||
])
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
):
|
||||
result = await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "query"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
assert result == {"ok": True}
|
||||
assert client.chat.completions.create.await_count == 2
|
||||
first_kwargs = client.chat.completions.create.call_args_list[0].kwargs
|
||||
retry_kwargs = client.chat.completions.create.call_args_list[1].kwargs
|
||||
assert first_kwargs["temperature"] == 0.3
|
||||
assert "temperature" not in retry_kwargs
|
||||
assert retry_kwargs["max_tokens"] == 500
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_non_temperature_400_does_not_retry(self):
|
||||
client = MagicMock()
|
||||
client.base_url = "https://api.openai.com/v1"
|
||||
client.chat.completions.create = AsyncMock(
|
||||
side_effect=RuntimeError("HTTP 400: Invalid value: 'tool'"),
|
||||
)
|
||||
|
||||
with (
|
||||
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||||
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||||
patch("agent.auxiliary_client._get_cached_client",
|
||||
return_value=(client, "gpt-5.5")),
|
||||
patch("agent.auxiliary_client._validate_llm_response",
|
||||
side_effect=lambda resp, _task: resp),
|
||||
patch("agent.auxiliary_client._try_payment_fallback",
|
||||
return_value=None),
|
||||
):
|
||||
with pytest.raises(RuntimeError, match="Invalid value"):
|
||||
await async_call_llm(
|
||||
task="session_search",
|
||||
messages=[{"role": "user", "content": "x"}],
|
||||
temperature=0.3,
|
||||
max_tokens=500,
|
||||
)
|
||||
assert client.chat.completions.create.await_count == 1
|
||||
@@ -33,18 +33,15 @@ class TestChatCompletionsBasic:
|
||||
def test_convert_messages_strips_codex_fields(self, transport):
|
||||
msgs = [
|
||||
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
|
||||
"codex_message_items": [{"id": "msg_1", "type": "message"}],
|
||||
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
|
||||
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
|
||||
]
|
||||
result = transport.convert_messages(msgs)
|
||||
assert "codex_reasoning_items" not in result[0]
|
||||
assert "codex_message_items" not in result[0]
|
||||
assert "call_id" not in result[0]["tool_calls"][0]
|
||||
assert "response_item_id" not in result[0]["tool_calls"][0]
|
||||
# Original list untouched (deepcopy-on-demand)
|
||||
assert "codex_reasoning_items" in msgs[0]
|
||||
assert "codex_message_items" in msgs[0]
|
||||
|
||||
|
||||
class TestChatCompletionsBuildKwargs:
|
||||
|
||||
@@ -194,36 +194,6 @@ class TestCodexNormalizeResponse:
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_message_items_preserved_in_provider_data(self, transport):
|
||||
"""Codex assistant message item ids/phases must survive transport normalization."""
|
||||
r = SimpleNamespace(
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
id="msg_abc",
|
||||
phase="final_answer",
|
||||
content=[SimpleNamespace(type="output_text", text="Hello world")],
|
||||
status="completed",
|
||||
),
|
||||
],
|
||||
status="completed",
|
||||
incomplete_details=None,
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5,
|
||||
input_tokens_details=None, output_tokens_details=None),
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.codex_message_items == [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": "completed",
|
||||
"content": [{"type": "output_text", "text": "Hello world"}],
|
||||
"id": "msg_abc",
|
||||
"phase": "final_answer",
|
||||
}
|
||||
]
|
||||
|
||||
def test_tool_call_response(self, transport):
|
||||
"""Normalize a Codex response with tool calls."""
|
||||
r = SimpleNamespace(
|
||||
|
||||
@@ -60,13 +60,6 @@ class TestTransportRegistry:
|
||||
assert t is not None
|
||||
assert t.api_mode == "anthropic_messages"
|
||||
|
||||
def test_discovers_missing_transport_when_registry_partially_populated(self):
|
||||
"""Importing one transport directly must not hide other valid api_modes."""
|
||||
import agent.transports.chat_completions # noqa: F401
|
||||
t = get_transport("codex_responses")
|
||||
assert t is not None
|
||||
assert t.api_mode == "codex_responses"
|
||||
|
||||
def test_register_and_get(self):
|
||||
class DummyTransport(ProviderTransport):
|
||||
@property
|
||||
|
||||
@@ -270,15 +270,3 @@ class TestNormalizedResponseBackwardCompat:
|
||||
def test_codex_reasoning_items_none_when_absent(self):
|
||||
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
|
||||
assert nr.codex_reasoning_items is None
|
||||
|
||||
def test_codex_message_items_from_provider_data(self):
|
||||
items = [{"id": "msg_1", "type": "message"}]
|
||||
nr = NormalizedResponse(
|
||||
content="hi", tool_calls=None, finish_reason="stop",
|
||||
provider_data={"codex_message_items": items},
|
||||
)
|
||||
assert nr.codex_message_items == items
|
||||
|
||||
def test_codex_message_items_none_when_absent(self):
|
||||
nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
|
||||
assert nr.codex_message_items is None
|
||||
|
||||
@@ -33,6 +33,7 @@ class _FakeAgent:
|
||||
self._todo_store.write(
|
||||
[{"id": "t1", "content": "unfinished task", "status": "in_progress"}]
|
||||
)
|
||||
self.flush_memories = MagicMock()
|
||||
self.commit_memory_session = MagicMock()
|
||||
self._invalidate_system_prompt = MagicMock()
|
||||
|
||||
@@ -156,6 +157,7 @@ def test_new_command_creates_real_fresh_session_and_resets_agent_state(tmp_path)
|
||||
assert cli.agent._todo_store.read() == []
|
||||
assert cli.session_start > old_session_start
|
||||
assert cli.agent.session_start == cli.session_start
|
||||
cli.agent.flush_memories.assert_called_once_with([{"role": "user", "content": "hello"}])
|
||||
cli.agent._invalidate_system_prompt.assert_called_once()
|
||||
|
||||
|
||||
|
||||
@@ -1,390 +0,0 @@
|
||||
"""Tests for cron job context_from feature (issue #5439 Option C)."""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cron_env(tmp_path, monkeypatch):
|
||||
"""Isolated cron environment with temp HERMES_HOME."""
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "cron").mkdir()
|
||||
(hermes_home / "cron" / "output").mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import cron.jobs as jobs_mod
|
||||
monkeypatch.setattr(jobs_mod, "HERMES_DIR", hermes_home)
|
||||
monkeypatch.setattr(jobs_mod, "CRON_DIR", hermes_home / "cron")
|
||||
monkeypatch.setattr(jobs_mod, "JOBS_FILE", hermes_home / "cron" / "jobs.json")
|
||||
monkeypatch.setattr(jobs_mod, "OUTPUT_DIR", hermes_home / "cron" / "output")
|
||||
|
||||
return hermes_home
|
||||
|
||||
|
||||
class TestJobContextFromField:
|
||||
"""Test that context_from is stored and retrieved correctly."""
|
||||
|
||||
def test_create_job_with_context_from_string(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize findings",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
|
||||
assert job_b["context_from"] == [job_a["id"]]
|
||||
loaded = get_job(job_b["id"])
|
||||
assert loaded["context_from"] == [job_a["id"]]
|
||||
|
||||
def test_create_job_with_context_from_list(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Find weather", schedule="every 1h")
|
||||
job_c = create_job(
|
||||
prompt="Summarize everything",
|
||||
schedule="every 2h",
|
||||
context_from=[job_a["id"], job_b["id"]],
|
||||
)
|
||||
|
||||
assert job_c["context_from"] == [job_a["id"], job_b["id"]]
|
||||
|
||||
def test_create_job_without_context_from(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h")
|
||||
assert job.get("context_from") is None
|
||||
|
||||
def test_context_from_empty_string_normalized_to_none(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h", context_from="")
|
||||
assert job.get("context_from") is None
|
||||
|
||||
def test_context_from_empty_list_normalized_to_none(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
|
||||
job = create_job(prompt="Hello", schedule="every 1h", context_from=[])
|
||||
assert job.get("context_from") is None
|
||||
|
||||
|
||||
class TestBuildJobPromptContextFrom:
|
||||
"""Test that _build_job_prompt() injects context from referenced jobs."""
|
||||
|
||||
def test_injects_latest_output(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
|
||||
# Записываем output для job_a
|
||||
output_dir = OUTPUT_DIR / job_a["id"]
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
(output_dir / "2026-04-22_10-00-00.md").write_text(
|
||||
"Today's top story: AI is everywhere.", encoding="utf-8"
|
||||
)
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Summarize the news",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "Today's top story: AI is everywhere." in prompt
|
||||
assert f"Output from job '{job_a['id']}'" in prompt
|
||||
|
||||
def test_uses_most_recent_output(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
import time
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
output_dir = OUTPUT_DIR / job_a["id"]
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
old_file = output_dir / "2026-04-22_08-00-00.md"
|
||||
old_file.write_text("Old output", encoding="utf-8")
|
||||
time.sleep(0.01)
|
||||
new_file = output_dir / "2026-04-22_10-00-00.md"
|
||||
new_file.write_text("New output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "New output" in prompt
|
||||
assert "Old output" not in prompt
|
||||
|
||||
def test_graceful_when_no_output_yet(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# job_a never ran — output dir does not exist
|
||||
# expect silent skip: no placeholder injected, base prompt intact
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "no output" not in prompt.lower()
|
||||
assert "not found" not in prompt.lower()
|
||||
assert "Summarize" in prompt
|
||||
|
||||
def test_injects_multiple_context_jobs(self, cron_env):
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Find weather", schedule="every 1h")
|
||||
|
||||
for job, content in [(job_a, "News: AI boom"), (job_b, "Weather: Sunny")]:
|
||||
out_dir = OUTPUT_DIR / job["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text(content, encoding="utf-8")
|
||||
|
||||
job_c = create_job(
|
||||
prompt="Daily briefing",
|
||||
schedule="every 2h",
|
||||
context_from=[job_a["id"], job_b["id"]],
|
||||
)
|
||||
prompt = _build_job_prompt(job_c)
|
||||
assert "News: AI boom" in prompt
|
||||
assert "Weather: Sunny" in prompt
|
||||
|
||||
def test_context_injected_before_prompt(self, cron_env):
|
||||
"""Context should appear before the job's own prompt."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Context data", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process the data above",
|
||||
schedule="every 2h",
|
||||
context_from=job_a["id"],
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
context_pos = prompt.find("Context data")
|
||||
prompt_pos = prompt.find("Process the data above")
|
||||
assert context_pos < prompt_pos
|
||||
|
||||
def test_output_truncated_at_8k_chars(self, cron_env):
|
||||
"""Output longer than 8000 chars should be truncated."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
big_output = "x" * 10000
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text(big_output, encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
prompt = _build_job_prompt(job_b)
|
||||
assert "truncated" in prompt
|
||||
assert "x" * 10000 not in prompt
|
||||
|
||||
def test_graceful_when_file_deleted_between_listing_and_reading(self, cron_env):
|
||||
"""Job should not crash if output file is deleted mid-read."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
from unittest.mock import patch
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# Simulate file deleted between glob() and read_text()
|
||||
original_read = Path.read_text
|
||||
def mock_read_text(self, *args, **kwargs):
|
||||
if self.suffix == ".md":
|
||||
raise FileNotFoundError("file deleted mid-read")
|
||||
return original_read(self, *args, **kwargs)
|
||||
|
||||
with patch.object(Path, "read_text", mock_read_text):
|
||||
prompt = _build_job_prompt(job_b)
|
||||
|
||||
# Job should not crash, prompt should still contain the base prompt
|
||||
assert "Process" in prompt
|
||||
|
||||
def test_graceful_when_permission_error(self, cron_env):
|
||||
"""Job should not crash if output directory is not readable."""
|
||||
from cron.jobs import create_job, OUTPUT_DIR
|
||||
from cron.scheduler import _build_job_prompt
|
||||
from unittest.mock import patch
|
||||
|
||||
job_a = create_job(prompt="Find data", schedule="every 1h")
|
||||
out_dir = OUTPUT_DIR / job_a["id"]
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
(out_dir / "2026-04-22_10-00-00.md").write_text("Some output", encoding="utf-8")
|
||||
|
||||
job_b = create_job(
|
||||
prompt="Process", schedule="every 2h", context_from=job_a["id"]
|
||||
)
|
||||
|
||||
# Simulate permission error on read
|
||||
original_read = Path.read_text
|
||||
def mock_read_text(self, *args, **kwargs):
|
||||
if self.suffix == ".md":
|
||||
raise PermissionError("permission denied")
|
||||
return original_read(self, *args, **kwargs)
|
||||
|
||||
with patch.object(Path, "read_text", mock_read_text):
|
||||
prompt = _build_job_prompt(job_b)
|
||||
|
||||
# Job should not crash, prompt should still contain the base prompt
|
||||
assert "Process" in prompt
|
||||
|
||||
def test_invalid_job_id_skipped(self, cron_env):
|
||||
"""context_from with path traversal job_id should be skipped."""
|
||||
from cron.jobs import create_job
|
||||
from cron.scheduler import _build_job_prompt
|
||||
|
||||
job = create_job(prompt="Process", schedule="every 2h")
|
||||
# Manually inject invalid context_from (simulating tampered jobs.json)
|
||||
job["context_from"] = ["../../../etc/passwd"]
|
||||
prompt = _build_job_prompt(job)
|
||||
# Should not crash and should not inject anything malicious
|
||||
assert "Process" in prompt
|
||||
assert "etc/passwd" not in prompt
|
||||
|
||||
|
||||
|
||||
class TestUpdateContextFrom:
|
||||
"""Verify the cronjob tool's `update` action wires context_from through.
|
||||
|
||||
Without this, the create-path stores the field but users can never modify
|
||||
or clear it via the tool (schema promises "pass an empty array to clear").
|
||||
"""
|
||||
|
||||
def test_update_adds_context_from_to_existing_job(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(prompt="Summarize", schedule="every 2h")
|
||||
assert job_b.get("context_from") is None
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=job_a["id"],
|
||||
))
|
||||
assert result["success"] is True
|
||||
|
||||
reloaded = get_job(job_b["id"])
|
||||
assert reloaded["context_from"] == [job_a["id"]]
|
||||
|
||||
def test_update_changes_context_from_reference(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_a2 = create_job(prompt="Find weather", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
assert job_b["context_from"] == [job_a["id"]]
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=[job_a2["id"]],
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] == [job_a2["id"]]
|
||||
|
||||
def test_update_clears_context_from_with_empty_list(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
assert get_job(job_b["id"])["context_from"] == [job_a["id"]]
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=[],
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] is None
|
||||
|
||||
def test_update_clears_context_from_with_empty_string(self, cron_env):
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from="",
|
||||
))
|
||||
assert result["success"] is True
|
||||
assert get_job(job_b["id"])["context_from"] is None
|
||||
|
||||
def test_update_rejects_unknown_job_reference(self, cron_env):
|
||||
from cron.jobs import create_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_b = create_job(prompt="Summarize", schedule="every 2h")
|
||||
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
context_from=["deadbeef0000"],
|
||||
))
|
||||
assert result["success"] is False
|
||||
assert "not found" in result["error"]
|
||||
|
||||
def test_update_preserves_context_from_when_not_passed(self, cron_env):
|
||||
"""Updating other fields must not clobber context_from."""
|
||||
from cron.jobs import create_job, get_job
|
||||
from tools.cronjob_tools import cronjob
|
||||
import json
|
||||
|
||||
job_a = create_job(prompt="Find news", schedule="every 1h")
|
||||
job_b = create_job(
|
||||
prompt="Summarize", schedule="every 2h", context_from=job_a["id"],
|
||||
)
|
||||
|
||||
# Update an unrelated field
|
||||
result = json.loads(cronjob(
|
||||
action="update",
|
||||
job_id=job_b["id"],
|
||||
prompt="Summarize v2",
|
||||
))
|
||||
assert result["success"] is True
|
||||
reloaded = get_job(job_b["id"])
|
||||
assert reloaded["prompt"] == "Summarize v2"
|
||||
assert reloaded["context_from"] == [job_a["id"]]
|
||||
@@ -346,7 +346,6 @@ def make_discord_message(
|
||||
|
||||
return SimpleNamespace(
|
||||
id=message_id, content=content, author=author, channel=channel,
|
||||
guild=getattr(channel, "guild", None),
|
||||
mentions=mentions, attachments=attachments,
|
||||
type=getattr(discord, "MessageType", SimpleNamespace()).default,
|
||||
reference=None, created_at=datetime.now(timezone.utc),
|
||||
|
||||
@@ -1,365 +0,0 @@
|
||||
"""Tests for /v1/runs endpoints: start, events, and stop.
|
||||
|
||||
Covers:
|
||||
- POST /v1/runs — start a run (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE event stream
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- Auth, error handling, and cleanup
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
import time as _time
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from aiohttp import web
|
||||
from aiohttp.test_utils import TestClient, TestServer
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
from gateway.platforms.api_server import (
|
||||
APIServerAdapter,
|
||||
cors_middleware,
|
||||
security_headers_middleware,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_adapter(api_key: str = "") -> APIServerAdapter:
|
||||
"""Create an adapter with optional API key."""
|
||||
extra = {}
|
||||
if api_key:
|
||||
extra["key"] = api_key
|
||||
config = PlatformConfig(enabled=True, extra=extra)
|
||||
adapter = APIServerAdapter(config)
|
||||
return adapter
|
||||
|
||||
|
||||
def _create_runs_app(adapter: APIServerAdapter) -> web.Application:
|
||||
"""Create an aiohttp app with /v1/runs routes registered."""
|
||||
mws = [mw for mw in (cors_middleware, security_headers_middleware) if mw is not None]
|
||||
app = web.Application(middlewares=mws)
|
||||
app["api_server_adapter"] = adapter
|
||||
app.router.add_post("/v1/runs", adapter._handle_runs)
|
||||
app.router.add_get("/v1/runs/{run_id}/events", adapter._handle_run_events)
|
||||
app.router.add_post("/v1/runs/{run_id}/stop", adapter._handle_stop_run)
|
||||
return app
|
||||
|
||||
|
||||
def _make_slow_agent(**kwargs):
|
||||
"""Create a mock agent that blocks in run_conversation until interrupted.
|
||||
|
||||
Returns (mock_agent, agent_ready_event, interrupt_event) where
|
||||
agent_ready_event is set once run_conversation starts, and
|
||||
interrupt_event is set when interrupt() is called.
|
||||
"""
|
||||
ready = threading.Event()
|
||||
interrupted = threading.Event()
|
||||
|
||||
mock_agent = MagicMock()
|
||||
|
||||
def _do_interrupt(message=None):
|
||||
interrupted.set()
|
||||
|
||||
mock_agent.interrupt = MagicMock(side_effect=_do_interrupt)
|
||||
|
||||
def _slow_run(user_message=None, conversation_history=None, task_id=None):
|
||||
ready.set()
|
||||
# Block until interrupt() is called
|
||||
interrupted.wait(timeout=10)
|
||||
return {"final_response": "interrupted"}
|
||||
|
||||
mock_agent.run_conversation.side_effect = _slow_run
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
|
||||
return mock_agent, ready, interrupted
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def adapter():
|
||||
return _make_adapter()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_adapter():
|
||||
return _make_adapter(api_key="sk-secret")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /v1/runs — start a run
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStartRun:
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_returns_202(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "done"}
|
||||
mock_agent.session_prompt_tokens = 10
|
||||
mock_agent.session_completion_tokens = 5
|
||||
mock_agent.session_total_tokens = 15
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
assert data["status"] == "started"
|
||||
assert data["run_id"].startswith("run_")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_invalid_json_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post(
|
||||
"/v1/runs",
|
||||
data="not json",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
assert resp.status == 400
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_missing_input_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"model": "test"})
|
||||
assert resp.status == 400
|
||||
data = await resp.json()
|
||||
assert "input" in data["error"]["message"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_empty_input_returns_400(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"input": ""})
|
||||
assert resp.status == 400
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_start_with_valid_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(auth_adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "ok"}
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post(
|
||||
"/v1/runs",
|
||||
json={"input": "hello"},
|
||||
headers={"Authorization": "Bearer sk-secret"},
|
||||
)
|
||||
assert resp.status == 202
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /v1/runs/{run_id}/events — SSE event stream
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRunEvents:
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_stream_returns_completed(self, adapter):
|
||||
"""Events stream should receive run.completed when agent finishes."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "Hello!"}
|
||||
mock_agent.session_prompt_tokens = 10
|
||||
mock_agent.session_completion_tokens = 5
|
||||
mock_agent.session_total_tokens = 15
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
# Subscribe to events
|
||||
events_resp = await cli.get(f"/v1/runs/{run_id}/events")
|
||||
assert events_resp.status == 200
|
||||
body = await events_resp.text()
|
||||
|
||||
# Should contain run.completed
|
||||
assert "run.completed" in body
|
||||
assert "Hello!" in body
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_not_found_returns_404(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/runs/run_nonexistent/events")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_events_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.get("/v1/runs/run_any/events")
|
||||
assert resp.status == 401
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestStopRun:
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_running_agent(self, adapter):
|
||||
"""Stop should interrupt the agent and cancel the task."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
# Wait for agent to start running in the thread
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Verify agent ref is stored
|
||||
assert run_id in adapter._active_run_agents
|
||||
|
||||
# Stop the run
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
stop_data = await stop_resp.json()
|
||||
assert stop_data["run_id"] == run_id
|
||||
assert stop_data["status"] == "stopping"
|
||||
|
||||
# Agent interrupt should have been called
|
||||
mock_agent.interrupt.assert_called_once_with("Stop requested via API")
|
||||
|
||||
# Refs should be cleaned up
|
||||
await asyncio.sleep(0.5)
|
||||
assert run_id not in adapter._active_run_agents
|
||||
assert run_id not in adapter._active_run_tasks
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_nonexistent_run_returns_404(self, adapter):
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs/run_nonexistent/stop")
|
||||
assert resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_requires_auth(self, auth_adapter):
|
||||
app = _create_runs_app(auth_adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
resp = await cli.post("/v1/runs/run_any/stop")
|
||||
assert resp.status == 401
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_already_completed_run_returns_404(self, adapter):
|
||||
"""Stopping a run that already finished should return 404 (refs cleaned up)."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.run_conversation.return_value = {"final_response": "done"}
|
||||
mock_agent.session_prompt_tokens = 0
|
||||
mock_agent.session_completion_tokens = 0
|
||||
mock_agent.session_total_tokens = 0
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start and wait for completion
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# Run should be done, refs cleaned up
|
||||
assert run_id not in adapter._active_run_agents
|
||||
|
||||
# Stop should return 404
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 404
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_interrupt_exception_does_not_crash(self, adapter):
|
||||
"""If agent.interrupt() raises, stop should still succeed."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
# Override the interrupt side_effect to raise
|
||||
mock_agent.interrupt = MagicMock(side_effect=RuntimeError("interrupt failed"))
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
stop_data = await stop_resp.json()
|
||||
assert stop_data["status"] == "stopping"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stop_sends_sentinel_to_events_stream(self, adapter):
|
||||
"""After stop, the events stream should close."""
|
||||
app = _create_runs_app(adapter)
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
with patch.object(adapter, "_create_agent") as mock_create:
|
||||
mock_agent, agent_ready, _ = _make_slow_agent()
|
||||
mock_create.return_value = mock_agent
|
||||
|
||||
# Start run
|
||||
resp = await cli.post("/v1/runs", json={"input": "hello"})
|
||||
assert resp.status == 202
|
||||
data = await resp.json()
|
||||
run_id = data["run_id"]
|
||||
|
||||
agent_ready.wait(timeout=3.0)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Subscribe to events in background
|
||||
events_task = asyncio.ensure_future(
|
||||
cli.get(f"/v1/runs/{run_id}/events")
|
||||
)
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Stop the run
|
||||
stop_resp = await cli.post(f"/v1/runs/{run_id}/stop")
|
||||
assert stop_resp.status == 200
|
||||
|
||||
# Events stream should close
|
||||
events_resp = await asyncio.wait_for(events_task, timeout=5.0)
|
||||
assert events_resp.status == 200
|
||||
body = await events_resp.text()
|
||||
# Stream should have received run.failed and closed
|
||||
assert "run.failed" in body or "stream closed" in body
|
||||
@@ -0,0 +1,249 @@
|
||||
"""Tests for proactive memory flush on session expiry.
|
||||
|
||||
Verifies that:
|
||||
1. _is_session_expired() works from a SessionEntry alone (no source needed)
|
||||
2. The sync callback is no longer called in get_or_create_session
|
||||
3. memory_flushed flag persists across save/load cycles (prevents restart re-flush)
|
||||
4. The background watcher can detect expired sessions
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from gateway.config import Platform, GatewayConfig, SessionResetPolicy
|
||||
from gateway.session import SessionSource, SessionStore, SessionEntry
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def idle_store(tmp_path):
|
||||
"""SessionStore with a 60-minute idle reset policy."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="idle", idle_minutes=60),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def no_reset_store(tmp_path):
|
||||
"""SessionStore with no reset policy (mode=none)."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="none"),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
s = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
s._db = None
|
||||
s._loaded = True
|
||||
return s
|
||||
|
||||
|
||||
class TestIsSessionExpired:
|
||||
"""_is_session_expired should detect expiry from entry alone."""
|
||||
|
||||
def test_idle_session_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_1",
|
||||
created_at=datetime.now() - timedelta(hours=3),
|
||||
updated_at=datetime.now() - timedelta(minutes=120),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is True
|
||||
|
||||
def test_active_session_not_expired(self, idle_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_2",
|
||||
created_at=datetime.now() - timedelta(hours=1),
|
||||
updated_at=datetime.now() - timedelta(minutes=10),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_none_mode_never_expires(self, no_reset_store):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_3",
|
||||
created_at=datetime.now() - timedelta(days=30),
|
||||
updated_at=datetime.now() - timedelta(days=30),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert no_reset_store._is_session_expired(entry) is False
|
||||
|
||||
def test_active_processes_prevent_expiry(self, idle_store):
|
||||
"""Sessions with active background processes should never expire."""
|
||||
idle_store._has_active_processes_fn = lambda key: True
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_4",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert idle_store._is_session_expired(entry) is False
|
||||
|
||||
def test_daily_mode_expired(self, tmp_path):
|
||||
"""Daily mode should expire sessions from before today's reset hour."""
|
||||
config = GatewayConfig(
|
||||
default_reset_policy=SessionResetPolicy(mode="daily", at_hour=4),
|
||||
)
|
||||
with patch("gateway.session.SessionStore._ensure_loaded"):
|
||||
store = SessionStore(sessions_dir=tmp_path, config=config)
|
||||
store._db = None
|
||||
store._loaded = True
|
||||
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm",
|
||||
session_id="sid_5",
|
||||
created_at=datetime.now() - timedelta(days=2),
|
||||
updated_at=datetime.now() - timedelta(days=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert store._is_session_expired(entry) is True
|
||||
|
||||
|
||||
class TestGetOrCreateSessionNoCallback:
|
||||
"""get_or_create_session should NOT call a sync flush callback."""
|
||||
|
||||
def test_auto_reset_creates_new_session_after_flush(self, idle_store):
|
||||
"""When a flushed session auto-resets, a new session_id is created."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
# Create initial session
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
old_sid = entry1.session_id
|
||||
|
||||
# Simulate the watcher having flushed it
|
||||
entry1.memory_flushed = True
|
||||
|
||||
# Simulate the session going idle
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Next call should auto-reset
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.session_id != old_sid
|
||||
assert entry2.was_auto_reset is True
|
||||
# New session starts with memory_flushed=False
|
||||
assert entry2.memory_flushed is False
|
||||
|
||||
def test_no_sync_callback_invoked(self, idle_store):
|
||||
"""No synchronous callback should block during auto-reset."""
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="123",
|
||||
chat_type="dm",
|
||||
)
|
||||
entry1 = idle_store.get_or_create_session(source)
|
||||
entry1.updated_at = datetime.now() - timedelta(minutes=120)
|
||||
idle_store._save()
|
||||
|
||||
# Verify no _on_auto_reset attribute
|
||||
assert not hasattr(idle_store, '_on_auto_reset')
|
||||
|
||||
# This should NOT block (no sync LLM call)
|
||||
entry2 = idle_store.get_or_create_session(source)
|
||||
assert entry2.was_auto_reset is True
|
||||
|
||||
|
||||
class TestMemoryFlushedFlag:
|
||||
"""The memory_flushed flag on SessionEntry prevents double-flushing."""
|
||||
|
||||
def test_defaults_to_false(self):
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:123",
|
||||
session_id="sid_new",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
assert entry.memory_flushed is False
|
||||
|
||||
def test_persists_through_save_load(self, idle_store):
|
||||
"""memory_flushed=True must survive a save/load cycle (simulates restart)."""
|
||||
key = "agent:main:discord:thread:789"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=5),
|
||||
updated_at=datetime.now() - timedelta(hours=5),
|
||||
platform=Platform.DISCORD,
|
||||
chat_type="thread",
|
||||
memory_flushed=True,
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
# Simulate restart: clear in-memory state, reload from disk
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is True
|
||||
|
||||
def test_unflushed_entry_survives_restart_as_unflushed(self, idle_store):
|
||||
"""An entry without memory_flushed stays False after reload."""
|
||||
key = "agent:main:telegram:dm:456"
|
||||
entry = SessionEntry(
|
||||
session_key=key,
|
||||
session_id="sid_not_flushed",
|
||||
created_at=datetime.now() - timedelta(hours=2),
|
||||
updated_at=datetime.now() - timedelta(hours=2),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
idle_store._entries[key] = entry
|
||||
idle_store._save()
|
||||
|
||||
idle_store._entries.clear()
|
||||
idle_store._loaded = False
|
||||
idle_store._ensure_loaded()
|
||||
|
||||
reloaded = idle_store._entries[key]
|
||||
assert reloaded.memory_flushed is False
|
||||
|
||||
def test_roundtrip_to_dict_from_dict(self):
|
||||
"""to_dict/from_dict must preserve memory_flushed."""
|
||||
entry = SessionEntry(
|
||||
session_key="agent:main:telegram:dm:999",
|
||||
session_id="sid_rt",
|
||||
created_at=datetime.now(),
|
||||
updated_at=datetime.now(),
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
memory_flushed=True,
|
||||
)
|
||||
d = entry.to_dict()
|
||||
assert d["memory_flushed"] is True
|
||||
|
||||
restored = SessionEntry.from_dict(d)
|
||||
assert restored.memory_flushed is True
|
||||
|
||||
def test_legacy_entry_without_field_defaults_false(self):
|
||||
"""Old sessions.json entries missing memory_flushed should default to False."""
|
||||
data = {
|
||||
"session_key": "agent:main:telegram:dm:legacy",
|
||||
"session_id": "sid_legacy",
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"updated_at": datetime.now().isoformat(),
|
||||
"platform": "telegram",
|
||||
"chat_type": "dm",
|
||||
# no memory_flushed key
|
||||
}
|
||||
entry = SessionEntry.from_dict(data)
|
||||
assert entry.memory_flushed is False
|
||||
@@ -349,121 +349,3 @@ class TestBusySessionAck:
|
||||
|
||||
result = await runner._handle_active_session_busy_message(event, sk)
|
||||
assert result is False # not handled, let default path try
|
||||
|
||||
|
||||
class TestBusySessionOnboardingHint:
|
||||
"""First-touch hint appended to the busy-ack the first time it fires."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch):
|
||||
"""First busy-while-running message gets an extra hint about /busy."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# mark_seen imports utils.atomic_yaml_write; make sure it resolves
|
||||
# against a writable dir by pointing _hermes_home at tmp_path.
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
# Normal ack body
|
||||
assert "Interrupting" in content
|
||||
# First-touch hint appended
|
||||
assert "First-time tip" in content
|
||||
assert "/busy queue" in content
|
||||
|
||||
# The flag is now persisted to tmp_path/config.yaml
|
||||
import yaml
|
||||
cfg = yaml.safe_load((tmp_path / "config.yaml").read_text())
|
||||
assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch):
|
||||
"""Once the flag is marked, the hint never appears again."""
|
||||
import gateway.run as _gr
|
||||
import yaml
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
# Pre-populate the config so is_seen() returns True from the start.
|
||||
(tmp_path / "config.yaml").write_text(yaml.safe_dump({
|
||||
"onboarding": {"seen": {"busy_input_prompt": True}},
|
||||
}))
|
||||
monkeypatch.setattr(
|
||||
_gr, "_load_gateway_config",
|
||||
lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()),
|
||||
)
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "interrupt"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="ping again")
|
||||
sk = build_session_key(event.source)
|
||||
|
||||
agent = MagicMock()
|
||||
agent.get_activity_summary.return_value = {
|
||||
"api_call_count": 3, "max_iterations": 60,
|
||||
"current_tool": None, "last_activity_ts": time.time(),
|
||||
"last_activity_desc": "api", "seconds_since_activity": 0.1,
|
||||
}
|
||||
runner._running_agents[sk] = agent
|
||||
runner._running_agents_ts[sk] = time.time() - 5
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
call_kwargs = adapter._send_with_retry.call_args
|
||||
content = call_kwargs.kwargs.get("content", "")
|
||||
|
||||
assert "Interrupting" in content
|
||||
assert "First-time tip" not in content
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch):
|
||||
"""In queue mode the hint should suggest /busy interrupt, not /busy queue."""
|
||||
import gateway.run as _gr
|
||||
|
||||
monkeypatch.setattr(_gr, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {})
|
||||
|
||||
runner, _sentinel = _make_runner()
|
||||
runner._busy_input_mode = "queue"
|
||||
adapter = _make_adapter()
|
||||
|
||||
event = _make_event(text="queue me")
|
||||
sk = build_session_key(event.source)
|
||||
runner.adapters[event.source.platform] = adapter
|
||||
|
||||
agent = MagicMock()
|
||||
runner._running_agents[sk] = agent
|
||||
|
||||
with patch("gateway.run.merge_pending_message_event"):
|
||||
await runner._handle_active_session_busy_message(event, sk)
|
||||
|
||||
content = adapter._send_with_retry.call_args.kwargs.get("content", "")
|
||||
assert "Queued for the next turn" in content
|
||||
assert "First-time tip" in content
|
||||
assert "/busy interrupt" in content
|
||||
# Must NOT tell the user to /busy queue when they're already on queue.
|
||||
assert "/busy queue" not in content
|
||||
|
||||
@@ -0,0 +1,240 @@
|
||||
"""Tests for memory flush stale-overwrite prevention (#2670).
|
||||
|
||||
Verifies that:
|
||||
1. Cron sessions are skipped (no flush for headless cron runs)
|
||||
2. Current memory state is injected into the flush prompt so the
|
||||
flush agent can see what's already saved and avoid overwrites
|
||||
3. The flush still works normally when memory files don't exist
|
||||
"""
|
||||
|
||||
import sys
|
||||
import types
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, call
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _mock_dotenv(monkeypatch):
|
||||
"""gateway.run imports dotenv at module level; stub it so tests run without the package."""
|
||||
fake = types.ModuleType("dotenv")
|
||||
fake.load_dotenv = lambda *a, **kw: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner._honcho_managers = {}
|
||||
runner._honcho_configs = {}
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner.adapters = {}
|
||||
runner.hooks = MagicMock()
|
||||
runner.session_store = MagicMock()
|
||||
return runner
|
||||
|
||||
|
||||
_TRANSCRIPT_4_MSGS = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi there"},
|
||||
{"role": "user", "content": "remember my name is Alice"},
|
||||
{"role": "assistant", "content": "Got it, Alice!"},
|
||||
]
|
||||
|
||||
|
||||
class TestCronSessionBypass:
|
||||
"""Cron sessions should never trigger a memory flush."""
|
||||
|
||||
def test_cron_session_skipped(self):
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_job123_20260323_120000")
|
||||
# session_store.load_transcript should never be called
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_cron_session_with_prefix_skipped(self):
|
||||
"""Cron sessions with different prefixes are still skipped."""
|
||||
runner = _make_runner()
|
||||
runner._flush_memories_for_session("cron_daily_20260323")
|
||||
runner.session_store.load_transcript.assert_not_called()
|
||||
|
||||
def test_non_cron_session_proceeds(self):
|
||||
"""Non-cron sessions should still attempt the flush."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = []
|
||||
runner._flush_memories_for_session("session_abc123")
|
||||
runner.session_store.load_transcript.assert_called_once_with("session_abc123")
|
||||
|
||||
|
||||
def _make_flush_context(monkeypatch, memory_dir=None):
|
||||
"""Return (runner, tmp_agent, fake_run_agent) with run_agent mocked in sys.modules."""
|
||||
tmp_agent = MagicMock()
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = MagicMock(return_value=tmp_agent)
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
return runner, tmp_agent, memory_dir
|
||||
|
||||
|
||||
class TestMemoryInjection:
|
||||
"""The flush prompt should include current memory state from disk."""
|
||||
|
||||
def test_memory_content_injected_into_flush_prompt(self, tmp_path, monkeypatch):
|
||||
"""When memory files exist, their content appears in the flush prompt."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
|
||||
(memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch, memory_dir)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_123")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
|
||||
assert "Agent knows Python" in flush_prompt
|
||||
assert "User prefers dark mode" in flush_prompt
|
||||
assert "Name: Alice" in flush_prompt
|
||||
assert "Timezone: PST" in flush_prompt
|
||||
assert "Do NOT overwrite or remove entries" in flush_prompt
|
||||
assert "current live state of memory" in flush_prompt
|
||||
|
||||
def test_flush_works_without_memory_files(self, tmp_path, monkeypatch):
|
||||
"""When no memory files exist, flush still runs without the guard."""
|
||||
empty_dir = tmp_path / "no_memories"
|
||||
empty_dir.mkdir()
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: empty_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_456")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "Do NOT overwrite or remove entries" not in flush_prompt
|
||||
assert "Review the conversation above" in flush_prompt
|
||||
|
||||
def test_empty_memory_files_no_injection(self, tmp_path, monkeypatch):
|
||||
"""Empty memory files should not trigger the guard section."""
|
||||
memory_dir = tmp_path / "memories"
|
||||
memory_dir.mkdir()
|
||||
(memory_dir / "MEMORY.md").write_text("")
|
||||
(memory_dir / "USER.md").write_text(" \n ") # whitespace only
|
||||
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: memory_dir)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_789")
|
||||
|
||||
tmp_agent.run_conversation.assert_called_once()
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "current live state of memory" not in flush_prompt
|
||||
|
||||
|
||||
class TestFlushAgentSilenced:
|
||||
"""The flush agent must not produce any terminal output."""
|
||||
|
||||
def test_print_fn_set_to_noop(self, tmp_path, monkeypatch):
|
||||
"""_print_fn on the flush agent must be a no-op so tool output never leaks."""
|
||||
runner = _make_runner()
|
||||
runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
|
||||
|
||||
captured_agent = {}
|
||||
|
||||
def _fake_ai_agent(*args, **kwargs):
|
||||
agent = MagicMock()
|
||||
captured_agent["instance"] = agent
|
||||
return agent
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _fake_ai_agent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: tmp_path)}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_silent")
|
||||
|
||||
agent = captured_agent["instance"]
|
||||
assert agent._print_fn is not None, "_print_fn should be overridden to suppress output"
|
||||
# Confirm it is callable and produces no output (no exception)
|
||||
agent._print_fn("should be silenced")
|
||||
|
||||
def test_kawaii_spinner_respects_print_fn(self):
|
||||
"""KawaiiSpinner must route all output through print_fn when supplied."""
|
||||
from agent.display import KawaiiSpinner
|
||||
|
||||
written = []
|
||||
spinner = KawaiiSpinner("test", print_fn=lambda *a, **kw: written.append(a))
|
||||
spinner._write("hello")
|
||||
assert written == [("hello",)], "spinner should route through print_fn"
|
||||
|
||||
# A no-op print_fn must produce no output to stdout
|
||||
import io, sys
|
||||
buf = io.StringIO()
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = buf
|
||||
try:
|
||||
silent_spinner = KawaiiSpinner("silent", print_fn=lambda *a, **kw: None)
|
||||
silent_spinner._write("should not appear")
|
||||
silent_spinner.stop("done")
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
assert buf.getvalue() == "", "no-op print_fn spinner must not write to stdout"
|
||||
|
||||
def test_flush_agent_closes_resources_after_run(self, monkeypatch):
|
||||
"""Memory flush should close temporary agent resources after the turn."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
tmp_agent.shutdown_memory_provider = MagicMock()
|
||||
tmp_agent.close = MagicMock()
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_cleanup")
|
||||
|
||||
tmp_agent.shutdown_memory_provider.assert_called_once()
|
||||
tmp_agent.close.assert_called_once()
|
||||
|
||||
|
||||
class TestFlushPromptStructure:
|
||||
"""Verify the flush prompt retains its core instructions."""
|
||||
|
||||
def test_core_instructions_present(self, monkeypatch):
|
||||
"""The flush prompt should still contain the original guidance."""
|
||||
runner, tmp_agent, _ = _make_flush_context(monkeypatch)
|
||||
|
||||
with (
|
||||
patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
|
||||
patch("gateway.run._resolve_gateway_model", return_value="test-model"),
|
||||
patch.dict("sys.modules", {"tools.memory_tool": MagicMock(get_memory_dir=lambda: Path("/nonexistent"))}),
|
||||
):
|
||||
runner._flush_memories_for_session("session_struct")
|
||||
|
||||
flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
|
||||
assert "automatically reset" in flush_prompt
|
||||
assert "Save any important facts" in flush_prompt
|
||||
assert "consider saving it as a skill" in flush_prompt
|
||||
assert "Do NOT respond to the user" in flush_prompt
|
||||
@@ -197,14 +197,10 @@ def _make_fake_mautrix():
|
||||
self.account_id = account_id
|
||||
self.pickle_key = pickle_key
|
||||
self.db = db
|
||||
self._device_id = ""
|
||||
|
||||
async def open(self):
|
||||
pass
|
||||
|
||||
async def put_device_id(self, device_id):
|
||||
self._device_id = device_id
|
||||
|
||||
mautrix_crypto_store_asyncpg.PgCryptoStore = PgCryptoStore
|
||||
|
||||
# --- mautrix.util ---
|
||||
|
||||
@@ -33,7 +33,6 @@ def _make_runner():
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._prefill_messages = []
|
||||
runner._reasoning_config = None
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._show_reasoning = False
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
@@ -77,10 +76,6 @@ class TestReasoningCommand:
|
||||
source = inspect.getsource(gateway_run.GatewayRunner._handle_message)
|
||||
assert '"reasoning"' in source
|
||||
|
||||
def test_parse_reasoning_command_args_accepts_ascii_and_smart_global_flags(self):
|
||||
assert gateway_run.GatewayRunner._parse_reasoning_command_args("high --global") == ("high", True)
|
||||
assert gateway_run.GatewayRunner._parse_reasoning_command_args("—global xhigh") == ("xhigh", True)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_command_reloads_current_state_from_config(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
@@ -116,90 +111,13 @@ class TestReasoningCommand:
|
||||
runner = _make_runner()
|
||||
runner._reasoning_config = {"enabled": True, "effort": "medium"}
|
||||
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning low --global"))
|
||||
result = await runner._handle_reasoning_command(_make_event("/reasoning low"))
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "low"
|
||||
assert runner._reasoning_config == {"enabled": True, "effort": "low"}
|
||||
assert "takes effect on next message" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handle_reasoning_command_defaults_to_session_only(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning high")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "medium"
|
||||
assert runner._session_reasoning_overrides[session_key] == {"enabled": True, "effort": "high"}
|
||||
assert runner._reasoning_config == {"enabled": True, "effort": "high"}
|
||||
assert "session only" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_global_clears_existing_session_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning low --global")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "low"
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert "saved to config" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reasoning_reset_clears_session_override_without_config_write(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n reasoning_effort: medium\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
event = _make_event("/reasoning reset")
|
||||
session_key = runner._session_key_for_source(event.source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
result = await runner._handle_reasoning_command(event)
|
||||
|
||||
saved = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
assert saved["agent"]["reasoning_effort"] == "medium"
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert "cleared" in result
|
||||
|
||||
def test_resolve_session_reasoning_prefers_session_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
|
||||
runner = _make_runner()
|
||||
source = _make_event("/reasoning").source
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
assert runner._resolve_session_reasoning_config(source=source) == {"enabled": True, "effort": "xhigh"}
|
||||
|
||||
def test_run_agent_reloads_reasoning_config_per_message(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
@@ -249,56 +167,6 @@ class TestReasoningCommand:
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
|
||||
|
||||
def test_run_agent_prefers_session_reasoning_override(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
(hermes_home / "config.yaml").write_text("agent:\n reasoning_effort: low\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
|
||||
monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
|
||||
monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "***",
|
||||
},
|
||||
)
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = _CapturingAgent
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
_CapturingAgent.last_init = None
|
||||
runner = _make_runner()
|
||||
session_key = "agent:main:local:dm"
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
source = SessionSource(
|
||||
platform=Platform.LOCAL,
|
||||
chat_id="cli",
|
||||
chat_name="CLI",
|
||||
chat_type="dm",
|
||||
user_id="user-1",
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
message="ping",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id="session-1",
|
||||
session_key=session_key,
|
||||
)
|
||||
)
|
||||
|
||||
assert result["final_response"] == "ok"
|
||||
assert _CapturingAgent.last_init is not None
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch):
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir()
|
||||
|
||||
@@ -4,7 +4,7 @@ Tests the _handle_resume_command handler (switch to a previously-named session)
|
||||
across gateway messenger platforms.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -53,6 +53,9 @@ def _make_runner(session_db=None, current_session_id="current_session_001",
|
||||
mock_store.switch_session.return_value = mock_session_entry
|
||||
runner.session_store = mock_store
|
||||
|
||||
# Stub out memory flushing
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
|
||||
return runner
|
||||
|
||||
|
||||
@@ -230,3 +233,28 @@ class TestHandleResumeCommand:
|
||||
|
||||
assert real_key not in runner._running_agents
|
||||
db.close()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resume_flushes_memories(self, tmp_path):
|
||||
"""Resume should flush memories from the current session before switching."""
|
||||
from hermes_state import SessionDB
|
||||
|
||||
db = SessionDB(db_path=tmp_path / "state.db")
|
||||
db.create_session("old_session", "telegram")
|
||||
db.set_session_title("old_session", "Old Work")
|
||||
db.create_session("current_session_001", "telegram")
|
||||
|
||||
event = _make_event(text="/resume Old Work")
|
||||
runner = _make_runner(
|
||||
session_db=db,
|
||||
current_session_id="current_session_001",
|
||||
event=event,
|
||||
)
|
||||
|
||||
await runner._handle_resume_command(event)
|
||||
|
||||
runner._async_flush_memories.assert_called_once_with(
|
||||
"current_session_001",
|
||||
"agent:main:telegram:dm:67890",
|
||||
)
|
||||
db.close()
|
||||
|
||||
@@ -1,215 +0,0 @@
|
||||
"""Tests for interrupt-aware tool-progress suppression in gateway.
|
||||
|
||||
When a user sends `stop` while the agent is executing a batch of parallel
|
||||
tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles
|
||||
and the drain loop should drop any already-queued events. Without this
|
||||
guard, the stop acknowledgement appears first but is followed by a trail
|
||||
of tool-progress bubbles for calls that were already parsed from the LLM
|
||||
response — making the interrupt feel ignored.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import sys
|
||||
import time
|
||||
import types
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import BasePlatformAdapter, SendResult
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
class ProgressCaptureAdapter(BasePlatformAdapter):
|
||||
def __init__(self, platform=Platform.TELEGRAM):
|
||||
super().__init__(PlatformConfig(enabled=True, token="***"), platform)
|
||||
self.sent = []
|
||||
self.edits = []
|
||||
self.typing = []
|
||||
|
||||
async def connect(self) -> bool:
|
||||
return True
|
||||
|
||||
async def disconnect(self) -> None:
|
||||
return None
|
||||
|
||||
async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult:
|
||||
self.sent.append({"chat_id": chat_id, "content": content})
|
||||
return SendResult(success=True, message_id="progress-1")
|
||||
|
||||
async def edit_message(self, chat_id, message_id, content) -> SendResult:
|
||||
self.edits.append({"message_id": message_id, "content": content})
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
|
||||
async def send_typing(self, chat_id, metadata=None) -> None:
|
||||
self.typing.append(chat_id)
|
||||
|
||||
async def stop_typing(self, chat_id) -> None:
|
||||
return None
|
||||
|
||||
async def get_chat_info(self, chat_id: str):
|
||||
return {"id": chat_id}
|
||||
|
||||
|
||||
class PreInterruptAgent:
|
||||
"""Fires tool-progress events BEFORE the interrupt lands.
|
||||
|
||||
These should render normally. Baseline for comparison with the
|
||||
interrupted case — proves the harness renders events when no
|
||||
interrupt is active.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
self._interrupt_requested = False
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
self.tool_progress_callback("tool.started", "web_search", "first search", {})
|
||||
time.sleep(0.35) # let the drain loop process
|
||||
return {"final_response": "done", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
class InterruptedAgent:
|
||||
"""Fires tool.started events AFTER interrupt — all should be suppressed.
|
||||
|
||||
Mirrors the failure mode in the bug report: LLM returned N parallel
|
||||
web_search calls, interrupt flag flipped, remaining events still
|
||||
rendered as bubbles. With the fix, none of these should appear.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.tool_progress_callback = kwargs.get("tool_progress_callback")
|
||||
self.tools = []
|
||||
# Start already interrupted — simulates stop having already landed
|
||||
# by the time the agent batch starts firing tool.started events.
|
||||
self._interrupt_requested = True
|
||||
|
||||
@property
|
||||
def is_interrupted(self) -> bool:
|
||||
return self._interrupt_requested
|
||||
|
||||
def run_conversation(self, message, conversation_history=None, task_id=None):
|
||||
# Parallel tool batch — in production these come from one LLM
|
||||
# response with 5 tool_calls. All are post-interrupt.
|
||||
self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {})
|
||||
self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {})
|
||||
time.sleep(0.35) # let the drain loop attempt to process the queue
|
||||
return {"final_response": "interrupted", "messages": [], "api_calls": 1}
|
||||
|
||||
|
||||
def _make_runner(adapter):
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
GatewayRunner = gateway_run.GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.adapters = {adapter.platform: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner._prefill_messages = []
|
||||
runner._ephemeral_system_prompt = ""
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._session_db = None
|
||||
runner._running_agents = {}
|
||||
runner._session_run_generation = {}
|
||||
runner.hooks = SimpleNamespace(loaded_hooks=False)
|
||||
runner.config = SimpleNamespace(
|
||||
thread_sessions_per_user=False,
|
||||
group_sessions_per_user=False,
|
||||
stt_enabled=False,
|
||||
)
|
||||
return runner
|
||||
|
||||
|
||||
async def _run_once(monkeypatch, tmp_path, agent_cls, session_id):
|
||||
monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
|
||||
|
||||
fake_dotenv = types.ModuleType("dotenv")
|
||||
fake_dotenv.load_dotenv = lambda *args, **kwargs: None
|
||||
monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
|
||||
|
||||
fake_run_agent = types.ModuleType("run_agent")
|
||||
fake_run_agent.AIAgent = agent_cls
|
||||
monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
|
||||
|
||||
adapter = ProgressCaptureAdapter()
|
||||
runner = _make_runner(adapter)
|
||||
gateway_run = importlib.import_module("gateway.run")
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
monkeypatch.setattr(
|
||||
gateway_run,
|
||||
"_resolve_runtime_agent_kwargs",
|
||||
lambda: {"api_key": "fake"},
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1001",
|
||||
chat_type="group",
|
||||
thread_id="17585",
|
||||
)
|
||||
result = await runner._run_agent(
|
||||
message="hi",
|
||||
context_prompt="",
|
||||
history=[],
|
||||
source=source,
|
||||
session_id=session_id,
|
||||
session_key="agent:main:telegram:group:-1001:17585",
|
||||
)
|
||||
return adapter, result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path):
|
||||
"""Sanity check: when is_interrupted is False, tool-progress renders normally."""
|
||||
adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline")
|
||||
assert result["final_response"] == "done"
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
assert "first search" in rendered, (
|
||||
"baseline agent should render its tool-progress event — "
|
||||
"if this fails the test harness is broken, not the fix"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path):
|
||||
"""Post-interrupt tool.started events must not render as bubbles.
|
||||
|
||||
This is Bug B from the screenshot: user sends `stop`, agent acks with
|
||||
⚡ Interrupting, but 5 more 🔍 web_search bubbles still render because
|
||||
their tool.started events were already parsed from the LLM response.
|
||||
With the fix, progress_callback and the drain loop both check
|
||||
is_interrupted and skip these events.
|
||||
"""
|
||||
adapter, result = await _run_once(
|
||||
monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted"
|
||||
)
|
||||
assert result["final_response"] == "interrupted"
|
||||
|
||||
rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join(
|
||||
c["content"] for c in adapter.edits
|
||||
)
|
||||
|
||||
# None of the post-interrupt queries should appear.
|
||||
for leaked_query in (
|
||||
"cognee hermes",
|
||||
"McBee deer hunting",
|
||||
"kuzu graph db",
|
||||
"moonshot kimi api",
|
||||
"platform.moonshot.cn",
|
||||
):
|
||||
assert leaked_query not in rendered, (
|
||||
f"event '{leaked_query}' leaked into the UI after interrupt — "
|
||||
f"progress_callback / drain loop is not checking is_interrupted"
|
||||
)
|
||||
@@ -165,26 +165,3 @@ async def test_reasoning_rejected_mid_run():
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" in result
|
||||
assert "/reasoning" in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_btw_dispatches_mid_run():
|
||||
"""/btw mid-run must dispatch to /background's handler, not hit the catch-all.
|
||||
|
||||
/btw is an alias of /background (see hermes_cli/commands.py). Typing
|
||||
/btw mid-turn must spawn a parallel background task — that's the whole
|
||||
point of the command. Before the mid-turn bypass was added for
|
||||
/background, /btw fell through to the "Agent is running — wait or
|
||||
/stop first" catch-all, making it useless in exactly the scenario it
|
||||
was designed for. The alias and the bypass together make it work.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
runner._handle_background_command = AsyncMock(
|
||||
return_value='🚀 Background task started: "what module owns titles?"'
|
||||
)
|
||||
|
||||
result = await runner._handle_message(_make_event("/btw what module owns titles?"))
|
||||
|
||||
runner._handle_background_command.assert_awaited_once()
|
||||
assert result is not None
|
||||
assert "can't run mid-turn" not in result
|
||||
|
||||
@@ -177,8 +177,8 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
its reset policy (idle timeout, scheduled reset), it must fire
|
||||
``on_session_finalize`` so plugin providers get the same final-pass
|
||||
extraction opportunity they'd get from /new or CLI shutdown. Before
|
||||
the fix, the expiry path evicted the agent but silently skipped the
|
||||
hook.
|
||||
the fix, the expiry path flushed memories and evicted the agent but
|
||||
silently skipped the hook.
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
@@ -200,7 +200,7 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_type="dm",
|
||||
)
|
||||
expired_entry.expiry_finalized = False
|
||||
expired_entry.memory_flushed = False
|
||||
|
||||
runner.session_store = MagicMock()
|
||||
runner.session_store._ensure_loaded = MagicMock()
|
||||
@@ -211,24 +211,24 @@ async def test_idle_expiry_fires_finalize_hook(mock_invoke_hook):
|
||||
runner.session_store._lock.__exit__ = MagicMock(return_value=None)
|
||||
runner.session_store._save = MagicMock()
|
||||
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
runner._evict_cached_agent = MagicMock()
|
||||
runner._cleanup_agent_resources = MagicMock()
|
||||
runner._sweep_idle_cached_agents = MagicMock(return_value=0)
|
||||
|
||||
# The watcher starts with `await asyncio.sleep(60)` and loops while
|
||||
# `self._running`. Patch sleep so the 60s initial delay is instant, and
|
||||
# make the expiry hook invocation flip `_running` false so the loop
|
||||
# exits cleanly after one pass.
|
||||
# `self._running`. Patch sleep so the 60s initial delay is instant, then
|
||||
# flip `_running` false inside the flush call so the loop exits cleanly
|
||||
# after one pass.
|
||||
_orig_sleep = __import__("asyncio").sleep
|
||||
|
||||
async def _fast_sleep(_):
|
||||
await _orig_sleep(0)
|
||||
|
||||
def _hook_and_stop(*a, **kw):
|
||||
runner._running = False
|
||||
return None
|
||||
async def _flush_and_stop(session_id, key):
|
||||
runner._running = False # terminate the loop after this iteration
|
||||
|
||||
mock_invoke_hook.side_effect = _hook_and_stop
|
||||
runner._async_flush_memories = AsyncMock(side_effect=_flush_and_stop)
|
||||
|
||||
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
|
||||
await runner._session_expiry_watcher(interval=0)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Regression tests for approval-state cleanup on session boundaries."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -72,6 +72,7 @@ def _make_resume_runner():
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.adapters = {}
|
||||
runner._background_tasks = set()
|
||||
runner._async_flush_memories = AsyncMock()
|
||||
runner._running_agents = {}
|
||||
runner._running_agents_ts = {}
|
||||
runner._busy_ack_ts = {}
|
||||
|
||||
@@ -58,7 +58,7 @@ class TestFormatSessionInfo:
|
||||
{"provider": "", "base_url": "", "api_key": ""})
|
||||
with p1, p2, p3:
|
||||
info = runner._format_session_info()
|
||||
assert "256K" in info
|
||||
assert "128K" in info
|
||||
assert "model.context_length" in info
|
||||
|
||||
def test_local_endpoint_shown(self, runner, tmp_path):
|
||||
|
||||
@@ -54,7 +54,6 @@ def _make_runner():
|
||||
runner._background_tasks = set()
|
||||
runner._session_db = None
|
||||
runner._session_model_overrides = {}
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._agent_cache = {}
|
||||
@@ -103,7 +102,6 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
|
||||
)
|
||||
session_key = "agent:main:local:dm"
|
||||
runner._session_model_overrides[session_key] = _codex_override()
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
result = asyncio.run(
|
||||
runner._run_agent(
|
||||
@@ -123,7 +121,6 @@ def test_run_agent_prefers_session_override_over_global_runtime(monkeypatch):
|
||||
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
|
||||
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert _CapturingAgent.last_init["api_key"] == "***"
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -152,7 +149,6 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
|
||||
)
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._session_model_overrides[session_key] = _codex_override()
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
await runner._run_background_task("say hello", source, "bg_test")
|
||||
|
||||
@@ -162,4 +158,3 @@ async def test_background_task_prefers_session_override_over_global_runtime(monk
|
||||
assert _CapturingAgent.last_init["api_mode"] == "codex_responses"
|
||||
assert _CapturingAgent.last_init["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert _CapturingAgent.last_init["api_key"] == "***"
|
||||
assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "high"}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Tests that /new (and its /reset alias) clears session-scoped overrides."""
|
||||
"""Tests that /new (and its /reset alias) clears the session-scoped model override."""
|
||||
from datetime import datetime
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
@@ -37,7 +37,6 @@ def _make_runner():
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner._session_model_overrides = {}
|
||||
runner._session_reasoning_overrides = {}
|
||||
runner._pending_model_notes = {}
|
||||
runner._background_tasks = set()
|
||||
|
||||
@@ -76,16 +75,14 @@ async def test_new_command_clears_session_model_override():
|
||||
runner._session_model_overrides[session_key] = {
|
||||
"model": "gpt-4o",
|
||||
"provider": "openai",
|
||||
"api_key": "***",
|
||||
"api_key": "sk-test",
|
||||
"base_url": "",
|
||||
"api_mode": "openai",
|
||||
}
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -95,12 +92,10 @@ async def test_new_command_no_override_is_noop():
|
||||
session_key = build_session_key(_make_source())
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -120,16 +115,12 @@ async def test_new_command_only_clears_own_session():
|
||||
runner._session_model_overrides[other_key] = {
|
||||
"model": "claude-sonnet-4-6",
|
||||
"provider": "anthropic",
|
||||
"api_key": "***",
|
||||
"api_key": "sk-ant-test",
|
||||
"base_url": "",
|
||||
"api_mode": "anthropic",
|
||||
}
|
||||
runner._session_reasoning_overrides[session_key] = {"enabled": True, "effort": "high"}
|
||||
runner._session_reasoning_overrides[other_key] = {"enabled": True, "effort": "low"}
|
||||
|
||||
await runner._handle_reset_command(_make_event("/new"))
|
||||
|
||||
assert session_key not in runner._session_model_overrides
|
||||
assert other_key in runner._session_model_overrides
|
||||
assert session_key not in runner._session_reasoning_overrides
|
||||
assert other_key in runner._session_reasoning_overrides
|
||||
|
||||
@@ -177,53 +177,6 @@ class TestHandleVoiceCommand:
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"123"}
|
||||
|
||||
def test_sync_populates_enabled_chats_from_voice_modes(self, runner):
|
||||
"""Issue #16007: sync also restores per-chat /voice on|tts opt-ins.
|
||||
|
||||
The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose
|
||||
persisted voice_mode is ``voice_only`` or ``all`` — without this,
|
||||
``/voice on`` was relying on a "not in disabled set" default that
|
||||
silently enabled auto-TTS for every chat.
|
||||
"""
|
||||
from gateway.config import Platform
|
||||
runner._voice_mode = {
|
||||
"telegram:off_chat": "off",
|
||||
"telegram:on_chat": "voice_only",
|
||||
"telegram:tts_chat": "all",
|
||||
"slack:999": "voice_only", # wrong platform, must be ignored
|
||||
}
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_disabled_chats == {"off_chat"}
|
||||
assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"}
|
||||
|
||||
def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch):
|
||||
"""Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``."""
|
||||
from gateway.config import Platform
|
||||
|
||||
fake_cfg = {"voice": {"auto_tts": True}}
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.config.load_config",
|
||||
lambda: fake_cfg,
|
||||
)
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=False,
|
||||
_auto_tts_disabled_chats=set(),
|
||||
_auto_tts_enabled_chats=set(),
|
||||
platform=Platform.TELEGRAM,
|
||||
)
|
||||
|
||||
runner._sync_voice_mode_state_to_adapter(adapter)
|
||||
|
||||
assert adapter._auto_tts_default is True
|
||||
|
||||
def test_restart_restores_voice_off_state(self, runner, tmp_path):
|
||||
from gateway.config import Platform
|
||||
runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"}))
|
||||
@@ -2753,56 +2706,3 @@ class TestUDPKeepalive:
|
||||
mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe')
|
||||
finally:
|
||||
DiscordAdapter._KEEPALIVE_INTERVAL = original_interval
|
||||
|
||||
|
||||
# =====================================================================
|
||||
# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS
|
||||
# on voice input. Regression test for Issue #16007.
|
||||
# =====================================================================
|
||||
|
||||
class TestShouldAutoTtsForChat:
|
||||
"""Three-layer gate: per-chat enable > per-chat disable > config default."""
|
||||
|
||||
def _make_adapter(self, *, default: bool, enabled=(), disabled=()):
|
||||
"""Build a bare adapter with only the attrs the gate reads."""
|
||||
adapter = SimpleNamespace(
|
||||
_auto_tts_default=default,
|
||||
_auto_tts_enabled_chats=set(enabled),
|
||||
_auto_tts_disabled_chats=set(disabled),
|
||||
)
|
||||
# Bind the unbound method — _should_auto_tts_for_chat only reads the
|
||||
# three attrs above via ``self.``, so an unbound call works.
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
return BasePlatformAdapter._should_auto_tts_for_chat, adapter
|
||||
|
||||
def test_default_false_no_override_suppresses(self):
|
||||
"""Issue #16007: voice.auto_tts=False and no per-chat state → no TTS."""
|
||||
fn, adapter = self._make_adapter(default=False)
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_default_true_no_override_fires(self):
|
||||
fn, adapter = self._make_adapter(default=True)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_enable_overrides_false_default(self):
|
||||
"""``/voice on`` with config auto_tts=False still fires."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_explicit_disable_overrides_true_default(self):
|
||||
"""``/voice off`` with config auto_tts=True still suppresses."""
|
||||
fn, adapter = self._make_adapter(default=True, disabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is False
|
||||
|
||||
def test_enabled_wins_over_disabled(self):
|
||||
"""An explicit enable beats an explicit disable (enable takes priority)."""
|
||||
fn, adapter = self._make_adapter(
|
||||
default=False, enabled={"chat1"}, disabled={"chat1"}
|
||||
)
|
||||
assert fn(adapter, "chat1") is True
|
||||
|
||||
def test_per_chat_isolation(self):
|
||||
"""Enable for chat1 doesn't leak to chat2."""
|
||||
fn, adapter = self._make_adapter(default=False, enabled={"chat1"})
|
||||
assert fn(adapter, "chat1") is True
|
||||
assert fn(adapter, "chat2") is False
|
||||
|
||||
@@ -1,152 +0,0 @@
|
||||
"""Regression test for the `/model` picker confirmation display.
|
||||
|
||||
Bug (April 2026): after choosing a model from the interactive `/model` picker,
|
||||
``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window``
|
||||
straight from models.dev, which always reports the vendor-wide value (e.g.
|
||||
gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in
|
||||
particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling
|
||||
``_handle_model_switch()`` (typed ``/model <name>``) was already fixed to use
|
||||
``resolve_display_context_length()``; the picker path was missed, causing
|
||||
"sometimes 1M, sometimes 272K" for the same model across sibling UI paths.
|
||||
|
||||
Fix: both display paths now go through ``resolve_display_context_length()``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.model_switch import ModelSwitchResult
|
||||
|
||||
|
||||
class _FakeModelInfo:
|
||||
context_window = 1_050_000
|
||||
max_output = 0
|
||||
|
||||
def has_cost_data(self):
|
||||
return False
|
||||
|
||||
def format_capabilities(self):
|
||||
return ""
|
||||
|
||||
|
||||
class _StubCLI:
|
||||
"""Minimum attrs ``_apply_model_switch_result`` reads on ``self``."""
|
||||
agent = None
|
||||
model = ""
|
||||
provider = ""
|
||||
requested_provider = ""
|
||||
api_key = ""
|
||||
_explicit_api_key = ""
|
||||
base_url = ""
|
||||
_explicit_base_url = ""
|
||||
api_mode = ""
|
||||
_pending_model_switch_note = ""
|
||||
|
||||
|
||||
def _run_display(monkeypatch, result):
|
||||
import cli as cli_mod
|
||||
|
||||
captured: list[str] = []
|
||||
monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s)))
|
||||
# Avoid writing to ~/.hermes/config.yaml during the test.
|
||||
monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None)
|
||||
cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False)
|
||||
return captured
|
||||
|
||||
|
||||
def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch):
|
||||
"""``_apply_model_switch_result`` must prefer the provider-aware resolver
|
||||
(272K on Codex) over the raw models.dev value (1.05M for gpt-5.5).
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="gpt-5.5",
|
||||
target_provider="openai-codex",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://chatgpt.com/backend-api/codex",
|
||||
api_mode="codex_responses",
|
||||
warning_message="",
|
||||
provider_label="ChatGPT Codex",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # models.dev says 1.05M
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=272_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "272,000" in ctx_line, (
|
||||
f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}"
|
||||
)
|
||||
assert "1,050,000" not in ctx_line, (
|
||||
f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch):
|
||||
"""On providers with no enforced cap (e.g. OpenRouter), the picker path
|
||||
should surface the real 1.05M context for gpt-5.5 — resolver and models.dev
|
||||
agree here.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="openai/gpt-5.5",
|
||||
target_provider="openrouter",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="OpenRouter",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(),
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=1_050_000,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch):
|
||||
"""If ``get_model_context_length`` returns nothing (rare — truly unknown
|
||||
endpoint), the display still surfaces ``ModelInfo.context_window`` so the
|
||||
user sees *something* rather than a silent blank.
|
||||
"""
|
||||
result = ModelSwitchResult(
|
||||
success=True,
|
||||
new_model="some-model",
|
||||
target_provider="some-provider",
|
||||
provider_changed=True,
|
||||
api_key="",
|
||||
base_url="",
|
||||
api_mode="chat_completions",
|
||||
warning_message="",
|
||||
provider_label="Some Provider",
|
||||
resolved_via_alias=False,
|
||||
capabilities=None,
|
||||
model_info=_FakeModelInfo(), # context_window = 1_050_000
|
||||
is_global=False,
|
||||
)
|
||||
with patch(
|
||||
"agent.model_metadata.get_model_context_length",
|
||||
return_value=None,
|
||||
):
|
||||
lines = _run_display(monkeypatch, result)
|
||||
|
||||
ctx_line = next((l for l in lines if "Context:" in l), "")
|
||||
assert "1,050,000" in ctx_line, (
|
||||
f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}"
|
||||
)
|
||||
@@ -1,237 +0,0 @@
|
||||
"""Tests for hermes_cli.azure_detect — transport & model auto-detection."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import azure_detect
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
class _FakeHTTPResponse:
|
||||
"""Minimal stand-in for urllib.request.urlopen's context manager."""
|
||||
|
||||
def __init__(self, status: int, body: bytes):
|
||||
self.status = status
|
||||
self._body = body
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._body
|
||||
|
||||
|
||||
def _openai_models_body(*ids: str) -> bytes:
|
||||
return json.dumps({
|
||||
"object": "list",
|
||||
"data": [{"id": i, "object": "model"} for i in ids],
|
||||
}).encode()
|
||||
|
||||
|
||||
def _anthropic_error_body(msg: str = "model not found") -> bytes:
|
||||
return json.dumps({
|
||||
"type": "error",
|
||||
"error": {"type": "invalid_request_error", "message": msg},
|
||||
}).encode()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _looks_like_anthropic_path
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize("url, expected", [
|
||||
("https://foo.services.ai.azure.com/anthropic", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/", True),
|
||||
("https://foo.services.ai.azure.com/anthropic/v1", True),
|
||||
("https://foo.openai.azure.com/openai/v1", False),
|
||||
("https://foo.openai.azure.com/", False),
|
||||
("https://openrouter.ai/api/v1", False),
|
||||
])
|
||||
def test_looks_like_anthropic_path(url, expected):
|
||||
assert azure_detect._looks_like_anthropic_path(url) is expected
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _extract_model_ids
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_extract_model_ids_openai_shape():
|
||||
body = {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{"id": "gpt-4.1-mini", "object": "model"},
|
||||
{"id": "claude-sonnet-4-6", "object": "model"},
|
||||
],
|
||||
}
|
||||
assert azure_detect._extract_model_ids(body) == ["gpt-4.1-mini", "claude-sonnet-4-6"]
|
||||
|
||||
|
||||
def test_extract_model_ids_bad_shape_returns_empty():
|
||||
assert azure_detect._extract_model_ids({}) == []
|
||||
assert azure_detect._extract_model_ids({"data": "not-a-list"}) == []
|
||||
assert azure_detect._extract_model_ids({"data": [{"no-id": True}]}) == []
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# detect() integration
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_detect_anthropic_path_wins_without_http():
|
||||
"""URL path sniff short-circuits — no HTTP call happens."""
|
||||
with patch.object(azure_detect, "_http_get_json") as fake_get, \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages") as fake_probe:
|
||||
result = azure_detect.detect(
|
||||
"https://foo.services.ai.azure.com/anthropic", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
assert "path" in result.reason.lower()
|
||||
fake_get.assert_not_called()
|
||||
fake_probe.assert_not_called()
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_success():
|
||||
"""/models probe returning a model list → chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
assert "key-abc" == api_key
|
||||
return 200, json.loads(_openai_models_body("gpt-5.4", "claude-opus-4-6"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models_probe_ok is True
|
||||
assert result.models == ["gpt-5.4", "claude-opus-4-6"]
|
||||
assert "/models" in result.reason
|
||||
|
||||
|
||||
def test_detect_openai_models_probe_empty_list_still_counts():
|
||||
"""Endpoint returned OpenAI shape but no models → still chat_completions."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 200, {"object": "list", "data": []}
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
result = azure_detect.detect(
|
||||
"https://my.openai.azure.com/openai/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "chat_completions"
|
||||
assert result.models == []
|
||||
assert result.models_probe_ok is True
|
||||
|
||||
|
||||
def test_detect_falls_back_to_anthropic_probe():
|
||||
"""/models fails but Anthropic Messages probe succeeds."""
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
return 401, None # /models forbidden
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=True):
|
||||
result = azure_detect.detect(
|
||||
"https://my.services.ai.azure.com/v1", "key-abc",
|
||||
)
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
assert result.is_anthropic is True
|
||||
|
||||
|
||||
def test_detect_all_probes_fail_returns_none():
|
||||
"""Every probe fails → api_mode is None and caller falls back to manual."""
|
||||
with patch.object(azure_detect, "_http_get_json", return_value=(500, None)), \
|
||||
patch.object(azure_detect, "_probe_anthropic_messages", return_value=False):
|
||||
result = azure_detect.detect(
|
||||
"https://some-private.example.com/", "key-abc",
|
||||
)
|
||||
assert result.api_mode is None
|
||||
assert result.models == []
|
||||
assert "manual" in result.reason.lower()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _probe_openai_models URL list (Azure vs v1 api-version)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_probe_openai_models_tries_multiple_api_versions():
|
||||
"""First call (no api-version) fails, api-version fallback succeeds."""
|
||||
calls = []
|
||||
|
||||
def _fake_get(url, api_key, timeout=6.0):
|
||||
calls.append(url)
|
||||
if "api-version" not in url:
|
||||
return 404, None
|
||||
return 200, json.loads(_openai_models_body("gpt-4.1"))
|
||||
|
||||
with patch.object(azure_detect, "_http_get_json", side_effect=_fake_get):
|
||||
ok, models = azure_detect._probe_openai_models(
|
||||
"https://my.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert ok is True
|
||||
assert models == ["gpt-4.1"]
|
||||
# Should have tried without api-version first, then with at least one
|
||||
assert any("api-version" not in u for u in calls)
|
||||
assert any("api-version" in u for u in calls)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# _http_get_json error handling
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_http_get_json_on_urlerror_returns_zero_none():
|
||||
"""Network failure returns (0, None), never raises."""
|
||||
import urllib.error
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen",
|
||||
side_effect=urllib.error.URLError("dns fail")):
|
||||
status, body = azure_detect._http_get_json("https://bad.example/", "k")
|
||||
assert status == 0
|
||||
assert body is None
|
||||
|
||||
|
||||
def test_http_get_json_on_http_error_returns_code_none():
|
||||
"""HTTP 4xx/5xx returns (code, None)."""
|
||||
import urllib.error
|
||||
err = urllib.error.HTTPError("https://x/", 403, "Forbidden", {}, None)
|
||||
with patch("hermes_cli.azure_detect.urllib_request.urlopen", side_effect=err):
|
||||
status, body = azure_detect._http_get_json("https://x/", "k")
|
||||
assert status == 403
|
||||
assert body is None
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# lookup_context_length
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def test_lookup_context_length_returns_known():
|
||||
"""When model_metadata returns a non-fallback value, we pass it through."""
|
||||
fake = MagicMock(return_value=400000)
|
||||
with patch("agent.model_metadata.get_model_context_length", fake), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"gpt-5.4", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n == 400000
|
||||
|
||||
|
||||
def test_lookup_context_length_returns_none_on_fallback():
|
||||
"""When resolver falls through to DEFAULT_FALLBACK_CONTEXT, we return None."""
|
||||
with patch("agent.model_metadata.get_model_context_length", return_value=128000), \
|
||||
patch("agent.model_metadata.DEFAULT_FALLBACK_CONTEXT", 128000):
|
||||
n = azure_detect.lookup_context_length(
|
||||
"totally-unknown-model", "https://x.openai.azure.com/openai/v1", "k",
|
||||
)
|
||||
assert n is None
|
||||
|
||||
|
||||
def test_lookup_context_length_swallows_exceptions():
|
||||
"""Resolver raising must not crash the wizard."""
|
||||
with patch("agent.model_metadata.get_model_context_length",
|
||||
side_effect=RuntimeError("boom")):
|
||||
assert azure_detect.lookup_context_length("m", "https://x/", "k") is None
|
||||
@@ -1,240 +0,0 @@
|
||||
"""Regression tests for custom_providers per-model context_length resolution.
|
||||
|
||||
Covers the fix for #15779 — mid-session /model switch to a named custom
|
||||
provider must honor ``custom_providers[].models.<id>.context_length`` the
|
||||
same way startup already does.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
|
||||
|
||||
class TestGetCustomProviderContextLength:
|
||||
def test_returns_override_for_matching_entry(self):
|
||||
custom = [
|
||||
{
|
||||
"name": "my-endpoint",
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"gpt-5.5", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 1_050_000
|
||||
)
|
||||
|
||||
def test_trailing_slash_insensitive(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1/",
|
||||
"models": {"m": {"context_length": 500_000}},
|
||||
}
|
||||
]
|
||||
# config has trailing slash, runtime doesn't — must match
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 500_000
|
||||
)
|
||||
# and the reverse
|
||||
custom2 = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 500_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1/", custom2
|
||||
)
|
||||
== 500_000
|
||||
)
|
||||
|
||||
def test_returns_none_when_url_does_not_match(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 400_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://other.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_when_model_does_not_match(self):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 400_000}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"different-model", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_for_string_value(self):
|
||||
"""'256K' string is not a valid int — skip silently.
|
||||
|
||||
(The inline startup path still emits a user-visible warning; the
|
||||
helper itself returns None so downstream fallbacks can run.)
|
||||
"""
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": "256K"}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
def test_returns_none_for_zero_or_negative(self):
|
||||
for bad in (0, -1, -100):
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": bad}},
|
||||
}
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
is None
|
||||
), f"value {bad!r} should be rejected"
|
||||
|
||||
def test_empty_inputs_return_none(self):
|
||||
assert get_custom_provider_context_length("", "http://x", [{"base_url": "http://x", "models": {"": {"context_length": 1}}}]) is None
|
||||
assert get_custom_provider_context_length("m", "", [{"base_url": "", "models": {"m": {"context_length": 1}}}]) is None
|
||||
assert get_custom_provider_context_length("m", "http://x", None) is None
|
||||
assert get_custom_provider_context_length("m", "http://x", []) is None
|
||||
|
||||
def test_ignores_non_dict_entries(self):
|
||||
"""Malformed entries must not crash the lookup."""
|
||||
custom = [
|
||||
"not a dict",
|
||||
None,
|
||||
{"base_url": "https://example.invalid/v1", "models": "not a dict"},
|
||||
{"base_url": "https://example.invalid/v1", "models": {"m": "not a dict"}},
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 400_000}},
|
||||
},
|
||||
]
|
||||
assert (
|
||||
get_custom_provider_context_length(
|
||||
"m", "https://example.invalid/v1", custom
|
||||
)
|
||||
== 400_000
|
||||
)
|
||||
|
||||
|
||||
class TestGetModelContextLengthHonorsOverride:
|
||||
"""agent.model_metadata.get_model_context_length must honor the
|
||||
custom_providers override at step 0b — before any probe, cache hit,
|
||||
or models.dev lookup can override it.
|
||||
"""
|
||||
|
||||
def _mock_all_probes(self):
|
||||
"""Context manager that disables every downstream resolution step."""
|
||||
from agent import model_metadata as _mm
|
||||
return [
|
||||
patch.object(_mm, "get_cached_context_length", return_value=None),
|
||||
patch.object(_mm, "fetch_endpoint_model_metadata", return_value={}),
|
||||
patch.object(_mm, "fetch_model_metadata", return_value={}),
|
||||
patch.object(_mm, "is_local_endpoint", return_value=False),
|
||||
patch.object(_mm, "_is_known_provider_base_url", return_value=False),
|
||||
]
|
||||
|
||||
def test_custom_providers_override_wins_over_default_fallback(self):
|
||||
from agent.model_metadata import get_model_context_length
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
patches = self._mock_all_probes()
|
||||
for p in patches:
|
||||
p.start()
|
||||
try:
|
||||
ctx = get_model_context_length(
|
||||
"gpt-5.5",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
custom_providers=custom,
|
||||
)
|
||||
finally:
|
||||
for p in patches:
|
||||
p.stop()
|
||||
assert ctx == 1_050_000
|
||||
|
||||
def test_explicit_config_context_length_still_wins(self):
|
||||
"""Top-level model.context_length (step 0) outranks custom_providers (step 0b).
|
||||
|
||||
Users who set both should see the top-level value — that's the
|
||||
documented precedence and matches the long-standing step-0 behavior.
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length
|
||||
custom = [
|
||||
{
|
||||
"base_url": "https://example.invalid/v1",
|
||||
"models": {"m": {"context_length": 1_050_000}},
|
||||
}
|
||||
]
|
||||
ctx = get_model_context_length(
|
||||
"m",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
config_context_length=500_000, # explicit top-level wins
|
||||
custom_providers=custom,
|
||||
)
|
||||
assert ctx == 500_000
|
||||
|
||||
def test_no_override_falls_through_to_default(self):
|
||||
"""With custom_providers=None and all probes disabled, resolver
|
||||
returns DEFAULT_FALLBACK_CONTEXT (256K after the stepdown bump).
|
||||
"""
|
||||
from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
|
||||
patches = self._mock_all_probes()
|
||||
for p in patches:
|
||||
p.start()
|
||||
try:
|
||||
ctx = get_model_context_length(
|
||||
"unknown-model",
|
||||
base_url="https://example.invalid/v1",
|
||||
provider="custom",
|
||||
custom_providers=None,
|
||||
)
|
||||
finally:
|
||||
for p in patches:
|
||||
p.stop()
|
||||
assert ctx == DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
|
||||
class TestContextProbeTiers:
|
||||
def test_256k_is_top_tier_and_default(self):
|
||||
"""The stepdown probe starts at 256K and 256K is the new default."""
|
||||
from agent.model_metadata import CONTEXT_PROBE_TIERS, DEFAULT_FALLBACK_CONTEXT
|
||||
|
||||
assert CONTEXT_PROBE_TIERS[0] == 256_000
|
||||
assert DEFAULT_FALLBACK_CONTEXT == 256_000
|
||||
# Tiers still descend monotonically
|
||||
for a, b in zip(CONTEXT_PROBE_TIERS, CONTEXT_PROBE_TIERS[1:]):
|
||||
assert a > b, f"tiers must strictly descend, got {a} then {b}"
|
||||
# 128K is still a tier (users relying on it probe-down get there)
|
||||
assert 128_000 in CONTEXT_PROBE_TIERS
|
||||
@@ -52,12 +52,7 @@ class TestCustomProviderModelSwitch:
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
# fetch_api_models MUST be called even though model was saved
|
||||
mock_fetch.assert_called_once_with(
|
||||
"sk-test",
|
||||
"https://vllm.example.com/v1",
|
||||
timeout=8.0,
|
||||
api_mode=None,
|
||||
)
|
||||
mock_fetch.assert_called_once_with("sk-test", "https://vllm.example.com/v1", timeout=8.0)
|
||||
|
||||
def test_can_switch_to_different_model(self, config_home):
|
||||
"""User selects a different model than the saved one."""
|
||||
@@ -178,147 +173,3 @@ class TestCustomProviderModelSwitch:
|
||||
model = config.get("model")
|
||||
assert isinstance(model, dict)
|
||||
assert "api_mode" not in model, "Stale api_mode should be removed"
|
||||
|
||||
def test_env_template_api_key_is_preserved_in_model_config(self, config_home, monkeypatch):
|
||||
"""Selecting an env-backed custom provider must not inline the secret."""
|
||||
import yaml
|
||||
from hermes_cli.main import _model_flow_named_custom
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
" provider: openrouter\n"
|
||||
"custom_providers:\n"
|
||||
"- name: Example Provider\n"
|
||||
" base_url: https://api.example-provider.test/v1\n"
|
||||
" api_key: ${EXAMPLE_PROVIDER_API_KEY}\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
)
|
||||
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
|
||||
|
||||
provider_info = {
|
||||
"name": "Example Provider",
|
||||
"base_url": "https://api.example-provider.test/v1",
|
||||
"api_key": "sk-live-example-provider",
|
||||
"api_key_ref": "${EXAMPLE_PROVIDER_API_KEY}",
|
||||
"model": "qwen3.6-35b-fast",
|
||||
}
|
||||
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
mock_fetch.assert_called_once_with(
|
||||
"sk-live-example-provider",
|
||||
"https://api.example-provider.test/v1",
|
||||
timeout=8.0,
|
||||
api_mode=None,
|
||||
)
|
||||
config = yaml.safe_load(config_path.read_text()) or {}
|
||||
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert config["custom_providers"][0]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert "sk-live-example-provider" not in config_path.read_text()
|
||||
|
||||
def test_key_env_custom_provider_persists_reference_not_secret(self, config_home, monkeypatch):
|
||||
"""key_env custom providers should also avoid writing plaintext keys."""
|
||||
import yaml
|
||||
from hermes_cli.main import _model_flow_named_custom
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
"custom_providers:\n"
|
||||
"- name: Example Provider\n"
|
||||
" base_url: https://api.example-provider.test/v1\n"
|
||||
" key_env: EXAMPLE_PROVIDER_API_KEY\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
)
|
||||
monkeypatch.setenv("EXAMPLE_PROVIDER_API_KEY", "sk-live-example-provider")
|
||||
|
||||
provider_info = {
|
||||
"name": "Example Provider",
|
||||
"base_url": "https://api.example-provider.test/v1",
|
||||
"api_key": "",
|
||||
"key_env": "EXAMPLE_PROVIDER_API_KEY",
|
||||
"model": "qwen3.6-35b-fast",
|
||||
}
|
||||
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=["qwen3.6-35b-fast"]), \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
_model_flow_named_custom({}, provider_info)
|
||||
|
||||
config = yaml.safe_load(config_path.read_text()) or {}
|
||||
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"
|
||||
assert config["custom_providers"][0]["key_env"] == "EXAMPLE_PROVIDER_API_KEY"
|
||||
assert "sk-live-example-provider" not in config_path.read_text()
|
||||
|
||||
def test_env_ref_base_url_preserves_api_key_ref_through_picker(
|
||||
self, config_home, monkeypatch
|
||||
):
|
||||
"""Integration regression: when BOTH ``base_url`` and ``api_key`` use
|
||||
``${VAR}`` templates (the Discord-reported NeuralWatt case), the picker
|
||||
must still preserve the env reference in ``model.api_key``.
|
||||
|
||||
The earlier lookup went through ``get_compatible_custom_providers``
|
||||
which dropped entries whose ``base_url`` was an env-ref template
|
||||
(``urlparse("${NEURALWATT_API_BASE}")`` has no scheme/netloc), causing
|
||||
``api_key_ref`` to stay empty and the resolved secret to be written to
|
||||
``config.yaml``. This test drives the real picker-callsite code path.
|
||||
"""
|
||||
import yaml
|
||||
from hermes_cli.main import select_provider_and_model
|
||||
|
||||
config_path = config_home / "config.yaml"
|
||||
config_path.write_text(
|
||||
"model:\n"
|
||||
" default: old-model\n"
|
||||
" provider: openrouter\n"
|
||||
"custom_providers:\n"
|
||||
"- name: NeuralWatt\n"
|
||||
" base_url: ${NEURALWATT_API_BASE}\n"
|
||||
" api_key: ${NEURALWATT_API_KEY}\n"
|
||||
" model: qwen3.6-35b-fast\n"
|
||||
" models: []\n"
|
||||
)
|
||||
monkeypatch.setenv("NEURALWATT_API_BASE", "https://api.neuralwatt.com/v1")
|
||||
monkeypatch.setenv("NEURALWATT_API_KEY", "sk-live-neuralwatt-secret")
|
||||
|
||||
# Exercise the real picker: select "custom:neuralwatt" from the
|
||||
# provider menu. ``select_provider_and_model`` prompts for a provider
|
||||
# choice (returns an index), then hands off to
|
||||
# ``_model_flow_named_custom`` with the provider_info built by
|
||||
# ``_named_custom_provider_map``.
|
||||
def _pick_neuralwatt(labels, default=0):
|
||||
for i, label in enumerate(labels):
|
||||
if "NeuralWatt" in label:
|
||||
return i
|
||||
raise AssertionError(
|
||||
f"NeuralWatt entry missing from provider menu: {labels}"
|
||||
)
|
||||
|
||||
with patch("hermes_cli.main._prompt_provider_choice",
|
||||
side_effect=_pick_neuralwatt), \
|
||||
patch("hermes_cli.models.fetch_api_models",
|
||||
return_value=["qwen3.6-35b-fast"]) as mock_fetch, \
|
||||
patch.dict("sys.modules", {"simple_term_menu": None}), \
|
||||
patch("builtins.input", return_value="1"), \
|
||||
patch("builtins.print"):
|
||||
select_provider_and_model()
|
||||
|
||||
# The live probe must still use the resolved secret.
|
||||
mock_fetch.assert_called_once()
|
||||
probe_args, probe_kwargs = mock_fetch.call_args
|
||||
assert probe_args[0] == "sk-live-neuralwatt-secret"
|
||||
|
||||
# But config.yaml must keep the env reference, not the plaintext secret.
|
||||
saved = config_path.read_text()
|
||||
config = yaml.safe_load(saved) or {}
|
||||
assert config["model"]["api_key"] == "${NEURALWATT_API_KEY}"
|
||||
assert config["custom_providers"][0]["api_key"] == "${NEURALWATT_API_KEY}"
|
||||
assert "sk-live-neuralwatt-secret" not in saved
|
||||
|
||||
@@ -308,43 +308,6 @@ def test_run_doctor_accepts_named_provider_from_providers_section(monkeypatch, t
|
||||
assert "model.provider 'volcengine-plan' is not a recognised provider" not in out
|
||||
|
||||
|
||||
def test_run_doctor_accepts_bare_custom_provider(monkeypatch, tmp_path):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
(home / "config.yaml").write_text(
|
||||
"model:\n"
|
||||
" provider: custom\n"
|
||||
" default: local-model\n"
|
||||
" base_url: http://localhost:8000/v1\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
|
||||
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
|
||||
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
|
||||
(tmp_path / "project").mkdir(exist_ok=True)
|
||||
|
||||
fake_model_tools = types.SimpleNamespace(
|
||||
check_tool_availability=lambda *a, **kw: ([], []),
|
||||
TOOLSET_REQUIREMENTS={},
|
||||
)
|
||||
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
|
||||
|
||||
try:
|
||||
from hermes_cli import auth as _auth_mod
|
||||
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
|
||||
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
buf = io.StringIO()
|
||||
with contextlib.redirect_stdout(buf):
|
||||
doctor_mod.run_doctor(Namespace(fix=False))
|
||||
|
||||
out = buf.getvalue()
|
||||
assert "model.provider 'custom' is not a recognised provider" not in out
|
||||
|
||||
|
||||
def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -1,486 +0,0 @@
|
||||
"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import types
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.fixture()
|
||||
def isolated_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir(exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
return tmp_path
|
||||
|
||||
|
||||
def _write_config(home: Path, data: dict) -> None:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
config_path.write_text(yaml.safe_dump(data), encoding="utf-8")
|
||||
|
||||
|
||||
def _read_config(home: Path) -> dict:
|
||||
config_path = home / ".hermes" / "config.yaml"
|
||||
return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _read_chain / _write_chain
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestReadChain:
|
||||
def test_returns_empty_list_when_unset(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
assert _read_chain({}) == []
|
||||
|
||||
def test_reads_new_list_format(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"},
|
||||
]
|
||||
|
||||
def test_migrates_legacy_single_dict(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}}
|
||||
assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}]
|
||||
|
||||
def test_skips_incomplete_entries(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter"}, # missing model
|
||||
{"model": "gpt-5.4"}, # missing provider
|
||||
{"provider": "nous", "model": "foo"}, # valid
|
||||
"not-a-dict", # noise
|
||||
]
|
||||
}
|
||||
assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}]
|
||||
|
||||
def test_returns_copies_not_aliases(self):
|
||||
from hermes_cli.fallback_cmd import _read_chain
|
||||
cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]}
|
||||
result = _read_chain(cfg)
|
||||
result[0]["provider"] = "mutated"
|
||||
assert cfg["fallback_providers"][0]["provider"] == "nous"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _extract_fallback_from_model_cfg
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestExtractFallback:
|
||||
def test_extracts_from_default_field(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
}
|
||||
|
||||
def test_extracts_optional_base_url_and_api_mode(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
model_cfg = {
|
||||
"provider": "custom",
|
||||
"default": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
assert _extract_fallback_from_model_cfg(model_cfg) == {
|
||||
"provider": "custom",
|
||||
"model": "local-model",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
|
||||
def test_returns_none_without_provider(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"default": "foo"}) is None
|
||||
|
||||
def test_returns_none_without_model(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None
|
||||
|
||||
def test_returns_none_for_non_dict(self):
|
||||
from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg
|
||||
assert _extract_fallback_from_model_cfg("plain-string") is None
|
||||
assert _extract_fallback_from_model_cfg(None) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestListCommand:
|
||||
def test_list_empty(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
assert "hermes fallback add" in out
|
||||
|
||||
def test_list_with_entries(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain (2 entries)" in out
|
||||
assert "anthropic/claude-sonnet-4.6" in out
|
||||
assert "Hermes-4" in out
|
||||
# Primary should be shown too
|
||||
assert "claude-sonnet-4-6" in out
|
||||
|
||||
def test_list_migrates_legacy_for_display(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"},
|
||||
})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_list
|
||||
cmd_fallback_list(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "1 entry" in out
|
||||
assert "gpt-5.4" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_add — mock select_provider_and_model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestAddCommand:
|
||||
def test_add_appends_new_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# Simulate what the real picker does: writes the selection to config["model"]
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary is preserved
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
# Fallback was appended
|
||||
assert cfg["fallback_providers"] == [
|
||||
{
|
||||
"provider": "openrouter",
|
||||
"model": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Added fallback" in out
|
||||
|
||||
def test_add_rejects_duplicate(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Should still have exactly one entry
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "already in the fallback chain" in out
|
||||
|
||||
def test_add_rejects_same_as_primary(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "openrouter", "default": "gpt-5.4"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User picks the same thing that's already the primary
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
assert "matches the current primary" in out
|
||||
|
||||
def test_add_preserves_primary_when_picker_changes_it(self, isolated_home):
|
||||
"""The picker mutates config["model"]; fallback_add must restore the primary."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {
|
||||
"provider": "anthropic",
|
||||
"default": "claude-sonnet-4-6",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {
|
||||
"provider": "openrouter",
|
||||
"default": "anthropic/claude-sonnet-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions",
|
||||
}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
# Primary exactly as it was
|
||||
assert cfg["model"]["provider"] == "anthropic"
|
||||
assert cfg["model"]["default"] == "claude-sonnet-4-6"
|
||||
assert cfg["model"]["base_url"] == "https://api.anthropic.com"
|
||||
assert cfg["model"]["api_mode"] == "anthropic_messages"
|
||||
# Fallback added
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
assert cfg["fallback_providers"][0]["provider"] == "openrouter"
|
||||
|
||||
def test_add_noop_when_picker_cancelled(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
# User cancelled — no change to config
|
||||
pass
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert "fallback_providers" not in cfg or cfg["fallback_providers"] == []
|
||||
out = capsys.readouterr().out
|
||||
# Either "No fallback added" (picker fully cancelled) or "matches the current primary"
|
||||
# (picker left config untouched) — both indicate a non-add outcome.
|
||||
assert ("No fallback added" in out) or ("matches the current primary" in out)
|
||||
|
||||
def test_add_noop_when_picker_clears_model(self, isolated_home, capsys):
|
||||
"""Simulate picker explicitly clearing model.default (unusual but possible)."""
|
||||
_write_config(isolated_home, {
|
||||
"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"},
|
||||
})
|
||||
|
||||
def fake_picker(args=None):
|
||||
from hermes_cli.config import load_config, save_config
|
||||
cfg = load_config()
|
||||
cfg["model"] = {"provider": "", "default": ""}
|
||||
save_config(cfg)
|
||||
|
||||
with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \
|
||||
patch("hermes_cli.main._require_tty"):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_add
|
||||
cmd_fallback_add(types.SimpleNamespace())
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback added" in out
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_remove
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRemoveCommand:
|
||||
def test_remove_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_remove_selected_entry(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
],
|
||||
})
|
||||
|
||||
# Picker returns index 1 (the middle entry, "nous / Hermes-4")
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg["fallback_providers"] == [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "anthropic", "model": "claude-sonnet-4-6"},
|
||||
]
|
||||
out = capsys.readouterr().out
|
||||
assert "Removed fallback" in out
|
||||
assert "Hermes-4" in out
|
||||
|
||||
def test_remove_cancel_keeps_chain(self, isolated_home):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
],
|
||||
})
|
||||
|
||||
# Cancel = last item (index == len(chain) == 1 in our menu)
|
||||
with patch("hermes_cli.setup._curses_prompt_choice", return_value=1):
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_remove
|
||||
cmd_fallback_remove(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback_clear
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestClearCommand:
|
||||
def test_clear_empty_chain(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to clear" in out
|
||||
|
||||
def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [
|
||||
{"provider": "openrouter", "model": "gpt-5.4"},
|
||||
{"provider": "nous", "model": "Hermes-4"},
|
||||
],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "y")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert cfg.get("fallback_providers") == []
|
||||
out = capsys.readouterr().out
|
||||
assert "Fallback chain cleared" in out
|
||||
|
||||
def test_clear_cancelled(self, isolated_home, monkeypatch):
|
||||
_write_config(isolated_home, {
|
||||
"fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}],
|
||||
})
|
||||
monkeypatch.setattr("builtins.input", lambda *a, **kw: "n")
|
||||
from hermes_cli.fallback_cmd import cmd_fallback_clear
|
||||
cmd_fallback_clear(types.SimpleNamespace())
|
||||
|
||||
cfg = _read_config(isolated_home)
|
||||
assert len(cfg["fallback_providers"]) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# cmd_fallback dispatcher
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDispatcher:
|
||||
def test_no_subcommand_lists(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command=None))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_list_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="ls"))
|
||||
out = capsys.readouterr().out
|
||||
assert "No fallback providers configured" in out
|
||||
|
||||
def test_remove_alias(self, isolated_home, capsys):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="rm"))
|
||||
out = capsys.readouterr().out
|
||||
assert "nothing to remove" in out
|
||||
|
||||
def test_unknown_subcommand_exits(self, isolated_home):
|
||||
_write_config(isolated_home, {})
|
||||
from hermes_cli.fallback_cmd import cmd_fallback
|
||||
with pytest.raises(SystemExit):
|
||||
cmd_fallback(types.SimpleNamespace(fallback_command="nope"))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# argparse wiring — verify the subparser is registered
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestArgparseWiring:
|
||||
"""Verify `hermes fallback` is wired into main.py's argparse tree.
|
||||
|
||||
main() builds the parser inline, so we invoke main([...]) via subprocess
|
||||
with --help to introspect registered subcommands without side effects.
|
||||
"""
|
||||
|
||||
def test_fallback_help_lists_subcommands(self):
|
||||
import subprocess
|
||||
import sys
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "hermes_cli.main", "fallback", "--help"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
)
|
||||
# --help exits 0
|
||||
assert result.returncode == 0, f"stderr: {result.stderr}"
|
||||
out = result.stdout + result.stderr
|
||||
# All four subcommands should appear in help
|
||||
assert "list" in out
|
||||
assert "add" in out
|
||||
assert "remove" in out
|
||||
assert "clear" in out
|
||||
@@ -1,210 +0,0 @@
|
||||
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import kanban as kc
|
||||
from hermes_cli import kanban_db as kb
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def kanban_home(tmp_path, monkeypatch):
|
||||
home = tmp_path / ".hermes"
|
||||
home.mkdir()
|
||||
monkeypatch.setenv("HERMES_HOME", str(home))
|
||||
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
||||
kb.init_db()
|
||||
return home
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace flag parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value,expected",
|
||||
[
|
||||
("scratch", ("scratch", None)),
|
||||
("worktree", ("worktree", None)),
|
||||
("dir:/tmp/work", ("dir", "/tmp/work")),
|
||||
],
|
||||
)
|
||||
def test_parse_workspace_flag_valid(value, expected):
|
||||
assert kc._parse_workspace_flag(value) == expected
|
||||
|
||||
|
||||
def test_parse_workspace_flag_expands_user():
|
||||
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
||||
assert kind == "dir"
|
||||
assert path.endswith("/vault")
|
||||
assert not path.startswith("~")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
||||
def test_parse_workspace_flag_rejects(bad):
|
||||
if not bad:
|
||||
# Empty -> defaults; not an error.
|
||||
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
||||
return
|
||||
with pytest.raises(argparse.ArgumentTypeError):
|
||||
kc._parse_workspace_flag(bad)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_run_slash_no_args_shows_usage(kanban_home):
|
||||
out = kc.run_slash("")
|
||||
assert "kanban" in out.lower()
|
||||
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_create_and_list(kanban_home):
|
||||
out = kc.run_slash("create 'ship feature' --assignee alice")
|
||||
assert "Created" in out
|
||||
out = kc.run_slash("list")
|
||||
assert "ship feature" in out
|
||||
assert "alice" in out
|
||||
|
||||
|
||||
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
||||
# Parent then child via --parent
|
||||
out1 = kc.run_slash("create 'parent' --assignee alice")
|
||||
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
||||
import re
|
||||
m = re.search(r"(t_[a-f0-9]+)", out1)
|
||||
assert m
|
||||
p = m.group(1)
|
||||
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
||||
assert "todo" in out2 # child starts as todo
|
||||
|
||||
# Complete parent; list should promote child to ready
|
||||
kc.run_slash(f"complete {p}")
|
||||
# Explicit filter: child should now be ready (was todo before complete).
|
||||
ready_list = kc.run_slash("list --status ready")
|
||||
assert "child" in ready_list
|
||||
|
||||
|
||||
def test_run_slash_show_includes_comments(kanban_home):
|
||||
out = kc.run_slash("create 'x'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "source is paywalled" in show
|
||||
|
||||
|
||||
def test_run_slash_block_unblock_cycle(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
# Claim first so block() finds it running
|
||||
kc.run_slash(f"claim {tid}")
|
||||
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
||||
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
||||
|
||||
|
||||
def test_run_slash_json_output(kanban_home):
|
||||
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
||||
payload = json.loads(out)
|
||||
assert payload["title"] == "jsontask"
|
||||
assert payload["assignee"] == "alice"
|
||||
assert payload["status"] == "ready"
|
||||
|
||||
|
||||
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
||||
kc.run_slash("create 'a' --assignee alice")
|
||||
kc.run_slash("create 'b' --assignee bob")
|
||||
out = kc.run_slash("dispatch --dry-run")
|
||||
assert "Spawned:" in out
|
||||
|
||||
|
||||
def test_run_slash_context_output_format(kanban_home):
|
||||
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
||||
ctx = kc.run_slash(f"context {tid}")
|
||||
assert "tech spec" in ctx
|
||||
assert "write an RFC" in ctx
|
||||
assert "performance section" in ctx
|
||||
|
||||
|
||||
def test_run_slash_tenant_filter(kanban_home):
|
||||
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
||||
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
||||
a = kc.run_slash("list --tenant biz-a")
|
||||
b = kc.run_slash("list --tenant biz-b")
|
||||
assert "biz-a task" in a and "biz-b task" not in a
|
||||
assert "biz-b task" in b and "biz-a task" not in b
|
||||
|
||||
|
||||
def test_run_slash_usage_error_returns_message(kanban_home):
|
||||
# Missing required argument for create
|
||||
out = kc.run_slash("create")
|
||||
assert "usage" in out.lower() or "error" in out.lower()
|
||||
|
||||
|
||||
def test_run_slash_assign_reassigns(kanban_home):
|
||||
out = kc.run_slash("create 'x' --assignee alice")
|
||||
import re
|
||||
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
||||
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
||||
show = kc.run_slash(f"show {tid}")
|
||||
assert "bob" in show
|
||||
|
||||
|
||||
def test_run_slash_link_unlink(kanban_home):
|
||||
a = kc.run_slash("create 'a'")
|
||||
b = kc.run_slash("create 'b'")
|
||||
import re
|
||||
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
||||
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
||||
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
||||
# After link, b is todo
|
||||
show = kc.run_slash(f"show {tb}")
|
||||
assert "todo" in show
|
||||
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration with the COMMAND_REGISTRY
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_kanban_is_resolvable():
|
||||
from hermes_cli.commands import resolve_command
|
||||
|
||||
cmd = resolve_command("kanban")
|
||||
assert cmd is not None
|
||||
assert cmd.name == "kanban"
|
||||
|
||||
|
||||
def test_kanban_bypasses_active_session_guard():
|
||||
from hermes_cli.commands import should_bypass_active_session
|
||||
|
||||
assert should_bypass_active_session("kanban")
|
||||
|
||||
|
||||
def test_kanban_in_autocomplete_table():
|
||||
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
||||
|
||||
assert "/kanban" in COMMANDS
|
||||
subs = SUBCOMMANDS.get("/kanban") or []
|
||||
assert "create" in subs
|
||||
assert "dispatch" in subs
|
||||
|
||||
|
||||
def test_kanban_not_gateway_only():
|
||||
# kanban is available in BOTH CLI and gateway surfaces.
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
|
||||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user