Compare commits

..

2 Commits

Author SHA1 Message Date
kshitijk4poor
8aaefec231 fix: follow-up for salvaged PR #8952
- Rename provider_contracts.py -> volcengine_byteplus.py for explicitness
- Consolidate duplicate host-to-provider mappings: provider_for_base_url()
  now uses the canonical _URL_TO_PROVIDER from model_metadata.py instead of
  maintaining a separate 20-entry dict
- Add volcengine/byteplus to runtime_provider.py model-dependent base URL
  resolution (kimi-style special case) so manually-edited configs resolve
  the coding-plan base URL correctly
- Remove volcengine/byteplus from _API_KEY_PROVIDER_AUX_MODELS — the
  main-model-first design in _resolve_auto() handles these providers
  already; entries were dead code in the normal flow
- Add VOLCENGINE_API_KEY and BYTEPLUS_API_KEY to OPTIONAL_ENV_VARS in
  config.py so they appear in hermes setup
- Update docs: environment-variables.md, fallback-providers.md,
  configuration.md
2026-04-22 22:42:39 +05:30
gaoyiman
ccde71a6ab feat(providers): add Volcengine and BytePlus support
Based on PR #8952 by @Maaannnn.

Adds Volcengine and BytePlus as first-class providers, each with standard
and Coding Plan model catalogs. The model prefix (volcengine/ vs
volcengine-coding-plan/) determines the runtime base URL automatically.

- New hermes_cli/provider_contracts.py centralises all constants
- ProviderConfig entries in auth.py with api_key auth
- Model catalogs, aliases, and provider ordering in models.py/providers.py
- Auxiliary client entries and context window resolution
- gateway /provider command detects known Volcengine/BytePlus endpoints
- Comprehensive tests and docs update
2026-04-22 22:33:06 +05:30
45 changed files with 1163 additions and 4823 deletions

View File

@@ -13,7 +13,7 @@
**The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Volcengine](https://www.volcengine.com/product/ark), [BytePlus](https://www.byteplus.com/en/product/modelark), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
<table>
<tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>

View File

@@ -74,6 +74,10 @@ _PROVIDER_ALIASES = {
"minimax_cn": "minimax-cn",
"claude": "anthropic",
"claude-code": "anthropic",
"volcengine-coding-plan": "volcengine",
"volcengine_coding_plan": "volcengine",
"byteplus-coding-plan": "byteplus",
"byteplus_coding_plan": "byteplus",
}

View File

@@ -14,8 +14,8 @@ from urllib.parse import urlparse
import requests
import yaml
from hermes_cli.volcengine_byteplus import model_context_window
from utils import base_url_host_matches, base_url_hostname
from hermes_constants import OPENROUTER_MODELS_URL
logger = logging.getLogger(__name__)
@@ -30,6 +30,10 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
"qwen-oauth",
"xiaomi",
"arcee",
"volcengine",
"volcengine-coding-plan",
"byteplus",
"byteplus-coding-plan",
"custom", "local",
# Common aliases
"google", "google-gemini", "google-ai-studio",
@@ -257,6 +261,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.xiaomimimo.com": "xiaomi",
"xiaomimimo.com": "xiaomi",
"ollama.com": "ollama-cloud",
"ark.cn-beijing.volces.com": "volcengine",
"ark.ap-southeast.bytepluses.com": "byteplus",
}
@@ -1119,12 +1125,20 @@ def get_model_context_length(
ctx = _resolve_nous_context_length(model)
if ctx:
return ctx
if effective_provider in {"volcengine", "byteplus"}:
ctx = model_context_window(model)
if ctx:
return ctx
if effective_provider:
from agent.models_dev import lookup_models_dev_context
ctx = lookup_models_dev_context(effective_provider, model)
if ctx:
return ctx
ctx = model_context_window(model)
if ctx:
return ctx
# 6. OpenRouter live API metadata (provider-unaware fallback)
metadata = fetch_model_metadata()
if model in metadata:

View File

@@ -5690,6 +5690,7 @@ class GatewayRunner:
from hermes_cli.models import (
list_available_providers,
normalize_provider,
provider_for_base_url,
_PROVIDER_LABELS,
)
@@ -5718,7 +5719,10 @@ class GatewayRunner:
# Detect custom endpoint from config base_url
if current_provider == "openrouter":
_cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
if _cfg_base and "openrouter.ai" not in _cfg_base:
inferred_provider = provider_for_base_url(_cfg_base)
if inferred_provider:
current_provider = inferred_provider
elif _cfg_base and "openrouter.ai" not in _cfg_base:
current_provider = "custom"
current_label = _PROVIDER_LABELS.get(current_provider, current_provider)

View File

@@ -39,6 +39,13 @@ import httpx
import yaml
from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
from hermes_cli.volcengine_byteplus import (
VOLCENGINE_PROVIDER,
BYTEPLUS_PROVIDER,
VOLCENGINE_STANDARD_BASE_URL,
BYTEPLUS_STANDARD_BASE_URL,
base_url_for_provider_model,
)
from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@@ -307,6 +314,20 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
api_key_env_vars=("XIAOMI_API_KEY",),
base_url_env_var="XIAOMI_BASE_URL",
),
"volcengine": ProviderConfig(
id=VOLCENGINE_PROVIDER,
name="Volcengine",
auth_type="api_key",
inference_base_url=VOLCENGINE_STANDARD_BASE_URL,
api_key_env_vars=("VOLCENGINE_API_KEY",),
),
"byteplus": ProviderConfig(
id=BYTEPLUS_PROVIDER,
name="BytePlus",
auth_type="api_key",
inference_base_url=BYTEPLUS_STANDARD_BASE_URL,
api_key_env_vars=("BYTEPLUS_API_KEY",),
),
"ollama-cloud": ProviderConfig(
id="ollama-cloud",
name="Ollama Cloud",
@@ -1015,6 +1036,10 @@ def resolve_provider(
"hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
"mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
"aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
"volcengine-coding-plan": "volcengine",
"volcengine_coding_plan": "volcengine",
"byteplus-coding-plan": "byteplus",
"byteplus_coding_plan": "byteplus",
"go": "opencode-go", "opencode-go-sub": "opencode-go",
"kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
# Local server aliases — route through the generic custom provider
@@ -1157,6 +1182,21 @@ def _qwen_cli_auth_path() -> Path:
return Path.home() / ".qwen" / "oauth_creds.json"
def _current_model_for_provider(provider_id: str) -> str:
"""Return the currently configured model when it belongs to the provider."""
try:
config = read_raw_config()
except Exception:
return ""
model_cfg = config.get("model")
if isinstance(model_cfg, dict):
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
if configured_provider == provider_id:
return str(model_cfg.get("default") or model_cfg.get("model") or "").strip()
return ""
def _read_qwen_cli_tokens() -> Dict[str, Any]:
auth_path = _qwen_cli_auth_path()
if not auth_path.exists():
@@ -2555,7 +2595,11 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in ("kimi-coding", "kimi-coding-cn"):
active_model = _current_model_for_provider(provider_id)
if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
elif provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif env_url:
base_url = env_url
@@ -2650,7 +2694,11 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
if pconfig.base_url_env_var:
env_url = os.getenv(pconfig.base_url_env_var, "").strip()
if provider_id in ("kimi-coding", "kimi-coding-cn"):
active_model = _current_model_for_provider(provider_id)
if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
elif provider_id in ("kimi-coding", "kimi-coding-cn"):
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
elif provider_id == "zai":
base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)

View File

@@ -1281,6 +1281,20 @@ OPTIONAL_ENV_VARS = {
"category": "provider",
"advanced": True,
},
"VOLCENGINE_API_KEY": {
"description": "Volcengine API key for Doubao / Seed models (standard + Coding Plan catalogs)",
"prompt": "Volcengine API Key",
"url": "https://www.volcengine.com/product/ark",
"password": True,
"category": "provider",
},
"BYTEPLUS_API_KEY": {
"description": "BytePlus API key for Seed / Dola models (standard + Coding Plan catalogs)",
"prompt": "BytePlus API Key",
"url": "https://www.byteplus.com/en/product/modelark",
"password": True,
"category": "provider",
},
"AWS_REGION": {
"description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
"prompt": "AWS Region",

View File

@@ -1570,6 +1570,8 @@ def select_provider_and_model(args=None):
_model_flow_stepfun(config, current_model)
elif selected_provider == "bedrock":
_model_flow_bedrock(config, current_model)
elif selected_provider in ("volcengine", "byteplus"):
_model_flow_contract_provider(config, selected_provider, current_model)
elif selected_provider in (
"gemini",
"deepseek",
@@ -1954,7 +1956,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))
def _prompt_provider_choice(choices, *, default=0):
def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
"""Show provider selection menu with curses arrow-key navigation.
Falls back to a numbered list when curses is unavailable (e.g. piped
@@ -1963,8 +1965,7 @@ def _prompt_provider_choice(choices, *, default=0):
"""
try:
from hermes_cli.setup import _curses_prompt_choice
idx = _curses_prompt_choice("Select provider:", choices, default)
idx = _curses_prompt_choice(title, choices, default)
if idx >= 0:
print()
return idx
@@ -1972,7 +1973,7 @@ def _prompt_provider_choice(choices, *, default=0):
pass
# Fallback: numbered list
print("Select provider:")
print(title)
for i, c in enumerate(choices, 1):
marker = "" if i - 1 == default else " "
print(f" {marker} {i}. {c}")
@@ -2944,6 +2945,10 @@ def _model_flow_named_custom(config, provider_info):
# Curated model lists for direct API-key providers — single source in models.py
from hermes_cli.models import _PROVIDER_MODELS
from hermes_cli.volcengine_byteplus import (
base_url_for_provider_model,
provider_models,
)
def _current_reasoning_effort(config) -> str:
@@ -4033,6 +4038,70 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
print("No change.")
def _model_flow_contract_provider(config, provider_id, current_model=""):
"""Provider flow for Volcengine / BytePlus contract-backed catalogs."""
from hermes_cli.auth import (
PROVIDER_REGISTRY,
_prompt_model_selection,
_save_model_choice,
deactivate_provider,
)
from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
pconfig = PROVIDER_REGISTRY[provider_id]
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
existing_key = ""
for env_var in pconfig.api_key_env_vars:
existing_key = get_env_value(env_var) or os.getenv(env_var, "")
if existing_key:
break
if not existing_key:
print(f"No {pconfig.name} API key configured.")
if key_env:
try:
import getpass
new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not new_key:
print("Cancelled.")
return
save_env_value(key_env, new_key)
print("API key saved.")
print()
else:
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
print()
model_list = provider_models(provider_id)
if not model_list:
print(f"No curated model catalog found for {pconfig.name}.")
return
selected = _prompt_model_selection(model_list, current_model=current_model)
if not selected:
print("No change.")
return
_save_model_choice(selected)
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = provider_id
model["base_url"] = base_url_for_provider_model(provider_id, selected)
model.pop("api_mode", None)
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via {pconfig.name})")
def _run_anthropic_oauth_flow(save_env_value):
"""Run the Claude OAuth setup-token flow. Returns True if credentials were saved."""
from agent.anthropic_adapter import (

View File

@@ -97,6 +97,8 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
"xiaomi",
"arcee",
"ollama-cloud",
"volcengine",
"byteplus",
"custom",
})
@@ -423,4 +425,3 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
# ---------------------------------------------------------------------------
# Batch / convenience helpers
# ---------------------------------------------------------------------------

View File

@@ -22,6 +22,12 @@ from hermes_cli import __version__ as _HERMES_VERSION
# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
from hermes_cli.volcengine_byteplus import (
BYTEPLUS_PROVIDER,
VOLCENGINE_PROVIDER,
provider_models,
)
COPILOT_BASE_URL = "https://api.githubcopilot.com"
COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -356,6 +362,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"us.meta.llama4-maverick-17b-instruct-v1:0",
"us.meta.llama4-scout-17b-instruct-v1:0",
],
VOLCENGINE_PROVIDER: provider_models(VOLCENGINE_PROVIDER),
BYTEPLUS_PROVIDER: provider_models(BYTEPLUS_PROVIDER),
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -690,6 +698,8 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry(VOLCENGINE_PROVIDER, "Volcengine", "Volcengine (standard + Coding Plan catalogs)"),
ProviderEntry(BYTEPLUS_PROVIDER, "BytePlus", "BytePlus (standard + Coding Plan catalogs)"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
ProviderEntry("nvidia", "NVIDIA NIM", "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
ProviderEntry("qwen-oauth", "Qwen OAuth (Portal)", "Qwen OAuth (reuses local Qwen CLI login)"),
@@ -719,7 +729,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
_PROVIDER_ALIASES = {
"glm": "zai",
"z-ai": "zai",
@@ -782,6 +791,10 @@ _PROVIDER_ALIASES = {
"nemotron": "nvidia",
"ollama": "custom", # bare "ollama" = local; use "ollama-cloud" for cloud
"ollama_cloud": "ollama-cloud",
"volcengine-coding-plan": VOLCENGINE_PROVIDER,
"volcengine_coding_plan": VOLCENGINE_PROVIDER,
"byteplus-coding-plan": BYTEPLUS_PROVIDER,
"byteplus_coding_plan": BYTEPLUS_PROVIDER,
}
@@ -1242,7 +1255,6 @@ def list_available_providers() -> list[dict[str, str]]:
"""
# Derive display order from canonical list + custom
provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
# Build reverse alias map
aliases_for: dict[str, list[str]] = {}
for alias, canonical in _PROVIDER_ALIASES.items():
@@ -1258,7 +1270,7 @@ def list_available_providers() -> list[dict[str, str]]:
from hermes_cli.auth import get_auth_status, has_usable_secret
if pid == "custom":
custom_base_url = _get_custom_base_url() or ""
has_creds = bool(custom_base_url.strip())
has_creds = bool(custom_base_url.strip()) and provider_for_base_url(custom_base_url) is None
elif pid == "openrouter":
has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
else:
@@ -1324,6 +1336,29 @@ def _get_custom_base_url() -> str:
return ""
def provider_for_base_url(base_url: str) -> Optional[str]:
"""Return a known built-in provider for a configured base URL, if any.
Uses the canonical _URL_TO_PROVIDER mapping from model_metadata plus
additional entries for providers not in that dict.
"""
normalized = str(base_url or "").strip().rstrip("/")
if not normalized or "openrouter.ai" in normalized.lower():
return None
url_lower = normalized.lower()
# Primary source — shared with context-length resolution
from agent.model_metadata import _URL_TO_PROVIDER
for host, provider_id in _URL_TO_PROVIDER.items():
if host in url_lower:
canonical = normalize_provider(provider_id)
if canonical in _PROVIDER_LABELS and canonical != "custom":
return canonical
return None
def curated_models_for_provider(
provider: Optional[str],
*,

View File

@@ -23,6 +23,12 @@ import logging
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
from hermes_cli.volcengine_byteplus import (
BYTEPLUS_PROVIDER,
BYTEPLUS_STANDARD_BASE_URL,
VOLCENGINE_PROVIDER,
VOLCENGINE_STANDARD_BASE_URL,
)
from utils import base_url_host_matches, base_url_hostname
logger = logging.getLogger(__name__)
@@ -163,6 +169,16 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
transport="openai_chat",
base_url_env_var="OLLAMA_BASE_URL",
),
VOLCENGINE_PROVIDER: HermesOverlay(
transport="openai_chat",
extra_env_vars=("VOLCENGINE_API_KEY",),
base_url_override=VOLCENGINE_STANDARD_BASE_URL,
),
BYTEPLUS_PROVIDER: HermesOverlay(
transport="openai_chat",
extra_env_vars=("BYTEPLUS_API_KEY",),
base_url_override=BYTEPLUS_STANDARD_BASE_URL,
),
}
@@ -273,6 +289,10 @@ ALIASES: Dict[str, str] = {
# xiaomi
"mimo": "xiaomi",
"xiaomi-mimo": "xiaomi",
"volcengine-coding-plan": VOLCENGINE_PROVIDER,
"volcengine_coding_plan": VOLCENGINE_PROVIDER,
"byteplus-coding-plan": BYTEPLUS_PROVIDER,
"byteplus_coding_plan": BYTEPLUS_PROVIDER,
# bedrock
"aws": "bedrock",
@@ -306,6 +326,8 @@ _LABEL_OVERRIDES: Dict[str, str] = {
"copilot-acp": "GitHub Copilot ACP",
"stepfun": "StepFun Step Plan",
"xiaomi": "Xiaomi MiMo",
VOLCENGINE_PROVIDER: "Volcengine",
BYTEPLUS_PROVIDER: "BytePlus",
"local": "Local endpoint",
"bedrock": "AWS Bedrock",
"ollama-cloud": "Ollama Cloud",

View File

@@ -643,7 +643,7 @@ def _resolve_explicit_runtime(
base_url = explicit_base_url
if not base_url:
if provider in ("kimi-coding", "kimi-coding-cn"):
if provider in ("kimi-coding", "kimi-coding-cn", "volcengine", "byteplus"):
creds = resolve_api_key_provider_credentials(provider)
base_url = creds.get("base_url", "").rstrip("/")
else:

View File

@@ -0,0 +1,134 @@
"""Source-of-truth contracts for built-in providers without models.dev catalogs."""
from __future__ import annotations
from typing import Dict, List, Tuple
VOLCENGINE_PROVIDER = "volcengine"
BYTEPLUS_PROVIDER = "byteplus"
VOLCENGINE_STANDARD_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
VOLCENGINE_CODING_PLAN_BASE_URL = "https://ark.cn-beijing.volces.com/api/coding/v3"
BYTEPLUS_STANDARD_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/v3"
BYTEPLUS_CODING_PLAN_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/coding/v3"
VOLCENGINE_STANDARD_MODELS: Tuple[str, ...] = (
"doubao-seed-2-0-pro-260215",
"doubao-seed-2-0-lite-260215",
"doubao-seed-2-0-mini-260215",
"doubao-seed-2-0-code-preview-260215",
"kimi-k2-5-260127",
"glm-4-7-251222",
"deepseek-v3-2-251201",
)
VOLCENGINE_CODING_PLAN_MODELS: Tuple[str, ...] = (
"doubao-seed-2.0-code",
"doubao-seed-2.0-pro",
"doubao-seed-2.0-lite",
"doubao-seed-code",
"minimax-m2.5",
"glm-4.7",
"deepseek-v3.2",
"kimi-k2.5",
)
BYTEPLUS_STANDARD_MODELS: Tuple[str, ...] = (
"seed-2-0-pro-260328",
"seed-2-0-lite-260228",
"seed-2-0-mini-260215",
"kimi-k2-5-260127",
"glm-4-7-251222",
)
BYTEPLUS_CODING_PLAN_MODELS: Tuple[str, ...] = (
"dola-seed-2.0-pro",
"dola-seed-2.0-lite",
"bytedance-seed-code",
"glm-4.7",
"kimi-k2.5",
"gpt-oss-120b",
)
VOLCENGINE_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
f"{VOLCENGINE_PROVIDER}/{model_id}" for model_id in VOLCENGINE_STANDARD_MODELS
)
VOLCENGINE_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
f"{VOLCENGINE_PROVIDER}-coding-plan/{model_id}" for model_id in VOLCENGINE_CODING_PLAN_MODELS
)
BYTEPLUS_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
f"{BYTEPLUS_PROVIDER}/{model_id}" for model_id in BYTEPLUS_STANDARD_MODELS
)
BYTEPLUS_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
f"{BYTEPLUS_PROVIDER}-coding-plan/{model_id}" for model_id in BYTEPLUS_CODING_PLAN_MODELS
)
PROVIDER_MODEL_CATALOGS: Dict[str, Tuple[str, ...]] = {
VOLCENGINE_PROVIDER: VOLCENGINE_STANDARD_MODEL_REFS + VOLCENGINE_CODING_PLAN_MODEL_REFS,
BYTEPLUS_PROVIDER: BYTEPLUS_STANDARD_MODEL_REFS + BYTEPLUS_CODING_PLAN_MODEL_REFS,
}
MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
"doubao-seed-2-0-pro-260215": 256000,
"doubao-seed-2-0-lite-260215": 256000,
"doubao-seed-2-0-mini-260215": 256000,
"doubao-seed-2-0-code-preview-260215": 256000,
"kimi-k2-5-260127": 256000,
"glm-4-7-251222": 200000,
"deepseek-v3-2-251201": 128000,
"doubao-seed-2.0-code": 256000,
"doubao-seed-2.0-pro": 256000,
"doubao-seed-2.0-lite": 256000,
"doubao-seed-code": 256000,
"minimax-m2.5": 200000,
"glm-4.7": 200000,
"deepseek-v3.2": 128000,
"kimi-k2.5": 256000,
"seed-2-0-pro-260328": 256000,
"seed-2-0-lite-260228": 256000,
"seed-2-0-mini-260215": 256000,
}
def provider_models(provider_id: str) -> List[str]:
"""Return the full user-facing model catalog for a provider."""
return list(PROVIDER_MODEL_CATALOGS.get(provider_id, ()))
def _bare_model_name(model_name: str) -> str:
value = (model_name or "").strip()
if not value:
return ""
if "/" in value:
return value.split("/", 1)[1].strip()
return value
def is_coding_plan_model(provider_id: str, model_name: str) -> bool:
"""Return True when a model belongs to the coding-plan catalog."""
raw = (model_name or "").strip()
bare = _bare_model_name(raw)
if provider_id == VOLCENGINE_PROVIDER:
return raw in VOLCENGINE_CODING_PLAN_MODEL_REFS or bare in VOLCENGINE_CODING_PLAN_MODELS
if provider_id == BYTEPLUS_PROVIDER:
return raw in BYTEPLUS_CODING_PLAN_MODEL_REFS or bare in BYTEPLUS_CODING_PLAN_MODELS
return False
def base_url_for_provider_model(provider_id: str, model_name: str) -> str:
"""Resolve the source-of-truth base URL for a provider+model pair."""
if provider_id == VOLCENGINE_PROVIDER:
if is_coding_plan_model(provider_id, model_name):
return VOLCENGINE_CODING_PLAN_BASE_URL
return VOLCENGINE_STANDARD_BASE_URL
if provider_id == BYTEPLUS_PROVIDER:
if is_coding_plan_model(provider_id, model_name):
return BYTEPLUS_CODING_PLAN_BASE_URL
return BYTEPLUS_STANDARD_BASE_URL
return ""
def model_context_window(model_name: str) -> int | None:
"""Return a known context window for a model, if specified by the contract."""
bare = _bare_model_name(model_name)
return MODEL_CONTEXT_WINDOWS.get(bare)

View File

@@ -256,136 +256,109 @@ class SessionDB:
self._conn.close()
self._conn = None
@staticmethod
def _parse_schema_columns(schema_sql: str) -> Dict[str, Dict[str, str]]:
"""Extract expected columns per table from SCHEMA_SQL.
Uses an in-memory SQLite database to parse the SQL — SQLite itself
handles all syntax (DEFAULT expressions with commas, inline
REFERENCES, CHECK constraints, etc.) so there are zero regex
edge cases. The in-memory DB is opened, the schema DDL is
executed, and PRAGMA table_info extracts the column metadata.
Adding a column to SCHEMA_SQL is all that's needed; the
reconciliation loop picks it up automatically.
"""
ref = sqlite3.connect(":memory:")
try:
ref.executescript(schema_sql)
table_columns: Dict[str, Dict[str, str]] = {}
for (tbl,) in ref.execute(
"SELECT name FROM sqlite_master "
"WHERE type='table' AND name NOT LIKE 'sqlite_%'"
).fetchall():
cols: Dict[str, str] = {}
for row in ref.execute(
f'PRAGMA table_info("{tbl}")'
).fetchall():
# row: (cid, name, type, notnull, dflt_value, pk)
col_name = row[1]
col_type = row[2] or ""
notnull = row[3]
default = row[4]
pk = row[5]
# Reconstruct the type expression for ALTER TABLE ADD COLUMN
parts = [col_type] if col_type else []
if notnull and not pk:
parts.append("NOT NULL")
if default is not None:
parts.append(f"DEFAULT {default}")
cols[col_name] = " ".join(parts)
table_columns[tbl] = cols
return table_columns
finally:
ref.close()
def _reconcile_columns(self, cursor: sqlite3.Cursor) -> None:
"""Ensure live tables have every column declared in SCHEMA_SQL.
Follows the Beets/sqlite-utils pattern: the CREATE TABLE definition
in SCHEMA_SQL is the single source of truth for the desired schema.
On every startup this method diffs the live columns (via PRAGMA
table_info) against the declared columns, and ADDs any that are
missing.
This makes column additions a declarative operation — just add
the column to SCHEMA_SQL and it appears on the next startup.
Version-gated migration blocks are no longer needed for ADD COLUMN.
"""
expected = self._parse_schema_columns(SCHEMA_SQL)
for table_name, declared_cols in expected.items():
# Get current columns from the live table
try:
rows = cursor.execute(
f'PRAGMA table_info("{table_name}")'
).fetchall()
except sqlite3.OperationalError:
continue # Table doesn't exist yet (shouldn't happen after executescript)
live_cols = set()
for row in rows:
# PRAGMA table_info returns (cid, name, type, notnull, dflt_value, pk)
name = row[1] if isinstance(row, (tuple, list)) else row["name"]
live_cols.add(name)
for col_name, col_type in declared_cols.items():
if col_name not in live_cols:
safe_name = col_name.replace('"', '""')
try:
cursor.execute(
f'ALTER TABLE "{table_name}" ADD COLUMN "{safe_name}" {col_type}'
)
except sqlite3.OperationalError as exc:
# Expected: "duplicate column name" from a race or
# re-run. Unexpected: "Cannot add a NOT NULL column
# with default value NULL" from a schema mistake.
# Log at DEBUG so it's visible in agent.log.
logger.debug(
"reconcile %s.%s: %s", table_name, col_name, exc,
)
def _init_schema(self):
"""Create tables and FTS if they don't exist, reconcile columns.
Schema management follows the declarative reconciliation pattern
(Beets, sqlite-utils): SCHEMA_SQL is the single source of truth.
On existing databases, _reconcile_columns() diffs live columns
against SCHEMA_SQL and ADDs any missing ones. This eliminates
the version-gated migration chain for column additions, making
it impossible for reordered or inserted migrations to skip columns.
The schema_version table is retained for future data migrations
(transforming existing rows) which cannot be handled declaratively.
"""
"""Create tables and FTS if they don't exist, run migrations."""
cursor = self._conn.cursor()
cursor.executescript(SCHEMA_SQL)
# ── Declarative column reconciliation ──────────────────────────
# Diff live tables against SCHEMA_SQL and ADD any missing columns.
# This is idempotent and self-healing: even if a version-gated
# migration was skipped (e.g. due to version renumbering), the
# column gets created here.
self._reconcile_columns(cursor)
# ── Schema version bookkeeping ─────────────────────────────────
# Bump to current so future data migrations (if any) can gate on
# version. No version-gated column additions remain.
# Check schema version and run migrations
cursor.execute("SELECT version FROM schema_version LIMIT 1")
row = cursor.fetchone()
if row is None:
cursor.execute(
"INSERT INTO schema_version (version) VALUES (?)",
(SCHEMA_VERSION,),
)
cursor.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
else:
current_version = row["version"] if isinstance(row, sqlite3.Row) else row[0]
if current_version < SCHEMA_VERSION:
cursor.execute(
"UPDATE schema_version SET version = ?",
(SCHEMA_VERSION,),
)
if current_version < 2:
# v2: add finish_reason column to messages
try:
cursor.execute("ALTER TABLE messages ADD COLUMN finish_reason TEXT")
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 2")
if current_version < 3:
# v3: add title column to sessions
try:
cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT")
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 3")
if current_version < 4:
# v4: add unique index on title (NULLs allowed, only non-NULL must be unique)
try:
cursor.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
"ON sessions(title) WHERE title IS NOT NULL"
)
except sqlite3.OperationalError:
pass # Index already exists
cursor.execute("UPDATE schema_version SET version = 4")
if current_version < 5:
new_columns = [
("cache_read_tokens", "INTEGER DEFAULT 0"),
("cache_write_tokens", "INTEGER DEFAULT 0"),
("reasoning_tokens", "INTEGER DEFAULT 0"),
("billing_provider", "TEXT"),
("billing_base_url", "TEXT"),
("billing_mode", "TEXT"),
("estimated_cost_usd", "REAL"),
("actual_cost_usd", "REAL"),
("cost_status", "TEXT"),
("cost_source", "TEXT"),
("pricing_version", "TEXT"),
]
for name, column_type in new_columns:
try:
# name and column_type come from the hardcoded tuple above,
# not user input. Double-quote identifier escaping is applied
# as defense-in-depth; SQLite DDL cannot be parameterized.
safe_name = name.replace('"', '""')
cursor.execute(f'ALTER TABLE sessions ADD COLUMN "{safe_name}" {column_type}')
except sqlite3.OperationalError:
pass
cursor.execute("UPDATE schema_version SET version = 5")
if current_version < 6:
# v6: add reasoning columns to messages table — preserves assistant
# reasoning text and structured reasoning_details across gateway
# session turns. Without these, reasoning chains are lost on
# session reload, breaking multi-turn reasoning continuity for
# providers that replay reasoning (OpenRouter, OpenAI, Nous).
for col_name, col_type in [
("reasoning", "TEXT"),
("reasoning_details", "TEXT"),
("codex_reasoning_items", "TEXT"),
]:
try:
safe = col_name.replace('"', '""')
cursor.execute(
f'ALTER TABLE messages ADD COLUMN "{safe}" {col_type}'
)
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 6")
if current_version < 7:
# v7: preserve provider-native reasoning_content separately from
# normalized reasoning text. Kimi/Moonshot replay can require
# this field on assistant tool-call messages when thinking is on.
try:
cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 7")
if current_version < 8:
# v8: add api_call_count column to sessions — tracks the number
# of individual LLM API calls made within a session (as opposed
# to the session count itself).
try:
cursor.execute(
'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
)
except sqlite3.OperationalError:
pass # Column already exists
cursor.execute("UPDATE schema_version SET version = 8")
# Unique title index — always ensure it exists
# Unique title index — always ensure it exists (safe to run after migrations
# since the title column is guaranteed to exist at this point)
try:
cursor.execute(
"CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "

View File

@@ -105,6 +105,7 @@ AUTHOR_MAP = {
"134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
"ben.burtenshaw@gmail.com": "burtenshaw",
"roopaknijhara@gmail.com": "rnijhara",
"Maaannnn@users.noreply.github.com": "Maaannnn",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",

View File

@@ -782,6 +782,45 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
# ---------------------------------------------------------------------------
class TestModelDefaultElimination:
"""_resolve_api_key_provider must skip providers without known aux models."""
def test_unknown_provider_skipped(self, monkeypatch):
"""Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
# Verify our known providers have entries
assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
# A random provider_id not in the dict should return None
assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
def test_known_provider_gets_real_model(self):
"""Known providers get a real model name, not 'default'."""
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
assert model != "default", f"{provider_id} should not map to 'default'"
assert isinstance(model, str) and model.strip(), \
f"{provider_id} should have a non-empty model string"
def test_volcengine_byteplus_use_main_model_first(self):
"""Volcengine/BytePlus use main-model-first — no entry in _API_KEY_PROVIDER_AUX_MODELS."""
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert "volcengine" not in _API_KEY_PROVIDER_AUX_MODELS
assert "byteplus" not in _API_KEY_PROVIDER_AUX_MODELS
class TestContractProviderAliases:
def test_coding_plan_aliases_normalize_to_canonical_provider(self):
from agent.auxiliary_client import _normalize_aux_provider
assert _normalize_aux_provider("volcengine-coding-plan") == "volcengine"
assert _normalize_aux_provider("byteplus-coding-plan") == "byteplus"
# ---------------------------------------------------------------------------
# _try_payment_fallback reason parameter (#7512 bug 3)
# ---------------------------------------------------------------------------

View File

@@ -222,6 +222,22 @@ class TestGetModelContextLength:
mock_fetch.return_value = {}
assert get_model_context_length("unknown/never-heard-of-this") == CONTEXT_PROBE_TIERS[0]
@patch("agent.model_metadata.fetch_model_metadata")
def test_volcengine_contract_model_uses_contract_context_length(self, mock_fetch):
mock_fetch.return_value = {}
assert get_model_context_length(
"volcengine/doubao-seed-2-0-pro-260215",
provider="volcengine",
) == 256000
@patch("agent.model_metadata.fetch_model_metadata")
def test_byteplus_contract_model_infers_provider_from_url(self, mock_fetch):
mock_fetch.return_value = {}
assert get_model_context_length(
"byteplus-coding-plan/kimi-k2.5",
base_url="https://ark.ap-southeast.bytepluses.com/api/coding/v3",
) == 256000
@patch("agent.model_metadata.fetch_model_metadata")
def test_partial_match_in_defaults(self, mock_fetch):
mock_fetch.return_value = {}

View File

@@ -42,6 +42,8 @@ class TestProviderRegistry:
("minimax-cn", "MiniMax (China)", "api_key"),
("ai-gateway", "Vercel AI Gateway", "api_key"),
("kilocode", "Kilo Code", "api_key"),
("volcengine", "Volcengine", "api_key"),
("byteplus", "BytePlus", "api_key"),
])
def test_provider_registered(self, provider_id, name, auth_type):
assert provider_id in PROVIDER_REGISTRY
@@ -111,6 +113,16 @@ class TestProviderRegistry:
assert pconfig.api_key_env_vars == ("HF_TOKEN",)
assert pconfig.base_url_env_var == "HF_BASE_URL"
def test_volcengine_env_vars(self):
pconfig = PROVIDER_REGISTRY["volcengine"]
assert pconfig.api_key_env_vars == ("VOLCENGINE_API_KEY",)
assert pconfig.base_url_env_var == ""
def test_byteplus_env_vars(self):
pconfig = PROVIDER_REGISTRY["byteplus"]
assert pconfig.api_key_env_vars == ("BYTEPLUS_API_KEY",)
assert pconfig.base_url_env_var == ""
def test_base_urls(self):
assert PROVIDER_REGISTRY["copilot"].inference_base_url == "https://api.githubcopilot.com"
assert PROVIDER_REGISTRY["copilot-acp"].inference_base_url == "acp://copilot"
@@ -122,6 +134,8 @@ class TestProviderRegistry:
assert PROVIDER_REGISTRY["ai-gateway"].inference_base_url == "https://ai-gateway.vercel.sh/v1"
assert PROVIDER_REGISTRY["kilocode"].inference_base_url == "https://api.kilo.ai/api/gateway"
assert PROVIDER_REGISTRY["huggingface"].inference_base_url == "https://router.huggingface.co/v1"
assert PROVIDER_REGISTRY["volcengine"].inference_base_url == "https://ark.cn-beijing.volces.com/api/v3"
assert PROVIDER_REGISTRY["byteplus"].inference_base_url == "https://ark.ap-southeast.bytepluses.com/api/v3"
def test_oauth_providers_unchanged(self):
"""Ensure we didn't break the existing OAuth providers."""
@@ -147,6 +161,7 @@ PROVIDER_ENV_VARS = (
"NOUS_API_KEY", "GITHUB_TOKEN", "GH_TOKEN",
"OPENAI_BASE_URL", "HERMES_COPILOT_ACP_COMMAND", "COPILOT_CLI_PATH",
"HERMES_COPILOT_ACP_ARGS", "COPILOT_ACP_BASE_URL",
"VOLCENGINE_API_KEY", "BYTEPLUS_API_KEY",
)
@@ -232,6 +247,14 @@ class TestResolveProvider:
assert resolve_provider("github-copilot-acp") == "copilot-acp"
assert resolve_provider("copilot-acp-agent") == "copilot-acp"
def test_alias_volcengine_coding_plan(self):
assert resolve_provider("volcengine-coding-plan") == "volcengine"
assert resolve_provider("volcengine_coding_plan") == "volcengine"
def test_alias_byteplus_coding_plan(self):
assert resolve_provider("byteplus-coding-plan") == "byteplus"
assert resolve_provider("byteplus_coding_plan") == "byteplus"
def test_explicit_huggingface(self):
assert resolve_provider("huggingface") == "huggingface"
@@ -339,6 +362,23 @@ class TestApiKeyProviderStatus:
assert status["configured"] is True
assert status["base_url"] == STEPFUN_STEP_PLAN_CN_BASE_URL
def test_volcengine_status_uses_coding_plan_base_url(self, monkeypatch):
monkeypatch.setenv("VOLCENGINE_API_KEY", "volc-test-key")
monkeypatch.setattr(
"hermes_cli.auth.read_raw_config",
lambda: {
"model": {
"provider": "volcengine",
"default": "volcengine-coding-plan/doubao-seed-2.0-code",
}
},
)
status = get_api_key_provider_status("volcengine")
assert status["configured"] is True
assert status["base_url"] == "https://ark.cn-beijing.volces.com/api/coding/v3"
def test_copilot_status_uses_gh_cli_token(self, monkeypatch):
monkeypatch.setattr("hermes_cli.copilot_auth._try_gh_cli_token", lambda: "gho_gh_cli_token")
status = get_api_key_provider_status("copilot")
@@ -394,6 +434,25 @@ class TestResolveApiKeyProviderCredentials:
assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
assert creds["source"] == "GLM_API_KEY"
def test_resolve_byteplus_with_coding_plan_model_uses_coding_base_url(self, monkeypatch):
monkeypatch.setenv("BYTEPLUS_API_KEY", "byteplus-secret-key")
monkeypatch.setattr(
"hermes_cli.auth.read_raw_config",
lambda: {
"model": {
"provider": "byteplus",
"default": "byteplus-coding-plan/dola-seed-2.0-pro",
}
},
)
creds = resolve_api_key_provider_credentials("byteplus")
assert creds["provider"] == "byteplus"
assert creds["api_key"] == "byteplus-secret-key"
assert creds["base_url"] == "https://ark.ap-southeast.bytepluses.com/api/coding/v3"
assert creds["source"] == "BYTEPLUS_API_KEY"
def test_resolve_copilot_with_github_token(self, monkeypatch):
monkeypatch.setenv("GITHUB_TOKEN", "gh-env-secret")
creds = resolve_api_key_provider_credentials("copilot")

View File

@@ -179,6 +179,19 @@ class TestIssue6211NativeProviderPrefixNormalization:
assert normalize_model_for_provider(model, target_provider) == expected
class TestContractProviderPrefixNormalization:
@pytest.mark.parametrize("model,target_provider,expected", [
("volcengine/doubao-seed-2-0-pro-260215", "volcengine", "doubao-seed-2-0-pro-260215"),
("volcengine-coding-plan/doubao-seed-2.0-code", "volcengine", "doubao-seed-2.0-code"),
("byteplus/seed-2-0-pro-260328", "byteplus", "seed-2-0-pro-260328"),
("byteplus-coding-plan/dola-seed-2.0-pro", "byteplus", "dola-seed-2.0-pro"),
])
def test_contract_provider_prefixes_strip_to_native_model(
self, model, target_provider, expected
):
assert normalize_model_for_provider(model, target_provider) == expected
# ── detect_vendor ──────────────────────────────────────────────────────
class TestDetectVendor:

View File

@@ -102,6 +102,31 @@ class TestProviderPersistsAfterModelSave:
)
assert model.get("default") == "kimi-k2.5"
def test_volcengine_contract_provider_persists_coding_plan_model(self, config_home, monkeypatch):
"""Volcengine should persist a prefixed coding-plan model and matching base URL."""
monkeypatch.setenv("VOLCENGINE_API_KEY", "volc-test-key")
from hermes_cli.main import _model_flow_contract_provider
from hermes_cli.config import load_config
with patch(
"hermes_cli.auth._prompt_model_selection",
return_value="volcengine-coding-plan/doubao-seed-2.0-code",
), patch(
"hermes_cli.auth.deactivate_provider",
):
_model_flow_contract_provider(load_config(), "volcengine", "old-model")
import yaml
config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
model = config.get("model")
assert isinstance(model, dict), f"model should be dict, got {type(model)}"
assert model.get("provider") == "volcengine"
assert model.get("default") == "volcengine-coding-plan/doubao-seed-2.0-code"
assert model.get("base_url") == "https://ark.cn-beijing.volces.com/api/coding/v3"
assert "api_mode" not in model
def test_copilot_provider_saved_when_selected(self, config_home):
"""_model_flow_copilot should persist provider/base_url/model together."""
from hermes_cli.main import _model_flow_copilot

View File

@@ -6,6 +6,7 @@ from hermes_cli.models import (
OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
is_nous_free_tier, partition_nous_models_by_tier,
check_nous_free_tier, _FREE_TIER_CACHE_TTL,
list_available_providers, provider_for_base_url,
)
import hermes_cli.models as _models_mod
@@ -291,6 +292,41 @@ class TestDetectProviderForModel:
assert result is not None
assert result[0] not in ("nous",) # nous has claude models but shouldn't be suggested
def test_volcengine_coding_plan_model_detected(self):
result = detect_provider_for_model(
"volcengine-coding-plan/doubao-seed-2.0-code",
"openrouter",
)
assert result == ("volcengine", "volcengine-coding-plan/doubao-seed-2.0-code")
def test_byteplus_standard_model_detected(self):
result = detect_provider_for_model(
"byteplus/seed-2-0-pro-260328",
"openrouter",
)
assert result == ("byteplus", "byteplus/seed-2-0-pro-260328")
class TestConfiguredBaseUrlProviderDetection:
def test_provider_for_base_url_detects_volcengine(self):
assert provider_for_base_url("https://ark.cn-beijing.volces.com/api/v3") == "volcengine"
def test_provider_for_base_url_detects_byteplus_coding(self):
assert provider_for_base_url("https://ark.ap-southeast.bytepluses.com/api/coding/v3") == "byteplus"
def test_known_builtin_endpoint_is_not_listed_as_custom(self, monkeypatch):
monkeypatch.setattr("hermes_cli.models._get_custom_base_url", lambda: "https://ark.cn-beijing.volces.com/api/v3")
monkeypatch.setattr(
"hermes_cli.auth.get_auth_status",
lambda pid: {"configured": pid == "volcengine", "logged_in": pid == "volcengine"},
)
monkeypatch.setattr("hermes_cli.auth.has_usable_secret", lambda value: False)
providers = {p["id"]: p for p in list_available_providers()}
assert providers["volcengine"]["authenticated"] is True
assert providers["custom"]["authenticated"] is False
class TestIsNousFreeTier:
"""Tests for is_nous_free_tier — account tier detection."""

View File

@@ -1254,144 +1254,6 @@ class TestSchemaInit:
migrated_db.close()
def test_reconciliation_adds_missing_columns(self, tmp_path):
"""Columns present in SCHEMA_SQL but missing from the live table
are added by _reconcile_columns regardless of schema_version.
Regression test: commit a7d78d3b inserted a new v7 migration
(reasoning_content) and renumbered the old v7 (api_call_count)
to v8. Users already at the old v7 had schema_version >= 7,
so the new v7 block was skipped and reasoning_content was never
created — causing 'no such column' on /continue.
"""
import sqlite3
db_path = tmp_path / "gap_test.db"
conn = sqlite3.connect(str(db_path))
# Simulate the old v7 state: api_call_count exists, reasoning_content does NOT
conn.executescript("""
CREATE TABLE schema_version (version INTEGER NOT NULL);
INSERT INTO schema_version (version) VALUES (7);
CREATE TABLE sessions (
id TEXT PRIMARY KEY,
source TEXT NOT NULL,
user_id TEXT,
model TEXT,
model_config TEXT,
system_prompt TEXT,
parent_session_id TEXT,
started_at REAL NOT NULL,
ended_at REAL,
end_reason TEXT,
message_count INTEGER DEFAULT 0,
tool_call_count INTEGER DEFAULT 0,
input_tokens INTEGER DEFAULT 0,
output_tokens INTEGER DEFAULT 0,
cache_read_tokens INTEGER DEFAULT 0,
cache_write_tokens INTEGER DEFAULT 0,
reasoning_tokens INTEGER DEFAULT 0,
billing_provider TEXT,
billing_base_url TEXT,
billing_mode TEXT,
estimated_cost_usd REAL,
actual_cost_usd REAL,
cost_status TEXT,
cost_source TEXT,
pricing_version TEXT,
title TEXT,
api_call_count INTEGER DEFAULT 0
);
CREATE TABLE messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
role TEXT NOT NULL,
content TEXT,
tool_call_id TEXT,
tool_calls TEXT,
tool_name TEXT,
timestamp REAL NOT NULL,
token_count INTEGER,
finish_reason TEXT,
reasoning TEXT,
reasoning_details TEXT,
codex_reasoning_items TEXT
);
""")
conn.execute(
"INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)",
("s1", "cli", 1000.0),
)
conn.execute(
"INSERT INTO messages (session_id, role, content, timestamp) "
"VALUES (?, ?, ?, ?)",
("s1", "assistant", "hello", 1001.0),
)
conn.commit()
# Verify reasoning_content is absent
cols = {r[1] for r in conn.execute("PRAGMA table_info(messages)").fetchall()}
assert "reasoning_content" not in cols
conn.close()
# Open with SessionDB — reconciliation should add the missing column
migrated_db = SessionDB(db_path=db_path)
msg_cols = {
r[1]
for r in migrated_db._conn.execute("PRAGMA table_info(messages)").fetchall()
}
assert "reasoning_content" in msg_cols
# The query that used to crash must now work
cursor = migrated_db._conn.execute(
"SELECT role, content, reasoning, reasoning_content, "
"reasoning_details, codex_reasoning_items "
"FROM messages WHERE session_id = ?",
("s1",),
)
row = cursor.fetchone()
assert row is not None
assert row[0] == "assistant"
assert row[3] is None # reasoning_content NULL for old rows
migrated_db.close()
def test_reconciliation_is_idempotent(self, tmp_path):
"""Opening the same database twice doesn't error or duplicate columns."""
db_path = tmp_path / "idempotent.db"
db1 = SessionDB(db_path=db_path)
cols1 = {r[1] for r in db1._conn.execute("PRAGMA table_info(messages)").fetchall()}
db1.close()
db2 = SessionDB(db_path=db_path)
cols2 = {r[1] for r in db2._conn.execute("PRAGMA table_info(messages)").fetchall()}
db2.close()
assert cols1 == cols2
def test_schema_sql_is_source_of_truth(self, db):
"""Every column in SCHEMA_SQL exists in the live database.
This is the architectural invariant: SCHEMA_SQL declares the
desired schema, _reconcile_columns ensures it matches reality.
"""
from hermes_state import SCHEMA_SQL
expected = SessionDB._parse_schema_columns(SCHEMA_SQL)
for table_name, declared_cols in expected.items():
live_cols = {
r[1]
for r in db._conn.execute(
f'PRAGMA table_info("{table_name}")'
).fetchall()
}
for col_name in declared_cols:
assert col_name in live_cols, (
f"Column {col_name} declared in SCHEMA_SQL for {table_name} "
f"but missing from live DB. Live columns: {live_cols}"
)
class TestTitleUniqueness:
"""Tests for unique title enforcement and title-based lookups."""

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,410 +0,0 @@
import { describe, expect, it } from 'vitest'
import {
buildSubagentTree,
descendantIds,
flattenTree,
fmtCost,
fmtDuration,
fmtTokens,
formatSummary,
hotnessBucket,
peakHotness,
sparkline,
topLevelSubagents,
treeTotals,
widthByDepth
} from '../lib/subagentTree.js'
import type { SubagentProgress } from '../types.js'
const makeItem = (overrides: Partial<SubagentProgress> & Pick<SubagentProgress, 'id' | 'index'>): SubagentProgress => ({
depth: 0,
goal: overrides.id,
notes: [],
parentId: null,
status: 'running',
taskCount: 1,
thinking: [],
toolCount: 0,
tools: [],
...overrides
})
describe('aggregate: tokens, cost, files, hotness', () => {
it('sums tokens and cost across subtree', () => {
const items = [
makeItem({ costUsd: 0.01, id: 'p', index: 0, inputTokens: 1000, outputTokens: 500 }),
makeItem({
costUsd: 0.005,
depth: 1,
id: 'c1',
index: 0,
inputTokens: 500,
outputTokens: 100,
parentId: 'p'
}),
makeItem({
costUsd: 0.008,
depth: 1,
id: 'c2',
index: 1,
inputTokens: 300,
outputTokens: 200,
parentId: 'p'
})
]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate).toMatchObject({
costUsd: 0.023,
inputTokens: 1800,
outputTokens: 800
})
})
it('counts files read + written across subtree', () => {
const items = [
makeItem({ filesRead: ['a.ts', 'b.ts'], id: 'p', index: 0 }),
makeItem({ depth: 1, filesWritten: ['c.ts'], id: 'c', index: 0, parentId: 'p' })
]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate.filesTouched).toBe(3)
})
it('hotness = totalTools / totalDuration', () => {
const items = [
makeItem({
durationSeconds: 10,
id: 'p',
index: 0,
status: 'completed',
toolCount: 20
})
]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate.hotness).toBeCloseTo(2)
})
it('hotness is zero when duration is zero', () => {
const items = [makeItem({ id: 'p', index: 0, toolCount: 10 })]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate.hotness).toBe(0)
})
})
describe('hotnessBucket + peakHotness', () => {
it('peakHotness walks subtree', () => {
const items = [
makeItem({ durationSeconds: 100, id: 'p', index: 0, status: 'completed', toolCount: 1 }),
makeItem({
depth: 1,
durationSeconds: 1,
id: 'c',
index: 0,
parentId: 'p',
status: 'completed',
toolCount: 5
})
]
const tree = buildSubagentTree(items)
expect(peakHotness(tree)).toBeGreaterThan(2)
})
it('hotnessBucket clamps and normalizes', () => {
expect(hotnessBucket(0, 10, 4)).toBe(0)
expect(hotnessBucket(10, 10, 4)).toBe(3)
expect(hotnessBucket(5, 10, 4)).toBe(2)
expect(hotnessBucket(100, 10, 4)).toBe(3) // clamped
expect(hotnessBucket(5, 0, 4)).toBe(0) // guard against divide-by-zero
})
})
describe('fmtCost + fmtTokens', () => {
it('fmtCost handles ranges', () => {
expect(fmtCost(0)).toBe('')
expect(fmtCost(0.001)).toBe('<$0.01')
expect(fmtCost(0.42)).toBe('$0.42')
expect(fmtCost(1.23)).toBe('$1.23')
expect(fmtCost(12.5)).toBe('$12.5')
})
it('fmtTokens handles ranges', () => {
expect(fmtTokens(0)).toBe('0')
expect(fmtTokens(542)).toBe('542')
expect(fmtTokens(1234)).toBe('1.2k')
expect(fmtTokens(45678)).toBe('46k')
})
})
describe('formatSummary with tokens + cost', () => {
it('includes token + cost when present', () => {
expect(
formatSummary({
activeCount: 0,
costUsd: 0.42,
descendantCount: 3,
filesTouched: 0,
hotness: 0,
inputTokens: 8000,
maxDepthFromHere: 2,
outputTokens: 2000,
totalDuration: 30,
totalTools: 14
})
).toBe('d2 · 3 agents · 14 tools · 30s · 10k tok · $0.42')
})
})
describe('buildSubagentTree', () => {
it('returns empty list for empty input', () => {
expect(buildSubagentTree([])).toEqual([])
})
it('treats flat list as top-level when no parentId is given', () => {
const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 }), makeItem({ id: 'c', index: 2 })]
const tree = buildSubagentTree(items)
expect(tree).toHaveLength(3)
expect(tree.map(n => n.item.id)).toEqual(['a', 'b', 'c'])
expect(tree.every(n => n.children.length === 0)).toBe(true)
})
it('nests children under their parent by subagent_id', () => {
const items = [
makeItem({ id: 'parent', index: 0 }),
makeItem({ depth: 1, id: 'child-1', index: 0, parentId: 'parent' }),
makeItem({ depth: 1, id: 'child-2', index: 1, parentId: 'parent' })
]
const tree = buildSubagentTree(items)
expect(tree).toHaveLength(1)
expect(tree[0]!.children).toHaveLength(2)
expect(tree[0]!.children.map(n => n.item.id)).toEqual(['child-1', 'child-2'])
})
it('builds multi-level nesting', () => {
const items = [
makeItem({ id: 'p', index: 0 }),
makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' }),
makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c' })
]
const tree = buildSubagentTree(items)
expect(tree[0]!.children[0]!.children[0]!.item.id).toBe('gc')
expect(tree[0]!.aggregate.maxDepthFromHere).toBe(2)
expect(tree[0]!.aggregate.descendantCount).toBe(2)
})
it('promotes orphaned children (missing parent) to top level', () => {
const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
const tree = buildSubagentTree(items)
expect(tree).toHaveLength(2)
expect(tree.map(n => n.item.id)).toEqual(['a', 'orphan'])
})
it('stable sort: children ordered by (depth, index) not insert order', () => {
const items = [
makeItem({ id: 'p', index: 0 }),
makeItem({ depth: 1, id: 'c3', index: 2, parentId: 'p' }),
makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
]
const tree = buildSubagentTree(items)
expect(tree[0]!.children.map(n => n.item.id)).toEqual(['c1', 'c2', 'c3'])
})
})
describe('aggregate', () => {
it('sums tool counts and durations across subtree', () => {
const items = [
makeItem({ durationSeconds: 10, id: 'p', index: 0, status: 'completed', toolCount: 5 }),
makeItem({ depth: 1, durationSeconds: 4, id: 'c1', index: 0, parentId: 'p', status: 'completed', toolCount: 3 }),
makeItem({ depth: 1, durationSeconds: 2, id: 'c2', index: 1, parentId: 'p', status: 'completed', toolCount: 1 })
]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate).toMatchObject({
activeCount: 0,
descendantCount: 2,
totalDuration: 16,
totalTools: 9
})
})
it('counts queued + running as active', () => {
const items = [
makeItem({ id: 'p', index: 0, status: 'running' }),
makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p', status: 'queued' }),
makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p', status: 'completed' })
]
const tree = buildSubagentTree(items)
expect(tree[0]!.aggregate.activeCount).toBe(2)
})
})
describe('widthByDepth', () => {
it('returns empty array for empty tree', () => {
expect(widthByDepth([])).toEqual([])
})
it('tallies nodes at each depth', () => {
const items = [
makeItem({ id: 'p1', index: 0 }),
makeItem({ id: 'p2', index: 1 }),
makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p1' }),
makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p1' }),
makeItem({ depth: 1, id: 'c3', index: 0, parentId: 'p2' }),
makeItem({ depth: 2, id: 'gc1', index: 0, parentId: 'c1' })
]
expect(widthByDepth(buildSubagentTree(items))).toEqual([2, 3, 1])
})
})
describe('treeTotals', () => {
it('folds a full tree into a single rollup', () => {
const items = [
makeItem({ id: 'p1', index: 0, toolCount: 5 }),
makeItem({ id: 'p2', index: 1, toolCount: 2 }),
makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p1', toolCount: 3 })
]
const totals = treeTotals(buildSubagentTree(items))
expect(totals.descendantCount).toBe(3)
expect(totals.totalTools).toBe(10)
expect(totals.maxDepthFromHere).toBe(2)
})
it('returns zeros for empty tree', () => {
expect(treeTotals([])).toEqual({
activeCount: 0,
costUsd: 0,
descendantCount: 0,
filesTouched: 0,
hotness: 0,
inputTokens: 0,
maxDepthFromHere: 0,
outputTokens: 0,
totalDuration: 0,
totalTools: 0
})
})
})
describe('flattenTree + descendantIds', () => {
const items = [
makeItem({ id: 'p', index: 0 }),
makeItem({ depth: 1, id: 'c1', index: 0, parentId: 'p' }),
makeItem({ depth: 2, id: 'gc', index: 0, parentId: 'c1' }),
makeItem({ depth: 1, id: 'c2', index: 1, parentId: 'p' })
]
it('flattens in visit order (depth-first, pre-order)', () => {
const tree = buildSubagentTree(items)
expect(flattenTree(tree).map(n => n.item.id)).toEqual(['p', 'c1', 'gc', 'c2'])
})
it('collects descendant ids excluding the node itself', () => {
const tree = buildSubagentTree(items)
expect(descendantIds(tree[0]!)).toEqual(['c1', 'gc', 'c2'])
})
})
describe('sparkline', () => {
it('returns empty string for empty input', () => {
expect(sparkline([])).toBe('')
})
it('renders zeroes as spaces (not bottom glyph)', () => {
expect(sparkline([0, 0])).toBe(' ')
})
it('scales to the max value', () => {
const out = sparkline([1, 8])
expect(out).toHaveLength(2)
expect(out[1]).toBe('█')
})
it('sparse widths render as expected', () => {
const out = sparkline([2, 3, 7, 4])
expect(out).toHaveLength(4)
expect([...out].every(ch => /[\s▁-█]/.test(ch))).toBe(true)
})
})
describe('formatSummary', () => {
const emptyTotals = {
activeCount: 0,
costUsd: 0,
descendantCount: 0,
filesTouched: 0,
hotness: 0,
inputTokens: 0,
maxDepthFromHere: 0,
outputTokens: 0,
totalDuration: 0,
totalTools: 0
}
it('collapses zero-valued components', () => {
expect(formatSummary({ ...emptyTotals, descendantCount: 1 })).toBe('d0 · 1 agent')
})
it('emits rich summary with all pieces', () => {
expect(
formatSummary({
...emptyTotals,
activeCount: 2,
descendantCount: 7,
maxDepthFromHere: 3,
totalDuration: 134,
totalTools: 124
})
).toBe('d3 · 7 agents · 124 tools · 2m 14s · ⚡2')
})
})
describe('fmtDuration', () => {
it('formats under a minute as plain seconds', () => {
expect(fmtDuration(0)).toBe('0s')
expect(fmtDuration(42)).toBe('42s')
expect(fmtDuration(59.4)).toBe('59s')
})
it('formats whole minutes without trailing seconds', () => {
expect(fmtDuration(60)).toBe('1m')
expect(fmtDuration(180)).toBe('3m')
})
it('mixes minutes and seconds', () => {
expect(fmtDuration(134)).toBe('2m 14s')
expect(fmtDuration(605)).toBe('10m 5s')
})
})
describe('topLevelSubagents', () => {
it('returns items with no parent', () => {
const items = [makeItem({ id: 'a', index: 0 }), makeItem({ id: 'b', index: 1 })]
expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'b'])
})
it('excludes children whose parent is present', () => {
const items = [
makeItem({ id: 'p', index: 0 }),
makeItem({ depth: 1, id: 'c', index: 0, parentId: 'p' })
]
expect(topLevelSubagents(items).map(s => s.id)).toEqual(['p'])
})
it('promotes orphans whose parent is missing', () => {
const items = [makeItem({ id: 'a', index: 0 }), makeItem({ depth: 1, id: 'orphan', index: 1, parentId: 'ghost' })]
expect(topLevelSubagents(items).map(s => s.id)).toEqual(['a', 'orphan'])
})
})

View File

@@ -1,13 +1,11 @@
import { STREAM_BATCH_MS } from '../config/timing.js'
import { buildSetupRequiredSections, SETUP_REQUIRED_TITLE } from '../content/setup.js'
import type { CommandsCatalogResponse, DelegationStatusResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
import type { CommandsCatalogResponse, GatewayEvent, GatewaySkin } from '../gatewayTypes.js'
import { rpcErrorMessage } from '../lib/rpc.js'
import { topLevelSubagents } from '../lib/subagentTree.js'
import { formatToolCall, stripAnsi } from '../lib/text.js'
import { fromSkin } from '../theme.js'
import type { Msg, SubagentProgress } from '../types.js'
import { applyDelegationStatus, getDelegationState } from './delegationStore.js'
import type { GatewayEventHandlerContext } from './interfaces.js'
import { patchOverlayState } from './overlayStore.js'
import { turnController } from './turnController.js'
@@ -55,55 +53,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
let pendingThinkingStatus = ''
let thinkingStatusTimer: null | ReturnType<typeof setTimeout> = null
// Inject the disk-save callback into turnController so recordMessageComplete
// can fire-and-forget a persist without having to plumb a gateway ref around.
turnController.persistSpawnTree = async (subagents, sessionId) => {
try {
const startedAt = subagents.reduce<number>((min, s) => {
if (!s.startedAt) {
return min
}
return min === 0 ? s.startedAt : Math.min(min, s.startedAt)
}, 0)
const top = topLevelSubagents(subagents)
.map(s => s.goal)
.filter(Boolean)
.slice(0, 2)
const label = top.length ? top.join(' · ') : `${subagents.length} subagents`
await rpc('spawn_tree.save', {
finished_at: Date.now() / 1000,
label: label.slice(0, 120),
session_id: sessionId ?? 'default',
started_at: startedAt ? startedAt / 1000 : null,
subagents
})
} catch {
// Persistence is best-effort; in-memory history is the authoritative
// same-session source. A write failure doesn't block the turn.
}
}
// Refresh delegation caps at most every 5s so the status bar HUD can
// render a /warning close to the configured cap without spamming the RPC.
let lastDelegationFetchAt = 0
const refreshDelegationStatus = (force = false) => {
const now = Date.now()
if (!force && now - lastDelegationFetchAt < 5000) {
return
}
lastDelegationFetchAt = now
rpc<DelegationStatusResponse>('delegation.status', {})
.then(r => applyDelegationStatus(r))
.catch(() => {})
}
const setStatus = (status: string) => {
pendingThinkingStatus = ''
@@ -136,12 +85,7 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
}, ms)
}
// Terminal statuses are never overwritten by late-arriving live events —
// otherwise a stale `subagent.start` / `spawn_requested` can clobber a
// `failed` or `interrupted` terminal state (Copilot review #14045).
const isTerminalStatus = (s: SubagentProgress['status']) => s === 'completed' || s === 'failed' || s === 'interrupted'
const keepTerminalElseRunning = (s: SubagentProgress['status']) => (isTerminalStatus(s) ? s : 'running')
const keepCompletedElseRunning = (s: SubagentProgress['status']) => (s === 'completed' ? s : 'running')
const handleReady = (skin?: GatewaySkin) => {
if (skin) {
@@ -316,28 +260,32 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
turnController.recordToolStart(ev.payload.tool_id, ev.payload.name ?? 'tool', ev.payload.context ?? '')
return
case 'tool.complete': {
const inlineDiffText =
ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
turnController.recordToolComplete(
ev.payload.tool_id,
ev.payload.name,
ev.payload.error,
inlineDiffText ? '' : ev.payload.summary
)
case 'tool.complete':
{
const inlineDiffText =
ev.payload.inline_diff && getUiState().inlineDiffs ? stripAnsi(String(ev.payload.inline_diff)).trim() : ''
turnController.recordToolComplete(
ev.payload.tool_id,
ev.payload.name,
ev.payload.error,
inlineDiffText ? '' : ev.payload.summary
)
if (!inlineDiffText) {
return
}
// Keep inline diffs attached to the assistant completion body so
// they render in the same message flow, not as a standalone system
// artifact that can look out-of-place around tool rows.
turnController.queueInlineDiff(inlineDiffText)
if (!inlineDiffText) {
return
}
// Keep inline diffs attached to the assistant completion body so
// they render in the same message flow, not as a standalone system
// artifact that can look out-of-place around tool rows.
turnController.queueInlineDiff(inlineDiffText)
return
}
case 'clarify.request':
patchOverlayState({
@@ -381,23 +329,8 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
case 'subagent.spawn_requested':
// Child built but not yet running (waiting on ThreadPoolExecutor slot).
// Preserve completed state if a later event races in before this one.
turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'queued' }))
// Prime the status-bar HUD: fetch caps (once every 5s) so we can
// warn as depth/concurrency approaches the configured ceiling.
if (getDelegationState().maxSpawnDepth === null) {
refreshDelegationStatus(true)
} else {
refreshDelegationStatus()
}
return
case 'subagent.start':
turnController.upsertSubagent(ev.payload, c => (isTerminalStatus(c.status) ? {} : { status: 'running' }))
turnController.upsertSubagent(ev.payload, () => ({ status: 'running' }))
return
case 'subagent.thinking': {
@@ -407,16 +340,10 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
}
// Update-only: never resurrect subagents whose spawn_requested/start
// we missed or that already flushed via message.complete.
turnController.upsertSubagent(
ev.payload,
c => ({
status: keepTerminalElseRunning(c.status),
thinking: pushThinking(c.thinking, text)
}),
{ createIfMissing: false }
)
turnController.upsertSubagent(ev.payload, c => ({
status: keepCompletedElseRunning(c.status),
thinking: pushThinking(c.thinking, text)
}))
return
}
@@ -427,14 +354,10 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
ev.payload.tool_preview ?? ev.payload.text ?? ''
)
turnController.upsertSubagent(
ev.payload,
c => ({
status: keepTerminalElseRunning(c.status),
tools: pushTool(c.tools, line)
}),
{ createIfMissing: false }
)
turnController.upsertSubagent(ev.payload, c => ({
status: keepCompletedElseRunning(c.status),
tools: pushTool(c.tools, line)
}))
return
}
@@ -446,28 +369,20 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev:
return
}
turnController.upsertSubagent(
ev.payload,
c => ({
notes: pushNote(c.notes, text),
status: keepTerminalElseRunning(c.status)
}),
{ createIfMissing: false }
)
turnController.upsertSubagent(ev.payload, c => ({
notes: pushNote(c.notes, text),
status: keepCompletedElseRunning(c.status)
}))
return
}
case 'subagent.complete':
turnController.upsertSubagent(
ev.payload,
c => ({
durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
status: ev.payload.status ?? 'completed',
summary: ev.payload.summary || ev.payload.text || c.summary
}),
{ createIfMissing: false }
)
turnController.upsertSubagent(ev.payload, c => ({
durationSeconds: ev.payload.duration_seconds ?? c.durationSeconds,
status: ev.payload.status ?? 'completed',
summary: ev.payload.summary || ev.payload.text || c.summary
}))
return

View File

@@ -1,77 +0,0 @@
import { atom } from 'nanostores'
import type { DelegationStatusResponse } from '../gatewayTypes.js'
export interface DelegationState {
// Last known caps from `delegation.status` RPC. null until fetched.
maxConcurrentChildren: null | number
maxSpawnDepth: null | number
// True when spawning is globally paused (see tools/delegate_tool.py).
paused: boolean
// Monotonic clock of the last successful status fetch.
updatedAt: null | number
}
const buildState = (): DelegationState => ({
maxConcurrentChildren: null,
maxSpawnDepth: null,
paused: false,
updatedAt: null
})
export const $delegationState = atom<DelegationState>(buildState())
export const getDelegationState = () => $delegationState.get()
export const patchDelegationState = (next: Partial<DelegationState>) =>
$delegationState.set({ ...$delegationState.get(), ...next })
export const resetDelegationState = () => $delegationState.set(buildState())
// ── Overlay accordion open-state ──────────────────────────────────────
//
// Lifted out of OverlaySection's local useState so collapse choices
// survive:
// - navigating to a different subagent (Detail remounts)
// - switching list ↔ detail mode (Detail unmounts in list mode)
// - walking history (←/→)
// Keyed by section title; missing entries fall back to the section's
// `defaultOpen` prop.
export const $overlaySectionsOpen = atom<Record<string, boolean>>({})
export const toggleOverlaySection = (title: string, defaultOpen: boolean) => {
const state = $overlaySectionsOpen.get()
const current = title in state ? state[title]! : defaultOpen
$overlaySectionsOpen.set({ ...state, [title]: !current })
}
export const getOverlaySectionOpen = (title: string, defaultOpen: boolean): boolean => {
const state = $overlaySectionsOpen.get()
return title in state ? state[title]! : defaultOpen
}
/** Merge a raw RPC response into the store. Tolerant of partial/omitted fields. */
export const applyDelegationStatus = (r: DelegationStatusResponse | null | undefined) => {
if (!r) {
return
}
const patch: Partial<DelegationState> = { updatedAt: Date.now() }
if (typeof r.max_spawn_depth === 'number') {
patch.maxSpawnDepth = r.max_spawn_depth
}
if (typeof r.max_concurrent_children === 'number') {
patch.maxConcurrentChildren = r.max_concurrent_children
}
if (typeof r.paused === 'boolean') {
patch.paused = r.paused
}
patchDelegationState(patch)
}

View File

@@ -53,8 +53,6 @@ export interface GatewayProviderProps {
}
export interface OverlayState {
agents: boolean
agentsInitialHistoryIndex: number
approval: ApprovalReq | null
clarify: ClarifyReq | null
confirm: ConfirmReq | null

View File

@@ -3,8 +3,6 @@ import { atom, computed } from 'nanostores'
import type { OverlayState } from './interfaces.js'
const buildOverlayState = (): OverlayState => ({
agents: false,
agentsInitialHistoryIndex: 0,
approval: null,
clarify: null,
confirm: null,
@@ -20,8 +18,8 @@ export const $overlayState = atom<OverlayState>(buildOverlayState())
export const $isBlocked = computed(
$overlayState,
({ agents, approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
Boolean(agents || approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
({ approval, clarify, confirm, modelPicker, pager, picker, secret, skillsHub, sudo }) =>
Boolean(approval || clarify || confirm || modelPicker || pager || picker || secret || skillsHub || sudo)
)
export const getOverlayState = () => $overlayState.get()
@@ -29,23 +27,4 @@ export const getOverlayState = () => $overlayState.get()
export const patchOverlayState = (next: Partial<OverlayState> | ((state: OverlayState) => OverlayState)) =>
$overlayState.set(typeof next === 'function' ? next($overlayState.get()) : { ...$overlayState.get(), ...next })
/** Full reset — used by session/turn teardown and tests. */
export const resetOverlayState = () => $overlayState.set(buildOverlayState())
/**
* Soft reset: drop FLOW-scoped overlays (approval / clarify / confirm / sudo
* / secret / pager) but PRESERVE user-toggled ones — agents dashboard, model
* picker, skills hub, session picker. Those are opened deliberately and
* shouldn't vanish when a turn ends. Called from turnController.idle() on
* every turn completion / interrupt; the old "reset everything" behaviour
* silently closed /agents the moment delegation finished.
*/
export const resetFlowOverlays = () =>
$overlayState.set({
...buildOverlayState(),
agents: $overlayState.get().agents,
agentsInitialHistoryIndex: $overlayState.get().agentsInitialHistoryIndex,
modelPicker: $overlayState.get().modelPicker,
picker: $overlayState.get().picker,
skillsHub: $overlayState.get().skillsHub
})

View File

@@ -1,14 +1,6 @@
import type {
DelegationPauseResponse,
SlashExecResponse,
SpawnTreeListResponse,
SpawnTreeLoadResponse,
ToolsConfigureResponse
} from '../../../gatewayTypes.js'
import type { SlashExecResponse, ToolsConfigureResponse } from '../../../gatewayTypes.js'
import type { PanelSection } from '../../../types.js'
import { applyDelegationStatus, getDelegationState } from '../../delegationStore.js'
import { patchOverlayState } from '../../overlayStore.js'
import { getSpawnHistory, pushDiskSnapshot, setDiffPair, type SpawnSnapshot } from '../../spawnHistoryStore.js'
import type { SlashCommand } from '../types.js'
interface SkillInfo {
@@ -50,163 +42,6 @@ interface SkillsBrowseResponse {
}
export const opsCommands: SlashCommand[] = [
{
aliases: ['tasks'],
help: 'open the spawn-tree dashboard (live audit + kill/pause controls)',
name: 'agents',
run: (arg, ctx) => {
const sub = arg.trim().toLowerCase()
// Stay compatible with the gateway `/agents [pause|resume|status]` CLI —
// explicit subcommands skip the overlay and act directly so scripts and
// multi-step flows can drive it without entering interactive mode.
if (sub === 'pause' || sub === 'resume' || sub === 'unpause') {
const paused = sub === 'pause'
ctx.gateway.gw
.request<DelegationPauseResponse>('delegation.pause', { paused })
.then(r => {
applyDelegationStatus({ paused: r?.paused })
ctx.transcript.sys(`delegation · ${r?.paused ? 'paused' : 'resumed'}`)
})
.catch(ctx.guardedErr)
return
}
if (sub === 'status') {
const d = getDelegationState()
ctx.transcript.sys(
`delegation · ${d.paused ? 'paused' : 'active'} · caps d${d.maxSpawnDepth ?? '?'}/${d.maxConcurrentChildren ?? '?'}`
)
return
}
patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
}
},
{
help: 'replay a completed spawn tree · `/replay [N|last|list|load <path>]`',
name: 'replay',
run: (arg, ctx) => {
const history = getSpawnHistory()
const raw = arg.trim()
const lower = raw.toLowerCase()
// ── Disk-backed listing ─────────────────────────────────────
if (lower === 'list' || lower === 'ls') {
ctx.gateway
.rpc<SpawnTreeListResponse>('spawn_tree.list', {
limit: 30,
session_id: ctx.sid ?? 'default'
})
.then(
ctx.guarded<SpawnTreeListResponse>(r => {
const entries = r.entries ?? []
if (!entries.length) {
return ctx.transcript.sys('no archived spawn trees on disk for this session')
}
const rows: [string, string][] = entries.map(e => {
const ts = e.finished_at ? new Date(e.finished_at * 1000).toLocaleString() : '?'
const label = e.label || `${e.count} subagents`
return [`${ts} · ${e.count}×`, `${label}\n ${e.path}`]
})
ctx.transcript.panel('Archived spawn trees', [{ rows }])
})
)
.catch(ctx.guardedErr)
return
}
// ── Disk-backed load by path ─────────────────────────────────
if (lower.startsWith('load ')) {
const path = raw.slice(5).trim()
if (!path) {
return ctx.transcript.sys('usage: /replay load <path>')
}
ctx.gateway
.rpc<SpawnTreeLoadResponse>('spawn_tree.load', { path })
.then(
ctx.guarded<SpawnTreeLoadResponse>(r => {
if (!r.subagents?.length) {
return ctx.transcript.sys('snapshot empty or unreadable')
}
// Push onto the in-memory history so the overlay picks it up
// by index 1 just like any other snapshot.
pushDiskSnapshot(r, path)
patchOverlayState({ agents: true, agentsInitialHistoryIndex: 1 })
})
)
.catch(ctx.guardedErr)
return
}
// ── In-memory nav (same-session) ─────────────────────────────
if (!history.length) {
return ctx.transcript.sys('no completed spawn trees this session · try /replay list')
}
let index = 1
if (raw && lower !== 'last') {
const parsed = parseInt(raw, 10)
if (Number.isNaN(parsed) || parsed < 1 || parsed > history.length) {
return ctx.transcript.sys(`replay: index out of range 1..${history.length} · use /replay list for disk`)
}
index = parsed
}
patchOverlayState({ agents: true, agentsInitialHistoryIndex: index })
}
},
{
help: 'diff two completed spawn trees · `/replay-diff <baseline> <candidate>` (indexes from /replay list or history N)',
name: 'replay-diff',
run: (arg, ctx) => {
const parts = arg.trim().split(/\s+/).filter(Boolean)
if (parts.length !== 2) {
return ctx.transcript.sys('usage: /replay-diff <a> <b> (e.g. /replay-diff 1 2 for last two)')
}
const [a, b] = parts
const history = getSpawnHistory()
const resolve = (token: string): null | SpawnSnapshot => {
const n = parseInt(token!, 10)
if (Number.isFinite(n) && n >= 1 && n <= history.length) {
return history[n - 1] ?? null
}
return null
}
const baseline = resolve(a!)
const candidate = resolve(b!)
if (!baseline || !candidate) {
return ctx.transcript.sys(`replay-diff: could not resolve indices · history has ${history.length} entries`)
}
setDiffPair({ baseline, candidate })
patchOverlayState({ agents: true, agentsInitialHistoryIndex: 0 })
}
},
{
help: 'browse, inspect, install skills',
name: 'skills',

View File

@@ -1,139 +0,0 @@
import { atom } from 'nanostores'
import type { SpawnTreeLoadResponse } from '../gatewayTypes.js'
import type { SubagentProgress } from '../types.js'
export interface SpawnSnapshot {
finishedAt: number
fromDisk?: boolean
id: string
label: string
path?: string
sessionId: null | string
startedAt: number
subagents: SubagentProgress[]
}
export interface SpawnDiffPair {
baseline: SpawnSnapshot
candidate: SpawnSnapshot
}
const HISTORY_LIMIT = 10
export const $spawnHistory = atom<SpawnSnapshot[]>([])
export const $spawnDiff = atom<null | SpawnDiffPair>(null)
export const getSpawnHistory = () => $spawnHistory.get()
export const getSpawnDiff = () => $spawnDiff.get()
export const clearSpawnHistory = () => $spawnHistory.set([])
export const clearDiffPair = () => $spawnDiff.set(null)
export const setDiffPair = (pair: SpawnDiffPair) => $spawnDiff.set(pair)
/**
* Commit a finished turn's spawn tree to history. Keeps the last 10
* non-empty snapshots — empty turns (no subagents) are dropped.
*
* Why in-memory? The primary investigation loop is "I just ran a fan-out,
* it misbehaved, let me look at what happened" — same-session debugging.
* Disk persistence across process restarts is a natural extension but
* adds RPC surface for a less-common path.
*/
export const pushSnapshot = (
subagents: readonly SubagentProgress[],
meta: { sessionId?: null | string; startedAt?: null | number }
) => {
if (!subagents.length) {
return
}
const now = Date.now()
const started = meta.startedAt ?? Math.min(...subagents.map(s => s.startedAt ?? now))
const snap: SpawnSnapshot = {
finishedAt: now,
id: `snap-${now.toString(36)}`,
label: summarizeLabel(subagents),
sessionId: meta.sessionId ?? null,
startedAt: Number.isFinite(started) ? started : now,
subagents: subagents.map(item => ({ ...item }))
}
const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
$spawnHistory.set(next)
}
function summarizeLabel(subagents: readonly SubagentProgress[]): string {
const top = subagents
.filter(s => s.parentId == null || subagents.every(o => o.id !== s.parentId))
.slice(0, 2)
.map(s => s.goal || 'subagent')
.join(' · ')
return top || `${subagents.length} agent${subagents.length === 1 ? '' : 's'}`
}
/**
* Push a disk-loaded snapshot onto the front of the history stack so the
* overlay can pick it up at index 1 via /replay load. Normalises the
* server payload (arbitrary list) into the same SubagentProgress shape
* used for live data — defensive against cross-version reads.
*/
export const pushDiskSnapshot = (r: SpawnTreeLoadResponse, path: string) => {
const raw = Array.isArray(r.subagents) ? r.subagents : []
const normalised = raw.map(normaliseSubagent)
if (!normalised.length) {
return
}
const snap: SpawnSnapshot = {
finishedAt: (r.finished_at ?? Date.now() / 1000) * 1000,
fromDisk: true,
id: `disk-${path}`,
label: r.label || `${normalised.length} subagents`,
path,
sessionId: r.session_id ?? null,
startedAt: (r.started_at ?? r.finished_at ?? Date.now() / 1000) * 1000,
subagents: normalised
}
const next = [snap, ...$spawnHistory.get()].slice(0, HISTORY_LIMIT)
$spawnHistory.set(next)
}
function normaliseSubagent(raw: unknown): SubagentProgress {
const o = raw as Record<string, unknown>
const s = (v: unknown) => (typeof v === 'string' ? v : undefined)
const n = (v: unknown) => (typeof v === 'number' ? v : undefined)
const arr = <T>(v: unknown): T[] | undefined => (Array.isArray(v) ? (v as T[]) : undefined)
return {
apiCalls: n(o.apiCalls),
costUsd: n(o.costUsd),
depth: typeof o.depth === 'number' ? o.depth : 0,
durationSeconds: n(o.durationSeconds),
filesRead: arr<string>(o.filesRead),
filesWritten: arr<string>(o.filesWritten),
goal: s(o.goal) ?? 'subagent',
id: s(o.id) ?? `sa-${Math.random().toString(36).slice(2, 8)}`,
index: typeof o.index === 'number' ? o.index : 0,
inputTokens: n(o.inputTokens),
iteration: n(o.iteration),
model: s(o.model),
notes: (arr<string>(o.notes) ?? []).filter(x => typeof x === 'string'),
outputTail: arr(o.outputTail) as SubagentProgress['outputTail'],
outputTokens: n(o.outputTokens),
parentId: s(o.parentId) ?? null,
reasoningTokens: n(o.reasoningTokens),
startedAt: n(o.startedAt),
status: (s(o.status) as SubagentProgress['status']) ?? 'completed',
summary: s(o.summary),
taskCount: typeof o.taskCount === 'number' ? o.taskCount : 1,
thinking: (arr<string>(o.thinking) ?? []).filter(x => typeof x === 'string'),
toolCount: typeof o.toolCount === 'number' ? o.toolCount : 0,
tools: (arr<string>(o.tools) ?? []).filter(x => typeof x === 'string'),
toolsets: arr<string>(o.toolsets)
}
}

View File

@@ -10,9 +10,8 @@ import {
} from '../lib/text.js'
import type { ActiveTool, ActivityItem, Msg, SubagentProgress } from '../types.js'
import { resetFlowOverlays } from './overlayStore.js'
import { pushSnapshot } from './spawnHistoryStore.js'
import { getTurnState, patchTurnState, resetTurnState } from './turnStore.js'
import { resetOverlayState } from './overlayStore.js'
import { patchTurnState, resetTurnState } from './turnStore.js'
import { getUiState, patchUiState } from './uiStore.js'
const INTERRUPT_COOLDOWN_MS = 1500
@@ -42,7 +41,6 @@ class TurnController {
lastStatusNote = ''
pendingInlineDiffs: string[] = []
persistedToolLabels = new Set<string>()
persistSpawnTree?: (subagents: SubagentProgress[], sessionId: null | string) => Promise<void>
protocolWarned = false
reasoningText = ''
segmentMessages: Msg[] = []
@@ -92,7 +90,7 @@ class TurnController {
turnTrail: []
})
patchUiState({ busy: false })
resetFlowOverlays()
resetOverlayState()
}
interruptTurn({ appendMessage, gw, sid, sys }: InterruptDeps) {
@@ -191,7 +189,9 @@ class TurnController {
// leading "┊ review diff" header written by `_emit_inline_diff` for the
// terminal printer). That header only makes sense as stdout dressing,
// not inside a markdown ```diff block.
const text = diffText.replace(/^\s*┊[^\n]*\n?/, '').trim()
const text = diffText
.replace(/^\s*┊[^\n]*\n?/, '')
.trim()
if (!text || this.pendingInlineDiffs.includes(text)) {
return
@@ -249,15 +249,12 @@ class TurnController {
// markdown fence of its own — otherwise we render two stacked diff
// blocks for the same edit.
const assistantAlreadyHasDiff = /```(?:diff|patch)\b/i.test(finalText)
const remainingInlineDiffs = assistantAlreadyHasDiff
? []
: this.pendingInlineDiffs.filter(diff => !finalText.includes(diff))
const inlineDiffBlock = remainingInlineDiffs.length
? `\`\`\`diff\n${remainingInlineDiffs.join('\n\n')}\n\`\`\``
: ''
const mergedText = [finalText, inlineDiffBlock].filter(Boolean).join('\n\n')
const existingReasoning = this.reasoningText.trim() || String(payload.reasoning ?? '').trim()
const savedReasoning = [existingReasoning, existingReasoning ? '' : split.reasoning].filter(Boolean).join('\n\n')
@@ -279,20 +276,6 @@ class TurnController {
const wasInterrupted = this.interrupted
// Archive the turn's spawn tree to history BEFORE idle() drops subagents
// from turnState. Lets /replay and the overlay's history nav pull up
// finished fan-outs without a round-trip to disk.
const finishedSubagents = getTurnState().subagents
const sessionId = getUiState().sid
if (finishedSubagents.length > 0) {
pushSnapshot(finishedSubagents, { sessionId, startedAt: null })
// Fire-and-forget disk persistence so /replay survives process restarts.
// The same snapshot lives in memory via spawnHistoryStore for immediate
// recall — disk is the long-term archive.
void this.persistSpawnTree?.(finishedSubagents, sessionId)
}
this.idle()
this.clearReasoning()
this.turnTools = []
@@ -460,82 +443,33 @@ class TurnController {
patchTurnState({ activity: [], outcome: '', subagents: [], toolTokens: 0, tools: [], turnTrail: [] })
}
upsertSubagent(
p: SubagentEventPayload,
patch: (current: SubagentProgress) => Partial<SubagentProgress>,
opts: { createIfMissing?: boolean } = { createIfMissing: true }
) {
// Stable id: prefer the server-issued subagent_id (survives nested
// grandchildren + cross-tree joins). Fall back to the composite key
// for older gateways that omit the field — those produce a flat list.
const id = p.subagent_id || `sa:${p.task_index}:${p.goal || 'subagent'}`
upsertSubagent(p: SubagentEventPayload, patch: (current: SubagentProgress) => Partial<SubagentProgress>) {
const id = `sa:${p.task_index}:${p.goal || 'subagent'}`
patchTurnState(state => {
const existing = state.subagents.find(item => item.id === id)
// Late events (subagent.complete/tool/progress arriving after message.complete
// has already fired idle()) would otherwise resurrect a finished
// subagent into turn.subagents and block the "finished" title on the
// /agents overlay. When `createIfMissing` is false we drop silently.
if (!existing && !opts.createIfMissing) {
return state
}
const base: SubagentProgress = existing ?? {
depth: p.depth ?? 0,
goal: p.goal,
id,
index: p.task_index,
model: p.model,
notes: [],
parentId: p.parent_id ?? null,
startedAt: Date.now(),
status: 'running',
taskCount: p.task_count ?? 1,
thinking: [],
toolCount: p.tool_count ?? 0,
tools: [],
toolsets: p.toolsets
tools: []
}
// Map snake_case payload keys onto camelCase state. Only overwrite
// when the event actually carries the field; `??` preserves prior
// values across streaming events that emit partial payloads.
const outputTail = p.output_tail
? p.output_tail.map(e => ({
isError: Boolean(e.is_error),
preview: String(e.preview ?? ''),
tool: String(e.tool ?? 'tool')
}))
: base.outputTail
const next: SubagentProgress = {
...base,
apiCalls: p.api_calls ?? base.apiCalls,
costUsd: p.cost_usd ?? base.costUsd,
depth: p.depth ?? base.depth,
filesRead: p.files_read ?? base.filesRead,
filesWritten: p.files_written ?? base.filesWritten,
goal: p.goal || base.goal,
inputTokens: p.input_tokens ?? base.inputTokens,
iteration: p.iteration ?? base.iteration,
model: p.model ?? base.model,
outputTail,
outputTokens: p.output_tokens ?? base.outputTokens,
parentId: p.parent_id ?? base.parentId,
reasoningTokens: p.reasoning_tokens ?? base.reasoningTokens,
taskCount: p.task_count ?? base.taskCount,
toolCount: p.tool_count ?? base.toolCount,
toolsets: p.toolsets ?? base.toolsets,
...patch(base)
}
// Stable order: by spawn (depth, parent, index) rather than insert time.
// Without it, grandchildren can shuffle relative to siblings when
// events arrive out of order under high concurrency.
const subagents = existing
? state.subagents.map(item => (item.id === id ? next : item))
: [...state.subagents, next].sort((a, b) => a.depth - b.depth || a.index - b.index)
: [...state.subagents, next].sort((a, b) => a.index - b.index)
return { ...state, subagents }
})

View File

@@ -74,10 +74,6 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
if (overlay.picker) {
return patchOverlayState({ picker: false })
}
if (overlay.agents) {
return patchOverlayState({ agents: false })
}
}
const cycleQueue = (dir: 1 | -1) => {
@@ -184,7 +180,6 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
if (isCtrl(key, ch, 'c')) {
cancelOverlayFromCtrlC()
}
return
}
@@ -295,7 +290,6 @@ export function useInputHandlers(ctx: InputHandlerContext): InputHandlerResult {
if (key.upArrow && !cState.inputBuf.length) {
const inputSel = getInputSelection()
const cursor = inputSel && inputSel.start === inputSel.end ? inputSel.start : null
const noLineAbove =
!cState.input || (cursor !== null && cState.input.lastIndexOf('\n', Math.max(0, cursor - 1)) < 0)

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,10 @@
import { Box, type ScrollBoxHandle, Text } from '@hermes/ink'
import { useStore } from '@nanostores/react'
import { type ReactNode, type RefObject, useCallback, useEffect, useMemo, useState, useSyncExternalStore } from 'react'
import { type ReactNode, type RefObject, useCallback, useEffect, useState, useSyncExternalStore } from 'react'
import { $delegationState } from '../app/delegationStore.js'
import { $turnState } from '../app/turnStore.js'
import { FACES } from '../content/faces.js'
import { VERBS } from '../content/verbs.js'
import { fmtDuration } from '../domain/messages.js'
import { stickyPromptFromViewport } from '../domain/viewport.js'
import { buildSubagentTree, treeTotals, widthByDepth } from '../lib/subagentTree.js'
import { fmtK } from '../lib/text.js'
import type { Theme } from '../theme.js'
import type { Msg, Usage } from '../types.js'
@@ -64,67 +60,6 @@ function ctxBar(pct: number | undefined, w = 10) {
return '█'.repeat(filled) + '░'.repeat(w - filled)
}
function SpawnHud({ t }: { t: Theme }) {
// Tight HUD that only appears when the session is actually fanning out.
// Colour escalates to warn/error as depth or concurrency approaches the cap.
const delegation = useStore($delegationState)
const turn = useStore($turnState)
const tree = useMemo(() => buildSubagentTree(turn.subagents), [turn.subagents])
const totals = useMemo(() => treeTotals(tree), [tree])
if (!totals.descendantCount && !delegation.paused) {
return null
}
const maxDepth = delegation.maxSpawnDepth
const maxConc = delegation.maxConcurrentChildren
const depth = Math.max(0, totals.maxDepthFromHere)
const active = totals.activeCount
// `max_concurrent_children` is a per-parent cap, not a global one.
// `activeCount` sums every running agent across the tree and would
// over-warn for multi-orchestrator runs. The widest level of the tree
// is a closer proxy to "most concurrent spawns that could be hitting a
// single parent's slot budget".
const widestLevel = widthByDepth(tree).reduce((a, b) => Math.max(a, b), 0)
const depthRatio = maxDepth ? depth / maxDepth : 0
const concRatio = maxConc ? widestLevel / maxConc : 0
const ratio = Math.max(depthRatio, concRatio)
const color = delegation.paused || ratio >= 1 ? t.color.error : ratio >= 0.66 ? t.color.warn : t.color.dim
const pieces: string[] = []
if (delegation.paused) {
pieces.push('⏸ paused')
}
if (totals.descendantCount > 0) {
const depthLabel = maxDepth ? `${depth}/${maxDepth}` : `${depth}`
pieces.push(`d${depthLabel}`)
if (active > 0) {
// Label pairs the widest-level count (drives concRatio above) with
// the total active count for context. `W/cap` triggers the warn,
// `+N` is everything else currently running across the tree.
const extra = Math.max(0, active - widestLevel)
const widthLabel = maxConc ? `${widestLevel}/${maxConc}` : `${widestLevel}`
const suffix = extra > 0 ? `+${extra}` : ''
pieces.push(`${widthLabel}${suffix}`)
}
}
const atCap = depthRatio >= 1 || concRatio >= 1
return (
<Text color={color}>
{atCap ? ' │ ⚠ ' : ' │ '}
{pieces.join(' ')}
</Text>
)
}
function SessionDuration({ startedAt }: { startedAt: number }) {
const [now, setNow] = useState(() => Date.now())
@@ -210,7 +145,6 @@ export function StatusRule({
<SessionDuration startedAt={sessionStartedAt} />
</Text>
) : null}
<SpawnHud t={t} />
{voiceLabel ? <Text color={t.color.dim}> {voiceLabel}</Text> : null}
{bgCount > 0 ? <Text color={t.color.dim}> {bgCount} bg</Text> : null}
{showCost && typeof usage.cost_usd === 'number' ? (

View File

@@ -2,15 +2,13 @@ import { AlternateScreen, Box, NoSelect, ScrollBox, Text } from '@hermes/ink'
import { useStore } from '@nanostores/react'
import { memo } from 'react'
import { useGateway } from '../app/gatewayContext.js'
import type { AppLayoutProgressProps, AppLayoutProps } from '../app/interfaces.js'
import { $isBlocked, $overlayState, patchOverlayState } from '../app/overlayStore.js'
import { $isBlocked } from '../app/overlayStore.js'
import { $uiState } from '../app/uiStore.js'
import { PLACEHOLDER } from '../content/placeholders.js'
import type { Theme } from '../theme.js'
import type { DetailsMode } from '../types.js'
import { AgentsOverlay } from './agentsOverlay.js'
import { GoodVibesHeart, StatusRule, StickyPromptTracker, TranscriptScrollbar } from './appChrome.js'
import { FloatingOverlays, PromptZone } from './appOverlays.js'
import { Banner, Panel, SessionPanel } from './branding.js'
@@ -258,21 +256,6 @@ const ComposerPane = memo(function ComposerPane({
)
})
const AgentsOverlayPane = memo(function AgentsOverlayPane() {
const { gw } = useGateway()
const ui = useStore($uiState)
const overlay = useStore($overlayState)
return (
<AgentsOverlay
gw={gw}
initialHistoryIndex={overlay.agentsInitialHistoryIndex}
onClose={() => patchOverlayState({ agents: false, agentsInitialHistoryIndex: 0 })}
t={ui.theme}
/>
)
})
export const AppLayout = memo(function AppLayout({
actions,
composer,
@@ -281,30 +264,22 @@ export const AppLayout = memo(function AppLayout({
status,
transcript
}: AppLayoutProps) {
const overlay = useStore($overlayState)
return (
<AlternateScreen mouseTracking={mouseTracking}>
<Box flexDirection="column" flexGrow={1}>
<Box flexDirection="row" flexGrow={1}>
{overlay.agents ? (
<AgentsOverlayPane />
) : (
<TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
)}
<TranscriptPane actions={actions} composer={composer} progress={progress} transcript={transcript} />
</Box>
{!overlay.agents && (
<PromptZone
cols={composer.cols}
onApprovalChoice={actions.answerApproval}
onClarifyAnswer={actions.answerClarify}
onSecretSubmit={actions.answerSecret}
onSudoSubmit={actions.answerSudo}
/>
)}
<PromptZone
cols={composer.cols}
onApprovalChoice={actions.answerApproval}
onClarifyAnswer={actions.answerClarify}
onSecretSubmit={actions.answerSecret}
onSudoSubmit={actions.answerSudo}
/>
{!overlay.agents && <ComposerPane actions={actions} composer={composer} status={status} />}
<ComposerPane actions={actions} composer={composer} status={status} />
</Box>
</AlternateScreen>
)

View File

@@ -615,7 +615,14 @@ export function TextInput({
return
}
if ((k.ctrl && inp === 'c') || k.tab || (k.shift && k.tab) || k.pageUp || k.pageDown || k.escape) {
if (
(k.ctrl && inp === 'c') ||
k.tab ||
(k.shift && k.tab) ||
k.pageUp ||
k.pageDown ||
k.escape
) {
return
}

View File

@@ -1,19 +1,8 @@
import { Box, NoSelect, Text } from '@hermes/ink'
import { memo, type ReactNode, useEffect, useMemo, useState } from 'react'
import { memo, useEffect, useMemo, useState, type ReactNode } from 'react'
import spinners, { type BrailleSpinnerName } from 'unicode-animations'
import { THINKING_COT_MAX } from '../config/limits.js'
import {
buildSubagentTree,
fmtCost,
fmtTokens,
formatSummary as formatSpawnSummary,
hotnessBucket,
peakHotness,
sparkline,
treeTotals,
widthByDepth
} from '../lib/subagentTree.js'
import {
compactPreview,
estimateTokensRough,
@@ -25,7 +14,7 @@ import {
toolTrailLabel
} from '../lib/text.js'
import type { Theme } from '../theme.js'
import type { ActiveTool, ActivityItem, DetailsMode, SubagentNode, SubagentProgress, ThinkingMode } from '../types.js'
import type { ActiveTool, ActivityItem, DetailsMode, SubagentProgress, ThinkingMode } from '../types.js'
const THINK: BrailleSpinnerName[] = ['helix', 'breathe', 'orbit', 'dna', 'waverows', 'snake', 'pulse']
const TOOL: BrailleSpinnerName[] = ['cascade', 'scan', 'diagswipe', 'fillsweep', 'rain', 'columns', 'sparkle']
@@ -117,8 +106,6 @@ function TreeNode({
header,
open,
rails = [],
stemColor,
stemDim,
t
}: {
branch: TreeBranch
@@ -126,13 +113,11 @@ function TreeNode({
header: ReactNode
open: boolean
rails?: TreeRails
stemColor?: string
stemDim?: boolean
t: Theme
}) {
return (
<Box flexDirection="column">
<TreeRow branch={branch} rails={rails} stemColor={stemColor} stemDim={stemDim} t={t}>
<TreeRow branch={branch} rails={rails} t={t}>
{header}
</TreeRow>
{open ? children?.(nextTreeRails(rails, branch)) : null}
@@ -254,31 +239,16 @@ function Chevron({
)
}
function heatColor(node: SubagentNode, peak: number, theme: Theme): string | undefined {
const palette = [theme.color.bronze, theme.color.amber, theme.color.gold, theme.color.warn, theme.color.error]
const idx = hotnessBucket(node.aggregate.hotness, peak, palette.length)
// Below the median bucket we keep the default dim stem so cool branches
// fade into the chrome — only "hot" branches draw the eye.
if (idx < 2) {
return undefined
}
return palette[idx]
}
function SubagentAccordion({
branch,
expanded,
node,
peak,
item,
rails = [],
t
}: {
branch: TreeBranch
expanded: boolean
node: SubagentNode
peak: number
item: SubagentProgress
rails?: TreeRails
t: Theme
}) {
@@ -287,7 +257,6 @@ function SubagentAccordion({
const [openThinking, setOpenThinking] = useState(expanded)
const [openTools, setOpenTools] = useState(expanded)
const [openNotes, setOpenNotes] = useState(expanded)
const [openKids, setOpenKids] = useState(expanded)
useEffect(() => {
if (!expanded) {
@@ -299,7 +268,6 @@ function SubagentAccordion({
setOpenThinking(true)
setOpenTools(true)
setOpenNotes(true)
setOpenKids(true)
}, [expanded])
const expandAll = () => {
@@ -308,13 +276,8 @@ function SubagentAccordion({
setOpenThinking(true)
setOpenTools(true)
setOpenNotes(true)
setOpenKids(true)
}
const item = node.item
const children = node.children
const aggregate = node.aggregate
const statusTone: 'dim' | 'error' | 'warn' =
item.status === 'failed' ? 'error' : item.status === 'interrupted' ? 'warn' : 'dim'
@@ -323,60 +286,10 @@ function SubagentAccordion({
const title = `${prefix}${open ? goalLabel : compactPreview(goalLabel, 60)}`
const summary = compactPreview((item.summary || '').replace(/\s+/g, ' ').trim(), 72)
// Suffix packs branch rollup: status · elapsed · per-branch tool/agent/token/cost.
// Emphasises the numbers the user can't easily eyeball from a flat list.
const statusLabel = item.status === 'queued' ? 'queued' : item.status === 'running' ? 'running' : String(item.status)
const rollupBits: string[] = [statusLabel]
if (item.durationSeconds) {
rollupBits.push(fmtElapsed(item.durationSeconds * 1000))
}
const localTools = item.toolCount ?? 0
const subtreeTools = aggregate.totalTools - localTools
if (localTools > 0) {
rollupBits.push(`${localTools} tool${localTools === 1 ? '' : 's'}`)
}
const localTokens = (item.inputTokens ?? 0) + (item.outputTokens ?? 0)
if (localTokens > 0) {
rollupBits.push(`${fmtTokens(localTokens)} tok`)
}
const localCost = item.costUsd ?? 0
if (localCost > 0) {
rollupBits.push(fmtCost(localCost))
}
const filesLocal = (item.filesWritten?.length ?? 0) + (item.filesRead?.length ?? 0)
if (filesLocal > 0) {
rollupBits.push(`${filesLocal}`)
}
if (children.length > 0) {
rollupBits.push(`${aggregate.descendantCount}`)
if (subtreeTools > 0) {
rollupBits.push(`+${subtreeTools}t sub`)
}
const subCost = aggregate.costUsd - localCost
if (subCost >= 0.01) {
rollupBits.push(`+${fmtCost(subCost)} sub`)
}
if (aggregate.activeCount > 0 && item.status !== 'running') {
rollupBits.push(`${aggregate.activeCount}`)
}
}
const suffix = rollupBits.join(' · ')
const suffix =
item.status === 'running'
? 'running'
: `${item.status}${item.durationSeconds ? ` · ${fmtElapsed(item.durationSeconds * 1000)}` : ''}`
const thinkingText = item.thinking.join('\n')
const hasThinking = Boolean(thinkingText)
@@ -505,50 +418,6 @@ function SubagentAccordion({
})
}
if (children.length > 0) {
// Nested grandchildren — rendered recursively via SubagentAccordion,
// sharing the same keybindings / expand semantics as top-level nodes.
sections.push({
header: (
<Chevron
count={children.length}
onClick={shift => {
if (shift) {
expandAll()
} else {
setOpenKids(v => !v)
}
}}
open={showChildren || openKids}
suffix={`d${item.depth + 1} · ${aggregate.descendantCount} total`}
t={t}
title="Spawned"
/>
),
key: 'subagents',
open: showChildren || openKids,
render: childRails => (
<Box flexDirection="column">
{children.map((child, i) => (
<SubagentAccordion
branch={i === children.length - 1 ? 'last' : 'mid'}
expanded={expanded || deep}
key={child.item.id}
node={child}
peak={peak}
rails={childRails}
t={t}
/>
))}
</Box>
)
})
}
// Heatmap: amber→error gradient on the stem when this branch is "hot"
// (high tools/sec) relative to the whole tree's peak.
const stem = heatColor(node, peak, t)
return (
<TreeNode
branch={branch}
@@ -578,8 +447,6 @@ function SubagentAccordion({
}
open={open}
rails={rails}
stemColor={stem}
stemDim={stem == null}
t={t}
>
{childRails => (
@@ -731,16 +598,6 @@ export const ToolTrail = memo(function ToolTrail({
const cot = useMemo(() => thinkingPreview(reasoning, 'full', THINKING_COT_MAX), [reasoning])
// Spawn-tree derivations must live above any early return so React's
// rules-of-hooks sees a stable call order. Cheap O(N) builds memoised
// by subagent-list identity.
const spawnTree = useMemo(() => buildSubagentTree(subagents), [subagents])
const spawnPeak = useMemo(() => peakHotness(spawnTree), [spawnTree])
const spawnTotals = useMemo(() => treeTotals(spawnTree), [spawnTree])
const spawnWidths = useMemo(() => widthByDepth(spawnTree), [spawnTree])
const spawnSpark = useMemo(() => sparkline(spawnWidths), [spawnWidths])
const spawnSummaryLabel = useMemo(() => formatSpawnSummary(spawnTotals), [spawnTotals])
if (
!busy &&
!trail.length &&
@@ -896,13 +753,12 @@ export const ToolTrail = memo(function ToolTrail({
const renderSubagentList = (rails: boolean[]) => (
<Box flexDirection="column">
{spawnTree.map((node, index) => (
{subagents.map((item, index) => (
<SubagentAccordion
branch={index === spawnTree.length - 1 ? 'last' : 'mid'}
branch={index === subagents.length - 1 ? 'last' : 'mid'}
expanded={detailsMode === 'expanded' || deepSubagents}
key={node.item.id}
node={node}
peak={spawnPeak}
item={item}
key={item.id}
rails={rails}
t={t}
/>
@@ -1025,14 +881,10 @@ export const ToolTrail = memo(function ToolTrail({
}
if (hasSubagents && !inlineDelegateKey) {
// Spark + summary give a one-line read on the branch shape before
// opening the subtree. `/agents` opens the full-screen audit overlay.
const suffix = spawnSpark ? `${spawnSummaryLabel} ${spawnSpark} (/agents)` : `${spawnSummaryLabel} (/agents)`
sections.push({
header: (
<Chevron
count={spawnTotals.descendantCount}
count={subagents.length}
onClick={shift => {
if (shift) {
expandAll()
@@ -1043,9 +895,8 @@ export const ToolTrail = memo(function ToolTrail({
}
}}
open={detailsMode === 'expanded' || openSubagents}
suffix={suffix}
t={t}
title="Spawn tree"
title="Subagents"
/>
),
key: 'subagents',

View File

@@ -280,85 +280,15 @@ export interface ReloadMcpResponse {
// ── Subagent events ──────────────────────────────────────────────────
export interface SubagentEventPayload {
api_calls?: number
cost_usd?: number
depth?: number
duration_seconds?: number
files_read?: string[]
files_written?: string[]
goal: string
input_tokens?: number
iteration?: number
model?: string
output_tail?: { is_error?: boolean; preview?: string; tool?: string }[]
output_tokens?: number
parent_id?: null | string
reasoning_tokens?: number
status?: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
subagent_id?: string
status?: 'completed' | 'failed' | 'interrupted' | 'running'
summary?: string
task_count?: number
task_index: number
text?: string
tool_count?: number
tool_name?: string
tool_preview?: string
toolsets?: string[]
}
// ── Delegation control RPCs ──────────────────────────────────────────
export interface DelegationStatusResponse {
active?: {
depth?: number
goal?: string
model?: null | string
parent_id?: null | string
started_at?: number
status?: string
subagent_id?: string
tool_count?: number
}[]
max_concurrent_children?: number
max_spawn_depth?: number
paused?: boolean
}
export interface DelegationPauseResponse {
paused?: boolean
}
export interface SubagentInterruptResponse {
found?: boolean
subagent_id?: string
}
// ── Spawn-tree snapshots ─────────────────────────────────────────────
export interface SpawnTreeListEntry {
count: number
finished_at?: number
label?: string
path: string
session_id?: string
started_at?: number | null
}
export interface SpawnTreeListResponse {
entries?: SpawnTreeListEntry[]
}
export interface SpawnTreeLoadResponse {
finished_at?: number
label?: string
session_id?: string
started_at?: null | number
subagents?: unknown[]
}
export interface SpawnTreeSaveResponse {
path?: string
session_id?: string
}
export type GatewayEvent =
@@ -390,7 +320,6 @@ export type GatewayEvent =
| { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' }
| { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' }
| { payload: { text: string }; session_id?: string; type: 'btw.complete' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' }
| { payload: SubagentEventPayload; session_id?: string; type: 'subagent.tool' }

View File

@@ -1,355 +0,0 @@
import type { SubagentAggregate, SubagentNode, SubagentProgress } from '../types.js'
const ROOT_KEY = '__root__'
/**
* Reconstruct the subagent spawn tree from a flat event-ordered list.
*
* Grouping is by `parentId`; a missing `parentId` (or one pointing at an
* unknown subagent) is treated as a top-level spawn of the current turn.
* Children within a parent are sorted by `depth` then `index` — same key
* used in `turnController.upsertSubagent`, so render order matches spawn
* order regardless of network reordering of gateway events.
*
* Older gateways omit `parentId`; every subagent is then a top-level node
* and the tree renders flat — matching pre-observability behaviour.
*/
export function buildSubagentTree(items: readonly SubagentProgress[]): SubagentNode[] {
if (!items.length) {
return []
}
const byParent = new Map<string, SubagentProgress[]>()
const known = new Set<string>()
for (const item of items) {
known.add(item.id)
}
for (const item of items) {
const parentKey = item.parentId && known.has(item.parentId) ? item.parentId : ROOT_KEY
const bucket = byParent.get(parentKey) ?? []
bucket.push(item)
byParent.set(parentKey, bucket)
}
for (const bucket of byParent.values()) {
bucket.sort((a, b) => a.depth - b.depth || a.index - b.index)
}
const build = (item: SubagentProgress): SubagentNode => {
const kids = byParent.get(item.id) ?? []
const children = kids.map(build)
return { aggregate: aggregate(item, children), children, item }
}
return (byParent.get(ROOT_KEY) ?? []).map(build)
}
/**
* Roll up counts for a node's whole subtree. Kept pure so the live view
* and the post-hoc replay can share the same renderer unchanged.
*
* `hotness` = tools per second across the subtree — a crude proxy for
* "how much work is happening in this branch". Used to colour tree rails
* in the overlay / inline view so the eye spots the expensive branch.
*/
export function aggregate(item: SubagentProgress, children: readonly SubagentNode[]): SubagentAggregate {
let totalTools = item.toolCount ?? 0
let totalDuration = item.durationSeconds ?? 0
let descendantCount = 0
let activeCount = isRunning(item) ? 1 : 0
let maxDepthFromHere = 0
let inputTokens = item.inputTokens ?? 0
let outputTokens = item.outputTokens ?? 0
let costUsd = item.costUsd ?? 0
let filesTouched = (item.filesRead?.length ?? 0) + (item.filesWritten?.length ?? 0)
for (const child of children) {
totalTools += child.aggregate.totalTools
totalDuration += child.aggregate.totalDuration
descendantCount += child.aggregate.descendantCount + 1
activeCount += child.aggregate.activeCount
maxDepthFromHere = Math.max(maxDepthFromHere, child.aggregate.maxDepthFromHere + 1)
inputTokens += child.aggregate.inputTokens
outputTokens += child.aggregate.outputTokens
costUsd += child.aggregate.costUsd
filesTouched += child.aggregate.filesTouched
}
const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
return {
activeCount,
costUsd,
descendantCount,
filesTouched,
hotness,
inputTokens,
maxDepthFromHere,
outputTokens,
totalDuration,
totalTools
}
}
/**
* Count of subagents at each depth level, indexed by depth (0 = top level).
* Drives the inline sparkline (`▁▃▇▅`) and the status-bar HUD.
*/
export function widthByDepth(tree: readonly SubagentNode[]): number[] {
const widths: number[] = []
const walk = (nodes: readonly SubagentNode[], depth: number) => {
if (!nodes.length) {
return
}
widths[depth] = (widths[depth] ?? 0) + nodes.length
for (const node of nodes) {
walk(node.children, depth + 1)
}
}
walk(tree, 0)
return widths
}
/**
* Flat totals across the full tree — feeds the summary chip header.
*/
export function treeTotals(tree: readonly SubagentNode[]): SubagentAggregate {
let totalTools = 0
let totalDuration = 0
let descendantCount = 0
let activeCount = 0
let maxDepthFromHere = 0
let inputTokens = 0
let outputTokens = 0
let costUsd = 0
let filesTouched = 0
for (const node of tree) {
totalTools += node.aggregate.totalTools
totalDuration += node.aggregate.totalDuration
descendantCount += node.aggregate.descendantCount + 1
activeCount += node.aggregate.activeCount
maxDepthFromHere = Math.max(maxDepthFromHere, node.aggregate.maxDepthFromHere + 1)
inputTokens += node.aggregate.inputTokens
outputTokens += node.aggregate.outputTokens
costUsd += node.aggregate.costUsd
filesTouched += node.aggregate.filesTouched
}
const hotness = totalDuration > 0 ? totalTools / totalDuration : 0
return {
activeCount,
costUsd,
descendantCount,
filesTouched,
hotness,
inputTokens,
maxDepthFromHere,
outputTokens,
totalDuration,
totalTools
}
}
/**
* Flatten the tree into visit order — useful for keyboard navigation and
* for "kill subtree" walks that fire one RPC per descendant.
*/
export function flattenTree(tree: readonly SubagentNode[]): SubagentNode[] {
const out: SubagentNode[] = []
const walk = (nodes: readonly SubagentNode[]) => {
for (const node of nodes) {
out.push(node)
walk(node.children)
}
}
walk(tree)
return out
}
/**
* Collect every descendant's id for a given node (excluding the node itself).
*/
export function descendantIds(node: SubagentNode): string[] {
const ids: string[] = []
const walk = (children: readonly SubagentNode[]) => {
for (const child of children) {
ids.push(child.item.id)
walk(child.children)
}
}
walk(node.children)
return ids
}
export function isRunning(item: Pick<SubagentProgress, 'status'>): boolean {
return item.status === 'running' || item.status === 'queued'
}
const SPARK_RAMP = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'] as const
/**
* 8-step unicode bar sparkline from a positive-integer array. Zeroes render
* as spaces so a sparse tree doesn't read as equal activity at every depth.
*/
export function sparkline(values: readonly number[]): string {
if (!values.length) {
return ''
}
const max = Math.max(...values)
if (max <= 0) {
return ' '.repeat(values.length)
}
return values
.map(v => {
if (v <= 0) {
return ' '
}
const idx = Math.min(SPARK_RAMP.length - 1, Math.max(0, Math.ceil((v / max) * (SPARK_RAMP.length - 1))))
return SPARK_RAMP[idx]
})
.join('')
}
/**
* Format totals into a compact one-line summary: `d2 · 7 agents · 124 tools · 2m 14s`
*/
export function formatSummary(totals: SubagentAggregate): string {
const pieces = [`d${Math.max(0, totals.maxDepthFromHere)}`]
pieces.push(`${totals.descendantCount} agent${totals.descendantCount === 1 ? '' : 's'}`)
if (totals.totalTools > 0) {
pieces.push(`${totals.totalTools} tool${totals.totalTools === 1 ? '' : 's'}`)
}
if (totals.totalDuration > 0) {
pieces.push(fmtDuration(totals.totalDuration))
}
const tokens = totals.inputTokens + totals.outputTokens
if (tokens > 0) {
pieces.push(`${fmtTokens(tokens)} tok`)
}
if (totals.costUsd > 0) {
pieces.push(fmtCost(totals.costUsd))
}
if (totals.activeCount > 0) {
pieces.push(`${totals.activeCount}`)
}
return pieces.join(' · ')
}
/** Compact dollar amount: `$0.02`, `$1.34`, `$12.4` — never > 5 chars beyond the `$`. */
export function fmtCost(usd: number): string {
if (!Number.isFinite(usd) || usd <= 0) {
return ''
}
if (usd < 0.01) {
return '<$0.01'
}
if (usd < 10) {
return `$${usd.toFixed(2)}`
}
return `$${usd.toFixed(1)}`
}
/** Compact token count: `12k`, `1.2k`, `542`. */
export function fmtTokens(n: number): string {
if (!Number.isFinite(n) || n <= 0) {
return '0'
}
if (n < 1000) {
return String(Math.round(n))
}
if (n < 10_000) {
return `${(n / 1000).toFixed(1)}k`
}
return `${Math.round(n / 1000)}k`
}
/**
* `Ns` / `Nm` / `Nm Ss` formatter for seconds. Shared with the agents
* overlay so the timeline + list + summary all speak the same dialect.
*/
export function fmtDuration(seconds: number): string {
if (seconds < 60) {
return `${Math.max(0, Math.round(seconds))}s`
}
const m = Math.floor(seconds / 60)
const s = Math.round(seconds - m * 60)
return s === 0 ? `${m}m` : `${m}m ${s}s`
}
/**
* A subagent is top-level if it has no `parentId`, or its parent isn't in
* the same snapshot (orphaned by a pruned mid-flight root). Same rule
* `buildSubagentTree` uses — keep call sites consistent across the live
* view, disk label, and diff pane.
*/
export function topLevelSubagents(items: readonly SubagentProgress[]): SubagentProgress[] {
const ids = new Set(items.map(s => s.id))
return items.filter(s => !s.parentId || !ids.has(s.parentId))
}
/**
* Normalize a node's hotness into a palette index 0..N-1 where N = buckets.
* Higher hotness = "hotter" colour. Normalized against the tree's peak hotness
* so a uniformly slow tree still shows gradient across its busiest branches.
*/
export function hotnessBucket(hotness: number, peakHotness: number, buckets: number): number {
if (!Number.isFinite(hotness) || hotness <= 0 || peakHotness <= 0 || buckets <= 1) {
return 0
}
const ratio = Math.min(1, hotness / peakHotness)
return Math.min(buckets - 1, Math.max(0, Math.round(ratio * (buckets - 1))))
}
export function peakHotness(tree: readonly SubagentNode[]): number {
let peak = 0
const walk = (nodes: readonly SubagentNode[]) => {
for (const node of nodes) {
peak = Math.max(peak, node.aggregate.hotness)
walk(node.children)
}
}
walk(tree)
return peak
}

View File

@@ -94,12 +94,7 @@ export const DARK_THEME: Theme = {
amber: '#FFBF00',
bronze: '#CD7F32',
cornsilk: '#FFF8DC',
// Bumped from the old `#B8860B` darkgoldenrod (~53% luminance) which
// read as barely-visible on dark terminals for long body text. The
// new value sits ~60% luminance — readable without losing the "muted /
// secondary" semantic. Field labels still use `label` (65%) which
// stays brighter so hierarchy holds.
dim: '#CC9B1F',
dim: '#B8860B',
completionBg: '#FFFFFF',
completionCurrentBg: mix('#FFFFFF', '#FFBF00', 0.25),
@@ -109,11 +104,8 @@ export const DARK_THEME: Theme = {
warn: '#ffa726',
prompt: '#FFF8DC',
// sessionLabel/sessionBorder intentionally track the `dim` value — they
// are "same role, same colour" by design. fromSkin's banner_dim fallback
// relies on this pairing (#11300).
sessionLabel: '#CC9B1F',
sessionBorder: '#CC9B1F',
sessionLabel: '#B8860B',
sessionBorder: '#B8860B',
statusBg: '#1a1a2e',
statusFg: '#C0C0C0',

View File

@@ -12,72 +12,16 @@ export interface ActivityItem {
}
export interface SubagentProgress {
apiCalls?: number
costUsd?: number
depth: number
durationSeconds?: number
filesRead?: string[]
filesWritten?: string[]
goal: string
id: string
index: number
inputTokens?: number
iteration?: number
model?: string
notes: string[]
outputTail?: SubagentOutputEntry[]
outputTokens?: number
parentId: null | string
reasoningTokens?: number
startedAt?: number
status: 'completed' | 'failed' | 'interrupted' | 'queued' | 'running'
status: 'completed' | 'failed' | 'interrupted' | 'running'
summary?: string
taskCount: number
thinking: string[]
toolCount: number
tools: string[]
toolsets?: string[]
}
export interface SubagentOutputEntry {
isError: boolean
preview: string
tool: string
}
export interface SubagentNode {
aggregate: SubagentAggregate
children: SubagentNode[]
item: SubagentProgress
}
export interface SubagentAggregate {
activeCount: number
costUsd: number
descendantCount: number
filesTouched: number
hotness: number
inputTokens: number
maxDepthFromHere: number
outputTokens: number
totalDuration: number
totalTools: number
}
export interface DelegationStatus {
active: {
depth?: number
goal?: string
model?: null | string
parent_id?: null | string
started_at?: number
status?: string
subagent_id?: string
tool_count?: number
}[]
max_concurrent_children?: number
max_spawn_depth?: number
paused: boolean
}
export interface ApprovalReq {

View File

@@ -30,6 +30,8 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) |
| **Xiaomi MiMo** | `XIAOMI_API_KEY` in `~/.hermes/.env` (provider: `xiaomi`, aliases: `mimo`, `xiaomi-mimo`) |
| **Volcengine** | `hermes model` or `VOLCENGINE_API_KEY` in `~/.hermes/.env` (provider: `volcengine`) |
| **BytePlus** | `hermes model` or `BYTEPLUS_API_KEY` in `~/.hermes/.env` (provider: `byteplus`) |
| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) |
| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) |
| **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) |
@@ -274,17 +276,59 @@ hermes chat --provider xiaomi --model mimo-v2-pro
# Arcee AI (Trinity models)
hermes chat --provider arcee --model trinity-large-thinking
# Requires: ARCEEAI_API_KEY in ~/.hermes/.env
# Volcengine
hermes chat --provider volcengine --model volcengine/doubao-seed-2-0-pro-260215
# Requires: VOLCENGINE_API_KEY in ~/.hermes/.env
# Volcengine Coding Plan catalog (same provider, same API key)
hermes chat --provider volcengine --model volcengine-coding-plan/doubao-seed-2.0-code
# BytePlus
hermes chat --provider byteplus --model byteplus/seed-2-0-pro-260328
# Requires: BYTEPLUS_API_KEY in ~/.hermes/.env
# BytePlus Coding Plan catalog (same provider, same API key)
hermes chat --provider byteplus --model byteplus-coding-plan/dola-seed-2.0-pro
```
Or set the provider permanently in `config.yaml`:
```yaml
model:
provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee
provider: "zai" # or: kimi-coding, kimi-coding-cn, minimax, minimax-cn, alibaba, xiaomi, arcee, volcengine, byteplus
default: "glm-5"
```
Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, `DASHSCOPE_BASE_URL`, or `XIAOMI_BASE_URL` environment variables.
### Volcengine and BytePlus Contract Catalogs
Hermes exposes **two** built-in providers for these integrations:
- `volcengine`
- `byteplus`
Each provider includes both its standard catalog and its Coding Plan catalog. The selected model ID determines the runtime base URL automatically:
- `volcengine/...` -> `https://ark.cn-beijing.volces.com/api/v3`
- `volcengine-coding-plan/...` -> `https://ark.cn-beijing.volces.com/api/coding/v3`
- `byteplus/...` -> `https://ark.ap-southeast.bytepluses.com/api/v3`
- `byteplus-coding-plan/...` -> `https://ark.ap-southeast.bytepluses.com/api/coding/v3`
In `hermes model`, the setup flow is:
1. Enter API key
2. Select a model
If you pick a `volcengine-coding-plan/...` or `byteplus-coding-plan/...` model, Hermes automatically uses the corresponding coding-plan base URL.
The API key is shared per provider:
- `VOLCENGINE_API_KEY` works for both `volcengine/...` and `volcengine-coding-plan/...`
- `BYTEPLUS_API_KEY` works for both `byteplus/...` and `byteplus-coding-plan/...`
Use `hermes model` to pick from the built-in curated catalogs. Hermes saves the canonical prefixed model ID in `config.yaml`, so standard and Coding Plan variants remain unambiguous.
:::note Z.AI Endpoint Auto-Detection
When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
:::

View File

@@ -44,6 +44,8 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) |
| `XIAOMI_API_KEY` | Xiaomi MiMo API key ([platform.xiaomimimo.com](https://platform.xiaomimimo.com)) |
| `XIAOMI_BASE_URL` | Override Xiaomi MiMo base URL (default: `https://api.xiaomimimo.com/v1`) |
| `VOLCENGINE_API_KEY` | Volcengine API key for Doubao / Seed models ([volcengine.com/product/ark](https://www.volcengine.com/product/ark)) |
| `BYTEPLUS_API_KEY` | BytePlus API key for Seed / Dola models ([byteplus.com/en/product/modelark](https://www.byteplus.com/en/product/modelark)) |
| `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
| `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
| `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |

View File

@@ -628,7 +628,7 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
Available providers for auxiliary tasks: `auto`, `main`, plus any provider in the [provider registry](/docs/reference/environment-variables) — `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `alibaba`, `bedrock`, `huggingface`, `arcee`, `xiaomi`, `volcengine`, `byteplus`, `kilocode`, `opencode-zen`, `opencode-go`, `ai-gateway` — or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
:::warning `"main"` is for auxiliary tasks only
The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options.

View File

@@ -58,6 +58,8 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| OpenCode Go | `opencode-go` | `OPENCODE_GO_API_KEY` |
| Kilo Code | `kilocode` | `KILOCODE_API_KEY` |
| Xiaomi MiMo | `xiaomi` | `XIAOMI_API_KEY` |
| Volcengine | `volcengine` | `VOLCENGINE_API_KEY` |
| BytePlus | `byteplus` | `BYTEPLUS_API_KEY` |
| Arcee AI | `arcee` | `ARCEEAI_API_KEY` |
| Alibaba / DashScope | `alibaba` | `DASHSCOPE_API_KEY` |
| Hugging Face | `huggingface` | `HF_TOKEN` |