Compare commits
372 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 60e75674e7 | |||
| dd83173621 | |||
| c65c1ddf21 | |||
| 1970bcf5a5 | |||
| 832ecde4b0 | |||
| be184aa5fa | |||
| 63b7b6d5bd | |||
| 123f8d0fed | |||
| a24c6e191f | |||
| 7206eed319 | |||
| 1619c0e503 | |||
| e27c819de3 | |||
| 1c78f6627a | |||
| 8ef2ae6502 | |||
| 0146cb2bd2 | |||
| da7d09c3b6 | |||
| af8d43dbbb | |||
| 27fc6c1086 | |||
| 45806629c5 | |||
| 4093201c47 | |||
| 9f610aa8f3 | |||
| e1c5e741ad | |||
| e3901d5b25 | |||
| 06f81752ed | |||
| 9ef1ae138a | |||
| c5196f1fc2 | |||
| 63bf7a29b6 | |||
| 15937a6b46 | |||
| 454d883e69 | |||
| 70f56e7605 | |||
| 7fa70b6c87 | |||
| 9a70260490 | |||
| ffd2621039 | |||
| 1e37ddc929 | |||
| 83c1c201f6 | |||
| 4bda9dcade | |||
| 67dcace412 | |||
| 35c57cc46b | |||
| e8441c4c0f | |||
| 2511207cb0 | |||
| 0f3a6f0fb3 | |||
| a562420383 | |||
| 855366909f | |||
| d09ab8ff13 | |||
| 438db0c7b0 | |||
| 2ccdadcca6 | |||
| 76042f5867 | |||
| 192e7eb21f | |||
| 59b56d445c | |||
| eb28145f36 | |||
| a55de5bcd0 | |||
| cec0af02ad | |||
| 91a7a0acbe | |||
| 7c50ed707c | |||
| 731e1ef8cb | |||
| ac57114284 | |||
| 24b4b24d79 | |||
| c15064fa37 | |||
| 7bfa9442de | |||
| d8e4c7214e | |||
| 6ef3a47ce5 | |||
| 3a7653dd1f | |||
| 125de02056 | |||
| 4c591c2819 | |||
| 01535a4732 | |||
| 0a15dbdc43 | |||
| ce0513dd2e | |||
| dc5e02ea7f | |||
| ff851ba7b9 | |||
| 14dd8e9a72 | |||
| 1d80e92c7e | |||
| edce7522a5 | |||
| 45e1228a8a | |||
| 83129e72de | |||
| 4d170134ef | |||
| 81e01f6ee9 | |||
| 7fd8dc0bfb | |||
| d056b610b7 | |||
| 2536a36f6f | |||
| 1b8ca9254f | |||
| db7c5735f0 | |||
| 8bbeaea6c7 | |||
| 1fdc31b214 | |||
| 5fac6c3440 | |||
| 2c56dce0ed | |||
| 01cf2c65cc | |||
| b2d3308f98 | |||
| 25ba6a4a74 | |||
| 4c797bfae9 | |||
| c58956a9a2 | |||
| 3944b22506 | |||
| 489bed6f96 | |||
| ad0ac89478 | |||
| dc4d92f131 | |||
| 47420a84b9 | |||
| f93d4624bf | |||
| 5ae608152e | |||
| 88b65cc82a | |||
| edc78e258c | |||
| 31d7f1951a | |||
| b1c18e5a41 | |||
| bd66e55a02 | |||
| 1735ced93b | |||
| bba16943f6 | |||
| 132620ba3d | |||
| 876bb60044 | |||
| a68793b6c4 | |||
| bcc5362432 | |||
| 283c8fd6e2 | |||
| 919274b60e | |||
| 6e83d90eb4 | |||
| c6fdf48b79 | |||
| a046483e86 | |||
| fdcbd2257b | |||
| 48bdd2445e | |||
| 5e52011de3 | |||
| e48a497d16 | |||
| 2dfcc8087a | |||
| 4db58d45d4 | |||
| 57b43fdd4b | |||
| e9c47c7042 | |||
| ee0728c6c4 | |||
| 9daa0620a6 | |||
| 648b89911f | |||
| 7c17accb29 | |||
| 5006b2204b | |||
| a9fa73a620 | |||
| 7c8c031f60 | |||
| ea01bdcebe | |||
| d635e2df3f | |||
| cf2fabc40f | |||
| af22421e87 | |||
| 97d54f0e4d | |||
| 6e561ffa6d | |||
| ac05daa189 | |||
| 3c1c65e754 | |||
| f92006ce1c | |||
| b35d692f45 | |||
| facea84559 | |||
| f67a61dc93 | |||
| 6ed37e0f42 | |||
| 591deeb928 | |||
| 5ae07e7b5c | |||
| 47b02e961c | |||
| 0702231dd8 | |||
| db09477b77 | |||
| 81987f0350 | |||
| 9830905dab | |||
| 0d548d1db9 | |||
| eb92222811 | |||
| e4a91ccb76 | |||
| 5ac5365923 | |||
| f433197f23 | |||
| df485628ce | |||
| 9fde22d233 | |||
| 9d7b64b5dd | |||
| 5401a0080d | |||
| e5647d7863 | |||
| 023b1bff11 | |||
| 6407b3d5b3 | |||
| 0a59994030 | |||
| 0ed37c0ca4 | |||
| 1c8ce33d51 | |||
| 2182de55bb | |||
| 3cf13747b7 | |||
| 3e61703b08 | |||
| 05d8f11085 | |||
| 13038dc747 | |||
| 629e108ee2 | |||
| c34d3f4807 | |||
| f14264c438 | |||
| 19a3e2ce8e | |||
| d58b305adf | |||
| e93cc934c7 | |||
| 93a2d6b307 | |||
| 4fade39c90 | |||
| f731c2c2bd | |||
| 00c3d848d8 | |||
| fd10463069 | |||
| c599a41b84 | |||
| c7d62b3fe3 | |||
| 36d68bcb82 | |||
| a29bad2a3c | |||
| 7957da7a1d | |||
| fd3864d8bd | |||
| 8ea389a7f8 | |||
| 3e6c108565 | |||
| e3a1a9c24d | |||
| e3697e20a6 | |||
| ed91b79b7e | |||
| 08d5c9c539 | |||
| 1dcf79a864 | |||
| 2de8a7a229 | |||
| ead66f0c92 | |||
| 0bcbc9e316 | |||
| 2d444fc84d | |||
| bb53d79d26 | |||
| 17fc84c256 | |||
| b7c1d77e55 | |||
| 7a192b124e | |||
| 0738b80833 | |||
| 4093ee9c62 | |||
| 6a957a74bc | |||
| 14b27bb68c | |||
| ef9355455b | |||
| dbdefa43c8 | |||
| db9d6375fb | |||
| 8a2506af43 | |||
| e7590f92a2 | |||
| a5129c72ef | |||
| 53fc10fc9a | |||
| 93ddff53e3 | |||
| de596aca1c | |||
| 6f1eed3968 | |||
| e3940f9807 | |||
| bfa60234c8 | |||
| fd9b692d33 | |||
| c61547c067 | |||
| 7f0f67d5f7 | |||
| f5e2a77a80 | |||
| 850fac14e3 | |||
| 5500b51800 | |||
| 63975aa75b | |||
| 62c14d5513 | |||
| 10deb1b87d | |||
| f49afd3122 | |||
| 1143f234e3 | |||
| c4627f4933 | |||
| 7c3e5706d8 | |||
| a9ccb03ccc | |||
| 7dc6eb9fbf | |||
| b290297d66 | |||
| f2fba4f9a1 | |||
| fcc05284fc | |||
| 1840c6a57d | |||
| 591aa159aa | |||
| d3e56b9f39 | |||
| c6b734e24d | |||
| 54146ae07c | |||
| be6b83562d | |||
| e1106772d9 | |||
| 5383615db5 | |||
| 56086e3fd7 | |||
| 8d12fb1e6b | |||
| e5d41f05d4 | |||
| 0fdbfad2b0 | |||
| 9d1b277e1d | |||
| 4a51ab61eb | |||
| 7f26cea390 | |||
| 2303dd8686 | |||
| 647900e813 | |||
| 25465fd8d7 | |||
| 260ae62134 | |||
| 9be17bb84f | |||
| fe9d9a26d8 | |||
| ee83a710f0 | |||
| f7f7588893 | |||
| a9fd8d7c88 | |||
| 46451528a5 | |||
| 4e27e498f1 | |||
| ba44a3d256 | |||
| a1caec1088 | |||
| 05394f2f28 | |||
| 0d32411310 | |||
| e87a2100f6 | |||
| 8c2732a9f9 | |||
| 15050fd965 | |||
| 5fa2f4258a | |||
| 4ac731c841 | |||
| 4f5669a569 | |||
| acd78a457e | |||
| 4ff7950f7f | |||
| 7e9dd9ca45 | |||
| 3392d1e422 | |||
| 785d168d50 | |||
| cd221080ec | |||
| 1fc77f995b | |||
| 1af44a13c0 | |||
| fff7ee31ae | |||
| 6fcaf5ebc2 | |||
| 461899894e | |||
| b3aed6cfd8 | |||
| 76329196c1 | |||
| d7ad07d6fe | |||
| 2cab8129d1 | |||
| 7d2f93a97f | |||
| 78450c4bd6 | |||
| 852c7f3be3 | |||
| 0e235947b9 | |||
| c2b3db48f5 | |||
| 1eb29e6452 | |||
| 7634c1386f | |||
| 3cb43df2cd | |||
| 1dca2e0a28 | |||
| 2f39dbe471 | |||
| 271f0e6eb0 | |||
| 813dbd9b40 | |||
| f76df30e08 | |||
| 227afcd80f | |||
| 06b60b76cd | |||
| 14c9f7272c | |||
| ccc8fccf77 | |||
| 3aa1a41e88 | |||
| 346601ca8d | |||
| 18f3fc8a6f | |||
| 1f9c368622 | |||
| edff2fbe7e | |||
| f9c6c5ab84 | |||
| 3a86f70969 | |||
| f1ba2f0c0b | |||
| 403c82b6b6 | |||
| 93a74f74bf | |||
| b4c030025f | |||
| 42d6ab5082 | |||
| fe34741f32 | |||
| 2e2de124af | |||
| df55660e3c | |||
| 7897f65a94 | |||
| 3e994e38f7 | |||
| 127048e643 | |||
| d6b65bbc47 | |||
| a5c7422f23 | |||
| 3c0a728607 | |||
| 339123481e | |||
| 9e6f34a76e | |||
| 7626f3702e | |||
| 9de555f3e3 | |||
| ac25e6c99a | |||
| b2e124d082 | |||
| b29287258a | |||
| bc15f526fb | |||
| ba3284f34a | |||
| f24956ba12 | |||
| 166b960fe4 | |||
| cbc39a8672 | |||
| dfc5563641 | |||
| 8a1e247c6c | |||
| 8598746e86 | |||
| f58a16f520 | |||
| 621fd348dc | |||
| 3e10f339fd | |||
| 5fdba79eb4 | |||
| 2ba9b29f37 | |||
| 1ef1e4c669 | |||
| 8aa37a0cf9 | |||
| b0cb81a089 | |||
| 727d1088c4 | |||
| a9a4416c7c | |||
| 4350668ae4 | |||
| 34c3e67109 | |||
| 5dda4cab41 | |||
| 6604e94c75 | |||
| 67bfd4b828 | |||
| 70925363b6 | |||
| 005cc29e98 | |||
| 728767e910 | |||
| 78481ac124 | |||
| 6051fba9dc | |||
| 2acc8783d1 | |||
| acdcb167fb | |||
| 51f4c9827f | |||
| 2e78a2b6b2 | |||
| 5a1c599412 | |||
| 0f6eabb890 | |||
| 809868e628 | |||
| eb93f88e1d | |||
| 3ccda2aa05 | |||
| e5d2815b41 | |||
| 983bbe2d40 | |||
| 379b2273d9 | |||
| 7db2703b33 | |||
| 7c59e1a871 |
@@ -53,6 +53,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Build skills index (if not already present)
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -36,6 +36,9 @@ jobs:
|
||||
- name: Extract skill metadata for dashboard
|
||||
run: python3 website/scripts/extract-skills.py
|
||||
|
||||
- name: Regenerate per-skill docs pages + catalogs
|
||||
run: python3 website/scripts/generate-skill-docs.py
|
||||
|
||||
- name: Lint docs diagrams
|
||||
run: npm run lint:diagrams
|
||||
working-directory: website
|
||||
|
||||
@@ -240,6 +240,19 @@ npm run fmt # prettier
|
||||
npm test # vitest
|
||||
```
|
||||
|
||||
### TUI in the Dashboard (`hermes dashboard` → `/chat`)
|
||||
|
||||
The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
|
||||
|
||||
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
|
||||
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
|
||||
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
|
||||
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
|
||||
|
||||
**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
|
||||
|
||||
**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
|
||||
|
||||
---
|
||||
|
||||
## Adding New Tools
|
||||
|
||||
+12
-4
@@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1
|
||||
ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
|
||||
# that would otherwise accumulate when hermes runs as PID 1. See #15012.
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
|
||||
build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
|
||||
@@ -41,9 +43,15 @@ COPY --chown=hermes:hermes . .
|
||||
# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
|
||||
RUN cd web && npm run build
|
||||
|
||||
# ---------- Permissions ----------
|
||||
# Make install dir world-readable so any HERMES_UID can read it at runtime.
|
||||
# The venv needs to be traversable too.
|
||||
USER root
|
||||
RUN chmod -R a+rX /opt/hermes
|
||||
# Start as root so the entrypoint can usermod/groupmod + gosu.
|
||||
# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
|
||||
|
||||
# ---------- Python virtualenv ----------
|
||||
RUN chown hermes:hermes /opt/hermes
|
||||
USER hermes
|
||||
RUN uv venv && \
|
||||
uv pip install --no-cache-dir -e ".[all]"
|
||||
|
||||
@@ -52,4 +60,4 @@ ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
|
||||
ENV HERMES_HOME=/opt/data
|
||||
ENV PATH="/opt/data/.local/bin:${PATH}"
|
||||
VOLUME [ "/opt/data" ]
|
||||
ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
|
||||
ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
|
||||
|
||||
@@ -60,7 +60,7 @@ from acp_adapter.events import (
|
||||
make_tool_progress_cb,
|
||||
)
|
||||
from acp_adapter.permissions import make_approval_callback
|
||||
from acp_adapter.session import SessionManager, SessionState
|
||||
from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -287,7 +287,11 @@ class HermesACPAgent(acp.Agent):
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
|
||||
enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
enabled_toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
|
||||
mcp_server_names=[server.name for server in mcp_servers],
|
||||
)
|
||||
state.agent.enabled_toolsets = enabled_toolsets
|
||||
disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
|
||||
state.agent.tools = get_tool_definitions(
|
||||
enabled_toolsets=enabled_toolsets,
|
||||
@@ -754,7 +758,9 @@ class HermesACPAgent(acp.Agent):
|
||||
def _cmd_tools(self, args: str, state: SessionState) -> str:
|
||||
try:
|
||||
from model_tools import get_tool_definitions
|
||||
toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
toolsets = _expand_acp_enabled_toolsets(
|
||||
getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
|
||||
)
|
||||
tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
|
||||
if not tools:
|
||||
return "No tools available."
|
||||
|
||||
+28
-1
@@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
|
||||
logger.debug("Failed to register ACP task cwd override", exc_info=True)
|
||||
|
||||
|
||||
def _expand_acp_enabled_toolsets(
|
||||
toolsets: List[str] | None = None,
|
||||
mcp_server_names: List[str] | None = None,
|
||||
) -> List[str]:
|
||||
"""Return ACP toolsets plus explicit MCP server toolsets for this session."""
|
||||
expanded: List[str] = []
|
||||
for name in list(toolsets or ["hermes-acp"]):
|
||||
if name and name not in expanded:
|
||||
expanded.append(name)
|
||||
|
||||
for server_name in list(mcp_server_names or []):
|
||||
toolset_name = f"mcp-{server_name}"
|
||||
if server_name and toolset_name not in expanded:
|
||||
expanded.append(toolset_name)
|
||||
|
||||
return expanded
|
||||
|
||||
|
||||
def _clear_task_cwd(task_id: str) -> None:
|
||||
"""Remove task-specific cwd overrides for an ACP session."""
|
||||
if not task_id:
|
||||
@@ -537,9 +555,18 @@ class SessionManager:
|
||||
elif isinstance(model_cfg, str) and model_cfg.strip():
|
||||
default_model = model_cfg.strip()
|
||||
|
||||
configured_mcp_servers = [
|
||||
name
|
||||
for name, cfg in (config.get("mcp_servers") or {}).items()
|
||||
if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
|
||||
]
|
||||
|
||||
kwargs = {
|
||||
"platform": "acp",
|
||||
"enabled_toolsets": ["hermes-acp"],
|
||||
"enabled_toolsets": _expand_acp_enabled_toolsets(
|
||||
["hermes-acp"],
|
||||
mcp_server_names=configured_mcp_servers,
|
||||
),
|
||||
"quiet_mode": True,
|
||||
"session_id": session_id,
|
||||
"model": model or default_model,
|
||||
|
||||
+122
-8
@@ -14,6 +14,8 @@ import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
@@ -277,8 +279,9 @@ def _is_oauth_token(key: str) -> bool:
|
||||
Positively identifies Anthropic OAuth tokens by their key format:
|
||||
- ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
|
||||
- ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
|
||||
- ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
|
||||
Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
|
||||
and correctly return False.
|
||||
"""
|
||||
if not key:
|
||||
@@ -292,6 +295,9 @@ def _is_oauth_token(key: str) -> bool:
|
||||
# JWTs from Anthropic OAuth flow
|
||||
if key.startswith("eyJ"):
|
||||
return True
|
||||
# Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
|
||||
if key.startswith("cc-"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
@@ -384,7 +390,16 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
|
||||
"timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
|
||||
}
|
||||
if normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
# Azure Anthropic endpoints require an ``api-version`` query parameter.
|
||||
# Pass it via default_query so the SDK appends it to every request URL
|
||||
# without corrupting the base_url (appending it directly produces
|
||||
# malformed paths like /anthropic?api-version=.../v1/messages).
|
||||
_is_azure_endpoint = "azure.com" in normalized_base_url.lower()
|
||||
if _is_azure_endpoint and "api-version" not in normalized_base_url:
|
||||
kwargs["base_url"] = normalized_base_url.rstrip("/")
|
||||
kwargs["default_query"] = {"api-version": "2025-04-15"}
|
||||
else:
|
||||
kwargs["base_url"] = normalized_base_url
|
||||
common_betas = _common_betas_for_base_url(normalized_base_url)
|
||||
|
||||
if _is_kimi_coding_endpoint(base_url):
|
||||
@@ -461,8 +476,72 @@ def build_anthropic_bedrock_client(region: str):
|
||||
)
|
||||
|
||||
|
||||
def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
|
||||
"""Read Claude Code OAuth credentials from the macOS Keychain.
|
||||
|
||||
Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
|
||||
service name "Claude Code-credentials" rather than (or in addition to)
|
||||
the JSON file at ~/.claude/.credentials.json.
|
||||
|
||||
The password field contains a JSON string with the same claudeAiOauth
|
||||
structure as the JSON file.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
import platform
|
||||
import subprocess
|
||||
|
||||
if platform.system() != "Darwin":
|
||||
return None
|
||||
|
||||
try:
|
||||
# Read the "Claude Code-credentials" generic password entry
|
||||
result = subprocess.run(
|
||||
["security", "find-generic-password",
|
||||
"-s", "Claude Code-credentials",
|
||||
"-w"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
)
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
logger.debug("Keychain: security command not available or timed out")
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
|
||||
return None
|
||||
|
||||
raw = result.stdout.strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Keychain: credentials payload is not valid JSON")
|
||||
return None
|
||||
|
||||
oauth_data = data.get("claudeAiOauth")
|
||||
if oauth_data and isinstance(oauth_data, dict):
|
||||
access_token = oauth_data.get("accessToken", "")
|
||||
if access_token:
|
||||
return {
|
||||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "macos_keychain",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
"""Read refreshable Claude Code OAuth credentials.
|
||||
|
||||
Checks two sources in order:
|
||||
1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
|
||||
2. ~/.claude/.credentials.json file
|
||||
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
@@ -471,6 +550,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# Try macOS Keychain first (covers Claude Code >=2.1.114)
|
||||
kc_creds = _read_claude_code_credentials_from_keychain()
|
||||
if kc_creds:
|
||||
return kc_creds
|
||||
|
||||
# Fall back to JSON file
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
@@ -641,7 +726,9 @@ def _write_claude_code_credentials(
|
||||
existing["claudeAiOauth"] = oauth_data
|
||||
|
||||
cred_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred = cred_path.with_suffix(".tmp")
|
||||
_tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
|
||||
_tmp_cred.replace(cred_path)
|
||||
# Restrict permissions (credentials file)
|
||||
cred_path.chmod(0o600)
|
||||
except (OSError, IOError) as e:
|
||||
@@ -908,6 +995,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _is_bedrock_model_id(model: str) -> bool:
|
||||
"""Detect AWS Bedrock model IDs that use dots as namespace separators.
|
||||
|
||||
Bedrock model IDs come in two forms:
|
||||
- Bare: ``anthropic.claude-opus-4-7``
|
||||
- Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
|
||||
|
||||
In both cases the dots separate namespace components, not version
|
||||
numbers, and must be preserved verbatim for the Bedrock API.
|
||||
"""
|
||||
lower = model.lower()
|
||||
# Regional inference-profile prefixes
|
||||
if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
|
||||
return True
|
||||
# Bare Bedrock model IDs: provider.model-family
|
||||
if lower.startswith("anthropic."):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
"""Normalize a model name for the Anthropic API.
|
||||
|
||||
@@ -915,11 +1022,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
|
||||
- Converts dots to hyphens in version numbers (OpenRouter uses dots,
|
||||
Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
|
||||
preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
|
||||
- Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
|
||||
regional inference profiles (``us.anthropic.claude-*``) whose dots
|
||||
are namespace separators, not version separators.
|
||||
"""
|
||||
lower = model.lower()
|
||||
if lower.startswith("anthropic/"):
|
||||
model = model[len("anthropic/"):]
|
||||
if not preserve_dots:
|
||||
# Bedrock model IDs use dots as namespace separators
|
||||
# (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
|
||||
# These must not be converted to hyphens. See issue #12295.
|
||||
if _is_bedrock_model_id(model):
|
||||
return model
|
||||
# OpenRouter uses dots for version separators (claude-opus-4.6),
|
||||
# Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
|
||||
model = model.replace(".", "-")
|
||||
@@ -1574,9 +1689,9 @@ def build_anthropic_kwargs(
|
||||
|
||||
# ── Strip sampling params on 4.7+ ─────────────────────────────────
|
||||
# Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
|
||||
# Callers (auxiliary_client, flush_memories, etc.) may set these for
|
||||
# older models; drop them here as a safety net so upstream 4.6 → 4.7
|
||||
# migrations don't require coordinated edits everywhere.
|
||||
# Callers (auxiliary_client, etc.) may set these for older models;
|
||||
# drop them here as a safety net so upstream 4.6 → 4.7 migrations
|
||||
# don't require coordinated edits everywhere.
|
||||
if _forbids_sampling_params(model):
|
||||
for _sampling_key in ("temperature", "top_p", "top_k"):
|
||||
kwargs.pop(_sampling_key, None)
|
||||
@@ -1598,4 +1713,3 @@ def build_anthropic_kwargs(
|
||||
return kwargs
|
||||
|
||||
|
||||
|
||||
|
||||
+362
-19
@@ -42,6 +42,7 @@ import time
|
||||
from pathlib import Path # noqa: F401 — used by test mocks
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse, parse_qs, urlunparse
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
@@ -52,6 +53,17 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_url_query_params(url: str):
|
||||
"""Extract query params from URL, return (clean_url, default_query dict or None)."""
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
clean = urlunparse(parsed._replace(query=""))
|
||||
params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
|
||||
return clean, params
|
||||
return url, None
|
||||
|
||||
|
||||
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
|
||||
_stale_base_url_warned = False
|
||||
|
||||
@@ -74,6 +86,12 @@ _PROVIDER_ALIASES = {
|
||||
"minimax_cn": "minimax-cn",
|
||||
"claude": "anthropic",
|
||||
"claude-code": "anthropic",
|
||||
"github": "copilot",
|
||||
"github-copilot": "copilot",
|
||||
"github-model": "copilot",
|
||||
"github-models": "copilot",
|
||||
"github-copilot-acp": "copilot-acp",
|
||||
"copilot-acp-agent": "copilot-acp",
|
||||
}
|
||||
|
||||
|
||||
@@ -89,10 +107,11 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
|
||||
if normalized == "main":
|
||||
# Resolve to the user's actual main provider so named custom providers
|
||||
# and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
|
||||
main_prov = _read_main_provider()
|
||||
main_prov = (_read_main_provider() or "").strip().lower()
|
||||
if main_prov and main_prov not in ("auto", "main", ""):
|
||||
return main_prov
|
||||
return "custom"
|
||||
normalized = main_prov
|
||||
else:
|
||||
return "custom"
|
||||
return _PROVIDER_ALIASES.get(normalized, normalized)
|
||||
|
||||
|
||||
@@ -383,7 +402,7 @@ class _CodexCompletionsAdapter:
|
||||
# Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
|
||||
# support max_output_tokens or temperature — omit to avoid 400 errors.
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
# Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
@@ -1150,8 +1169,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
return None, None
|
||||
model = _read_main_model() or "gpt-4o-mini"
|
||||
logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
_extra = {"default_query": _dq} if _dq else {}
|
||||
if custom_mode == "codex_responses":
|
||||
real_client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
if custom_mode == "anthropic_messages":
|
||||
# Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
|
||||
@@ -1165,12 +1186,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
||||
"Custom endpoint declares api_mode=anthropic_messages but the "
|
||||
"anthropic SDK is not installed — falling back to OpenAI-wire."
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
return (
|
||||
AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
|
||||
model,
|
||||
)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
|
||||
|
||||
|
||||
def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
||||
@@ -1342,6 +1363,111 @@ def _is_auth_error(exc: Exception) -> bool:
|
||||
return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
|
||||
|
||||
|
||||
def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
|
||||
"""Detect provider 400s for an unsupported request parameter.
|
||||
|
||||
Different OpenAI-compatible endpoints phrase the same class of error a few
|
||||
ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
|
||||
``param`` field, ``X is not supported``, ``unknown parameter: X``,
|
||||
``unrecognized request argument: X``. We match on both the parameter
|
||||
name and a generic "unsupported/unknown/unrecognized parameter" marker so
|
||||
call sites can reactively retry without the offending key instead of
|
||||
surfacing a noisy auxiliary failure.
|
||||
|
||||
Generalizes the temperature-specific detector that originally shipped
|
||||
with PR #15621 so the same retry strategy can cover ``max_tokens``,
|
||||
``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
|
||||
for the generalization pattern.
|
||||
"""
|
||||
param_lower = (param or "").lower()
|
||||
if not param_lower:
|
||||
return False
|
||||
err_lower = str(exc).lower()
|
||||
if param_lower not in err_lower:
|
||||
return False
|
||||
return any(marker in err_lower for marker in (
|
||||
"unsupported parameter",
|
||||
"unsupported_parameter",
|
||||
"not supported",
|
||||
"does not support",
|
||||
"unknown parameter",
|
||||
"unrecognized request argument",
|
||||
"unrecognized parameter",
|
||||
"invalid parameter",
|
||||
))
|
||||
|
||||
|
||||
def _is_unsupported_temperature_error(exc: Exception) -> bool:
|
||||
"""Back-compat wrapper: detect API errors where the model rejects ``temperature``.
|
||||
|
||||
Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
|
||||
public symbol because existing tests and call sites import it by name.
|
||||
"""
|
||||
return _is_unsupported_parameter_error(exc, "temperature")
|
||||
|
||||
|
||||
def _evict_cached_clients(provider: str) -> None:
|
||||
"""Drop cached auxiliary clients for a provider so fresh creds are used."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
with _client_cache_lock:
|
||||
stale_keys = [
|
||||
key for key in _client_cache
|
||||
if _normalize_aux_provider(str(key[0])) == normalized
|
||||
]
|
||||
for key in stale_keys:
|
||||
client = _client_cache.get(key, (None, None, None))[0]
|
||||
if client is not None:
|
||||
_force_close_async_httpx(client)
|
||||
try:
|
||||
close_fn = getattr(client, "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
except Exception:
|
||||
pass
|
||||
_client_cache.pop(key, None)
|
||||
|
||||
|
||||
def _refresh_provider_credentials(provider: str) -> bool:
|
||||
"""Refresh short-lived credentials for OAuth-backed auxiliary providers."""
|
||||
normalized = _normalize_aux_provider(provider)
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
from hermes_cli.auth import resolve_codex_runtime_credentials
|
||||
|
||||
creds = resolve_codex_runtime_credentials(force_refresh=True)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "nous":
|
||||
from hermes_cli.auth import resolve_nous_runtime_credentials
|
||||
|
||||
creds = resolve_nous_runtime_credentials(
|
||||
min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
|
||||
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
|
||||
force_mint=True,
|
||||
)
|
||||
if not str(creds.get("api_key", "") or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
if normalized == "anthropic":
|
||||
from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None
|
||||
if not str(token or "").strip():
|
||||
token = resolve_anthropic_token()
|
||||
if not str(token or "").strip():
|
||||
return False
|
||||
_evict_cached_clients(normalized)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc)
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _try_payment_fallback(
|
||||
failed_provider: str,
|
||||
task: str = None,
|
||||
@@ -1713,12 +1839,15 @@ def resolve_provider_client(
|
||||
provider,
|
||||
)
|
||||
extra = {}
|
||||
_clean_base, _dq = _extract_url_query_params(custom_base)
|
||||
if _dq:
|
||||
extra["default_query"] = _dq
|
||||
if base_url_host_matches(custom_base, "api.kimi.com"):
|
||||
extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
@@ -1736,7 +1865,7 @@ def resolve_provider_client(
|
||||
"but no endpoint credentials found")
|
||||
return None, None
|
||||
|
||||
# ── Named custom providers (config.yaml custom_providers list) ───
|
||||
# ── Named custom providers (config.yaml providers dict / custom_providers list) ───
|
||||
try:
|
||||
from hermes_cli.runtime_provider import _get_named_custom_provider
|
||||
custom_entry = _get_named_custom_provider(provider)
|
||||
@@ -1747,16 +1876,53 @@ def resolve_provider_client(
|
||||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
# An explicit per-task api_mode override (from _resolve_task_provider_model)
|
||||
# wins; otherwise fall back to what the provider entry declared.
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
if custom_base:
|
||||
final_model = _normalize_resolved_model(
|
||||
model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
_clean_base2, _dq2 = _extract_url_query_params(custom_base)
|
||||
_extra2 = {"default_query": _dq2} if _dq2 else {}
|
||||
logger.debug(
|
||||
"resolve_provider_client: named custom provider %r (%s)",
|
||||
provider, final_model)
|
||||
"resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
|
||||
provider, final_model, entry_api_mode or "chat_completions")
|
||||
# anthropic_messages: route through the Anthropic Messages API
|
||||
# via AnthropicAuxiliaryClient. Mirrors the anonymous-custom
|
||||
# branch in _try_custom_endpoint(). See #15033.
|
||||
if entry_api_mode == "anthropic_messages":
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
real_client = build_anthropic_client(custom_key, custom_base)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"Named custom provider %r declares api_mode="
|
||||
"anthropic_messages but the anthropic SDK is not "
|
||||
"installed — falling back to OpenAI-wire.",
|
||||
provider,
|
||||
)
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
sync_anthropic = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, custom_key, custom_base, is_oauth=False,
|
||||
)
|
||||
if async_mode:
|
||||
return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
|
||||
return sync_anthropic, final_model
|
||||
client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
|
||||
# codex_responses or inherited auto-detect (via _wrap_if_needed).
|
||||
# _wrap_if_needed reads the closed-over `api_mode` (the task-level
|
||||
# override). Named-provider entry api_mode=codex_responses also
|
||||
# flows through here.
|
||||
if entry_api_mode == "codex_responses" and not isinstance(
|
||||
client, CodexAuxiliaryClient
|
||||
):
|
||||
client = CodexAuxiliaryClient(client, final_model)
|
||||
else:
|
||||
client = _wrap_if_needed(client, final_model, custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
logger.warning(
|
||||
@@ -1889,6 +2055,39 @@ def resolve_provider_client(
|
||||
"directly supported", provider)
|
||||
return None, None
|
||||
|
||||
elif pconfig.auth_type == "aws_sdk":
|
||||
# AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
|
||||
# boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
|
||||
try:
|
||||
from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
|
||||
from agent.anthropic_adapter import build_anthropic_bedrock_client
|
||||
except ImportError:
|
||||
logger.warning("resolve_provider_client: bedrock requested but "
|
||||
"boto3 or anthropic SDK not installed")
|
||||
return None, None
|
||||
|
||||
if not has_aws_credentials():
|
||||
logger.debug("resolve_provider_client: bedrock requested but "
|
||||
"no AWS credentials found")
|
||||
return None, None
|
||||
|
||||
region = resolve_bedrock_region()
|
||||
default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
|
||||
final_model = _normalize_resolved_model(model or default_model, provider)
|
||||
try:
|
||||
real_client = build_anthropic_bedrock_client(region)
|
||||
except ImportError as exc:
|
||||
logger.warning("resolve_provider_client: cannot create Bedrock "
|
||||
"client: %s", exc)
|
||||
return None, None
|
||||
client = AnthropicAuxiliaryClient(
|
||||
real_client, final_model, api_key="aws-sdk",
|
||||
base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
|
||||
)
|
||||
logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||
# OAuth providers — route through their specific try functions
|
||||
if provider == "nous":
|
||||
@@ -2623,8 +2822,8 @@ def _build_call_kwargs(
|
||||
temperature = fixed_temperature
|
||||
|
||||
# Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0.3 on
|
||||
# flush_memories, 0 on structured-JSON extraction) don't 400 the moment
|
||||
# drop here so auxiliary callers that hardcode temperature (e.g. 0 on
|
||||
# structured-JSON extraction) don't 400 the moment
|
||||
# the aux model is flipped to 4.7.
|
||||
if temperature is not None:
|
||||
from agent.anthropic_adapter import _forbids_sampling_params
|
||||
@@ -2712,7 +2911,7 @@ def call_llm(
|
||||
|
||||
Args:
|
||||
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||
"session_search", "skills_hub", "mcp", "title_generation").
|
||||
Reads provider:model from config/env. Ignored if provider is set.
|
||||
provider: Explicit provider override.
|
||||
model: Explicit model override.
|
||||
@@ -2815,13 +3014,45 @@ def call_llm(
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
|
||||
kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry, then payment fallback.
|
||||
# Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
|
||||
# then payment fallback.
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s: provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
# If retry still fails, fall through to the max_tokens /
|
||||
# payment / auth chains below using the temperature-stripped
|
||||
# kwargs. Re-raise only if the retry hit something those
|
||||
# chains won't handle.
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -2857,6 +3088,49 @@ def call_llm(
|
||||
return _validate_llm_response(
|
||||
refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry ───────────────────────────────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s: refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
retry_client, retry_model = (
|
||||
resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=False,
|
||||
)[1:]
|
||||
if task == "vision"
|
||||
else _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
main_runtime=main_runtime,
|
||||
)
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / credit exhaustion fallback ──────────────────────
|
||||
# When the resolved provider returns 402 or a credit-related error,
|
||||
# try alternative providers instead of giving up. This handles the
|
||||
@@ -3041,8 +3315,35 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**kwargs), task)
|
||||
except Exception as first_err:
|
||||
if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
|
||||
retry_kwargs = dict(kwargs)
|
||||
retry_kwargs.pop("temperature", None)
|
||||
logger.info(
|
||||
"Auxiliary %s (async): provider rejected temperature; retrying once without it",
|
||||
task or "call",
|
||||
)
|
||||
try:
|
||||
return _validate_llm_response(
|
||||
await client.chat.completions.create(**retry_kwargs), task)
|
||||
except Exception as retry_err:
|
||||
retry_err_str = str(retry_err)
|
||||
if not (
|
||||
_is_payment_error(retry_err)
|
||||
or _is_connection_error(retry_err)
|
||||
or _is_auth_error(retry_err)
|
||||
or "max_tokens" in retry_err_str
|
||||
or "unsupported_parameter" in retry_err_str
|
||||
):
|
||||
raise
|
||||
first_err = retry_err
|
||||
kwargs = retry_kwargs
|
||||
|
||||
err_str = str(first_err)
|
||||
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||
if max_tokens is not None and (
|
||||
"max_tokens" in err_str
|
||||
or "unsupported_parameter" in err_str
|
||||
or _is_unsupported_parameter_error(first_err, "max_tokens")
|
||||
):
|
||||
kwargs.pop("max_tokens", None)
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
try:
|
||||
@@ -3077,6 +3378,48 @@ async def async_call_llm(
|
||||
return _validate_llm_response(
|
||||
await refreshed_client.chat.completions.create(**kwargs), task)
|
||||
|
||||
# ── Auth refresh retry (mirrors sync call_llm) ───────────────
|
||||
if (_is_auth_error(first_err)
|
||||
and resolved_provider not in ("auto", "", None)
|
||||
and not client_is_nous):
|
||||
if _refresh_provider_credentials(resolved_provider):
|
||||
logger.info(
|
||||
"Auxiliary %s (async): refreshed %s credentials after auth error, retrying",
|
||||
task or "call", resolved_provider,
|
||||
)
|
||||
if task == "vision":
|
||||
_, retry_client, retry_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=final_model,
|
||||
async_mode=True,
|
||||
)
|
||||
else:
|
||||
retry_client, retry_model = _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
api_mode=resolved_api_mode,
|
||||
)
|
||||
if retry_client is not None:
|
||||
retry_kwargs = _build_call_kwargs(
|
||||
resolved_provider,
|
||||
retry_model or final_model,
|
||||
messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
timeout=effective_timeout,
|
||||
extra_body=effective_extra_body,
|
||||
base_url=resolved_base_url,
|
||||
)
|
||||
_retry_base = str(getattr(retry_client, "base_url", "") or "")
|
||||
if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
|
||||
retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
|
||||
return _validate_llm_response(
|
||||
await retry_client.chat.completions.create(**retry_kwargs), task)
|
||||
|
||||
# ── Payment / connection fallback (mirrors sync call_llm) ─────
|
||||
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
|
||||
is_auto = resolved_provider in ("auto", "", None)
|
||||
|
||||
+130
-2
@@ -87,6 +87,114 @@ def reset_client_cache():
|
||||
_bedrock_control_client_cache.clear()
|
||||
|
||||
|
||||
def invalidate_runtime_client(region: str) -> bool:
|
||||
"""Evict the cached ``bedrock-runtime`` client for a single region.
|
||||
|
||||
Per-region counterpart to :func:`reset_client_cache`. Used by the converse
|
||||
call wrappers to discard clients whose underlying HTTP connection has
|
||||
gone stale, so the next call allocates a fresh client (with a fresh
|
||||
connection pool) instead of reusing a dead socket.
|
||||
|
||||
Returns True if a cached entry was evicted, False if the region was not
|
||||
cached.
|
||||
"""
|
||||
existed = region in _bedrock_runtime_client_cache
|
||||
_bedrock_runtime_client_cache.pop(region, None)
|
||||
return existed
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stale-connection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# boto3 caches its HTTPS connection pool inside the client object. When a
|
||||
# pooled connection is killed out from under us (NAT timeout, VPN flap,
|
||||
# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
|
||||
# one of a handful of low-level exceptions — most commonly
|
||||
# ``botocore.exceptions.ConnectionClosedError`` or
|
||||
# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
|
||||
# ``assert`` in a couple of paths (connection pool state checks, chunked
|
||||
# response readers) which bubbles up as a bare ``AssertionError`` with an
|
||||
# empty ``str(exc)``.
|
||||
#
|
||||
# In all of these cases the client is the problem, not the request: retrying
|
||||
# with the same cached client reproduces the failure until the process
|
||||
# restarts. The fix is to evict the region's cached client so the next
|
||||
# attempt builds a new one.
|
||||
|
||||
_STALE_LIB_MODULE_PREFIXES = (
|
||||
"urllib3.",
|
||||
"botocore.",
|
||||
"boto3.",
|
||||
)
|
||||
|
||||
|
||||
def _traceback_frames_modules(exc: BaseException):
|
||||
"""Yield ``__name__``-style module strings for each frame in exc's traceback."""
|
||||
tb = getattr(exc, "__traceback__", None)
|
||||
while tb is not None:
|
||||
frame = tb.tb_frame
|
||||
module = frame.f_globals.get("__name__", "")
|
||||
yield module or ""
|
||||
tb = tb.tb_next
|
||||
|
||||
|
||||
def is_stale_connection_error(exc: BaseException) -> bool:
|
||||
"""Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
|
||||
|
||||
Matches:
|
||||
* ``botocore.exceptions.ConnectionError`` and subclasses
|
||||
(``ConnectionClosedError``, ``EndpointConnectionError``,
|
||||
``ReadTimeoutError``, ``ConnectTimeoutError``).
|
||||
* ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
|
||||
``ConnectionError`` (best-effort import — urllib3 is a transitive
|
||||
dependency of botocore so it is always available in practice).
|
||||
* Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
|
||||
or boto3. These are internal-invariant failures (typically triggered
|
||||
by corrupted connection-pool state after a dropped socket) and are
|
||||
recoverable by swapping the client.
|
||||
|
||||
Non-library ``AssertionError``s (from application code or tests) are
|
||||
intentionally not matched — only library-internal asserts signal stale
|
||||
connection state.
|
||||
"""
|
||||
# botocore: the canonical signal — HTTPClientError is the umbrella for
|
||||
# ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
|
||||
# ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
|
||||
# the same family via a different branch of the hierarchy.
|
||||
try:
|
||||
from botocore.exceptions import (
|
||||
ConnectionError as BotoConnectionError,
|
||||
HTTPClientError,
|
||||
)
|
||||
botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
|
||||
except ImportError: # pragma: no cover — botocore always present with boto3
|
||||
botocore_errors = ()
|
||||
if botocore_errors and isinstance(exc, botocore_errors):
|
||||
return True
|
||||
|
||||
# urllib3: low-level transport failures
|
||||
try:
|
||||
from urllib3.exceptions import (
|
||||
ProtocolError,
|
||||
NewConnectionError,
|
||||
ConnectionError as Urllib3ConnectionError,
|
||||
)
|
||||
urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
|
||||
except ImportError: # pragma: no cover
|
||||
urllib3_errors = ()
|
||||
if urllib3_errors and isinstance(exc, urllib3_errors):
|
||||
return True
|
||||
|
||||
# Library-internal AssertionError (urllib3 / botocore / boto3)
|
||||
if isinstance(exc, AssertionError):
|
||||
for module in _traceback_frames_modules(exc):
|
||||
if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AWS credential detection
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -787,7 +895,17 @@ def call_converse(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse(**kwargs)
|
||||
try:
|
||||
response = client.converse(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse(region=%s, model=%s): "
|
||||
"%s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_response(response)
|
||||
|
||||
|
||||
@@ -819,7 +937,17 @@ def call_converse_stream(
|
||||
guardrail_config=guardrail_config,
|
||||
)
|
||||
|
||||
response = client.converse_stream(**kwargs)
|
||||
try:
|
||||
response = client.converse_stream(**kwargs)
|
||||
except Exception as exc:
|
||||
if is_stale_connection_error(exc):
|
||||
logger.warning(
|
||||
"bedrock: stale-connection error on converse_stream(region=%s, "
|
||||
"model=%s): %s — evicting cached client so the next call reconnects.",
|
||||
region, model, type(exc).__name__,
|
||||
)
|
||||
invalidate_runtime_client(region)
|
||||
raise
|
||||
return normalize_converse_stream_events(response)
|
||||
|
||||
|
||||
|
||||
@@ -23,26 +23,52 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Matches Codex/Harmony tool-call serialization that occasionally leaks into
|
||||
# assistant-message content when the model fails to emit a structured
|
||||
# ``function_call`` item. Accepts the common forms:
|
||||
#
|
||||
# to=functions.exec_command
|
||||
# assistant to=functions.exec_command
|
||||
# <|channel|>commentary to=functions.exec_command
|
||||
#
|
||||
# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
|
||||
# Harmony channel prefix varies by degeneration mode. Case-insensitive to
|
||||
# cover lowercase/uppercase ``assistant`` variants.
|
||||
_TOOL_CALL_LEAK_PATTERN = re.compile(
|
||||
r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multimodal content helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
|
||||
"""Convert chat-style multimodal content to Responses API input parts.
|
||||
|
||||
Input: ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
|
||||
Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
|
||||
Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
|
||||
|
||||
The ``role`` parameter controls the text content type:
|
||||
- ``"user"`` (default) → ``"input_text"``
|
||||
- ``"assistant"`` → ``"output_text"``
|
||||
|
||||
The Responses API rejects ``input_text`` inside assistant messages and
|
||||
``output_text`` inside user messages, so callers MUST pass the correct
|
||||
role for the message being converted.
|
||||
|
||||
Returns an empty list when ``content`` is not a list or contains no
|
||||
recognized parts — callers fall back to the string path.
|
||||
"""
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
if not isinstance(content, list):
|
||||
return []
|
||||
converted: List[Dict[str, Any]] = []
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
converted.append({"type": "input_text", "text": part})
|
||||
converted.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
@@ -50,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
|
||||
if ptype in {"text", "input_text", "output_text"}:
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
converted.append({"type": "input_text", "text": text})
|
||||
converted.append({"type": text_type, "text": text})
|
||||
continue
|
||||
if ptype in {"image_url", "input_image"}:
|
||||
image_ref = part.get("image_url")
|
||||
@@ -201,6 +227,23 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
|
||||
# Message format conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
|
||||
|
||||
|
||||
def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
|
||||
"""Normalize a Responses assistant message status for replay.
|
||||
|
||||
The API accepts completed/incomplete/in_progress on replayed assistant
|
||||
output messages. Preserve those exactly (modulo case/hyphen spelling) so
|
||||
incomplete Codex continuation turns don't get falsely marked completed.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
status = value.strip().lower().replace("-", "_").replace(" ", "_")
|
||||
if status in _RESPONSE_MESSAGE_STATUSES:
|
||||
return status
|
||||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
items: List[Dict[str, Any]] = []
|
||||
@@ -216,9 +259,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
if role in {"user", "assistant"}:
|
||||
content = msg.get("content", "")
|
||||
if isinstance(content, list):
|
||||
content_parts = _chat_content_to_responses_parts(content)
|
||||
content_parts = _chat_content_to_responses_parts(content, role=role)
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
content_text = "".join(
|
||||
p.get("text", "") for p in content_parts if p.get("type") == "input_text"
|
||||
p.get("text", "") for p in content_parts if p.get("type") == text_type
|
||||
)
|
||||
else:
|
||||
content_parts = []
|
||||
@@ -245,7 +289,57 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
||||
seen_item_ids.add(item_id)
|
||||
has_codex_reasoning = True
|
||||
|
||||
if content_parts:
|
||||
# Replay exact assistant message items (with id/phase) from
|
||||
# previous turns so the API can maintain prefix-cache hits.
|
||||
# OpenAI docs: "preserve and resend phase on all assistant
|
||||
# messages — dropping it can degrade performance."
|
||||
codex_message_items = msg.get("codex_message_items")
|
||||
replayed_message_items = 0
|
||||
if isinstance(codex_message_items, list):
|
||||
for raw_item in codex_message_items:
|
||||
if not isinstance(raw_item, dict):
|
||||
continue
|
||||
if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
|
||||
continue
|
||||
raw_content_parts = raw_item.get("content")
|
||||
if not isinstance(raw_content_parts, list):
|
||||
continue
|
||||
|
||||
normalized_content_parts = []
|
||||
for part in raw_content_parts:
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
part_type = str(part.get("type") or "").strip()
|
||||
if part_type not in {"output_text", "text"}:
|
||||
continue
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content_parts.append({"type": "output_text", "text": text})
|
||||
|
||||
if not normalized_content_parts:
|
||||
continue
|
||||
|
||||
replay_item = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(raw_item.get("status")),
|
||||
"content": normalized_content_parts,
|
||||
}
|
||||
item_id = raw_item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
replay_item["id"] = item_id.strip()
|
||||
phase = raw_item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
replay_item["phase"] = phase.strip()
|
||||
items.append(replay_item)
|
||||
replayed_message_items += 1
|
||||
|
||||
if replayed_message_items > 0:
|
||||
pass
|
||||
elif content_parts:
|
||||
items.append({"role": "assistant", "content": content_parts})
|
||||
elif content_text.strip():
|
||||
items.append({"role": "assistant", "content": content_text})
|
||||
@@ -405,6 +499,47 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
normalized.append(reasoning_item)
|
||||
continue
|
||||
|
||||
if item_type == "message":
|
||||
role = item.get("role")
|
||||
if role != "assistant":
|
||||
raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
|
||||
content = item.get("content")
|
||||
if not isinstance(content, list):
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
|
||||
normalized_content = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
|
||||
)
|
||||
part_type = part.get("type")
|
||||
if part_type not in {"output_text", "text"}:
|
||||
raise ValueError(
|
||||
f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
|
||||
)
|
||||
text = part.get("text", "")
|
||||
if text is None:
|
||||
text = ""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
normalized_content.append({"type": "output_text", "text": text})
|
||||
if not normalized_content:
|
||||
raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
|
||||
normalized_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item.get("status")),
|
||||
"content": normalized_content,
|
||||
}
|
||||
item_id = item.get("id")
|
||||
if isinstance(item_id, str) and item_id.strip():
|
||||
normalized_item["id"] = item_id.strip()
|
||||
phase = item.get("phase")
|
||||
if isinstance(phase, str) and phase.strip():
|
||||
normalized_item["phase"] = phase.strip()
|
||||
normalized.append(normalized_item)
|
||||
continue
|
||||
|
||||
role = item.get("role")
|
||||
if role in {"user", "assistant"}:
|
||||
content = item.get("content", "")
|
||||
@@ -412,13 +547,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
content = ""
|
||||
if isinstance(content, list):
|
||||
# Multimodal content from ``_chat_messages_to_responses_input``
|
||||
# is already in Responses format (``input_text`` / ``input_image``).
|
||||
# Validate each part and pass through.
|
||||
# is already in Responses format (``input_text`` / ``output_text``
|
||||
# / ``input_image``). Validate each part and pass through.
|
||||
# Use the correct text type for the role — ``output_text`` for
|
||||
# assistant messages, ``input_text`` for user messages.
|
||||
text_type = "output_text" if role == "assistant" else "input_text"
|
||||
validated: List[Dict[str, Any]] = []
|
||||
for part_idx, part in enumerate(content):
|
||||
if isinstance(part, str):
|
||||
if part:
|
||||
validated.append({"type": "input_text", "text": part})
|
||||
validated.append({"type": text_type, "text": part})
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
raise ValueError(
|
||||
@@ -429,7 +567,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
|
||||
text = part.get("text", "")
|
||||
if not isinstance(text, str):
|
||||
text = str(text or "")
|
||||
validated.append({"type": "input_text", "text": text})
|
||||
validated.append({"type": text_type, "text": text})
|
||||
elif ptype in {"input_image", "image_url"}:
|
||||
image_ref = part.get("image_url", "")
|
||||
detail = part.get("detail")
|
||||
@@ -686,6 +824,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
content_parts: List[str] = []
|
||||
reasoning_parts: List[str] = []
|
||||
reasoning_items_raw: List[Dict[str, Any]] = []
|
||||
message_items_raw: List[Dict[str, Any]] = []
|
||||
tool_calls: List[Any] = []
|
||||
has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
|
||||
saw_commentary_phase = False
|
||||
@@ -704,6 +843,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
|
||||
if item_type == "message":
|
||||
item_phase = getattr(item, "phase", None)
|
||||
normalized_phase = None
|
||||
if isinstance(item_phase, str):
|
||||
normalized_phase = item_phase.strip().lower()
|
||||
if normalized_phase in {"commentary", "analysis"}:
|
||||
@@ -713,6 +853,18 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
message_text = _extract_responses_message_text(item)
|
||||
if message_text:
|
||||
content_parts.append(message_text)
|
||||
raw_message_item: Dict[str, Any] = {
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"status": _normalize_responses_message_status(item_status),
|
||||
"content": [{"type": "output_text", "text": message_text}],
|
||||
}
|
||||
item_id = getattr(item, "id", None)
|
||||
if isinstance(item_id, str) and item_id:
|
||||
raw_message_item["id"] = item_id
|
||||
if normalized_phase:
|
||||
raw_message_item["phase"] = normalized_phase
|
||||
message_items_raw.append(raw_message_item)
|
||||
elif item_type == "reasoning":
|
||||
reasoning_text = _extract_responses_reasoning_text(item)
|
||||
if reasoning_text:
|
||||
@@ -787,6 +939,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
if isinstance(out_text, str):
|
||||
final_text = out_text.strip()
|
||||
|
||||
# ── Tool-call leak recovery ──────────────────────────────────
|
||||
# gpt-5.x on the Codex Responses API sometimes degenerates and emits
|
||||
# what should be a structured `function_call` item as plain assistant
|
||||
# text using the Harmony/Codex serialization (``to=functions.foo
|
||||
# {json}`` or ``assistant to=functions.foo {json}``). The model
|
||||
# intended to call a tool, but the intent never made it into
|
||||
# ``response.output`` as a ``function_call`` item, so ``tool_calls``
|
||||
# is empty here. If we pass this through, the parent sees a
|
||||
# confident-looking summary with no audit trail (empty ``tool_trace``)
|
||||
# and no tools actually ran — the Taiwan-embassy-email incident.
|
||||
#
|
||||
# Detection: leaked tokens always contain ``to=functions.<name>`` and
|
||||
# the assistant message has no real tool calls. Treat it as incomplete
|
||||
# so the existing Codex-incomplete continuation path (3 retries,
|
||||
# handled in run_agent.py) gets a chance to re-elicit a proper
|
||||
# ``function_call`` item. The existing loop already handles message
|
||||
# append, dedup, and retry budget.
|
||||
leaked_tool_call_text = False
|
||||
if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
|
||||
leaked_tool_call_text = True
|
||||
logger.warning(
|
||||
"Codex response contains leaked tool-call text in assistant content "
|
||||
"(no structured function_call items). Treating as incomplete so the "
|
||||
"continuation path can re-elicit a proper tool call. Leaked snippet: %r",
|
||||
final_text[:300],
|
||||
)
|
||||
# Clear the text so downstream code doesn't surface the garbage as
|
||||
# a summary. The encrypted reasoning items (if any) are preserved
|
||||
# so the model keeps its chain-of-thought on the retry.
|
||||
final_text = ""
|
||||
|
||||
assistant_message = SimpleNamespace(
|
||||
content=final_text,
|
||||
tool_calls=tool_calls,
|
||||
@@ -794,10 +977,13 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
|
||||
reasoning_content=None,
|
||||
reasoning_details=None,
|
||||
codex_reasoning_items=reasoning_items_raw or None,
|
||||
codex_message_items=message_items_raw or None,
|
||||
)
|
||||
|
||||
if tool_calls:
|
||||
finish_reason = "tool_calls"
|
||||
elif leaked_tool_call_text:
|
||||
finish_reason = "incomplete"
|
||||
elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
|
||||
finish_reason = "incomplete"
|
||||
elif reasoning_items_raw and not final_text:
|
||||
|
||||
@@ -294,6 +294,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._context_probed = False
|
||||
self._context_probe_persistable = False
|
||||
self._previous_summary = None
|
||||
self._last_summary_error = None
|
||||
self._last_compression_savings_pct = 100.0
|
||||
self._ineffective_compression_count = 0
|
||||
|
||||
@@ -317,6 +318,13 @@ class ContextCompressor(ContextEngine):
|
||||
int(context_length * self.threshold_percent),
|
||||
MINIMUM_CONTEXT_LENGTH,
|
||||
)
|
||||
# Recalculate token budgets for the new context length so the
|
||||
# compressor stays calibrated after a model switch (e.g. 200K → 32K).
|
||||
target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
|
||||
self.tail_token_budget = target_tokens
|
||||
self.max_summary_tokens = min(
|
||||
int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -389,6 +397,7 @@ class ContextCompressor(ContextEngine):
|
||||
self._last_compression_savings_pct: float = 100.0
|
||||
self._ineffective_compression_count: int = 0
|
||||
self._summary_failure_cooldown_until: float = 0.0
|
||||
self._last_summary_error: Optional[str] = None
|
||||
|
||||
def update_from_response(self, usage: Dict[str, Any]):
|
||||
"""Update tracked token usage from API response."""
|
||||
@@ -812,10 +821,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
self._previous_summary = summary
|
||||
self._summary_failure_cooldown_until = 0.0
|
||||
self._summary_model_fallen_back = False
|
||||
self._last_summary_error = None
|
||||
return self._with_summary_prefix(summary)
|
||||
except RuntimeError:
|
||||
# No provider configured — long cooldown, unlikely to self-resolve
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
|
||||
self._last_summary_error = "no auxiliary LLM provider configured"
|
||||
logging.warning("Context compression: no provider available for "
|
||||
"summary. Middle turns will be dropped without summary "
|
||||
"for %d seconds.",
|
||||
@@ -853,6 +864,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
# Transient errors (timeout, rate limit, network) — shorter cooldown
|
||||
_transient_cooldown = 60
|
||||
self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
|
||||
err_text = str(e).strip() or e.__class__.__name__
|
||||
if len(err_text) > 220:
|
||||
err_text = err_text[:217].rstrip() + "..."
|
||||
self._last_summary_error = err_text
|
||||
logging.warning(
|
||||
"Failed to generate context summary: %s. "
|
||||
"Further summary attempts paused for %d seconds.",
|
||||
@@ -1099,6 +1114,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
||||
|
||||
return max(cut_idx, head_end + 1)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# ContextEngine: manual /compress preflight
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Return True if there is a non-empty middle region to compact.
|
||||
|
||||
Overrides the ABC default so the gateway ``/compress`` guard can
|
||||
skip the LLM call when the transcript is still entirely inside
|
||||
the protected head/tail.
|
||||
"""
|
||||
compress_start = self._align_boundary_forward(messages, self.protect_first_n)
|
||||
compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
|
||||
return compress_start < compress_end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main compression entry point
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@@ -78,6 +78,7 @@ class ContextEngine(ABC):
|
||||
self,
|
||||
messages: List[Dict[str, Any]],
|
||||
current_tokens: int = None,
|
||||
focus_topic: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Compact the message list and return the new message list.
|
||||
|
||||
@@ -86,6 +87,12 @@ class ContextEngine(ABC):
|
||||
context budget. The implementation is free to summarize, build a
|
||||
DAG, or do anything else — as long as the returned list is a valid
|
||||
OpenAI-format message sequence.
|
||||
|
||||
Args:
|
||||
focus_topic: Optional topic string from manual ``/compress <focus>``.
|
||||
Engines that support guided compression should prioritise
|
||||
preserving information related to this topic. Engines that
|
||||
don't support it may simply ignore this argument.
|
||||
"""
|
||||
|
||||
# -- Optional: pre-flight check ----------------------------------------
|
||||
@@ -98,6 +105,21 @@ class ContextEngine(ABC):
|
||||
"""
|
||||
return False
|
||||
|
||||
# -- Optional: manual /compress preflight ------------------------------
|
||||
|
||||
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
|
||||
"""Quick check: is there anything in ``messages`` that can be compacted?
|
||||
|
||||
Used by the gateway ``/compress`` command as a preflight guard —
|
||||
returning False lets the gateway report "nothing to compress yet"
|
||||
without making an LLM call.
|
||||
|
||||
Default returns True (always attempt). Engines with a cheap way
|
||||
to introspect their own head/tail boundaries should override this
|
||||
to return False when the transcript is still entirely protected.
|
||||
"""
|
||||
return True
|
||||
|
||||
# -- Optional: session lifecycle ---------------------------------------
|
||||
|
||||
def on_session_start(self, session_id: str, **kwargs) -> None:
|
||||
|
||||
@@ -46,6 +46,47 @@ def _resolve_args() -> list[str]:
|
||||
return shlex.split(raw)
|
||||
|
||||
|
||||
def _resolve_home_dir() -> str:
|
||||
"""Return a stable HOME for child ACP processes."""
|
||||
|
||||
try:
|
||||
from hermes_constants import get_subprocess_home
|
||||
|
||||
profile_home = get_subprocess_home()
|
||||
if profile_home:
|
||||
return profile_home
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
home = os.environ.get("HOME", "").strip()
|
||||
if home:
|
||||
return home
|
||||
|
||||
expanded = os.path.expanduser("~")
|
||||
if expanded and expanded != "~":
|
||||
return expanded
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
||||
resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
|
||||
if resolved:
|
||||
return resolved
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Last resort: /tmp (writable on any POSIX system). Avoids crashing the
|
||||
# subprocess with no HOME; callers can set HERMES_HOME explicitly if they
|
||||
# need a different writable dir.
|
||||
return "/tmp"
|
||||
|
||||
|
||||
def _build_subprocess_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["HOME"] = _resolve_home_dir()
|
||||
return env
|
||||
|
||||
|
||||
def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
@@ -382,6 +423,7 @@ class CopilotACPClient:
|
||||
text=True,
|
||||
bufsize=1,
|
||||
cwd=self._acp_cwd,
|
||||
env=_build_subprocess_env(),
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
|
||||
+108
-3
@@ -455,6 +455,61 @@ class CredentialPool:
|
||||
logger.debug("Failed to sync from credentials file: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
Nous OAuth refresh tokens are single-use. When another process
|
||||
(e.g. a concurrent cron) refreshes the token via
|
||||
``resolve_nous_runtime_credentials``, it writes fresh tokens to
|
||||
auth.json under ``_auth_store_lock``. The pool entry's tokens
|
||||
become stale. This method detects that and adopts the newer pair,
|
||||
avoiding a "refresh token reuse" revocation on the Nous Portal.
|
||||
"""
|
||||
if self.provider != "nous" or entry.source != "device_code":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if not state:
|
||||
return entry
|
||||
store_refresh = state.get("refresh_token", "")
|
||||
store_access = state.get("access_token", "")
|
||||
if store_refresh and store_refresh != entry.refresh_token:
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
}
|
||||
if state.get("expires_at"):
|
||||
field_updates["expires_at"] = state["expires_at"]
|
||||
if state.get("agent_key"):
|
||||
field_updates["agent_key"] = state["agent_key"]
|
||||
if state.get("agent_key_expires_at"):
|
||||
field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
|
||||
if state.get("inference_base_url"):
|
||||
field_updates["inference_base_url"] = state["inference_base_url"]
|
||||
extra_updates = dict(entry.extra)
|
||||
for extra_key in ("obtained_at", "expires_in", "agent_key_id",
|
||||
"agent_key_expires_in", "agent_key_reused",
|
||||
"agent_key_obtained_at"):
|
||||
val = state.get(extra_key)
|
||||
if val is not None:
|
||||
extra_updates[extra_key] = val
|
||||
updated = replace(entry, extra=extra_updates, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
|
||||
"""Write refreshed pool entry tokens back to auth.json providers.
|
||||
|
||||
@@ -561,6 +616,9 @@ class CredentialPool:
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
nous_state = {
|
||||
"access_token": entry.access_token,
|
||||
"refresh_token": entry.refresh_token,
|
||||
@@ -635,6 +693,26 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
# auth.json and adopt the fresh tokens if available.
|
||||
if self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
|
||||
updated = replace(
|
||||
synced,
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
last_error_reason=None,
|
||||
last_error_message=None,
|
||||
last_error_reset_at=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
self._sync_device_code_entry_to_auth_store(updated)
|
||||
return updated
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -698,6 +776,17 @@ class CredentialPool:
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For nous entries, sync from auth.json before status checks.
|
||||
# Another process may have successfully refreshed via
|
||||
# resolve_nous_runtime_credentials(), making this entry's
|
||||
# exhausted status stale.
|
||||
if (self.provider == "nous"
|
||||
and entry.source == "device_code"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
@@ -739,8 +828,11 @@ class CredentialPool:
|
||||
|
||||
if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
|
||||
entry = min(available, key=lambda e: e.request_count)
|
||||
# Increment usage counter so subsequent selections distribute load
|
||||
updated = replace(entry, request_count=entry.request_count + 1)
|
||||
self._replace_entry(entry, updated)
|
||||
self._current_id = entry.id
|
||||
return entry
|
||||
return updated
|
||||
|
||||
if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
|
||||
entry = available[0]
|
||||
@@ -1056,6 +1148,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
"inference_base_url": state.get("inference_base_url"),
|
||||
"agent_key": state.get("agent_key"),
|
||||
"agent_key_expires_at": state.get("agent_key_expires_at"),
|
||||
# Carry the mint/refresh timestamps into the pool so
|
||||
# freshness-sensitive consumers (self-heal hooks, pool
|
||||
# pruning by age) can distinguish just-minted credentials
|
||||
# from stale ones. Without these, fresh device_code
|
||||
# entries get obtained_at=None and look older than they
|
||||
# are (#15099).
|
||||
"obtained_at": state.get("obtained_at"),
|
||||
"expires_in": state.get("expires_in"),
|
||||
"agent_key_id": state.get("agent_key_id"),
|
||||
"agent_key_expires_in": state.get("agent_key_expires_in"),
|
||||
"agent_key_reused": state.get("agent_key_reused"),
|
||||
"agent_key_obtained_at": state.get("agent_key_obtained_at"),
|
||||
"tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
|
||||
"label": seeded_label,
|
||||
},
|
||||
@@ -1066,9 +1170,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
# env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN). They don't live in
|
||||
# the auth store or credential pool, so we resolve them here.
|
||||
try:
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token
|
||||
from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
|
||||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
api_token = get_copilot_api_token(token)
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
@@ -1080,7 +1185,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"access_token": api_token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
|
||||
@@ -45,6 +45,7 @@ class FailoverReason(enum.Enum):
|
||||
|
||||
# Model
|
||||
model_not_found = "model_not_found" # 404 or invalid model — fallback to different model
|
||||
provider_policy_blocked = "provider_policy_blocked" # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
|
||||
|
||||
# Request format
|
||||
format_error = "format_error" # 400 bad request — abort or strip + retry
|
||||
@@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
|
||||
"unsupported model",
|
||||
]
|
||||
|
||||
# OpenRouter aggregator policy-block patterns.
|
||||
#
|
||||
# When a user's OpenRouter account privacy setting (or a per-request
|
||||
# `provider.data_collection: deny` preference) excludes the only endpoint
|
||||
# serving a model, OpenRouter returns 404 with a *specific* message that is
|
||||
# distinct from "model not found":
|
||||
#
|
||||
# "No endpoints available matching your guardrail restrictions and
|
||||
# data policy. Configure: https://openrouter.ai/settings/privacy"
|
||||
#
|
||||
# We classify this as `provider_policy_blocked` rather than
|
||||
# `model_not_found` because:
|
||||
# - The model *exists* — model_not_found is misleading in logs
|
||||
# - Provider fallback won't help: the account-level setting applies to
|
||||
# every call on the same OpenRouter account
|
||||
# - The error body already contains the fix URL, so the user gets
|
||||
# actionable guidance without us rewriting the message
|
||||
_PROVIDER_POLICY_BLOCKED_PATTERNS = [
|
||||
"no endpoints available matching your guardrail",
|
||||
"no endpoints available matching your data policy",
|
||||
"no endpoints found matching your data policy",
|
||||
]
|
||||
|
||||
# Auth patterns (non-status-code signals)
|
||||
_AUTH_PATTERNS = [
|
||||
"invalid api key",
|
||||
@@ -319,6 +343,11 @@ def classify_api_error(
|
||||
"""
|
||||
status_code = _extract_status_code(error)
|
||||
error_type = type(error).__name__
|
||||
# Copilot/GitHub Models RateLimitError may not set .status_code; force 429
|
||||
# so downstream rate-limit handling (classifier reason, pool rotation,
|
||||
# fallback gating) fires correctly instead of misclassifying as generic.
|
||||
if status_code is None and error_type == "RateLimitError":
|
||||
status_code = 429
|
||||
body = _extract_error_body(error)
|
||||
error_code = _extract_error_code(body)
|
||||
|
||||
@@ -523,6 +552,17 @@ def _classify_by_status(
|
||||
return _classify_402(error_msg, result_fn)
|
||||
|
||||
if status_code == 404:
|
||||
# OpenRouter policy-block 404 — distinct from "model not found".
|
||||
# The model exists; the user's account privacy setting excludes the
|
||||
# only endpoint serving it. Falling back to another provider won't
|
||||
# help (same account setting applies). The error body already
|
||||
# contains the fix URL, so just surface it.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -640,6 +680,12 @@ def _classify_400(
|
||||
)
|
||||
|
||||
# Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.model_not_found,
|
||||
@@ -812,6 +858,15 @@ def _classify_by_message(
|
||||
should_fallback=True,
|
||||
)
|
||||
|
||||
# Provider policy-block (aggregator-side guardrail) — check before
|
||||
# model_not_found so we don't mis-label as a missing model.
|
||||
if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
|
||||
return result_fn(
|
||||
FailoverReason.provider_policy_blocked,
|
||||
retryable=False,
|
||||
should_fallback=False,
|
||||
)
|
||||
|
||||
# Model not found patterns
|
||||
if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
|
||||
return result_fn(
|
||||
|
||||
@@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool:
|
||||
return not normalized.endswith("/openai")
|
||||
|
||||
|
||||
def probe_gemini_tier(
|
||||
api_key: str,
|
||||
base_url: str = DEFAULT_GEMINI_BASE_URL,
|
||||
*,
|
||||
model: str = "gemini-2.5-flash",
|
||||
timeout: float = 10.0,
|
||||
) -> str:
|
||||
"""Probe a Google AI Studio API key and return its tier.
|
||||
|
||||
Returns one of:
|
||||
|
||||
- ``"free"`` -- key is on the free tier (unusable with Hermes)
|
||||
- ``"paid"`` -- key is on a paid tier
|
||||
- ``"unknown"`` -- probe failed; callers should proceed without blocking.
|
||||
"""
|
||||
key = (api_key or "").strip()
|
||||
if not key:
|
||||
return "unknown"
|
||||
|
||||
normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
|
||||
if not normalized_base:
|
||||
normalized_base = DEFAULT_GEMINI_BASE_URL
|
||||
if normalized_base.lower().endswith("/openai"):
|
||||
normalized_base = normalized_base[: -len("/openai")]
|
||||
|
||||
url = f"{normalized_base}/models/{model}:generateContent"
|
||||
payload = {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hi"}]}],
|
||||
"generationConfig": {"maxOutputTokens": 1},
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=timeout) as client:
|
||||
resp = client.post(
|
||||
url,
|
||||
params={"key": key},
|
||||
json=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("probe_gemini_tier: network error: %s", exc)
|
||||
return "unknown"
|
||||
|
||||
headers_lower = {k.lower(): v for k, v in resp.headers.items()}
|
||||
rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
|
||||
if rpd_header:
|
||||
try:
|
||||
rpd_val = int(rpd_header)
|
||||
except (TypeError, ValueError):
|
||||
rpd_val = None
|
||||
# Published free-tier daily caps (Dec 2025):
|
||||
# gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
|
||||
# Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
|
||||
if rpd_val is not None and rpd_val <= 1000:
|
||||
return "free"
|
||||
if rpd_val is not None and rpd_val > 1000:
|
||||
return "paid"
|
||||
|
||||
if resp.status_code == 429:
|
||||
body_text = ""
|
||||
try:
|
||||
body_text = resp.text or ""
|
||||
except Exception:
|
||||
body_text = ""
|
||||
if "free_tier" in body_text.lower():
|
||||
return "free"
|
||||
return "paid"
|
||||
|
||||
if 200 <= resp.status_code < 300:
|
||||
return "paid"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def is_free_tier_quota_error(error_message: str) -> bool:
|
||||
"""Return True when a Gemini 429 message indicates free-tier exhaustion."""
|
||||
if not error_message:
|
||||
return False
|
||||
return "free_tier" in error_message.lower()
|
||||
|
||||
|
||||
_FREE_TIER_GUIDANCE = (
|
||||
"\n\nYour Google API key is on the free tier (<= 250 requests/day for "
|
||||
"gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
|
||||
"so the free tier is exhausted in a handful of messages and cannot sustain "
|
||||
"an agent session. Enable billing on your Google Cloud project and "
|
||||
"regenerate the key in a billing-enabled project: "
|
||||
"https://aistudio.google.com/apikey"
|
||||
)
|
||||
|
||||
|
||||
class GeminiAPIError(Exception):
|
||||
"""Error shape compatible with Hermes retry/error classification."""
|
||||
|
||||
@@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
|
||||
else:
|
||||
message = f"Gemini returned HTTP {status}: {body_text[:500]}"
|
||||
|
||||
# Free-tier quota exhaustion -> append actionable guidance so users who
|
||||
# bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
|
||||
# that the free tier cannot sustain an agent session.
|
||||
if status == 429 and is_free_tier_quota_error(err_message or body_text):
|
||||
message = message + _FREE_TIER_GUIDANCE
|
||||
|
||||
return GeminiAPIError(
|
||||
message,
|
||||
code=code,
|
||||
@@ -704,6 +801,13 @@ class GeminiNativeClient:
|
||||
http_client: Optional[httpx.Client] = None,
|
||||
**_: Any,
|
||||
) -> None:
|
||||
if not (api_key or "").strip():
|
||||
raise RuntimeError(
|
||||
"Gemini native client requires an API key, but none was provided. "
|
||||
"Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
|
||||
"(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
|
||||
"to configure the Google provider."
|
||||
)
|
||||
self.api_key = api_key
|
||||
normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
|
||||
if normalized_base.endswith("/openai"):
|
||||
|
||||
@@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
|
||||
]
|
||||
continue
|
||||
cleaned[key] = value
|
||||
|
||||
# Gemini's Schema validator requires every ``enum`` entry to be a string,
|
||||
# even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
|
||||
# OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
|
||||
# ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
|
||||
# so we only drop the ``enum`` when it would collide with Gemini's rule.
|
||||
# Keeping ``type: integer`` plus the human-readable description gives the
|
||||
# model enough guidance; the tool handler still validates the value.
|
||||
enum_val = cleaned.get("enum")
|
||||
type_val = cleaned.get("type")
|
||||
if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
|
||||
if any(not isinstance(item, str) for item in enum_val):
|
||||
cleaned.pop("enum", None)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
|
||||
+43
-2
@@ -31,6 +31,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import inspect
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.memory_provider import MemoryProvider
|
||||
@@ -312,7 +313,39 @@ class MemoryManager:
|
||||
)
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
@staticmethod
|
||||
def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
|
||||
"""Return how to pass metadata to a provider's memory-write hook."""
|
||||
try:
|
||||
signature = inspect.signature(provider.on_memory_write)
|
||||
except (TypeError, ValueError):
|
||||
return "keyword"
|
||||
|
||||
params = list(signature.parameters.values())
|
||||
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
|
||||
return "keyword"
|
||||
if "metadata" in signature.parameters:
|
||||
return "keyword"
|
||||
|
||||
accepted = [
|
||||
p for p in params
|
||||
if p.kind in (
|
||||
inspect.Parameter.POSITIONAL_ONLY,
|
||||
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
||||
inspect.Parameter.KEYWORD_ONLY,
|
||||
)
|
||||
]
|
||||
if len(accepted) >= 4:
|
||||
return "positional"
|
||||
return "legacy"
|
||||
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Notify external providers when the built-in memory tool writes.
|
||||
|
||||
Skips the builtin provider itself (it's the source of the write).
|
||||
@@ -321,7 +354,15 @@ class MemoryManager:
|
||||
if provider.name == "builtin":
|
||||
continue
|
||||
try:
|
||||
provider.on_memory_write(action, target, content)
|
||||
metadata_mode = self._provider_memory_write_metadata_mode(provider)
|
||||
if metadata_mode == "keyword":
|
||||
provider.on_memory_write(
|
||||
action, target, content, metadata=dict(metadata or {})
|
||||
)
|
||||
elif metadata_mode == "positional":
|
||||
provider.on_memory_write(action, target, content, dict(metadata or {}))
|
||||
else:
|
||||
provider.on_memory_write(action, target, content)
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
"Memory provider '%s' on_memory_write failed: %s",
|
||||
|
||||
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
|
||||
on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
|
||||
on_session_end(messages) — end-of-session extraction
|
||||
on_pre_compress(messages) -> str — extract before context compression
|
||||
on_memory_write(action, target, content) — mirror built-in memory writes
|
||||
on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
|
||||
on_delegation(task, result, **kwargs) — parent-side observation of subagent work
|
||||
"""
|
||||
|
||||
@@ -34,7 +34,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
|
||||
should all have ``env_var`` set and this method stays no-op).
|
||||
"""
|
||||
|
||||
def on_memory_write(self, action: str, target: str, content: str) -> None:
|
||||
def on_memory_write(
|
||||
self,
|
||||
action: str,
|
||||
target: str,
|
||||
content: str,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Called when the built-in memory tool writes an entry.
|
||||
|
||||
action: 'add', 'replace', or 'remove'
|
||||
target: 'memory' or 'user'
|
||||
content: the entry content
|
||||
metadata: structured provenance for the write, when available. Common
|
||||
keys include ``write_origin``, ``execution_context``, ``session_id``,
|
||||
``parent_session_id``, ``platform``, and ``tool_name``.
|
||||
|
||||
Use to mirror built-in memory writes to your backend.
|
||||
"""
|
||||
|
||||
+251
-26
@@ -6,6 +6,7 @@ and run_agent.py for pre-flight context checks.
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
@@ -21,6 +22,25 @@ from hermes_constants import OPENROUTER_MODELS_URL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_requests_verify() -> bool | str:
|
||||
"""Resolve SSL verify setting for `requests` calls from env vars.
|
||||
|
||||
The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
|
||||
by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
|
||||
and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
|
||||
that a single env var can cover both `requests` and `httpx` callsites
|
||||
inside the same process.
|
||||
|
||||
Returns either a filesystem path to a CA bundle, or True to defer to
|
||||
the requests default (certifi).
|
||||
"""
|
||||
for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
|
||||
val = os.getenv(env_var)
|
||||
if val and os.path.isfile(val):
|
||||
return val
|
||||
return True
|
||||
|
||||
# Provider names that can appear as a "provider:" prefix before a model ID.
|
||||
# Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
|
||||
# are preserved so the full model name reaches cache lookups and server queries.
|
||||
@@ -86,9 +106,11 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
|
||||
_ENDPOINT_MODEL_CACHE_TTL = 300
|
||||
|
||||
# Descending tiers for context length probing when the model is unknown.
|
||||
# We start at 128K (a safe default for most modern models) and step down
|
||||
# on context-length errors until one works.
|
||||
# We start at 256K (covers GPT-5.x, many current large-context models) and
|
||||
# step down on context-length errors until one works. Tier[0] is also the
|
||||
# default fallback when no detection method succeeds.
|
||||
CONTEXT_PROBE_TIERS = [
|
||||
256_000,
|
||||
128_000,
|
||||
64_000,
|
||||
32_000,
|
||||
@@ -123,9 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"claude": 200000,
|
||||
# OpenAI — GPT-5 family (most have 400k; specific overrides first)
|
||||
# Source: https://developers.openai.com/api/docs/models
|
||||
# GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models
|
||||
# endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex.
|
||||
"gpt-5.5": 400000,
|
||||
# GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
|
||||
# ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
|
||||
# provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
|
||||
# This hardcoded value is only reached when every probe misses.
|
||||
"gpt-5.5": 1050000,
|
||||
"gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4)
|
||||
"gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context)
|
||||
@@ -141,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
# DeepSeek — V4 family ships with a 1M context window. The legacy
|
||||
# aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
|
||||
# mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
|
||||
# and inherit the same 1M window. The ``deepseek`` substring entry
|
||||
# below remains as a 128K fallback for older / unknown DeepSeek model
|
||||
# ids (e.g. via custom endpoints).
|
||||
# https://api-docs.deepseek.com/zh-cn/quick_start/pricing
|
||||
"deepseek-v4-pro": 1_000_000,
|
||||
"deepseek-v4-flash": 1_000_000,
|
||||
"deepseek-chat": 1_000_000,
|
||||
"deepseek-reasoner": 1_000_000,
|
||||
"deepseek": 128000,
|
||||
# Meta
|
||||
"llama": 131072,
|
||||
@@ -494,7 +528,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
|
||||
response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
@@ -561,6 +595,7 @@ def fetch_endpoint_model_metadata(
|
||||
server_url.rstrip("/") + "/api/v1/models",
|
||||
headers=headers,
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
@@ -609,7 +644,7 @@ def fetch_endpoint_model_metadata(
|
||||
for candidate in candidates:
|
||||
url = candidate.rstrip("/") + "/models"
|
||||
try:
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
cache: Dict[str, Dict[str, Any]] = {}
|
||||
@@ -640,9 +675,10 @@ def fetch_endpoint_model_metadata(
|
||||
try:
|
||||
# Try /v1/props first (current llama.cpp); fall back to /props for older builds
|
||||
base = candidate.rstrip("/").replace("/v1", "")
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
|
||||
_verify = _resolve_requests_verify()
|
||||
props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
|
||||
if not props_resp.ok:
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5)
|
||||
props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
@@ -714,6 +750,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
|
||||
return cache.get(key)
|
||||
|
||||
|
||||
def _invalidate_cached_context_length(model: str, base_url: str) -> None:
|
||||
"""Drop a stale cache entry so it gets re-resolved on the next lookup."""
|
||||
key = f"{model}@{base_url}"
|
||||
cache = _load_context_cache()
|
||||
if key not in cache:
|
||||
return
|
||||
del cache[key]
|
||||
path = _get_context_cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(path, "w") as f:
|
||||
yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
|
||||
|
||||
|
||||
def get_next_probe_tier(current_length: int) -> Optional[int]:
|
||||
"""Return the next lower probe tier, or None if already at minimum."""
|
||||
for tier in CONTEXT_PROBE_TIERS:
|
||||
@@ -991,7 +1043,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
"x-api-key": api_key,
|
||||
"anthropic-version": "2023-06-01",
|
||||
}
|
||||
resp = requests.get(url, headers=headers, timeout=10)
|
||||
resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
|
||||
if resp.status_code != 200:
|
||||
return None
|
||||
data = resp.json()
|
||||
@@ -1005,6 +1057,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
|
||||
return None
|
||||
|
||||
|
||||
# Known ChatGPT Codex OAuth context windows (observed via live
|
||||
# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
|
||||
# `context_window` values, which are what Codex actually enforces — the
|
||||
# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
|
||||
# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
|
||||
#
|
||||
# Used as a fallback when the live probe fails (no token, network error).
|
||||
# Longest keys first so substring match picks the most specific entry.
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
|
||||
"gpt-5.1-codex-max": 272_000,
|
||||
"gpt-5.1-codex-mini": 272_000,
|
||||
"gpt-5.3-codex": 272_000,
|
||||
"gpt-5.2-codex": 272_000,
|
||||
"gpt-5.4-mini": 272_000,
|
||||
"gpt-5.5": 272_000,
|
||||
"gpt-5.4": 272_000,
|
||||
"gpt-5.2": 272_000,
|
||||
"gpt-5": 272_000,
|
||||
}
|
||||
|
||||
|
||||
_codex_oauth_context_cache: Dict[str, int] = {}
|
||||
_codex_oauth_context_cache_time: float = 0.0
|
||||
_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
|
||||
"""Probe the ChatGPT Codex /models endpoint for per-slug context windows.
|
||||
|
||||
Codex OAuth imposes its own context limits that differ from the direct
|
||||
OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
|
||||
`context_window` field in each model entry is the authoritative source.
|
||||
|
||||
Returns a ``{slug: context_window}`` dict. Empty on failure.
|
||||
"""
|
||||
global _codex_oauth_context_cache, _codex_oauth_context_cache_time
|
||||
now = time.time()
|
||||
if (
|
||||
_codex_oauth_context_cache
|
||||
and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
|
||||
):
|
||||
return _codex_oauth_context_cache
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
"https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=10,
|
||||
verify=_resolve_requests_verify(),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
|
||||
resp.status_code,
|
||||
)
|
||||
return {}
|
||||
data = resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug("Codex /models probe failed: %s", exc)
|
||||
return {}
|
||||
|
||||
entries = data.get("models", []) if isinstance(data, dict) else []
|
||||
result: Dict[str, int] = {}
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
slug = item.get("slug")
|
||||
ctx = item.get("context_window")
|
||||
if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
|
||||
result[slug.strip()] = ctx
|
||||
|
||||
if result:
|
||||
_codex_oauth_context_cache = result
|
||||
_codex_oauth_context_cache_time = now
|
||||
return result
|
||||
|
||||
|
||||
def _resolve_codex_oauth_context_length(
|
||||
model: str, access_token: str = ""
|
||||
) -> Optional[int]:
|
||||
"""Resolve a Codex OAuth model's real context window.
|
||||
|
||||
Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
|
||||
have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
|
||||
"""
|
||||
model_bare = _strip_provider_prefix(model).strip()
|
||||
if not model_bare:
|
||||
return None
|
||||
|
||||
if access_token:
|
||||
live = _fetch_codex_oauth_context_lengths(access_token)
|
||||
if model_bare in live:
|
||||
return live[model_bare]
|
||||
# Case-insensitive match in case casing drifts
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in live.items():
|
||||
if slug.lower() == model_lower:
|
||||
return ctx
|
||||
|
||||
# Fallback: longest-key-first substring match over hardcoded defaults.
|
||||
model_lower = model_bare.lower()
|
||||
for slug, ctx in sorted(
|
||||
_CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
|
||||
):
|
||||
if slug in model_lower:
|
||||
return ctx
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_nous_context_length(model: str) -> Optional[int]:
|
||||
"""Resolve Nous Portal model context length via OpenRouter metadata.
|
||||
|
||||
@@ -1044,12 +1206,14 @@ def get_model_context_length(
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
provider: str = "",
|
||||
custom_providers: list | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length or custom_providers per-model)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
1b. AWS Bedrock static table (must precede custom-endpoint probe)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. Local server query (for local endpoints)
|
||||
4. Anthropic /v1/models API (API-key users only, not OAuth)
|
||||
@@ -1063,6 +1227,23 @@ def get_model_context_length(
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 0b. custom_providers per-model override — check before any probe.
|
||||
# This closes the gap where /model switch and display paths used to fall
|
||||
# back to 128K despite the user having a per-model context_length set.
|
||||
# See #15779.
|
||||
if custom_providers and base_url and model:
|
||||
try:
|
||||
from hermes_cli.config import get_custom_provider_context_length
|
||||
cp_ctx = get_custom_provider_context_length(
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if cp_ctx:
|
||||
return cp_ctx
|
||||
except Exception:
|
||||
pass # fall through to probing
|
||||
|
||||
# Normalise provider-prefixed model names (e.g. "local:model-name" →
|
||||
# "model-name") so cache lookups and server queries use the bare ID that
|
||||
# local servers actually know about. Ollama "model:tag" colons are preserved.
|
||||
@@ -1072,7 +1253,41 @@ def get_model_context_length(
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
if cached is not None:
|
||||
return cached
|
||||
# Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
|
||||
# resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
|
||||
# models.dev and persisted it. Codex OAuth caps at 272K for every
|
||||
# slug, so any cached Codex entry at or above 400K is a leftover
|
||||
# from the old resolution path. Drop it and fall through to the
|
||||
# live /models probe in step 5 below.
|
||||
if provider == "openai-codex" and cached >= 400_000:
|
||||
logger.info(
|
||||
"Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
|
||||
"re-resolving via live /models probe",
|
||||
model, base_url, f"{cached:,}",
|
||||
)
|
||||
_invalidate_cached_context_length(model, base_url)
|
||||
else:
|
||||
return cached
|
||||
|
||||
# 1b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels API doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py that reflects
|
||||
# AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
|
||||
# Anthropic API). This must run BEFORE the custom-endpoint probe at
|
||||
# step 2 — bedrock-runtime.<region>.amazonaws.com is not in
|
||||
# _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
|
||||
# fail the /models probe (Bedrock doesn't expose that shape), and fall
|
||||
# back to the 128K default before reaching the original step 4b branch.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
|
||||
# 2. Active endpoint metadata for truly custom/unknown endpoints.
|
||||
# Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
|
||||
@@ -1119,19 +1334,7 @@ def get_model_context_length(
|
||||
if ctx:
|
||||
return ctx
|
||||
|
||||
# 4b. AWS Bedrock — use static context length table.
|
||||
# Bedrock's ListFoundationModels doesn't expose context window sizes,
|
||||
# so we maintain a curated table in bedrock_adapter.py.
|
||||
if provider == "bedrock" or (
|
||||
base_url
|
||||
and base_url_hostname(base_url).startswith("bedrock-runtime.")
|
||||
and base_url_host_matches(base_url, "amazonaws.com")
|
||||
):
|
||||
try:
|
||||
from agent.bedrock_adapter import get_bedrock_context_length
|
||||
return get_bedrock_context_length(model)
|
||||
except ImportError:
|
||||
pass # boto3 not installed — fall through to generic resolution
|
||||
# 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
|
||||
|
||||
# 5. Provider-aware lookups (before generic OpenRouter cache)
|
||||
# These are provider-specific and take priority over the generic OR cache,
|
||||
@@ -1145,10 +1348,32 @@ def get_model_context_length(
|
||||
if inferred:
|
||||
effective_provider = inferred
|
||||
|
||||
# 5a. Copilot live /models API — max_prompt_tokens from the user's account.
|
||||
# This catches account-specific models (e.g. claude-opus-4.6-1m) that
|
||||
# don't exist in models.dev. For models that ARE in models.dev, this
|
||||
# returns the provider-enforced limit which is what users can actually use.
|
||||
if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
|
||||
try:
|
||||
from hermes_cli.models import get_copilot_model_context
|
||||
ctx = get_copilot_model_context(model, api_key=api_key)
|
||||
if ctx:
|
||||
return ctx
|
||||
except Exception:
|
||||
pass # Fall through to models.dev
|
||||
|
||||
if effective_provider == "nous":
|
||||
ctx = _resolve_nous_context_length(model)
|
||||
if ctx:
|
||||
return ctx
|
||||
if effective_provider == "openai-codex":
|
||||
# Codex OAuth enforces lower context limits than the direct OpenAI
|
||||
# API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
|
||||
# on Codex). Authoritative source is Codex's own /models endpoint.
|
||||
codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
|
||||
if codex_ctx:
|
||||
if base_url:
|
||||
save_context_length(model, base_url, codex_ctx)
|
||||
return codex_ctx
|
||||
if effective_provider:
|
||||
from agent.models_dev import lookup_models_dev_context
|
||||
ctx = lookup_models_dev_context(effective_provider, model)
|
||||
@@ -1158,7 +1383,7 @@ def get_model_context_length(
|
||||
# 6. OpenRouter live API metadata (provider-unaware fallback)
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", 128000)
|
||||
return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
|
||||
|
||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||
# Only check `default_model in model` (is the key a substring of the input).
|
||||
|
||||
@@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str:
|
||||
h, remainder = divmod(s, 3600)
|
||||
m = remainder // 60
|
||||
return f"{h}h {m}m" if m else f"{h}h"
|
||||
|
||||
|
||||
# Buckets with reset windows shorter than this are treated as transient
|
||||
# (upstream jitter, secondary throttling) rather than a genuine quota
|
||||
# exhaustion worth a cross-session breaker trip.
|
||||
_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
|
||||
|
||||
|
||||
def is_genuine_nous_rate_limit(
|
||||
*,
|
||||
headers: Optional[Mapping[str, str]] = None,
|
||||
last_known_state: Optional[Any] = None,
|
||||
) -> bool:
|
||||
"""Decide whether a 429 from Nous Portal is a real account rate limit.
|
||||
|
||||
Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
|
||||
MiMo, Hermes, ...) behind one endpoint. A 429 can mean either:
|
||||
|
||||
(a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
|
||||
exhausted — a genuine rate limit that will last until the
|
||||
bucket resets.
|
||||
(b) The upstream provider is out of capacity for a specific model
|
||||
— transient, clears in seconds, and has nothing to do with
|
||||
the caller's quota on Nous.
|
||||
|
||||
Tripping the cross-session breaker on (b) blocks ALL Nous requests
|
||||
(and all models, since Nous is one provider key) for minutes even
|
||||
though the caller's account is healthy and a different model would
|
||||
have worked. That's the bug users hit when DeepSeek V4 Pro 429s
|
||||
trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
|
||||
|
||||
We tell the two apart by looking at:
|
||||
|
||||
1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits
|
||||
the full suite on every response including 429s. An exhausted
|
||||
bucket (``remaining == 0`` with a reset window >= 60s) is
|
||||
proof of (a).
|
||||
2. The last-known-good rate-limit state captured by
|
||||
``_capture_rate_limits()`` on the previous successful
|
||||
response. If any bucket there was already near-exhausted with
|
||||
a substantial reset window, the current 429 is almost
|
||||
certainly (a) continuing from that condition.
|
||||
|
||||
If neither signal fires, we treat the 429 as (b): fail the single
|
||||
request, let the retry loop or model-switch proceed, and do NOT
|
||||
write the cross-session breaker file.
|
||||
|
||||
Returns True when the evidence points at (a).
|
||||
"""
|
||||
# Signal 1: current 429 response headers.
|
||||
state = _parse_buckets_from_headers(headers)
|
||||
if _has_exhausted_bucket(state):
|
||||
return True
|
||||
|
||||
# Signal 2: last-known-good state from a recent successful response.
|
||||
# Accepts either a RateLimitState (dataclass from rate_limit_tracker)
|
||||
# or a dict of bucket snapshots.
|
||||
if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _parse_buckets_from_headers(
|
||||
headers: Optional[Mapping[str, str]],
|
||||
) -> dict[str, tuple[Optional[int], Optional[float]]]:
|
||||
"""Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
|
||||
|
||||
Returns empty dict when no rate-limit headers are present.
|
||||
"""
|
||||
if not headers:
|
||||
return {}
|
||||
|
||||
lowered = {k.lower(): v for k, v in headers.items()}
|
||||
if not any(k.startswith("x-ratelimit-") for k in lowered):
|
||||
return {}
|
||||
|
||||
def _maybe_int(raw: Optional[str]) -> Optional[int]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return int(float(raw))
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def _maybe_float(raw: Optional[str]) -> Optional[float]:
|
||||
if raw is None:
|
||||
return None
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
result: dict[str, tuple[Optional[int], Optional[float]]] = {}
|
||||
for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
|
||||
remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
|
||||
reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
|
||||
if remaining is not None or reset is not None:
|
||||
result[tag] = (remaining, reset)
|
||||
return result
|
||||
|
||||
|
||||
def _has_exhausted_bucket(
|
||||
buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
|
||||
) -> bool:
|
||||
"""Return True when any bucket has remaining == 0 AND a meaningful reset window."""
|
||||
for remaining, reset in buckets.values():
|
||||
if remaining is None or remaining > 0:
|
||||
continue
|
||||
if reset is None:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _has_exhausted_bucket_in_object(state: Any) -> bool:
|
||||
"""Check a RateLimitState-like object for an exhausted bucket.
|
||||
|
||||
Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
|
||||
exposed as attributes ``requests_min``, ``requests_hour``,
|
||||
``tokens_min``, ``tokens_hour``) and falls back gracefully for any
|
||||
object missing those attributes.
|
||||
"""
|
||||
for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
|
||||
bucket = getattr(state, attr, None)
|
||||
if bucket is None:
|
||||
continue
|
||||
limit = getattr(bucket, "limit", 0) or 0
|
||||
remaining = getattr(bucket, "remaining", 0) or 0
|
||||
# Prefer the adjusted "remaining_seconds_now" property when present;
|
||||
# fall back to raw reset_seconds.
|
||||
reset = getattr(bucket, "remaining_seconds_now", None)
|
||||
if reset is None:
|
||||
reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
|
||||
if limit <= 0:
|
||||
continue
|
||||
if remaining > 0:
|
||||
continue
|
||||
if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
Contextual first-touch onboarding hints.
|
||||
|
||||
Instead of blocking first-run questionnaires, show a one-time hint the *first*
|
||||
time a user hits a behavior fork — message-while-running, first long-running
|
||||
tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under
|
||||
``onboarding.seen.<flag>``) and then never again.
|
||||
|
||||
Keep this module tiny and dependency-free so both the CLI and gateway can import
|
||||
it without pulling in heavy modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Mapping, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Flag names (stable — used as config.yaml keys under onboarding.seen)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
BUSY_INPUT_FLAG = "busy_input_prompt"
|
||||
TOOL_PROGRESS_FLAG = "tool_progress_prompt"
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Hint content
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def busy_input_hint_gateway(mode: str) -> str:
|
||||
"""Hint shown the first time a user messages while the agent is busy.
|
||||
|
||||
``mode`` is the effective busy_input_mode that was just applied, so the
|
||||
message matches reality ("I just interrupted…" vs "I just queued…").
|
||||
"""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"💡 First-time tip — I queued your message instead of interrupting. "
|
||||
"Send `/busy interrupt` to make new messages stop the current task "
|
||||
"immediately, or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
return (
|
||||
"💡 First-time tip — I just interrupted my current task to answer you. "
|
||||
"Send `/busy queue` to queue follow-ups for after the current task instead, "
|
||||
"or `/busy status` to check. This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def busy_input_hint_cli(mode: str) -> str:
|
||||
"""CLI version of the busy-input hint (plain text, no markdown)."""
|
||||
if mode == "queue":
|
||||
return (
|
||||
"(tip) Your message was queued for the next turn. "
|
||||
"Use /busy interrupt to make Enter stop the current run instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
return (
|
||||
"(tip) Your message interrupted the current run. "
|
||||
"Use /busy queue to queue messages for the next turn instead. "
|
||||
"This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_gateway() -> str:
|
||||
return (
|
||||
"💡 First-time tip — that tool took a while and I'm streaming every step. "
|
||||
"If the progress messages feel noisy, send `/verbose` to cycle modes "
|
||||
"(all → new → off). This notice won't appear again."
|
||||
)
|
||||
|
||||
|
||||
def tool_progress_hint_cli() -> str:
|
||||
return (
|
||||
"(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
|
||||
"display modes (all -> new -> off -> verbose). This tip only shows once."
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# State read / write
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
|
||||
if not isinstance(onboarding, Mapping):
|
||||
return {}
|
||||
seen = onboarding.get("seen")
|
||||
return seen if isinstance(seen, Mapping) else {}
|
||||
|
||||
|
||||
def is_seen(config: Mapping[str, Any], flag: str) -> bool:
|
||||
"""Return True if the user has already been shown this first-touch hint."""
|
||||
return bool(_get_seen_dict(config).get(flag))
|
||||
|
||||
|
||||
def mark_seen(config_path: Path, flag: str) -> bool:
|
||||
"""Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
|
||||
|
||||
Uses the atomic YAML writer so a concurrent process can't observe a
|
||||
partially-written file. Returns True on success, False on any error
|
||||
(including the config file being absent — onboarding is best-effort).
|
||||
"""
|
||||
try:
|
||||
import yaml
|
||||
from utils import atomic_yaml_write
|
||||
except Exception as e: # pragma: no cover — dependency issue
|
||||
logger.debug("onboarding: failed to import yaml/utils: %s", e)
|
||||
return False
|
||||
|
||||
try:
|
||||
cfg: dict = {}
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
if not isinstance(cfg.get("onboarding"), dict):
|
||||
cfg["onboarding"] = {}
|
||||
seen = cfg["onboarding"].get("seen")
|
||||
if not isinstance(seen, dict):
|
||||
seen = {}
|
||||
cfg["onboarding"]["seen"] = seen
|
||||
if seen.get(flag) is True:
|
||||
return True # already marked — nothing to do
|
||||
seen[flag] = True
|
||||
atomic_yaml_write(config_path, cfg)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
|
||||
return False
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BUSY_INPUT_FLAG",
|
||||
"TOOL_PROGRESS_FLAG",
|
||||
"busy_input_hint_gateway",
|
||||
"busy_input_hint_cli",
|
||||
"tool_progress_hint_gateway",
|
||||
"tool_progress_hint_cli",
|
||||
"is_seen",
|
||||
"mark_seen",
|
||||
]
|
||||
@@ -176,6 +176,64 @@ SKILLS_GUIDANCE = (
|
||||
"Skills that aren't maintained become liabilities."
|
||||
)
|
||||
|
||||
KANBAN_GUIDANCE = (
|
||||
"# You are a Kanban worker\n"
|
||||
"You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
|
||||
"the shared board at `~/.hermes/kanban.db`. Your task id is in "
|
||||
"`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
|
||||
"The `kanban_*` tools in your schema are your primary coordination surface — "
|
||||
"they write directly to the shared SQLite DB and work regardless of terminal "
|
||||
"backend (local/docker/modal/ssh).\n"
|
||||
"\n"
|
||||
"## Lifecycle\n"
|
||||
"\n"
|
||||
"1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
|
||||
"task). The response includes title, body, parent-task handoffs (summary + "
|
||||
"metadata), any prior attempts on this task if you're a retry, the full "
|
||||
"comment thread, and a pre-formatted `worker_context` you can treat as "
|
||||
"ground truth.\n"
|
||||
"2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
|
||||
"any file operations. The workspace is yours for this run. Don't modify "
|
||||
"files outside it unless the task explicitly asks.\n"
|
||||
"3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
|
||||
"every few minutes during long subprocesses (training, encoding, crawling). "
|
||||
"Skip heartbeats for short tasks.\n"
|
||||
"4. **Block on genuine ambiguity.** If you need a human decision you cannot "
|
||||
"infer (missing credentials, UX choice, paywalled source, peer output you "
|
||||
"need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
|
||||
"The user will unblock with context and the dispatcher will respawn you.\n"
|
||||
"5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
|
||||
"metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
|
||||
"artifacts. `metadata` is machine-readable facts "
|
||||
"(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
|
||||
"workers read both via their own `kanban_show`. Never put secrets / "
|
||||
"tokens / raw PII in either field — run rows are durable forever.\n"
|
||||
"6. **If follow-up work appears, create it; don't do it.** Use "
|
||||
"`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
|
||||
"to spawn a child task for the appropriate specialist profile instead of "
|
||||
"scope-creeping into the next thing.\n"
|
||||
"\n"
|
||||
"## Orchestrator mode\n"
|
||||
"\n"
|
||||
"If your task is itself a decomposition task (e.g. a planner profile given "
|
||||
"a high-level goal), use `kanban_create` to fan out into child tasks — one "
|
||||
"per specialist, each with an explicit `assignee` and `parents=[...]` to "
|
||||
"express dependencies. Then `kanban_complete` your own task with a summary "
|
||||
"of the decomposition. Do NOT execute the work yourself; your job is "
|
||||
"routing, not implementation.\n"
|
||||
"\n"
|
||||
"## Do NOT\n"
|
||||
"\n"
|
||||
"- Do not shell out to `hermes kanban <verb>` for board operations. Use "
|
||||
"the `kanban_*` tools — they work across all terminal backends.\n"
|
||||
"- Do not complete a task you didn't actually finish. Block it.\n"
|
||||
"- Do not assign follow-up work to yourself. Assign it to the right "
|
||||
"specialist profile.\n"
|
||||
"- Do not call `delegate_task` as a board substitute. `delegate_task` is "
|
||||
"for short reasoning subtasks inside your own run; board tasks are for "
|
||||
"cross-agent handoffs that outlive one API loop."
|
||||
)
|
||||
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
||||
"# Tool-use enforcement\n"
|
||||
"You MUST use your tools to take action — do not describe what you would do "
|
||||
|
||||
+10
-133
@@ -1,154 +1,29 @@
|
||||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
"""Shared slash command helpers for skills.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
can invoke skills via /skill-name commands.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from hermes_constants import display_hermes_home
|
||||
from agent.skill_preprocessing import (
|
||||
expand_inline_shell as _expand_inline_shell,
|
||||
load_skills_config as _load_skills_config,
|
||||
substitute_template_vars as _substitute_template_vars,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
# Patterns for sanitizing skill names into clean hyphen-separated slugs.
|
||||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
"""Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
|
||||
raw_identifier = (skill_identifier or "").strip()
|
||||
@@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
|
||||
else:
|
||||
normalized = raw_identifier.lstrip("/")
|
||||
|
||||
loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
|
||||
loaded_skill = json.loads(
|
||||
skill_view(normalized, task_id=task_id, preprocess=False)
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
"""Shared SKILL.md preprocessing helpers."""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only -- no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available --
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return "[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def preprocess_skill_content(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None = None,
|
||||
skills_cfg: dict | None = None,
|
||||
) -> str:
|
||||
"""Apply configured SKILL.md template and inline-shell preprocessing."""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
|
||||
if cfg.get("template_vars", True):
|
||||
content = substitute_template_vars(content, skill_dir, session_id)
|
||||
if cfg.get("inline_shell", False):
|
||||
timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = expand_inline_shell(content, skill_dir, timeout)
|
||||
return content
|
||||
@@ -23,9 +23,14 @@ def get_transport(api_mode: str):
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
# The registry can be partially populated when a specific transport
|
||||
# module was imported directly (for example chat_completions before
|
||||
# codex). Discover on misses, not only when the registry is empty, so
|
||||
# test/order-dependent imports do not make valid api_modes unavailable.
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
@@ -31,15 +31,15 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
|
||||
"""Messages are already in OpenAI format — sanitize Codex leaks only.
|
||||
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` on the
|
||||
message, ``call_id``/``response_item_id`` on tool_calls) that strict
|
||||
chat-completions providers reject with 400/422.
|
||||
Strips Codex Responses API fields (``codex_reasoning_items`` /
|
||||
``codex_message_items`` on the message, ``call_id``/``response_item_id``
|
||||
on tool_calls) that strict chat-completions providers reject with 400/422.
|
||||
"""
|
||||
needs_sanitize = False
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if "codex_reasoning_items" in msg:
|
||||
if "codex_reasoning_items" in msg or "codex_message_items" in msg:
|
||||
needs_sanitize = True
|
||||
break
|
||||
tool_calls = msg.get("tool_calls")
|
||||
@@ -59,6 +59,7 @@ class ChatCompletionsTransport(ProviderTransport):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
msg.pop("codex_reasoning_items", None)
|
||||
msg.pop("codex_message_items", None)
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if isinstance(tool_calls, list):
|
||||
for tc in tool_calls:
|
||||
|
||||
@@ -120,6 +120,24 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
if request_overrides:
|
||||
kwargs.update(request_overrides)
|
||||
|
||||
if is_codex_backend:
|
||||
prompt_cache_key = kwargs.get("prompt_cache_key")
|
||||
cache_scope_id = str(prompt_cache_key or session_id or "").strip()
|
||||
if cache_scope_id:
|
||||
existing_extra_headers = kwargs.get("extra_headers")
|
||||
merged_extra_headers: Dict[str, str] = {}
|
||||
if isinstance(existing_extra_headers, dict):
|
||||
merged_extra_headers.update(
|
||||
{
|
||||
str(key): str(value)
|
||||
for key, value in existing_extra_headers.items()
|
||||
if key and value is not None
|
||||
}
|
||||
)
|
||||
merged_extra_headers["session_id"] = cache_scope_id
|
||||
merged_extra_headers["x-client-request-id"] = cache_scope_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
max_tokens = params.get("max_tokens")
|
||||
if max_tokens is not None and not is_codex_backend:
|
||||
kwargs["max_output_tokens"] = max_tokens
|
||||
@@ -160,6 +178,8 @@ class ResponsesApiTransport(ProviderTransport):
|
||||
provider_data = {}
|
||||
if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
|
||||
provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
|
||||
if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
|
||||
provider_data["codex_message_items"] = msg.codex_message_items
|
||||
if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
|
||||
provider_data["reasoning_details"] = msg.reasoning_details
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ class NormalizedResponse:
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
@@ -126,6 +126,11 @@ class NormalizedResponse:
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_reasoning_items")
|
||||
|
||||
@property
|
||||
def codex_message_items(self):
|
||||
pd = self.provider_data or {}
|
||||
return pd.get("codex_message_items")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
|
||||
+2
-6
@@ -951,13 +951,9 @@ class BatchRunner:
|
||||
root_logger.setLevel(original_level)
|
||||
|
||||
# Aggregate all batch statistics and update checkpoint
|
||||
all_completed_prompts = list(completed_prompts_set)
|
||||
total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
|
||||
|
||||
|
||||
for batch_result in results:
|
||||
# Add newly completed prompts
|
||||
all_completed_prompts.extend(batch_result.get("completed_prompts", []))
|
||||
|
||||
# Aggregate tool stats
|
||||
for tool_name, stats in batch_result.get("tool_stats", {}).items():
|
||||
if tool_name not in total_tool_stats:
|
||||
@@ -977,7 +973,7 @@ class BatchRunner:
|
||||
|
||||
# Save final checkpoint (best-effort; incremental writes already happened)
|
||||
try:
|
||||
checkpoint_data["completed_prompts"] = all_completed_prompts
|
||||
checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
|
||||
self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
|
||||
except Exception as ckpt_err:
|
||||
print(f"âš ï¸ Warning: Failed to save final checkpoint: {ckpt_err}")
|
||||
|
||||
+42
-10
@@ -326,6 +326,16 @@ compression:
|
||||
# To pin a specific model/provider for compression summaries, use the
|
||||
# auxiliary section below (auxiliary.compression.provider / model).
|
||||
|
||||
# =============================================================================
|
||||
# Anthropic prompt caching TTL
|
||||
# =============================================================================
|
||||
# When prompt caching is active (Claude via OpenRouter or native Anthropic),
|
||||
# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
|
||||
# "1h". Other values are ignored and "5m" is used.
|
||||
#
|
||||
prompt_caching:
|
||||
cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
|
||||
|
||||
# =============================================================================
|
||||
# Auxiliary Models (Advanced — Experimental)
|
||||
# =============================================================================
|
||||
@@ -780,9 +790,16 @@ code_execution:
|
||||
# Supports single tasks and batch mode (default 3 parallel, configurable).
|
||||
delegation:
|
||||
max_iterations: 50 # Max tool-calling turns per child (default: 50)
|
||||
# max_concurrent_children: 3 # Max parallel child agents (default: 3)
|
||||
# max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
|
||||
# max_concurrent_children: 3 # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
|
||||
# WARNING: values above 10 multiply API cost linearly.
|
||||
# max_spawn_depth: 1 # Delegation tree depth cap (range: 1-3, default: 1 = flat).
|
||||
# Raise to 2 to allow workers to spawn their own subagents.
|
||||
# Requires role="orchestrator" on intermediate agents.
|
||||
# orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true).
|
||||
# subagent_auto_approve: false # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
|
||||
# or auto-approve "once" (true) instead of blocking on stdin.
|
||||
# The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
|
||||
# Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
|
||||
# inherit_mcp_toolsets: true # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
|
||||
# model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent)
|
||||
# provider: "openrouter" # Override provider for subagents (empty = inherit parent)
|
||||
@@ -807,7 +824,9 @@ delegation:
|
||||
# Display
|
||||
# =============================================================================
|
||||
display:
|
||||
# Use compact banner mode
|
||||
# Use compact banner mode (hides the ASCII-art banner, shows a single line).
|
||||
# true: Compact single-line banner
|
||||
# false: Full ASCII banner with tool/skill summary (default)
|
||||
compact: false
|
||||
|
||||
# Tool progress display level (CLI and gateway)
|
||||
@@ -821,12 +840,15 @@ display:
|
||||
# Gateway-only natural mid-turn assistant updates.
|
||||
# When true, completed assistant status messages are sent as separate chat
|
||||
# messages. This is independent of tool_progress and gateway streaming.
|
||||
# true: Send mid-turn assistant updates as separate messages (default)
|
||||
# false: Only send the final response
|
||||
interim_assistant_messages: true
|
||||
|
||||
# What Enter does when Hermes is already busy in the CLI.
|
||||
# What Enter does when Hermes is already busy (CLI and gateway platforms).
|
||||
# interrupt: Interrupt the current run and redirect Hermes (default)
|
||||
# queue: Queue your message for the next turn
|
||||
# Ctrl+C always interrupts regardless of this setting.
|
||||
# Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
|
||||
# Toggle at runtime with /busy_input_mode <interrupt|queue>.
|
||||
busy_input_mode: interrupt
|
||||
|
||||
# Background process notifications (gateway/messaging only).
|
||||
@@ -842,17 +864,22 @@ display:
|
||||
# Play terminal bell when agent finishes a response.
|
||||
# Useful for long-running tasks — your terminal will ding when the agent is done.
|
||||
# Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
|
||||
# true: Ring the terminal bell on each response
|
||||
# false: Silent (default)
|
||||
bell_on_complete: false
|
||||
|
||||
# Show model reasoning/thinking before each response.
|
||||
# When enabled, a dim box shows the model's thought process above the response.
|
||||
# Toggle at runtime with /reasoning show or /reasoning hide.
|
||||
# true: Show the reasoning box
|
||||
# false: Hide reasoning (default)
|
||||
show_reasoning: false
|
||||
|
||||
# Stream tokens to the terminal as they arrive instead of waiting for the
|
||||
# full response. The response box opens on first token and text appears
|
||||
# line-by-line. Tool calls are still captured silently.
|
||||
# Stream tokens to the terminal in real-time. Disable to wait for full responses.
|
||||
# true: Stream tokens as they arrive (default)
|
||||
# false: Wait for the full response before rendering
|
||||
streaming: true
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────
|
||||
@@ -862,10 +889,15 @@ display:
|
||||
# response box label, and branding text. Change at runtime with /skin <name>.
|
||||
#
|
||||
# Built-in skins:
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# default — Classic Hermes gold/kawaii
|
||||
# ares — Crimson/bronze war-god theme with spinner wings
|
||||
# mono — Clean grayscale monochrome
|
||||
# slate — Cool blue developer-focused
|
||||
# daylight — Bright light-mode theme
|
||||
# warm-lightmode — Warm paper-tone light-mode theme
|
||||
# poseidon — Sea-green/teal Olympian theme
|
||||
# sisyphus — Earthy stone-and-moss theme
|
||||
# charizard — Fiery orange dragon theme
|
||||
#
|
||||
# Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
|
||||
# Schema (all fields optional, missing values inherit from default):
|
||||
|
||||
@@ -22,6 +22,7 @@ import re
|
||||
import concurrent.futures
|
||||
import base64
|
||||
import atexit
|
||||
import errno
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
@@ -416,6 +417,11 @@ def load_cli_config() -> Dict[str, Any]:
|
||||
"base_url": "", # Direct OpenAI-compatible endpoint for subagents
|
||||
"api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY)
|
||||
},
|
||||
"onboarding": {
|
||||
# First-touch hint flags (see agent/onboarding.py). Each hint is
|
||||
# shown once per install then latched here.
|
||||
"seen": {},
|
||||
},
|
||||
}
|
||||
|
||||
# Track whether the config file explicitly set terminal config.
|
||||
@@ -1688,7 +1694,6 @@ def _looks_like_slash_command(text: str) -> bool:
|
||||
from agent.skill_commands import (
|
||||
scan_skill_commands,
|
||||
build_skill_invocation_message,
|
||||
build_plan_path,
|
||||
build_preloaded_skills_prompt,
|
||||
)
|
||||
|
||||
@@ -3084,6 +3089,8 @@ class HermesCLI:
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
|
||||
_primary_exc = None
|
||||
runtime = None
|
||||
try:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=self.requested_provider,
|
||||
@@ -3091,7 +3098,34 @@ class HermesCLI:
|
||||
explicit_base_url=self._explicit_base_url,
|
||||
)
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
_primary_exc = exc
|
||||
|
||||
# Primary provider auth failed — try fallback providers before giving up.
|
||||
if runtime is None and _primary_exc is not None:
|
||||
from hermes_cli.auth import AuthError
|
||||
if isinstance(_primary_exc, AuthError):
|
||||
_fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else []
|
||||
for _fb in _fb_chain:
|
||||
_fb_provider = (_fb.get("provider") or "").strip().lower()
|
||||
_fb_model = (_fb.get("model") or "").strip()
|
||||
if not _fb_provider or not _fb_model:
|
||||
continue
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=_fb_provider)
|
||||
logger.warning(
|
||||
"Primary provider auth failed (%s). Falling through to fallback: %s/%s",
|
||||
_primary_exc, _fb_provider, _fb_model,
|
||||
)
|
||||
_cprint(f"⚠️ Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}")
|
||||
self.requested_provider = _fb_provider
|
||||
self.model = _fb_model
|
||||
_primary_exc = None
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if runtime is None:
|
||||
message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed."
|
||||
ChatConsole().print(f"[bold red]{message}[/]")
|
||||
return False
|
||||
|
||||
@@ -3148,7 +3182,14 @@ class HermesCLI:
|
||||
# the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
|
||||
runtime_model = runtime.get("model")
|
||||
if runtime_model and isinstance(runtime_model, str):
|
||||
self.model = runtime_model
|
||||
# Only use runtime model if: model is unset, or model equals provider name
|
||||
should_use_runtime_model = (
|
||||
not self.model or # No model configured yet
|
||||
self.model == self.provider or # Model is the provider slug
|
||||
self.model == runtime.get("name") # Model matches provider display name
|
||||
)
|
||||
if should_use_runtime_model:
|
||||
self.model = runtime_model
|
||||
|
||||
# If model is still empty (e.g. user ran `hermes auth add openai-codex`
|
||||
# without `hermes model`), fall back to the provider's first catalog
|
||||
@@ -3254,6 +3295,23 @@ class HermesCLI:
|
||||
_cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}")
|
||||
_cprint(f"{_DIM}Use a session ID from a previous CLI run (hermes sessions list).{_RST}")
|
||||
return False
|
||||
# If the requested session is the (empty) head of a compression
|
||||
# chain, walk to the descendant that actually holds the messages.
|
||||
# See #15000 and SessionDB.resolve_resume_session_id.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
|
||||
except Exception:
|
||||
resolved_id = self.session_id
|
||||
if resolved_id and resolved_id != self.session_id:
|
||||
ChatConsole().print(
|
||||
f"[{_DIM}]Session {_escape(self.session_id)} was compressed into "
|
||||
f"{_escape(resolved_id)}; resuming the descendant with your "
|
||||
f"transcript.[/]"
|
||||
)
|
||||
self.session_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(self.session_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
restored = self._session_db.get_messages_as_conversation(self.session_id)
|
||||
if restored:
|
||||
restored = [m for m in restored if m.get("role") != "session_meta"]
|
||||
@@ -3472,6 +3530,22 @@ class HermesCLI:
|
||||
)
|
||||
return False
|
||||
|
||||
# If the requested session is the (empty) head of a compression chain,
|
||||
# walk to the descendant that actually holds the messages. See #15000.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
|
||||
except Exception:
|
||||
resolved_id = self.session_id
|
||||
if resolved_id and resolved_id != self.session_id:
|
||||
self._console_print(
|
||||
f"[dim]Session {self.session_id} was compressed into "
|
||||
f"{resolved_id}; resuming the descendant with your transcript.[/]"
|
||||
)
|
||||
self.session_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(self.session_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
|
||||
restored = self._session_db.get_messages_as_conversation(self.session_id)
|
||||
if restored:
|
||||
restored = [m for m in restored if m.get("role") != "session_meta"]
|
||||
@@ -4250,7 +4324,7 @@ class HermesCLI:
|
||||
|
||||
_cprint(f"\n {_DIM}Tip: Just type your message to chat with Hermes!{_RST}")
|
||||
_cprint(f" {_DIM}Multi-line: Alt+Enter for a new line{_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G{_RST}")
|
||||
_cprint(f" {_DIM}Draft editor: Ctrl+G (Alt+G in VSCode/Cursor){_RST}")
|
||||
if _is_termux_environment():
|
||||
_cprint(f" {_DIM}Attach image: /image {_termux_example_image_path()} or start your prompt with a local image path{_RST}\n")
|
||||
else:
|
||||
@@ -4600,10 +4674,6 @@ class HermesCLI:
|
||||
def new_session(self, silent=False):
|
||||
"""Start a fresh session with a new session ID and cleared agent state."""
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Trigger memory extraction on the old session before session_id rotates.
|
||||
self.agent.commit_memory_session(self.conversation_history)
|
||||
self._notify_session_boundary("on_session_finalize")
|
||||
@@ -4686,6 +4756,22 @@ class HermesCLI:
|
||||
_cprint(" Use /history or `hermes sessions list` to see available sessions.")
|
||||
return
|
||||
|
||||
# If the target is the empty head of a compression chain, redirect to
|
||||
# the descendant that actually holds the transcript. See #15000.
|
||||
try:
|
||||
resolved_id = self._session_db.resolve_resume_session_id(target_id)
|
||||
except Exception:
|
||||
resolved_id = target_id
|
||||
if resolved_id and resolved_id != target_id:
|
||||
_cprint(
|
||||
f" Session {target_id} was compressed into {resolved_id}; "
|
||||
f"resuming the descendant with your transcript."
|
||||
)
|
||||
target_id = resolved_id
|
||||
resolved_meta = self._session_db.get_session(target_id)
|
||||
if resolved_meta:
|
||||
session_meta = resolved_meta
|
||||
|
||||
if target_id == self.session_id:
|
||||
_cprint(" Already on that session.")
|
||||
return
|
||||
@@ -5072,27 +5158,29 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
try:
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cache_enabled = (
|
||||
(base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
|
||||
@@ -5193,24 +5281,22 @@ class HermesCLI:
|
||||
# Parse --provider and --global flags
|
||||
model_input, explicit_provider, persist_global = parse_model_flags(raw_args)
|
||||
|
||||
# Load providers for switch_model (picker path needs them below)
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# No args at all: open prompt_toolkit-native picker modal
|
||||
if not model_input and not explicit_provider:
|
||||
model_display = self.model or "unknown"
|
||||
provider_display = get_label(self.provider) if self.provider else "unknown"
|
||||
|
||||
user_provs = None
|
||||
custom_provs = None
|
||||
try:
|
||||
from hermes_cli.config import get_compatible_custom_providers, load_config
|
||||
cfg = load_config()
|
||||
user_provs = cfg.get("providers")
|
||||
custom_provs = get_compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
providers = list_authenticated_providers(
|
||||
current_provider=self.provider or "",
|
||||
@@ -5297,29 +5383,26 @@ class HermesCLI:
|
||||
_cprint(f" ✓ Model switched: {result.new_model}")
|
||||
_cprint(f" Provider: {provider_label}")
|
||||
|
||||
# Rich metadata from models.dev
|
||||
# Context: always resolve via the provider-aware chain so Codex OAuth,
|
||||
# Copilot, and Nous-enforced caps win over the raw models.dev entry
|
||||
# (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
|
||||
mi = result.model_info
|
||||
from hermes_cli.model_switch import resolve_display_context_length
|
||||
ctx = resolve_display_context_length(
|
||||
result.new_model,
|
||||
result.target_provider,
|
||||
base_url=result.base_url or self.base_url or "",
|
||||
api_key=result.api_key or self.api_key or "",
|
||||
model_info=mi,
|
||||
)
|
||||
if ctx:
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
if mi:
|
||||
if mi.context_window:
|
||||
_cprint(f" Context: {mi.context_window:,} tokens")
|
||||
if mi.max_output:
|
||||
_cprint(f" Max output: {mi.max_output:,} tokens")
|
||||
if mi.has_cost_data():
|
||||
_cprint(f" Cost: {mi.format_cost()}")
|
||||
_cprint(f" Capabilities: {mi.format_capabilities()}")
|
||||
else:
|
||||
# Fallback to old context length lookup
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
result.new_model,
|
||||
base_url=result.base_url or self.base_url,
|
||||
api_key=result.api_key or self.api_key,
|
||||
provider=result.target_provider,
|
||||
)
|
||||
_cprint(f" Context: {ctx:,} tokens")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Cache notice
|
||||
cache_enabled = (
|
||||
@@ -5378,79 +5461,6 @@ class HermesCLI:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _show_model_and_providers(self):
|
||||
"""Show current model + provider and list all authenticated providers.
|
||||
|
||||
Shows current model + provider, then lists all authenticated
|
||||
providers with their available models.
|
||||
"""
|
||||
from hermes_cli.models import (
|
||||
curated_models_for_provider, list_available_providers,
|
||||
normalize_provider, _PROVIDER_LABELS,
|
||||
get_pricing_for_provider, format_model_pricing_table,
|
||||
)
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
|
||||
# Resolve current provider
|
||||
raw_provider = normalize_provider(self.provider)
|
||||
if raw_provider == "auto":
|
||||
try:
|
||||
current = _resolve_provider(
|
||||
self.requested_provider,
|
||||
explicit_api_key=self._explicit_api_key,
|
||||
explicit_base_url=self._explicit_base_url,
|
||||
)
|
||||
except Exception:
|
||||
current = "openrouter"
|
||||
else:
|
||||
current = raw_provider
|
||||
current_label = _PROVIDER_LABELS.get(current, current)
|
||||
|
||||
print(f"\n Current: {self.model} via {current_label}")
|
||||
print()
|
||||
|
||||
# Show all authenticated providers with their models
|
||||
providers = list_available_providers()
|
||||
authed = [p for p in providers if p["authenticated"]]
|
||||
unauthed = [p for p in providers if not p["authenticated"]]
|
||||
|
||||
if authed:
|
||||
print(" Authenticated providers & models:")
|
||||
for p in authed:
|
||||
is_active = p["id"] == current
|
||||
marker = " ← active" if is_active else ""
|
||||
print(f" [{p['id']}]{marker}")
|
||||
curated = curated_models_for_provider(p["id"])
|
||||
# Fetch pricing for providers that support it (openrouter, nous)
|
||||
pricing_map = get_pricing_for_provider(p["id"]) if p["id"] in ("openrouter", "nous") else {}
|
||||
if curated and pricing_map:
|
||||
cur_model = self.model if is_active else ""
|
||||
for line in format_model_pricing_table(curated, pricing_map, current_model=cur_model):
|
||||
print(line)
|
||||
elif curated:
|
||||
for mid, desc in curated:
|
||||
current_marker = " ← current" if (is_active and mid == self.model) else ""
|
||||
print(f" {mid}{current_marker}")
|
||||
elif p["id"] == "custom":
|
||||
from hermes_cli.models import _get_custom_base_url
|
||||
custom_url = _get_custom_base_url()
|
||||
if custom_url:
|
||||
print(f" endpoint: {custom_url}")
|
||||
if is_active:
|
||||
print(f" model: {self.model} ← current")
|
||||
print(" (use hermes model to change)")
|
||||
else:
|
||||
print(" (use hermes model to change)")
|
||||
print()
|
||||
|
||||
if unauthed:
|
||||
names = ", ".join(p["label"] for p in unauthed)
|
||||
print(f" Not configured: {names}")
|
||||
print(" Run: hermes setup")
|
||||
print()
|
||||
|
||||
print(" To change model or provider, use: hermes model")
|
||||
|
||||
def _output_console(self):
|
||||
"""Use prompt_toolkit-safe Rich rendering once the TUI is live."""
|
||||
if getattr(self, "_app", None):
|
||||
@@ -5808,7 +5818,28 @@ class HermesCLI:
|
||||
|
||||
print(f"(._.) Unknown cron command: {subcommand}")
|
||||
print(" Available: list, add, edit, pause, resume, run, remove")
|
||||
|
||||
|
||||
def _handle_kanban_command(self, cmd: str):
|
||||
"""Handle the /kanban command — delegate to the shared kanban CLI.
|
||||
|
||||
The string form passed here is the user's full ``/kanban ...``
|
||||
including the leading slash; we strip it and hand the remainder
|
||||
to ``kanban.run_slash`` which returns a single formatted string.
|
||||
"""
|
||||
from hermes_cli.kanban import run_slash
|
||||
|
||||
rest = cmd.strip()
|
||||
if rest.startswith("/"):
|
||||
rest = rest.lstrip("/")
|
||||
if rest.startswith("kanban"):
|
||||
rest = rest[len("kanban"):].lstrip()
|
||||
try:
|
||||
output = run_slash(rest)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
output = f"(._.) kanban error: {exc}"
|
||||
if output:
|
||||
print(output)
|
||||
|
||||
def _handle_skills_command(self, cmd: str):
|
||||
"""Handle /skills slash command — delegates to hermes_cli.skills_hub."""
|
||||
from hermes_cli.skills_hub import handle_skills_slash
|
||||
@@ -6026,16 +6057,12 @@ class HermesCLI:
|
||||
self._handle_resume_command(cmd_original)
|
||||
elif canonical == "model":
|
||||
self._handle_model_switch(cmd_original)
|
||||
elif canonical == "provider":
|
||||
self._show_model_and_providers()
|
||||
elif canonical == "gquota":
|
||||
self._handle_gquota_command(cmd_original)
|
||||
|
||||
elif canonical == "personality":
|
||||
# Use original case (handler lowercases the personality name itself)
|
||||
self._handle_personality_command(cmd_original)
|
||||
elif canonical == "plan":
|
||||
self._handle_plan_command(cmd_original)
|
||||
elif canonical == "retry":
|
||||
retry_msg = self.retry_last()
|
||||
if retry_msg and hasattr(self, '_pending_input'):
|
||||
@@ -6049,6 +6076,8 @@ class HermesCLI:
|
||||
self.save_conversation()
|
||||
elif canonical == "cron":
|
||||
self._handle_cron_command(cmd_original)
|
||||
elif canonical == "kanban":
|
||||
self._handle_kanban_command(cmd_original)
|
||||
elif canonical == "skills":
|
||||
with self._busy_command(self._slow_command_status(cmd_original)):
|
||||
self._handle_skills_command(cmd_original)
|
||||
@@ -6123,8 +6152,6 @@ class HermesCLI:
|
||||
self._handle_agents_command()
|
||||
elif canonical == "background":
|
||||
self._handle_background_command(cmd_original)
|
||||
elif canonical == "btw":
|
||||
self._handle_btw_command(cmd_original)
|
||||
elif canonical == "queue":
|
||||
# Extract prompt after "/queue " or "/q "
|
||||
parts = cmd_original.split(None, 1)
|
||||
@@ -6165,6 +6192,8 @@ class HermesCLI:
|
||||
self._handle_skin_command(cmd_original)
|
||||
elif canonical == "voice":
|
||||
self._handle_voice_command(cmd_original)
|
||||
elif canonical == "busy":
|
||||
self._handle_busy_command(cmd_original)
|
||||
else:
|
||||
# Check for user-defined quick commands (bypass agent loop, no LLM call)
|
||||
base_cmd = cmd_lower.split()[0]
|
||||
@@ -6270,32 +6299,6 @@ class HermesCLI:
|
||||
|
||||
return True
|
||||
|
||||
def _handle_plan_command(self, cmd: str):
|
||||
"""Handle /plan [request] — load the bundled plan skill."""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
user_instruction = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
plan_path = build_plan_path(user_instruction)
|
||||
msg = build_skill_invocation_message(
|
||||
"/plan",
|
||||
user_instruction,
|
||||
task_id=self.session_id,
|
||||
runtime_note=(
|
||||
"Save the markdown plan with write_file to this exact relative path "
|
||||
f"inside the active workspace/backend cwd: {plan_path}"
|
||||
),
|
||||
)
|
||||
|
||||
if not msg:
|
||||
ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]")
|
||||
return
|
||||
|
||||
_cprint(f" 📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
|
||||
if hasattr(self, '_pending_input'):
|
||||
self._pending_input.put(msg)
|
||||
else:
|
||||
ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]")
|
||||
|
||||
def _handle_background_command(self, cmd: str):
|
||||
"""Handle /background <prompt> — run a prompt in a separate background session.
|
||||
|
||||
@@ -6435,122 +6438,6 @@ class HermesCLI:
|
||||
self._background_tasks[task_id] = thread
|
||||
thread.start()
|
||||
|
||||
def _handle_btw_command(self, cmd: str):
|
||||
"""Handle /btw <question> — ephemeral side question using session context.
|
||||
|
||||
Snapshots the current conversation history, spawns a no-tools agent in
|
||||
a background thread, and prints the answer without persisting anything
|
||||
to the main session.
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or not parts[1].strip():
|
||||
_cprint(" Usage: /btw <question>")
|
||||
_cprint(" Example: /btw what module owns session title sanitization?")
|
||||
_cprint(" Answers using session context. No tools, not persisted.")
|
||||
return
|
||||
|
||||
question = parts[1].strip()
|
||||
task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
|
||||
if not self._ensure_runtime_credentials():
|
||||
_cprint(" (>_<) Cannot start /btw: no valid credentials.")
|
||||
return
|
||||
|
||||
turn_route = self._resolve_turn_agent_config(question)
|
||||
history_snapshot = list(self.conversation_history)
|
||||
|
||||
preview = question[:60] + ("..." if len(question) > 60 else "")
|
||||
_cprint(f' 💬 /btw: "{preview}"')
|
||||
|
||||
def run_btw():
|
||||
try:
|
||||
btw_agent = AIAgent(
|
||||
model=turn_route["model"],
|
||||
api_key=turn_route["runtime"].get("api_key"),
|
||||
base_url=turn_route["runtime"].get("base_url"),
|
||||
provider=turn_route["runtime"].get("provider"),
|
||||
api_mode=turn_route["runtime"].get("api_mode"),
|
||||
acp_command=turn_route["runtime"].get("command"),
|
||||
acp_args=turn_route["runtime"].get("args"),
|
||||
max_iterations=8,
|
||||
enabled_toolsets=[],
|
||||
quiet_mode=True,
|
||||
verbose_logging=False,
|
||||
session_id=task_id,
|
||||
platform="cli",
|
||||
reasoning_config=self.reasoning_config,
|
||||
service_tier=self.service_tier,
|
||||
request_overrides=turn_route.get("request_overrides"),
|
||||
providers_allowed=self._providers_only,
|
||||
providers_ignored=self._providers_ignore,
|
||||
providers_order=self._providers_order,
|
||||
provider_sort=self._provider_sort,
|
||||
provider_require_parameters=self._provider_require_params,
|
||||
provider_data_collection=self._provider_data_collection,
|
||||
fallback_model=self._fallback_model,
|
||||
session_db=None,
|
||||
skip_memory=True,
|
||||
skip_context_files=True,
|
||||
persist_session=False,
|
||||
)
|
||||
|
||||
btw_prompt = (
|
||||
"[Ephemeral /btw side question. Answer using the conversation "
|
||||
"context. No tools available. Be direct and concise.]\n\n"
|
||||
+ question
|
||||
)
|
||||
result = btw_agent.run_conversation(
|
||||
user_message=btw_prompt,
|
||||
conversation_history=history_snapshot,
|
||||
task_id=task_id,
|
||||
)
|
||||
|
||||
response = (result.get("final_response") or "") if result else ""
|
||||
if not response and result and result.get("error"):
|
||||
response = f"Error: {result['error']}"
|
||||
|
||||
# TUI refresh before printing
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
|
||||
if response:
|
||||
try:
|
||||
from hermes_cli.skin_engine import get_active_skin
|
||||
_skin = get_active_skin()
|
||||
_resp_color = _skin.get_color("response_border", "#4F6D4A")
|
||||
except Exception:
|
||||
_resp_color = "#4F6D4A"
|
||||
|
||||
ChatConsole().print(Panel(
|
||||
_render_final_assistant_content(response, mode=self.final_response_markdown),
|
||||
title=f"[{_resp_color} bold]⚕ /btw[/]",
|
||||
title_align="left",
|
||||
border_style=_resp_color,
|
||||
box=rich_box.HORIZONTALS,
|
||||
padding=(1, 4),
|
||||
))
|
||||
else:
|
||||
_cprint(" 💬 /btw: (no response)")
|
||||
|
||||
if self.bell_on_complete:
|
||||
sys.stdout.write("\a")
|
||||
sys.stdout.flush()
|
||||
|
||||
except Exception as e:
|
||||
if self._app:
|
||||
self._app.invalidate()
|
||||
time.sleep(0.05)
|
||||
print()
|
||||
_cprint(f" ❌ /btw failed: {e}")
|
||||
finally:
|
||||
if self._app:
|
||||
self._invalidate(min_interval=0)
|
||||
|
||||
thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}")
|
||||
thread.start()
|
||||
|
||||
@staticmethod
|
||||
def _try_launch_chrome_debug(port: int, system: str) -> bool:
|
||||
"""Try to launch Chrome/Chromium with remote debugging enabled.
|
||||
@@ -6685,6 +6572,13 @@ class HermesCLI:
|
||||
print(f" ⚠ Port {_port} is not reachable at {cdp_url}")
|
||||
|
||||
os.environ["BROWSER_CDP_URL"] = cdp_url
|
||||
# Eagerly start the CDP supervisor so pending_dialogs + frame_tree
|
||||
# show up in the next browser_snapshot. No-op if already started.
|
||||
try:
|
||||
from tools.browser_tool import _ensure_cdp_supervisor # type: ignore[import-not-found]
|
||||
_ensure_cdp_supervisor("default")
|
||||
except Exception:
|
||||
pass
|
||||
print()
|
||||
print("🌐 Browser connected to live Chrome via CDP")
|
||||
print(f" Endpoint: {cdp_url}")
|
||||
@@ -6706,7 +6600,8 @@ class HermesCLI:
|
||||
if current:
|
||||
os.environ.pop("BROWSER_CDP_URL", None)
|
||||
try:
|
||||
from tools.browser_tool import cleanup_all_browsers
|
||||
from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor
|
||||
_stop_cdp_supervisor("default")
|
||||
cleanup_all_browsers()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -6919,6 +6814,36 @@ class HermesCLI:
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_busy_command(self, cmd: str):
|
||||
"""Handle /busy — control what Enter does while Hermes is working.
|
||||
|
||||
Usage:
|
||||
/busy Show current busy input mode
|
||||
/busy status Show current busy input mode
|
||||
/busy queue Queue input for the next turn instead of interrupting
|
||||
/busy interrupt Interrupt the current run on Enter (default)
|
||||
"""
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
_cprint(f" {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
|
||||
_cprint(f" {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
arg = parts[1].strip().lower()
|
||||
if arg not in {"queue", "interrupt"}:
|
||||
_cprint(f" {_DIM}(._.) Unknown argument: {arg}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
|
||||
return
|
||||
|
||||
self.busy_input_mode = arg
|
||||
if save_config_value("display.busy_input_mode", arg):
|
||||
behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
|
||||
_cprint(f" {_DIM}{behavior}{_RST}")
|
||||
else:
|
||||
_cprint(f" {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
|
||||
if not self._fast_command_available():
|
||||
@@ -6997,51 +6922,52 @@ class HermesCLI:
|
||||
focus_topic = parts[1].strip()
|
||||
|
||||
original_count = len(self.conversation_history)
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
with self._busy_command("Compressing context..."):
|
||||
try:
|
||||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
from agent.manual_compression_feedback import summarize_manual_compression
|
||||
original_history = list(self.conversation_history)
|
||||
approx_tokens = estimate_messages_tokens_rough(original_history)
|
||||
if focus_topic:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens), "
|
||||
f"focus: \"{focus_topic}\"...")
|
||||
else:
|
||||
print(f"🗜️ Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
|
||||
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
compressed, _ = self.agent._compress_context(
|
||||
original_history,
|
||||
self.agent._cached_system_prompt or "",
|
||||
approx_tokens=approx_tokens,
|
||||
focus_topic=focus_topic or None,
|
||||
)
|
||||
self.conversation_history = compressed
|
||||
# _compress_context ends the old session and creates a new child
|
||||
# session on the agent (run_agent.py::_compress_context). Sync the
|
||||
# CLI's session_id so /status, /resume, exit summary, and title
|
||||
# generation all point at the live continuation session, not the
|
||||
# ended parent. Without this, subsequent end_session() calls target
|
||||
# the already-closed parent and the child is orphaned.
|
||||
if (
|
||||
getattr(self.agent, "session_id", None)
|
||||
and self.agent.session_id != self.session_id
|
||||
):
|
||||
self.session_id = self.agent.session_id
|
||||
self._pending_title = None
|
||||
new_tokens = estimate_messages_tokens_rough(self.conversation_history)
|
||||
summary = summarize_manual_compression(
|
||||
original_history,
|
||||
self.conversation_history,
|
||||
approx_tokens,
|
||||
new_tokens,
|
||||
)
|
||||
icon = "🗜️" if summary["noop"] else "✅"
|
||||
print(f" {icon} {summary['headline']}")
|
||||
print(f" {summary['token_line']}")
|
||||
if summary["note"]:
|
||||
print(f" {summary['note']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
except Exception as e:
|
||||
print(f" ❌ Compression failed: {e}")
|
||||
|
||||
def _handle_debug_command(self):
|
||||
"""Handle /debug — upload debug report + logs and print paste URLs."""
|
||||
@@ -7396,6 +7322,31 @@ class HermesCLI:
|
||||
_cprint(f" {line}")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the first tool in this process
|
||||
# that takes longer than the threshold while we're in the
|
||||
# noisiest progress mode, print a one-time hint about
|
||||
# /verbose. Latched on self so it fires at most once per
|
||||
# process; persisted to config.yaml so it never fires again
|
||||
# across processes either.
|
||||
try:
|
||||
if (
|
||||
not getattr(self, "_long_tool_hint_fired", False)
|
||||
and self.tool_progress_mode == "all"
|
||||
and duration >= 30.0
|
||||
):
|
||||
from agent.onboarding import (
|
||||
TOOL_PROGRESS_FLAG,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
tool_progress_hint_cli,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG):
|
||||
self._long_tool_hint_fired = True
|
||||
_cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
self._invalidate()
|
||||
return
|
||||
if event_type != "tool.started":
|
||||
@@ -9279,6 +9230,24 @@ class HermesCLI:
|
||||
f"agent_running={self._agent_running}\n")
|
||||
except Exception:
|
||||
pass
|
||||
# First-touch onboarding: on the very first busy-while-running
|
||||
# event for this install, print a one-line tip explaining the
|
||||
# /busy knob. Flag persists to config.yaml and never fires
|
||||
# again. Guarded for exceptions so onboarding can't break
|
||||
# the input loop.
|
||||
try:
|
||||
from agent.onboarding import (
|
||||
BUSY_INPUT_FLAG,
|
||||
busy_input_hint_cli,
|
||||
is_seen,
|
||||
mark_seen,
|
||||
)
|
||||
if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG):
|
||||
_cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}")
|
||||
mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG)
|
||||
CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
self._pending_input.put(payload)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
@@ -9293,14 +9262,18 @@ class HermesCLI:
|
||||
"""Ctrl+Enter (c-j) inserts a newline. Most terminals send c-j for Ctrl+Enter."""
|
||||
event.current_buffer.insert_text('\n')
|
||||
|
||||
@kb.add(
|
||||
'c-g',
|
||||
filter=Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
),
|
||||
# VSCode/Cursor bind Ctrl+G to "Find Next" at the editor level, so
|
||||
# the keystroke never reaches the embedded terminal. Alt+G is unbound
|
||||
# in those IDEs and arrives here as ('escape', 'g') — register it as
|
||||
# a fallback so the editor handoff works inside Cursor/VSCode too.
|
||||
_editor_filter = Condition(
|
||||
lambda: not self._clarify_state and not self._approval_state and not self._sudo_state and not self._secret_state
|
||||
)
|
||||
|
||||
@kb.add('c-g', filter=_editor_filter)
|
||||
@kb.add('escape', 'g', filter=_editor_filter)
|
||||
def handle_open_in_editor(event):
|
||||
"""Ctrl+G opens the current draft in an external editor."""
|
||||
"""Ctrl+G (or Alt+G in VSCode/Cursor) opens the current draft in an external editor."""
|
||||
cli_ref._open_external_editor(event.current_buffer)
|
||||
|
||||
@kb.add('tab', eager=True)
|
||||
@@ -9543,9 +9516,20 @@ class HermesCLI:
|
||||
|
||||
@kb.add('c-d')
|
||||
def handle_ctrl_d(event):
|
||||
"""Handle Ctrl+D - exit."""
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
"""Ctrl+D: delete char under cursor (standard readline behaviour).
|
||||
Only exit when the input is empty — same as bash/zsh. Pending
|
||||
attached images count as input and block the EOF-exit so the
|
||||
user doesn't lose them silently.
|
||||
"""
|
||||
buf = event.app.current_buffer
|
||||
if buf.text:
|
||||
buf.delete()
|
||||
elif self._attached_images:
|
||||
# Empty text but pending attachments — no-op, don't exit.
|
||||
return
|
||||
else:
|
||||
self._should_exit = True
|
||||
event.app.exit()
|
||||
|
||||
_modal_prompt_active = Condition(
|
||||
lambda: bool(self._secret_state or self._sudo_state)
|
||||
@@ -9753,6 +9737,11 @@ class HermesCLI:
|
||||
completer=_completer,
|
||||
),
|
||||
)
|
||||
# Keep prompt_toolkit on its simple tempfile path. Setting
|
||||
# buffer.tempfile = "prompt.md" triggers its complex-tempfile branch,
|
||||
# which tries to mkdir() the mkdtemp() directory again and raises
|
||||
# EEXIST. The suffix keeps markdown highlighting without that bug.
|
||||
input_area.buffer.tempfile_suffix = '.md'
|
||||
|
||||
# Dynamic height: accounts for both explicit newlines AND visual
|
||||
# wrapping of long lines so the input area always fits its content.
|
||||
@@ -10705,6 +10694,8 @@ class HermesCLI:
|
||||
return # silently suppress
|
||||
if isinstance(exc, KeyError) and "is not registered" in str(exc):
|
||||
return # suppress selector registration failures (#6393)
|
||||
if isinstance(exc, OSError) and getattr(exc, "errno", None) == errno.EIO:
|
||||
return # suppress I/O errors from broken stdout on interrupt (#13710)
|
||||
# Fall back to default handler for everything else
|
||||
loop.default_exception_handler(context)
|
||||
|
||||
@@ -10737,9 +10728,11 @@ class HermesCLI:
|
||||
except (EOFError, KeyboardInterrupt, BrokenPipeError):
|
||||
pass
|
||||
except (KeyError, OSError) as _stdin_err:
|
||||
# Catch selector registration failures from broken stdin (#6393).
|
||||
# This is the fallback for cases that slip past the fstat() guard.
|
||||
if "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
# Catch selector registration failures from broken stdin (#6393)
|
||||
# and I/O errors from broken stdout during interrupt (#13710).
|
||||
if isinstance(_stdin_err, OSError) and getattr(_stdin_err, "errno", None) == errno.EIO:
|
||||
pass # suppress broken-stdout I/O errors on interrupt (#13710)
|
||||
elif "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err):
|
||||
print(
|
||||
f"\nError: stdin is not usable ({_stdin_err}).\n"
|
||||
"This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n"
|
||||
@@ -10758,12 +10751,6 @@ class HermesCLI:
|
||||
self.agent.interrupt()
|
||||
except Exception:
|
||||
pass
|
||||
# Flush memories before exit (only for substantial conversations)
|
||||
if self.agent and self.conversation_history:
|
||||
try:
|
||||
self.agent.flush_memories(self.conversation_history)
|
||||
except (Exception, KeyboardInterrupt):
|
||||
pass
|
||||
# Shut down voice recorder (release persistent audio stream)
|
||||
if hasattr(self, '_voice_recorder') and self._voice_recorder:
|
||||
try:
|
||||
|
||||
+65
-1
@@ -16,7 +16,7 @@ import uuid
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Optional, Dict, List, Any
|
||||
from typing import Optional, Dict, List, Any, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -371,6 +371,39 @@ def save_jobs(jobs: List[Dict[str, Any]]):
|
||||
raise
|
||||
|
||||
|
||||
def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
|
||||
"""Normalize and validate a cron job workdir.
|
||||
|
||||
Rules:
|
||||
- Empty / None → None (feature off, preserves old behaviour).
|
||||
- ``~`` is expanded. Relative paths are rejected — cron jobs run detached
|
||||
from any shell cwd, so relative paths have no stable meaning.
|
||||
- The path must exist and be a directory at create/update time. We do
|
||||
NOT re-check at run time (a user might briefly unmount the dir; the
|
||||
scheduler will just fall back to old behaviour with a logged warning).
|
||||
|
||||
Returns the absolute path string, or None when disabled.
|
||||
Raises ValueError on invalid input.
|
||||
"""
|
||||
if workdir is None:
|
||||
return None
|
||||
raw = str(workdir).strip()
|
||||
if not raw:
|
||||
return None
|
||||
expanded = Path(raw).expanduser()
|
||||
if not expanded.is_absolute():
|
||||
raise ValueError(
|
||||
f"Cron workdir must be an absolute path (got {raw!r}). "
|
||||
f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
|
||||
)
|
||||
resolved = expanded.resolve()
|
||||
if not resolved.exists():
|
||||
raise ValueError(f"Cron workdir does not exist: {resolved}")
|
||||
if not resolved.is_dir():
|
||||
raise ValueError(f"Cron workdir is not a directory: {resolved}")
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def create_job(
|
||||
prompt: str,
|
||||
schedule: str,
|
||||
@@ -384,7 +417,9 @@ def create_job(
|
||||
provider: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
script: Optional[str] = None,
|
||||
context_from: Optional[Union[str, List[str]]] = None,
|
||||
enabled_toolsets: Optional[List[str]] = None,
|
||||
workdir: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new cron job.
|
||||
@@ -404,9 +439,18 @@ def create_job(
|
||||
script: Optional path to a Python script whose stdout is injected into the
|
||||
prompt each run. The script runs before the agent turn, and its output
|
||||
is prepended as context. Useful for data collection / change detection.
|
||||
context_from: Optional job ID (or list of job IDs) whose most recent output
|
||||
is injected into the prompt as context before each run.
|
||||
Useful for chaining cron jobs: job A finds data, job B processes it.
|
||||
enabled_toolsets: Optional list of toolset names to restrict the agent to.
|
||||
When set, only tools from these toolsets are loaded, reducing
|
||||
token overhead. When omitted, all default tools are loaded.
|
||||
workdir: Optional absolute path. When set, the job runs as if launched
|
||||
from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
|
||||
that directory are injected into the system prompt, and the
|
||||
terminal/file/code_exec tools use it as their working directory
|
||||
(via TERMINAL_CWD). When unset, the old behaviour is preserved
|
||||
(no context files injected, tools use the scheduler's cwd).
|
||||
|
||||
Returns:
|
||||
The created job dict
|
||||
@@ -439,6 +483,15 @@ def create_job(
|
||||
normalized_script = normalized_script or None
|
||||
normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
|
||||
normalized_toolsets = normalized_toolsets or None
|
||||
normalized_workdir = _normalize_workdir(workdir)
|
||||
|
||||
# Normalize context_from: accept str or list of str, store as list or None
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from.strip()] if context_from.strip() else None
|
||||
elif isinstance(context_from, list):
|
||||
context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
|
||||
else:
|
||||
context_from = None
|
||||
|
||||
label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
|
||||
job = {
|
||||
@@ -451,6 +504,7 @@ def create_job(
|
||||
"provider": normalized_provider,
|
||||
"base_url": normalized_base_url,
|
||||
"script": normalized_script,
|
||||
"context_from": context_from,
|
||||
"schedule": parsed_schedule,
|
||||
"schedule_display": parsed_schedule.get("display", schedule),
|
||||
"repeat": {
|
||||
@@ -471,6 +525,7 @@ def create_job(
|
||||
"deliver": deliver,
|
||||
"origin": origin, # Tracks where job was created for "origin" delivery
|
||||
"enabled_toolsets": normalized_toolsets,
|
||||
"workdir": normalized_workdir,
|
||||
}
|
||||
|
||||
jobs = load_jobs()
|
||||
@@ -504,6 +559,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
|
||||
if job["id"] != job_id:
|
||||
continue
|
||||
|
||||
# Validate / normalize workdir if present in updates. Empty string or
|
||||
# None both mean "clear the field" (restore old behaviour).
|
||||
if "workdir" in updates:
|
||||
_wd = updates["workdir"]
|
||||
if _wd in (None, "", False):
|
||||
updates["workdir"] = None
|
||||
else:
|
||||
updates["workdir"] = _normalize_workdir(_wd)
|
||||
|
||||
updated = _apply_skill_fields({**job, **updates})
|
||||
schedule_changed = "schedule" in updates
|
||||
|
||||
|
||||
+122
-9
@@ -671,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
|
||||
f"{prompt}"
|
||||
)
|
||||
|
||||
# Inject output from referenced cron jobs as context.
|
||||
context_from = job.get("context_from")
|
||||
if context_from:
|
||||
from cron.jobs import OUTPUT_DIR
|
||||
if isinstance(context_from, str):
|
||||
context_from = [context_from]
|
||||
for source_job_id in context_from:
|
||||
# Guard against path traversal — valid job IDs are 12-char hex strings
|
||||
if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
|
||||
logger.warning("context_from: skipping invalid job_id %r", source_job_id)
|
||||
continue
|
||||
try:
|
||||
job_output_dir = OUTPUT_DIR / source_job_id
|
||||
if not job_output_dir.exists():
|
||||
continue # silent skip — no output yet
|
||||
output_files = sorted(
|
||||
job_output_dir.glob("*.md"),
|
||||
key=lambda f: f.stat().st_mtime,
|
||||
reverse=True,
|
||||
)
|
||||
if not output_files:
|
||||
continue # silent skip — no output yet
|
||||
latest_output = output_files[0].read_text(encoding="utf-8").strip()
|
||||
# Truncate to 8K characters to avoid prompt bloat
|
||||
_MAX_CONTEXT_CHARS = 8000
|
||||
if len(latest_output) > _MAX_CONTEXT_CHARS:
|
||||
latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
|
||||
if latest_output:
|
||||
prompt = (
|
||||
f"## Output from job '{source_job_id}'\n"
|
||||
"The following is the most recent output from a preceding "
|
||||
"cron job. Use it as context for your analysis.\n\n"
|
||||
f"```\n{latest_output}\n```\n\n"
|
||||
f"{prompt}"
|
||||
)
|
||||
else:
|
||||
continue # silent skip — empty output
|
||||
except (OSError, PermissionError) as e:
|
||||
logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
|
||||
# silent skip — do not pollute the prompt with error messages
|
||||
|
||||
# Always prepend cron execution guidance so the agent knows how
|
||||
# delivery works and can suppress delivery when appropriate.
|
||||
cron_hint = (
|
||||
@@ -795,6 +836,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
chat_name=origin.get("chat_name", "") if origin else "",
|
||||
)
|
||||
|
||||
# Per-job working directory. When set (and validated at create/update
|
||||
# time), we point TERMINAL_CWD at it so:
|
||||
# - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from the job's project dir, AND
|
||||
# - the terminal, file, and code-exec tools run commands from there.
|
||||
#
|
||||
# tick() serializes workdir-jobs outside the parallel pool, so mutating
|
||||
# os.environ["TERMINAL_CWD"] here is safe for those jobs. For workdir-less
|
||||
# jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
|
||||
# (skip_context_files=True, tools use whatever cwd the scheduler has).
|
||||
_job_workdir = (job.get("workdir") or "").strip() or None
|
||||
if _job_workdir and not Path(_job_workdir).is_dir():
|
||||
# Directory was removed between create-time validation and now. Log
|
||||
# and drop back to old behaviour rather than crashing the job.
|
||||
logger.warning(
|
||||
"Job '%s': configured workdir %r no longer exists — running without it",
|
||||
job_id, _job_workdir,
|
||||
)
|
||||
_job_workdir = None
|
||||
_prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
|
||||
if _job_workdir:
|
||||
os.environ["TERMINAL_CWD"] = _job_workdir
|
||||
logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
|
||||
|
||||
try:
|
||||
# Re-read .env and config.yaml fresh every run so provider/key
|
||||
# changes take effect without a gateway restart.
|
||||
@@ -871,6 +936,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
resolve_runtime_provider,
|
||||
format_runtime_provider_error,
|
||||
)
|
||||
from hermes_cli.auth import AuthError
|
||||
try:
|
||||
runtime_kwargs = {
|
||||
"requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
|
||||
@@ -878,6 +944,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
if job.get("base_url"):
|
||||
runtime_kwargs["explicit_base_url"] = job.get("base_url")
|
||||
runtime = resolve_runtime_provider(**runtime_kwargs)
|
||||
except AuthError as auth_exc:
|
||||
# Primary provider auth failed — try fallback chain before giving up.
|
||||
logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
|
||||
fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
|
||||
fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
|
||||
runtime = None
|
||||
for entry in fb_list:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
fb_kwargs = {"requested": entry.get("provider")}
|
||||
if entry.get("base_url"):
|
||||
fb_kwargs["explicit_base_url"] = entry["base_url"]
|
||||
if entry.get("api_key"):
|
||||
fb_kwargs["explicit_api_key"] = entry["api_key"]
|
||||
runtime = resolve_runtime_provider(**fb_kwargs)
|
||||
logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
|
||||
break
|
||||
except Exception as fb_exc:
|
||||
logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
|
||||
if runtime is None:
|
||||
raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
|
||||
except Exception as exc:
|
||||
message = format_runtime_provider_error(exc)
|
||||
raise RuntimeError(message) from exc
|
||||
@@ -920,7 +1008,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
|
||||
disabled_toolsets=["cronjob", "messaging", "clarify"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True, # Don't inject SOUL.md/AGENTS.md from scheduler cwd
|
||||
# When a workdir is configured, inject AGENTS.md / CLAUDE.md /
|
||||
# .cursorrules from that directory; otherwise preserve the old
|
||||
# behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
|
||||
skip_context_files=not bool(_job_workdir),
|
||||
skip_memory=True, # Cron system prompts would corrupt user representations
|
||||
platform="cron",
|
||||
session_id=_cron_session_id,
|
||||
@@ -1059,6 +1150,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||
return False, output, "", error_msg
|
||||
|
||||
finally:
|
||||
# Restore TERMINAL_CWD to whatever it was before this job ran. We
|
||||
# only ever mutate it when the job has a workdir; see the setup block
|
||||
# at the top of run_job for the serialization guarantee.
|
||||
if _job_workdir:
|
||||
if _prior_terminal_cwd == "_UNSET_":
|
||||
os.environ.pop("TERMINAL_CWD", None)
|
||||
else:
|
||||
os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
|
||||
# Clean up ContextVar session/delivery state for this job.
|
||||
clear_session_vars(_ctx_tokens)
|
||||
if _session_db:
|
||||
@@ -1186,14 +1285,28 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
|
||||
mark_job_run(job["id"], False, str(e))
|
||||
return False
|
||||
|
||||
# Run all due jobs concurrently, each in its own ContextVar copy
|
||||
# so session/delivery state stays isolated per-thread.
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in due_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results = [f.result() for f in _futures]
|
||||
# Partition due jobs: those with a per-job workdir mutate
|
||||
# os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
|
||||
# so they MUST run sequentially to avoid corrupting each other. Jobs
|
||||
# without a workdir leave env untouched and stay parallel-safe.
|
||||
workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
|
||||
parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
|
||||
|
||||
_results: list = []
|
||||
|
||||
# Sequential pass for workdir jobs.
|
||||
for job in workdir_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_results.append(_ctx.run(_process_job, job))
|
||||
|
||||
# Parallel pass for the rest — same behaviour as before.
|
||||
if parallel_jobs:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
|
||||
_futures = []
|
||||
for job in parallel_jobs:
|
||||
_ctx = contextvars.copy_context()
|
||||
_futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
|
||||
_results.extend(f.result() for f in _futures)
|
||||
|
||||
return sum(_results)
|
||||
finally:
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
#
|
||||
# docker-compose.yml for Hermes Agent
|
||||
#
|
||||
# Usage:
|
||||
# HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
|
||||
#
|
||||
# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
|
||||
# files created inside the container stay readable/writable on the host.
|
||||
# The entrypoint remaps the internal `hermes` user to these values via
|
||||
# usermod/groupmod + gosu.
|
||||
#
|
||||
# Security notes:
|
||||
# - The dashboard service binds to 127.0.0.1 by default. It stores API
|
||||
# keys; exposing it on LAN without auth is unsafe. If you want remote
|
||||
# access, use an SSH tunnel or put it behind a reverse proxy that
|
||||
# adds authentication — do NOT pass --insecure --host 0.0.0.0.
|
||||
# - The gateway's API server is off unless you uncomment API_SERVER_KEY
|
||||
# and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
|
||||
# this on an internet-facing host.
|
||||
#
|
||||
services:
|
||||
gateway:
|
||||
build: .
|
||||
image: hermes-agent
|
||||
container_name: hermes
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# To expose the OpenAI-compatible API server beyond localhost,
|
||||
# uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
|
||||
# - API_SERVER_HOST=0.0.0.0
|
||||
# - API_SERVER_KEY=${API_SERVER_KEY}
|
||||
command: ["gateway", "run"]
|
||||
|
||||
dashboard:
|
||||
image: hermes-agent
|
||||
container_name: hermes-dashboard
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- gateway
|
||||
volumes:
|
||||
- ~/.hermes:/opt/data
|
||||
environment:
|
||||
- HERMES_UID=${HERMES_UID:-10000}
|
||||
- HERMES_GID=${HERMES_GID:-10000}
|
||||
# Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
|
||||
command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
|
||||
+20
-9
@@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then
|
||||
groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
|
||||
# files created by previous runs (under the old UID) become inaccessible.
|
||||
# Always chown -R when UID was remapped; otherwise only if top-level is wrong.
|
||||
actual_hermes_uid=$(id -u hermes)
|
||||
if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
|
||||
needs_chown=false
|
||||
if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
|
||||
needs_chown=true
|
||||
elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
|
||||
needs_chown=true
|
||||
fi
|
||||
if [ "$needs_chown" = true ]; then
|
||||
echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
|
||||
# In rootless Podman the container's "root" is mapped to an unprivileged
|
||||
# host UID — chown will fail. That's fine: the volume is already owned
|
||||
# by the mapped user on the host side.
|
||||
@@ -32,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
|
||||
echo "Warning: chown failed (rootless container?) — continuing anyway"
|
||||
fi
|
||||
|
||||
# Ensure config.yaml is readable by the hermes runtime user even if it was
|
||||
# edited on the host after initial ownership setup. Must run here (as root)
|
||||
# rather than after the gosu drop, otherwise a non-root caller like
|
||||
# `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
echo "Dropping root privileges"
|
||||
exec gosu hermes "$0" "$@"
|
||||
fi
|
||||
@@ -58,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
|
||||
cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# Ensure the main config file remains accessible to the hermes runtime user
|
||||
# even if it was edited on the host after initial ownership setup.
|
||||
if [ -f "$HERMES_HOME/config.yaml" ]; then
|
||||
chown hermes:hermes "$HERMES_HOME/config.yaml"
|
||||
chmod 640 "$HERMES_HOME/config.yaml"
|
||||
fi
|
||||
|
||||
# SOUL.md
|
||||
if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
|
||||
cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
|
||||
|
||||
Binary file not shown.
+8
-3
@@ -135,7 +135,7 @@ class SessionResetPolicy:
|
||||
mode=mode if mode is not None else "both",
|
||||
at_hour=at_hour if at_hour is not None else 4,
|
||||
idle_minutes=idle_minutes if idle_minutes is not None else 1440,
|
||||
notify=notify if notify is not None else True,
|
||||
notify=_coerce_bool(notify, True),
|
||||
notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
|
||||
)
|
||||
|
||||
@@ -178,7 +178,7 @@ class PlatformConfig:
|
||||
home_channel = HomeChannel.from_dict(data["home_channel"])
|
||||
|
||||
return cls(
|
||||
enabled=data.get("enabled", False),
|
||||
enabled=_coerce_bool(data.get("enabled"), False),
|
||||
token=data.get("token"),
|
||||
api_key=data.get("api_key"),
|
||||
home_channel=home_channel,
|
||||
@@ -435,7 +435,7 @@ class GatewayConfig:
|
||||
reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
|
||||
quick_commands=quick_commands,
|
||||
sessions_dir=sessions_dir,
|
||||
always_log_local=data.get("always_log_local", True),
|
||||
always_log_local=_coerce_bool(data.get("always_log_local"), True),
|
||||
stt_enabled=_coerce_bool(stt_enabled, True),
|
||||
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
|
||||
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
|
||||
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
|
||||
os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
|
||||
if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
|
||||
os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
|
||||
if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
|
||||
gac = telegram_cfg["group_allowed_chats"]
|
||||
if isinstance(gac, list):
|
||||
gac = ",".join(str(v) for v in gac)
|
||||
os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
|
||||
if "disable_link_previews" in telegram_cfg:
|
||||
plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
|
||||
if not isinstance(plat_data, dict):
|
||||
|
||||
+150
-22
@@ -9,6 +9,7 @@ Exposes an HTTP server with endpoints:
|
||||
- GET /v1/models — lists hermes-agent as an available model
|
||||
- POST /v1/runs — start a run, returns run_id immediately (202)
|
||||
- GET /v1/runs/{run_id}/events — SSE stream of structured lifecycle events
|
||||
- POST /v1/runs/{run_id}/stop — interrupt a running agent
|
||||
- GET /health — health check
|
||||
- GET /health/detailed — rich status for cross-container dashboard probing
|
||||
|
||||
@@ -586,6 +587,9 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
|
||||
# Creation timestamps for orphaned-run TTL sweep
|
||||
self._run_streams_created: Dict[str, float] = {}
|
||||
# Active run agent/task references for stop support
|
||||
self._active_run_agents: Dict[str, Any] = {}
|
||||
self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
|
||||
self._session_db: Optional[Any] = None # Lazy-init SessionDB for session continuity
|
||||
|
||||
@staticmethod
|
||||
@@ -1204,10 +1208,12 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
If the client disconnects mid-stream, ``agent.interrupt()`` is
|
||||
called so the agent stops issuing upstream LLM calls, then the
|
||||
asyncio task is cancelled. When ``store=True`` the full response
|
||||
is persisted to the ResponseStore in a ``finally`` block so GET
|
||||
/v1/responses/{id} and ``previous_response_id`` chaining work the
|
||||
same as the batch path.
|
||||
asyncio task is cancelled. When ``store=True`` an initial
|
||||
``in_progress`` snapshot is persisted immediately after
|
||||
``response.created`` and disconnects update it to an
|
||||
``incomplete`` snapshot so GET /v1/responses/{id} and
|
||||
``previous_response_id`` chaining still have something to
|
||||
recover from.
|
||||
"""
|
||||
import queue as _q
|
||||
|
||||
@@ -1269,6 +1275,60 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
final_response_text = ""
|
||||
agent_error: Optional[str] = None
|
||||
usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
|
||||
terminal_snapshot_persisted = False
|
||||
|
||||
def _persist_response_snapshot(
|
||||
response_env: Dict[str, Any],
|
||||
*,
|
||||
conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> None:
|
||||
if not store:
|
||||
return
|
||||
if conversation_history_snapshot is None:
|
||||
conversation_history_snapshot = list(conversation_history)
|
||||
conversation_history_snapshot.append({"role": "user", "content": user_message})
|
||||
self._response_store.put(response_id, {
|
||||
"response": response_env,
|
||||
"conversation_history": conversation_history_snapshot,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
def _persist_incomplete_if_needed() -> None:
|
||||
"""Persist an ``incomplete`` snapshot if no terminal one was written.
|
||||
|
||||
Called from both the client-disconnect (``ConnectionResetError``)
|
||||
and server-cancellation (``asyncio.CancelledError``) paths so
|
||||
GET /v1/responses/{id} and ``previous_response_id`` chaining keep
|
||||
working after abrupt stream termination.
|
||||
"""
|
||||
if not store or terminal_snapshot_persisted:
|
||||
return
|
||||
incomplete_text = "".join(final_text_parts) or final_response_text
|
||||
incomplete_items: List[Dict[str, Any]] = list(emitted_items)
|
||||
if incomplete_text:
|
||||
incomplete_items.append({
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": incomplete_text}],
|
||||
})
|
||||
incomplete_env = _envelope("incomplete")
|
||||
incomplete_env["output"] = incomplete_items
|
||||
incomplete_env["usage"] = {
|
||||
"input_tokens": usage.get("input_tokens", 0),
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
incomplete_history = list(conversation_history)
|
||||
incomplete_history.append({"role": "user", "content": user_message})
|
||||
if incomplete_text:
|
||||
incomplete_history.append({"role": "assistant", "content": incomplete_text})
|
||||
_persist_response_snapshot(
|
||||
incomplete_env,
|
||||
conversation_history_snapshot=incomplete_history,
|
||||
)
|
||||
|
||||
try:
|
||||
# response.created — initial envelope, status=in_progress
|
||||
@@ -1278,6 +1338,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"type": "response.created",
|
||||
"response": created_env,
|
||||
})
|
||||
_persist_response_snapshot(created_env)
|
||||
last_activity = time.monotonic()
|
||||
|
||||
async def _open_message_item() -> None:
|
||||
@@ -1534,6 +1595,18 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
_failed_history = list(conversation_history)
|
||||
_failed_history.append({"role": "user", "content": user_message})
|
||||
if final_response_text or agent_error:
|
||||
_failed_history.append({
|
||||
"role": "assistant",
|
||||
"content": final_response_text or agent_error,
|
||||
})
|
||||
_persist_response_snapshot(
|
||||
failed_env,
|
||||
conversation_history_snapshot=_failed_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.failed", {
|
||||
"type": "response.failed",
|
||||
"response": failed_env,
|
||||
@@ -1546,30 +1619,24 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
"output_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
}
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
_persist_response_snapshot(
|
||||
completed_env,
|
||||
conversation_history_snapshot=full_history,
|
||||
)
|
||||
terminal_snapshot_persisted = True
|
||||
await _write_event("response.completed", {
|
||||
"type": "response.completed",
|
||||
"response": completed_env,
|
||||
})
|
||||
|
||||
# Persist for future chaining / GET retrieval, mirroring
|
||||
# the batch path behavior.
|
||||
if store:
|
||||
full_history = list(conversation_history)
|
||||
full_history.append({"role": "user", "content": user_message})
|
||||
if isinstance(result, dict) and result.get("messages"):
|
||||
full_history.extend(result["messages"])
|
||||
else:
|
||||
full_history.append({"role": "assistant", "content": final_response_text})
|
||||
self._response_store.put(response_id, {
|
||||
"response": completed_env,
|
||||
"conversation_history": full_history,
|
||||
"instructions": instructions,
|
||||
"session_id": session_id,
|
||||
})
|
||||
if conversation:
|
||||
self._response_store.set_conversation(conversation, response_id)
|
||||
|
||||
except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
|
||||
_persist_incomplete_if_needed()
|
||||
# Client disconnected — interrupt the agent so it stops
|
||||
# making upstream LLM calls, then cancel the task.
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
@@ -1585,6 +1652,22 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("SSE client disconnected; interrupted agent task %s", response_id)
|
||||
except asyncio.CancelledError:
|
||||
# Server-side cancellation (e.g. shutdown, request timeout) —
|
||||
# persist an incomplete snapshot so GET /v1/responses/{id} and
|
||||
# previous_response_id chaining still work, then re-raise so the
|
||||
# runtime's cancellation semantics are respected.
|
||||
_persist_incomplete_if_needed()
|
||||
agent = agent_ref[0] if agent_ref else None
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("SSE task cancelled")
|
||||
except Exception:
|
||||
pass
|
||||
if not agent_task.done():
|
||||
agent_task.cancel()
|
||||
logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
|
||||
raise
|
||||
|
||||
return response
|
||||
|
||||
@@ -2362,6 +2445,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
stream_delta_callback=_text_cb,
|
||||
tool_progress_callback=event_cb,
|
||||
)
|
||||
self._active_run_agents[run_id] = agent
|
||||
def _run_sync():
|
||||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
@@ -2401,8 +2485,11 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
q.put_nowait(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
task = asyncio.create_task(_run_and_close())
|
||||
self._active_run_tasks[run_id] = task
|
||||
try:
|
||||
self._background_tasks.add(task)
|
||||
except TypeError:
|
||||
@@ -2461,6 +2548,44 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/runs/{run_id}/stop — interrupt a running agent."""
|
||||
auth_err = self._check_auth(request)
|
||||
if auth_err:
|
||||
return auth_err
|
||||
|
||||
run_id = request.match_info["run_id"]
|
||||
agent = self._active_run_agents.get(run_id)
|
||||
task = self._active_run_tasks.get(run_id)
|
||||
|
||||
if agent is None and task is None:
|
||||
return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
|
||||
|
||||
if agent is not None:
|
||||
try:
|
||||
agent.interrupt("Stop requested via API")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if task is not None and not task.done():
|
||||
task.cancel()
|
||||
# Bounded wait: run_conversation() executes in the default
|
||||
# executor thread which task.cancel() cannot preempt — we rely on
|
||||
# agent.interrupt() above to break the loop. Cap the wait so a
|
||||
# slow/unresponsive interrupt can't hang this handler.
|
||||
try:
|
||||
await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(
|
||||
"[api_server] stop for run %s timed out after 5s; "
|
||||
"agent may still be finishing the current step",
|
||||
run_id,
|
||||
)
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
|
||||
return web.json_response({"run_id": run_id, "status": "stopping"})
|
||||
|
||||
async def _sweep_orphaned_runs(self) -> None:
|
||||
"""Periodically clean up run streams that were never consumed."""
|
||||
while True:
|
||||
@@ -2475,6 +2600,8 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
logger.debug("[api_server] sweeping orphaned run %s", run_id)
|
||||
self._run_streams.pop(run_id, None)
|
||||
self._run_streams_created.pop(run_id, None)
|
||||
self._active_run_agents.pop(run_id, None)
|
||||
self._active_run_tasks.pop(run_id, None)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# BasePlatformAdapter interface
|
||||
@@ -2510,6 +2637,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
||||
# Structured event streaming
|
||||
self._app.router.add_post("/v1/runs", self._handle_runs)
|
||||
self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
|
||||
self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
|
||||
# Start background sweep to clean up orphaned (unconsumed) run streams
|
||||
sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
|
||||
try:
|
||||
|
||||
+147
-8
@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
def _split_host_port(value: str) -> tuple[str, int | None]:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return "", None
|
||||
if "://" in raw:
|
||||
parsed = urlsplit(raw)
|
||||
return (parsed.hostname or "").lower().rstrip("."), parsed.port
|
||||
if raw.startswith("[") and "]" in raw:
|
||||
host, _, rest = raw[1:].partition("]")
|
||||
port = None
|
||||
if rest.startswith(":") and rest[1:].isdigit():
|
||||
port = int(rest[1:])
|
||||
return host.lower().rstrip("."), port
|
||||
if raw.count(":") == 1:
|
||||
host, _, maybe_port = raw.rpartition(":")
|
||||
if maybe_port.isdigit():
|
||||
return host.lower().rstrip("."), int(maybe_port)
|
||||
return raw.lower().strip("[]").rstrip("."), None
|
||||
|
||||
|
||||
def _no_proxy_entries() -> list[str]:
|
||||
entries: list[str] = []
|
||||
for key in ("NO_PROXY", "no_proxy"):
|
||||
raw = os.environ.get(key, "")
|
||||
entries.extend(part.strip() for part in raw.split(",") if part.strip())
|
||||
return entries
|
||||
|
||||
|
||||
def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
|
||||
token = str(entry or "").strip().lower()
|
||||
if not token:
|
||||
return False
|
||||
if token == "*":
|
||||
return True
|
||||
|
||||
token_host, token_port = _split_host_port(token)
|
||||
if token_port is not None and port is not None and token_port != port:
|
||||
return False
|
||||
if token_port is not None and port is None:
|
||||
return False
|
||||
if not token_host:
|
||||
return False
|
||||
|
||||
try:
|
||||
network = ipaddress.ip_network(token_host, strict=False)
|
||||
try:
|
||||
return ipaddress.ip_address(host) in network
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
try:
|
||||
token_ip = ipaddress.ip_address(token_host)
|
||||
try:
|
||||
return ipaddress.ip_address(host) == token_ip
|
||||
except ValueError:
|
||||
return False
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if token_host.startswith("*."):
|
||||
suffix = token_host[1:]
|
||||
return host.endswith(suffix)
|
||||
if token_host.startswith("."):
|
||||
return host == token_host[1:] or host.endswith(token_host)
|
||||
return host == token_host or host.endswith(f".{token_host}")
|
||||
|
||||
|
||||
def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
|
||||
"""Return True when NO_PROXY/no_proxy matches at least one target host.
|
||||
|
||||
Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
|
||||
CIDR ranges, optional host:port entries, and ``*``.
|
||||
"""
|
||||
entries = _no_proxy_entries()
|
||||
if not entries or not target_hosts:
|
||||
return False
|
||||
if isinstance(target_hosts, str):
|
||||
candidates = [target_hosts]
|
||||
else:
|
||||
candidates = list(target_hosts)
|
||||
for candidate in candidates:
|
||||
host, port = _split_host_port(str(candidate))
|
||||
if not host:
|
||||
continue
|
||||
if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def resolve_proxy_url(
|
||||
platform_env_var: str | None = None,
|
||||
*,
|
||||
target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
|
||||
) -> str | None:
|
||||
"""Return a proxy URL from env vars, or macOS system proxy.
|
||||
|
||||
Check order:
|
||||
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
||||
1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
|
||||
2. macOS system proxy via ``scutil --proxy`` (auto-detect)
|
||||
|
||||
Returns *None* if no proxy is found.
|
||||
Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
|
||||
of ``target_hosts``.
|
||||
"""
|
||||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
if should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
detected = normalize_proxy_url(_detect_macos_system_proxy())
|
||||
if detected and should_bypass_proxy(target_hosts):
|
||||
return None
|
||||
return detected
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
@@ -922,7 +1025,20 @@ class BasePlatformAdapter(ABC):
|
||||
self._post_delivery_callbacks: Dict[str, Any] = {}
|
||||
self._expected_cancelled_tasks: set[asyncio.Task] = set()
|
||||
self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
|
||||
# Chats where auto-TTS on voice input is disabled (set by /voice off)
|
||||
# Auto-TTS on voice input: ``_auto_tts_default`` is the global default
|
||||
# (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
|
||||
# Per-chat overrides live in two sets populated from ``_voice_mode``:
|
||||
# - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
|
||||
# or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
|
||||
# the global default is False.
|
||||
# - ``_auto_tts_disabled_chats``: chat explicitly opted out via
|
||||
# ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
|
||||
# global default is True.
|
||||
# The gate in _process_message() is:
|
||||
# fire if chat in _auto_tts_enabled_chats
|
||||
# OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
|
||||
self._auto_tts_default: bool = False
|
||||
self._auto_tts_enabled_chats: set = set()
|
||||
self._auto_tts_disabled_chats: set = set()
|
||||
# Chats where typing indicator is paused (e.g. during approval waits).
|
||||
# _keep_typing skips send_typing when the chat_id is in this set.
|
||||
@@ -944,6 +1060,21 @@ class BasePlatformAdapter(ABC):
|
||||
def fatal_error_retryable(self) -> bool:
|
||||
return self._fatal_error_retryable
|
||||
|
||||
def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
|
||||
"""Whether auto-TTS on voice input should fire for ``chat_id``.
|
||||
|
||||
Decision layers (Issue #16007):
|
||||
1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
|
||||
``voice.auto_tts`` is False).
|
||||
2. Explicit ``/voice off`` → never fire.
|
||||
3. Fall back to the global ``voice.auto_tts`` config default.
|
||||
"""
|
||||
if chat_id in self._auto_tts_enabled_chats:
|
||||
return True
|
||||
if chat_id in self._auto_tts_disabled_chats:
|
||||
return False
|
||||
return bool(self._auto_tts_default)
|
||||
|
||||
def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
|
||||
self._fatal_error_handler = handler
|
||||
|
||||
@@ -2111,12 +2242,14 @@ class BasePlatformAdapter(ABC):
|
||||
logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
|
||||
|
||||
# Auto-TTS: if voice message, generate audio FIRST (before sending text)
|
||||
# Skipped when the chat has voice mode disabled (/voice off)
|
||||
# Gated via ``_should_auto_tts_for_chat``: fires when the chat has
|
||||
# an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
|
||||
# True globally and no ``/voice off`` has been issued.
|
||||
_tts_path = None
|
||||
if (event.message_type == MessageType.VOICE
|
||||
if (self._should_auto_tts_for_chat(event.source.chat_id)
|
||||
and event.message_type == MessageType.VOICE
|
||||
and text_content
|
||||
and not media_files
|
||||
and event.source.chat_id not in self._auto_tts_disabled_chats):
|
||||
and not media_files):
|
||||
try:
|
||||
from tools.tts_tool import text_to_speech_tool, check_tts_requirements
|
||||
if check_tts_requirements():
|
||||
@@ -2440,6 +2573,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt: Optional[str] = None,
|
||||
chat_id_alt: Optional[str] = None,
|
||||
is_bot: bool = False,
|
||||
guild_id: Optional[str] = None,
|
||||
parent_chat_id: Optional[str] = None,
|
||||
message_id: Optional[str] = None,
|
||||
) -> SessionSource:
|
||||
"""Helper to build a SessionSource for this platform."""
|
||||
# Normalize empty topic to None
|
||||
@@ -2457,6 +2593,9 @@ class BasePlatformAdapter(ABC):
|
||||
user_id_alt=user_id_alt,
|
||||
chat_id_alt=chat_id_alt,
|
||||
is_bot=is_bot,
|
||||
guild_id=str(guild_id) if guild_id else None,
|
||||
parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
|
||||
message_id=str(message_id) if message_id else None,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:
|
||||
|
||||
class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
platform = Platform.BLUEBUBBLES
|
||||
SUPPORTS_MESSAGE_EDITING = False
|
||||
MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
# Text sending
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
|
||||
# Use the base splitter but skip pagination indicators — iMessage
|
||||
# bubbles flow naturally without "(1/3)" suffixes.
|
||||
chunks = BasePlatformAdapter.truncate_message(content, max_length)
|
||||
return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
|
||||
|
||||
async def send(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
text = strip_markdown(content or "")
|
||||
text = self.format_message(content)
|
||||
if not text:
|
||||
return SendResult(success=False, error="BlueBubbles send requires text")
|
||||
chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
|
||||
# Split on paragraph breaks first (double newlines) so each thought
|
||||
# becomes its own iMessage bubble, then truncate any that are still
|
||||
# too long.
|
||||
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
||||
chunks: List[str] = []
|
||||
for para in (paragraphs or [text]):
|
||||
if len(para) <= self.MAX_MESSAGE_LENGTH:
|
||||
chunks.append(para)
|
||||
else:
|
||||
chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
|
||||
last = SendResult(success=True)
|
||||
for chunk in chunks:
|
||||
guid = await self._resolve_chat_guid(chat_id)
|
||||
|
||||
@@ -2246,10 +2246,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_usage(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/usage")
|
||||
|
||||
@tree.command(name="provider", description="Show available providers")
|
||||
async def slash_provider(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/provider")
|
||||
|
||||
@tree.command(name="help", description="Show available commands")
|
||||
async def slash_help(interaction: discord.Interaction):
|
||||
await self._run_simple_slash(interaction, "/help")
|
||||
@@ -2319,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
async def slash_background(interaction: discord.Interaction, prompt: str):
|
||||
await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")
|
||||
|
||||
@tree.command(name="btw", description="Ephemeral side question using session context")
|
||||
@discord.app_commands.describe(question="Your side question (no tools, not persisted)")
|
||||
async def slash_btw(interaction: discord.Interaction, question: str):
|
||||
await self._run_simple_slash(interaction, f"/btw {question}")
|
||||
|
||||
# ── Auto-register any gateway-available commands not yet on the tree ──
|
||||
# This ensures new commands added to COMMAND_REGISTRY in
|
||||
# hermes_cli/commands.py automatically appear as Discord slash
|
||||
@@ -2719,7 +2710,12 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")
|
||||
|
||||
def _discord_free_response_channels(self) -> set:
|
||||
"""Return Discord channel IDs where no bot mention is required."""
|
||||
"""Return Discord channel IDs where no bot mention is required.
|
||||
|
||||
A single ``"*"`` entry (either from a list or a comma-separated
|
||||
string) is preserved in the returned set so callers can short-circuit
|
||||
on wildcard membership, consistent with ``allowed_channels``.
|
||||
"""
|
||||
raw = self.config.extra.get("free_response_channels")
|
||||
if raw is None:
|
||||
raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
|
||||
@@ -3212,14 +3208,14 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
|
||||
if allowed_channels_raw:
|
||||
allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
|
||||
if not (channel_ids & allowed_channels):
|
||||
if "*" not in allowed_channels and not (channel_ids & allowed_channels):
|
||||
logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
# Check ignored channels - never respond even when mentioned
|
||||
ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
|
||||
ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
|
||||
if channel_ids & ignored_channels:
|
||||
if "*" in ignored_channels or (channel_ids & ignored_channels):
|
||||
logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
|
||||
return
|
||||
|
||||
@@ -3233,7 +3229,11 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
|
||||
current_channel_id = str(message.channel.id)
|
||||
is_voice_linked_channel = current_channel_id in voice_linked_ids
|
||||
is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
|
||||
is_free_channel = (
|
||||
"*" in free_channels
|
||||
or bool(channel_ids & free_channels)
|
||||
or is_voice_linked_channel
|
||||
)
|
||||
|
||||
# Skip the mention check if the message is in a thread where
|
||||
# the bot has previously participated (auto-created or replied in).
|
||||
@@ -3256,6 +3256,7 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
|
||||
thread = await self._auto_create_thread(message)
|
||||
if thread:
|
||||
parent_channel_id = str(message.channel.id)
|
||||
is_thread = True
|
||||
thread_id = str(thread.id)
|
||||
auto_threaded_channel = thread
|
||||
@@ -3315,6 +3316,9 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
thread_id=thread_id,
|
||||
chat_topic=chat_topic,
|
||||
is_bot=getattr(message.author, "bot", False),
|
||||
guild_id=str(message.guild.id) if message.guild else None,
|
||||
parent_chat_id=parent_channel_id,
|
||||
message_id=str(message.id),
|
||||
)
|
||||
|
||||
# Build media URLs -- download image attachments to local cache so the
|
||||
@@ -3866,6 +3870,15 @@ if DISCORD_AVAILABLE:
|
||||
|
||||
self.resolved = True
|
||||
model_id = interaction.data["values"][0]
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Switching Model",
|
||||
description=f"Switching to `{model_id}`...",
|
||||
color=discord.Color.blue(),
|
||||
),
|
||||
view=None,
|
||||
)
|
||||
|
||||
try:
|
||||
result_text = await self.on_model_selected(
|
||||
@@ -3876,14 +3889,13 @@ if DISCORD_AVAILABLE:
|
||||
except Exception as exc:
|
||||
result_text = f"Error switching model: {exc}"
|
||||
|
||||
self.clear_items()
|
||||
await interaction.response.edit_message(
|
||||
await interaction.edit_original_response(
|
||||
embed=discord.Embed(
|
||||
title="⚙ Model Switched",
|
||||
description=result_text,
|
||||
color=discord.Color.green(),
|
||||
),
|
||||
view=self,
|
||||
view=None,
|
||||
)
|
||||
|
||||
async def _on_back(self, interaction: discord.Interaction):
|
||||
|
||||
@@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter):
|
||||
)
|
||||
await crypto_store.open()
|
||||
|
||||
# Bind the store to the runtime device_id before any
|
||||
# put_account() runs. PgCryptoStore defaults _device_id
|
||||
# to "" and its crypto_account UPSERT never updates the
|
||||
# device_id column on conflict — so once put_account
|
||||
# writes blank, it stays blank forever. That breaks
|
||||
# every downstream device-scoped olm operation: peer
|
||||
# to-device ciphertext can't find our identity key and
|
||||
# no megolm sessions ever land. Setting _device_id here
|
||||
# (in-memory; the on-disk row may not exist yet) makes
|
||||
# the first put_account write the correct value.
|
||||
# DeviceID is a NewType(str) so plain str works at runtime.
|
||||
if client.device_id:
|
||||
await crypto_store.put_device_id(client.device_id)
|
||||
|
||||
crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
|
||||
olm = OlmMachine(client, crypto_store, crypto_state)
|
||||
|
||||
|
||||
@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
"write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
|
||||
}
|
||||
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
|
||||
disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
|
||||
fallback_ips = self._fallback_ips()
|
||||
if not fallback_ips:
|
||||
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
", ".join(fallback_ips),
|
||||
)
|
||||
|
||||
proxy_targets = ["api.telegram.org", *fallback_ips]
|
||||
proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
|
||||
if fallback_ips and not proxy_url and not disable_fallback:
|
||||
logger.info(
|
||||
"[%s] Telegram fallback IPs active: %s",
|
||||
|
||||
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
|
||||
_SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]
|
||||
|
||||
|
||||
def _resolve_proxy_url() -> str | None:
|
||||
def _resolve_proxy_url(target_hosts=None) -> str | None:
|
||||
# Delegate to shared implementation (env vars + macOS system proxy detection)
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
return resolve_proxy_url("TELEGRAM_PROXY")
|
||||
return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
|
||||
|
||||
|
||||
class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):
|
||||
|
||||
def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
|
||||
self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
|
||||
proxy_url = _resolve_proxy_url()
|
||||
proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
|
||||
if proxy_url and "proxy" not in transport_kwargs:
|
||||
transport_kwargs["proxy"] = proxy_url
|
||||
self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
|
||||
|
||||
+987
-558
File diff suppressed because it is too large
Load Diff
+99
-16
@@ -60,6 +60,10 @@ from .config import (
|
||||
SessionResetPolicy, # noqa: F401 — re-exported via gateway/__init__.py
|
||||
HomeChannel,
|
||||
)
|
||||
from .whatsapp_identity import (
|
||||
canonical_whatsapp_identifier,
|
||||
normalize_whatsapp_identifier,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -83,6 +87,9 @@ class SessionSource:
|
||||
user_id_alt: Optional[str] = None # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
|
||||
chat_id_alt: Optional[str] = None # Signal group internal ID
|
||||
is_bot: bool = False # True when the message author is a bot/webhook (Discord)
|
||||
guild_id: Optional[str] = None # Discord guild / Slack workspace / Matrix server scope
|
||||
parent_chat_id: Optional[str] = None # Parent channel when chat_id refers to a thread
|
||||
message_id: Optional[str] = None # ID of the triggering message (for pin/reply/react)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
@@ -120,8 +127,14 @@ class SessionSource:
|
||||
d["user_id_alt"] = self.user_id_alt
|
||||
if self.chat_id_alt:
|
||||
d["chat_id_alt"] = self.chat_id_alt
|
||||
if self.guild_id:
|
||||
d["guild_id"] = self.guild_id
|
||||
if self.parent_chat_id:
|
||||
d["parent_chat_id"] = self.parent_chat_id
|
||||
if self.message_id:
|
||||
d["message_id"] = self.message_id
|
||||
return d
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
|
||||
return cls(
|
||||
@@ -135,6 +148,9 @@ class SessionSource:
|
||||
chat_topic=data.get("chat_topic"),
|
||||
user_id_alt=data.get("user_id_alt"),
|
||||
chat_id_alt=data.get("chat_id_alt"),
|
||||
guild_id=data.get("guild_id"),
|
||||
parent_chat_id=data.get("parent_chat_id"),
|
||||
message_id=data.get("message_id"),
|
||||
)
|
||||
|
||||
|
||||
@@ -186,6 +202,31 @@ that requires raw IDs). Discord is excluded because mentions use ``<@user_id>``
|
||||
and the LLM needs the real ID to tag users."""
|
||||
|
||||
|
||||
def _discord_tools_loaded() -> bool:
|
||||
"""True iff the agent will actually have Discord tools this session.
|
||||
|
||||
Two conditions must hold:
|
||||
1. The `discord` or `discord_admin` toolset is enabled for the
|
||||
Discord platform via `hermes tools` (opt-in, default OFF).
|
||||
2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
|
||||
at registry time, so the toolset being enabled in config is not
|
||||
enough if the token isn't configured.
|
||||
|
||||
Returns False (safe default — keeps the stale-API disclaimer) on any
|
||||
error so a bad config can't silently promise tools the agent lacks.
|
||||
"""
|
||||
if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
cfg = load_config()
|
||||
enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
|
||||
return "discord" in enabled or "discord_admin" in enabled
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def build_session_context_prompt(
|
||||
context: SessionContext,
|
||||
*,
|
||||
@@ -273,13 +314,44 @@ def build_session_context_prompt(
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.DISCORD:
|
||||
# Inject the Discord IDs block only when the agent actually has
|
||||
# Discord tools loaded this session — i.e. the user opted into
|
||||
# `discord` / `discord_admin` via `hermes tools` AND the bot
|
||||
# token is configured. Otherwise keep the stale-API disclaimer
|
||||
# honest so we never promise tools the agent lacks.
|
||||
if _discord_tools_loaded():
|
||||
src = context.source
|
||||
id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
|
||||
if src.guild_id:
|
||||
id_lines.append(f" - Guild: `{src.guild_id}`")
|
||||
if src.thread_id and src.parent_chat_id:
|
||||
id_lines.append(f" - Parent channel: `{src.parent_chat_id}`")
|
||||
id_lines.append(f" - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
|
||||
else:
|
||||
id_lines.append(f" - Channel: `{src.chat_id}`")
|
||||
if src.message_id:
|
||||
id_lines.append(f" - Triggering message: `{src.message_id}`")
|
||||
lines.extend(id_lines)
|
||||
else:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
)
|
||||
elif context.source.platform == Platform.BLUEBUBBLES:
|
||||
lines.append("")
|
||||
lines.append(
|
||||
"**Platform notes:** You are running inside Discord. "
|
||||
"You do NOT have access to Discord-specific APIs — you cannot search "
|
||||
"channel history, pin messages, manage roles, or list server members. "
|
||||
"Do not promise to perform these actions. If the user asks, explain "
|
||||
"that you can only read messages sent directly to you and respond."
|
||||
"**Platform notes:** You are responding via iMessage. "
|
||||
"Keep responses short and conversational — think texts, not essays. "
|
||||
"Structure longer replies as separate short thoughts, each separated "
|
||||
"by a blank line (double newline). Each block between blank lines "
|
||||
"will be delivered as its own iMessage bubble, so write accordingly: "
|
||||
"one idea per bubble, 1–3 sentences each. "
|
||||
"If the user needs a detailed answer, give the short version first "
|
||||
"and offer to elaborate."
|
||||
)
|
||||
|
||||
# Connected platforms
|
||||
@@ -367,11 +439,11 @@ class SessionEntry:
|
||||
auto_reset_reason: Optional[str] = None # "idle" or "daily"
|
||||
reset_had_activity: bool = False # whether the expired session had any messages
|
||||
|
||||
# Set by the background expiry watcher after it successfully flushes
|
||||
# memories for this session. Persisted to sessions.json so the flag
|
||||
# survives gateway restarts (the old in-memory _pre_flushed_sessions
|
||||
# set was lost on restart, causing redundant re-flushes).
|
||||
memory_flushed: bool = False
|
||||
# Set by the background expiry watcher after it finalizes an expired
|
||||
# session (invoking on_session_finalize hooks and evicting the cached
|
||||
# agent). Persisted to sessions.json so the flag survives gateway
|
||||
# restarts — prevents redundant finalization runs.
|
||||
expiry_finalized: bool = False
|
||||
|
||||
# When True the next call to get_or_create_session() will auto-reset
|
||||
# this session (create a new session_id) so the user starts fresh.
|
||||
@@ -407,7 +479,7 @@ class SessionEntry:
|
||||
"last_prompt_tokens": self.last_prompt_tokens,
|
||||
"estimated_cost_usd": self.estimated_cost_usd,
|
||||
"cost_status": self.cost_status,
|
||||
"memory_flushed": self.memory_flushed,
|
||||
"expiry_finalized": self.expiry_finalized,
|
||||
"suspended": self.suspended,
|
||||
"resume_pending": self.resume_pending,
|
||||
"resume_reason": self.resume_reason,
|
||||
@@ -459,7 +531,7 @@ class SessionEntry:
|
||||
last_prompt_tokens=data.get("last_prompt_tokens", 0),
|
||||
estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
|
||||
cost_status=data.get("cost_status", "unknown"),
|
||||
memory_flushed=data.get("memory_flushed", False),
|
||||
expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
|
||||
suspended=data.get("suspended", False),
|
||||
resume_pending=data.get("resume_pending", False),
|
||||
resume_reason=data.get("resume_reason"),
|
||||
@@ -518,15 +590,24 @@ def build_session_key(
|
||||
"""
|
||||
platform = source.platform.value
|
||||
if source.chat_type == "dm":
|
||||
if source.chat_id:
|
||||
dm_chat_id = source.chat_id
|
||||
if source.platform == Platform.WHATSAPP:
|
||||
dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
|
||||
|
||||
if dm_chat_id:
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{source.chat_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm:{dm_chat_id}"
|
||||
if source.thread_id:
|
||||
return f"agent:main:{platform}:dm:{source.thread_id}"
|
||||
return f"agent:main:{platform}:dm"
|
||||
|
||||
participant_id = source.user_id_alt or source.user_id
|
||||
if participant_id and source.platform == Platform.WHATSAPP:
|
||||
# Same JID/LID-flip bug as the DM case: without canonicalisation, a
|
||||
# single group member gets two isolated per-user sessions when the
|
||||
# bridge reshuffles alias forms.
|
||||
participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
|
||||
key_parts = ["agent:main", platform, source.chat_type]
|
||||
|
||||
if source.chat_id:
|
||||
@@ -1151,6 +1232,7 @@ class SessionStore:
|
||||
reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
|
||||
reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
|
||||
codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
|
||||
codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Session DB operation failed: %s", e)
|
||||
@@ -1183,6 +1265,7 @@ class SessionStore:
|
||||
reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
|
||||
reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
|
||||
codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
|
||||
codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Failed to rewrite transcript in DB: %s", e)
|
||||
|
||||
@@ -0,0 +1,135 @@
|
||||
"""Shared helpers for canonicalising WhatsApp sender identity.
|
||||
|
||||
WhatsApp's bridge can surface the same human under two different JID shapes
|
||||
within a single conversation:
|
||||
|
||||
- LID form: ``999999999999999@lid``
|
||||
- Phone form: ``15551234567@s.whatsapp.net``
|
||||
|
||||
Both the authorisation path (:mod:`gateway.run`) and the session-key path
|
||||
(:mod:`gateway.session`) need to collapse these aliases to a single stable
|
||||
identity. This module is the single source of truth for that resolution so
|
||||
the two paths can never drift apart.
|
||||
|
||||
Public helpers:
|
||||
|
||||
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
|
||||
down to the bare numeric identifier.
|
||||
- :func:`canonical_whatsapp_identifier` — walk the bridge's
|
||||
``lid-mapping-*.json`` files and return a stable canonical identity
|
||||
across phone/LID variants.
|
||||
- :func:`expand_whatsapp_aliases` — return the full alias set for an
|
||||
identifier. Used by authorisation code that needs to match any known
|
||||
form of a sender against an allow-list.
|
||||
|
||||
Plugins that need per-sender behaviour on WhatsApp (role-based routing,
|
||||
per-contact authorisation, policy gating in a gateway hook) should use
|
||||
``canonical_whatsapp_identifier`` so their bookkeeping lines up with
|
||||
Hermes' own session keys.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Set
|
||||
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
|
||||
def normalize_whatsapp_identifier(value: str) -> str:
|
||||
"""Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
|
||||
|
||||
Accepts any of the identifier shapes the WhatsApp bridge may emit:
|
||||
``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
|
||||
``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
|
||||
Returns just the numeric identifier (``"60123456789"``) suitable for
|
||||
equality comparisons.
|
||||
|
||||
Useful for plugins that want to match sender IDs against
|
||||
user-supplied config (phone numbers in ``config.yaml``) without
|
||||
worrying about which variant the bridge happens to deliver.
|
||||
"""
|
||||
return (
|
||||
str(value or "")
|
||||
.strip()
|
||||
.replace("+", "", 1)
|
||||
.split(":", 1)[0]
|
||||
.split("@", 1)[0]
|
||||
)
|
||||
|
||||
|
||||
def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
||||
"""Resolve WhatsApp phone/LID aliases via bridge session mapping files.
|
||||
|
||||
Returns the set of all identifiers transitively reachable through the
|
||||
bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
|
||||
starting from ``identifier``. The result always includes the
|
||||
normalized input itself, so callers can safely ``in`` check against
|
||||
the return value without a separate fallback branch.
|
||||
|
||||
Returns an empty set if ``identifier`` normalizes to empty.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return set()
|
||||
|
||||
session_dir = get_hermes_home() / "whatsapp" / "session"
|
||||
resolved: Set[str] = set()
|
||||
queue = [normalized]
|
||||
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if not current or current in resolved:
|
||||
continue
|
||||
|
||||
resolved.add(current)
|
||||
for suffix in ("", "_reverse"):
|
||||
mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
|
||||
if not mapping_path.exists():
|
||||
continue
|
||||
try:
|
||||
mapped = normalize_whatsapp_identifier(
|
||||
json.loads(mapping_path.read_text(encoding="utf-8"))
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
if mapped and mapped not in resolved:
|
||||
queue.append(mapped)
|
||||
|
||||
return resolved
|
||||
|
||||
|
||||
def canonical_whatsapp_identifier(identifier: str) -> str:
|
||||
"""Return a stable WhatsApp sender identity across phone-JID/LID variants.
|
||||
|
||||
WhatsApp may surface the same person under either a phone-format JID
|
||||
(``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
|
||||
applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
|
||||
member inside a group chat — both represent a user identity, and the
|
||||
bridge may flip between the two for the same human.
|
||||
|
||||
This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
|
||||
files, walks the mapping transitively, and picks the shortest
|
||||
(numeric-preferred) alias as the canonical identity.
|
||||
:func:`gateway.session.build_session_key` uses this for both WhatsApp
|
||||
DM chat_ids and WhatsApp group participant_ids, so callers get the
|
||||
same session-key identity Hermes itself uses.
|
||||
|
||||
Plugins that need per-sender behaviour (role-based routing,
|
||||
authorisation, per-contact policy) should use this so their
|
||||
bookkeeping lines up with Hermes' session bookkeeping even when
|
||||
the bridge reshuffles aliases.
|
||||
|
||||
Returns an empty string if ``identifier`` normalizes to empty. If no
|
||||
mapping files exist yet (fresh bridge install), returns the
|
||||
normalized input unchanged.
|
||||
"""
|
||||
normalized = normalize_whatsapp_identifier(identifier)
|
||||
if not normalized:
|
||||
return ""
|
||||
|
||||
# expand_whatsapp_aliases always includes `normalized` itself in the
|
||||
# returned set, so the min() below degrades gracefully to `normalized`
|
||||
# when no lid-mapping files are present.
|
||||
aliases = expand_whatsapp_aliases(normalized)
|
||||
return min(aliases, key=lambda candidate: (len(candidate), candidate))
|
||||
+937
-106
File diff suppressed because it is too large
Load Diff
@@ -110,18 +110,40 @@ def _display_source(source: str) -> str:
|
||||
return source.split(":", 1)[1] if source.startswith("manual:") else source
|
||||
|
||||
|
||||
def _classify_exhausted_status(entry) -> tuple[str, bool]:
|
||||
code = getattr(entry, "last_error_code", None)
|
||||
reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
|
||||
message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
|
||||
|
||||
if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
|
||||
token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
|
||||
):
|
||||
return "rate-limited", True
|
||||
|
||||
if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
|
||||
token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
|
||||
):
|
||||
return "auth failed", False
|
||||
|
||||
return "exhausted", True
|
||||
|
||||
|
||||
|
||||
def _format_exhausted_status(entry) -> str:
|
||||
if entry.last_status != STATUS_EXHAUSTED:
|
||||
return ""
|
||||
label, show_retry_window = _classify_exhausted_status(entry)
|
||||
reason = getattr(entry, "last_error_reason", None)
|
||||
reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
|
||||
code = f" ({entry.last_error_code})" if entry.last_error_code else ""
|
||||
if not show_retry_window:
|
||||
return f" {label}{reason_text}{code} (re-auth may be required)"
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is None:
|
||||
return f" exhausted{reason_text}{code}"
|
||||
return f" {label}{reason_text}{code}"
|
||||
remaining = max(0, int(math.ceil(exhausted_until - time.time())))
|
||||
if remaining <= 0:
|
||||
return f" exhausted{reason_text}{code} (ready to retry)"
|
||||
return f" {label}{reason_text}{code} (ready to retry)"
|
||||
minutes, seconds = divmod(remaining, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
days, hours = divmod(hours, 24)
|
||||
@@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str:
|
||||
wait = f"{minutes}m {seconds}s"
|
||||
else:
|
||||
wait = f"{seconds}s"
|
||||
return f" exhausted{reason_text}{code} ({wait} left)"
|
||||
return f" {label}{reason_text}{code} ({wait} left)"
|
||||
|
||||
|
||||
def auth_add_command(args) -> None:
|
||||
@@ -386,6 +408,44 @@ def auth_reset_command(args) -> None:
|
||||
print(f"Reset status on {count} {provider} credentials")
|
||||
|
||||
|
||||
def auth_status_command(args) -> None:
|
||||
provider = _normalize_provider(getattr(args, "provider", "") or "")
|
||||
if not provider:
|
||||
raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
|
||||
status = auth_mod.get_auth_status(provider)
|
||||
if not status.get("logged_in"):
|
||||
reason = status.get("error")
|
||||
if reason:
|
||||
print(f"{provider}: logged out ({reason})")
|
||||
else:
|
||||
print(f"{provider}: logged out")
|
||||
return
|
||||
|
||||
print(f"{provider}: logged in")
|
||||
for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
|
||||
value = status.get(key)
|
||||
if value:
|
||||
print(f" {key}: {value}")
|
||||
|
||||
|
||||
def auth_logout_command(args) -> None:
|
||||
auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
|
||||
|
||||
|
||||
def auth_spotify_command(args) -> None:
|
||||
action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
|
||||
if action in {"", "login"}:
|
||||
auth_mod.login_spotify_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(SimpleNamespace(provider="spotify"))
|
||||
return
|
||||
raise SystemExit(f"Unknown Spotify auth action: {action}")
|
||||
|
||||
|
||||
def _interactive_auth() -> None:
|
||||
"""Interactive credential pool management when `hermes auth` is called bare."""
|
||||
# Show current pool status first
|
||||
@@ -583,5 +643,14 @@ def auth_command(args) -> None:
|
||||
if action == "reset":
|
||||
auth_reset_command(args)
|
||||
return
|
||||
if action == "status":
|
||||
auth_status_command(args)
|
||||
return
|
||||
if action == "logout":
|
||||
auth_logout_command(args)
|
||||
return
|
||||
if action == "spotify":
|
||||
auth_spotify_command(args)
|
||||
return
|
||||
# No subcommand — launch interactive mode
|
||||
_interactive_auth()
|
||||
|
||||
@@ -0,0 +1,300 @@
|
||||
"""Azure Foundry endpoint auto-detection.
|
||||
|
||||
Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
|
||||
- API transport (OpenAI-style ``chat_completions`` vs
|
||||
Anthropic-style ``anthropic_messages``)
|
||||
- Available models (best effort — Azure does not expose a deployment
|
||||
listing via the inference API key, but Azure OpenAI v1 endpoints
|
||||
return the resource's model catalog via ``GET /models``)
|
||||
- Context length for each discovered/entered model, via the existing
|
||||
:func:`agent.model_metadata.get_model_context_length` resolver.
|
||||
|
||||
Rationale:
|
||||
|
||||
Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
|
||||
deployment enumeration requires ARM management-plane auth. Azure
|
||||
OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
|
||||
a ``/models`` list, but it reflects the resource's *available* models
|
||||
rather than the user's *deployed* deployment names. In practice it is
|
||||
still a useful hint — the user picks a familiar model name and we look
|
||||
up its context length from the catalog.
|
||||
|
||||
The detector never crashes on errors (every HTTP call is wrapped in a
|
||||
broad try/except). Callers get a :class:`DetectionResult` with whatever
|
||||
information could be gathered, and fall back to manual entry for the
|
||||
rest.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
from urllib import request as urllib_request
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint
|
||||
# accepts requests without ``api-version`` entirely, so this is only used
|
||||
# as a fallback for pre-v1 resources that still require it.
|
||||
_AZURE_OPENAI_PROBE_API_VERSIONS = (
|
||||
"2025-04-01-preview",
|
||||
"2024-10-21", # oldest GA that supports /models
|
||||
)
|
||||
|
||||
# Default Azure Anthropic ``api-version``. Matches the value used by
|
||||
# ``agent/anthropic_adapter.py`` when building the Anthropic client.
|
||||
_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Everything auto-detection could gather from a base URL + API key."""
|
||||
|
||||
#: Detected API transport: ``"chat_completions"``,
|
||||
#: ``"anthropic_messages"``, or ``None`` when detection failed.
|
||||
api_mode: Optional[str] = None
|
||||
|
||||
#: Deployment / model IDs returned by ``/models`` (best effort).
|
||||
#: Empty when the endpoint doesn't expose the list with an API key.
|
||||
models: list[str] = field(default_factory=list)
|
||||
|
||||
#: Lowercased host from the base URL (used for display messages).
|
||||
hostname: str = ""
|
||||
|
||||
#: Human-readable reason the detector chose ``api_mode``. Useful
|
||||
#: for explaining auto-detection to the user in the wizard.
|
||||
reason: str = ""
|
||||
|
||||
#: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
|
||||
models_probe_ok: bool = False
|
||||
|
||||
#: ``True`` when the URL was determined to be an Anthropic-style
|
||||
#: endpoint (from path suffix or live probe).
|
||||
is_anthropic: bool = False
|
||||
|
||||
|
||||
def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
|
||||
"""GET a URL with ``api-key`` + ``Authorization`` headers. Return
|
||||
``(status_code, parsed_json_or_None)``. Never raises."""
|
||||
req = urllib_request.Request(url, method="GET")
|
||||
# Azure OpenAI uses ``api-key``. Some Azure deployments (and
|
||||
# Anthropic-style routes) use ``Authorization: Bearer``. Send both
|
||||
# so we probe once per URL rather than twice.
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=timeout) as resp:
|
||||
body = resp.read()
|
||||
try:
|
||||
return resp.status, json.loads(body.decode("utf-8", errors="replace"))
|
||||
except Exception:
|
||||
return resp.status, None
|
||||
except HTTPError as exc:
|
||||
return exc.code, None
|
||||
except (URLError, TimeoutError, OSError) as exc:
|
||||
logger.debug("azure_detect: GET %s failed: %s", url, exc)
|
||||
return 0, None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
|
||||
return 0, None
|
||||
|
||||
|
||||
def _strip_trailing_v1(url: str) -> str:
|
||||
"""Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
|
||||
return re.sub(r"/v1/?$", "", url.rstrip("/"))
|
||||
|
||||
|
||||
def _looks_like_anthropic_path(url: str) -> bool:
|
||||
"""Return True when the URL's path ends in ``/anthropic`` or
|
||||
contains a ``/anthropic/`` segment. Used by Azure Foundry
|
||||
resources that route Claude traffic through a dedicated path."""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").lower().rstrip("/")
|
||||
return path.endswith("/anthropic") or "/anthropic/" in path + "/"
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _extract_model_ids(payload: dict) -> list[str]:
|
||||
"""Extract a list of model IDs from an OpenAI-shaped ``/models``
|
||||
response. Returns ``[]`` on any shape mismatch."""
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if not isinstance(data, list):
|
||||
return []
|
||||
ids: list[str] = []
|
||||
for item in data:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
# OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
|
||||
mid = item.get("id") or item.get("model") or item.get("name")
|
||||
if isinstance(mid, str) and mid:
|
||||
ids.append(mid)
|
||||
return ids
|
||||
|
||||
|
||||
def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
|
||||
"""Probe ``<base>/models`` for an OpenAI-shaped response.
|
||||
|
||||
Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted
|
||||
us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
|
||||
"""
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
# Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
|
||||
# api-version required for GA paths, so probe without first.
|
||||
candidates = [f"{base_url}/models"]
|
||||
# Fallback: explicit api-version for pre-v1 resources
|
||||
for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
|
||||
candidates.append(f"{base_url}/models?api-version={v}")
|
||||
|
||||
for url in candidates:
|
||||
status, body = _http_get_json(url, api_key)
|
||||
if status == 200 and body is not None:
|
||||
ids = _extract_model_ids(body)
|
||||
if ids:
|
||||
logger.info(
|
||||
"azure_detect: /models probe OK at %s (%d models)",
|
||||
url, len(ids),
|
||||
)
|
||||
return True, ids
|
||||
# 200 + empty list still counts as "OpenAI shape, no models
|
||||
# listed" — let the user proceed with manual entry.
|
||||
if isinstance(body, dict) and "data" in body:
|
||||
return True, []
|
||||
return False, []
|
||||
|
||||
|
||||
def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
|
||||
"""Send a zero-token request to ``<base>/v1/messages`` and check
|
||||
whether the endpoint at least *recognises* the Anthropic Messages
|
||||
shape (any 4xx that mentions ``messages`` or ``model``, or a 400
|
||||
``invalid_request`` with an Anthropic error shape). Never completes
|
||||
a real chat.
|
||||
"""
|
||||
base = _strip_trailing_v1(base_url)
|
||||
url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
|
||||
payload = json.dumps({
|
||||
"model": "probe",
|
||||
"max_tokens": 1,
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
}).encode("utf-8")
|
||||
req = urllib_request.Request(url, method="POST", data=payload)
|
||||
req.add_header("api-key", api_key)
|
||||
req.add_header("Authorization", f"Bearer {api_key}")
|
||||
req.add_header("anthropic-version", "2023-06-01")
|
||||
req.add_header("content-type", "application/json")
|
||||
req.add_header("User-Agent", "hermes-agent/azure-detect")
|
||||
try:
|
||||
with urllib_request.urlopen(req, timeout=6.0) as resp:
|
||||
# Should never 200 — "probe" isn't a real deployment. But
|
||||
# if it does, the endpoint definitely speaks Anthropic.
|
||||
return resp.status < 500
|
||||
except HTTPError as exc:
|
||||
# 4xx with an Anthropic-shaped error body = Anthropic endpoint.
|
||||
try:
|
||||
body = exc.read().decode("utf-8", errors="replace")
|
||||
lowered = body.lower()
|
||||
if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
|
||||
return True
|
||||
# Pre-Azure-v1 Azure Foundry returns a plain 404 for
|
||||
# Anthropic-style calls on non-Anthropic deployments. A
|
||||
# 400 "model not found" IS Anthropic though.
|
||||
if exc.code == 400 and ("messages" in lowered or "model" in lowered):
|
||||
return True
|
||||
return False
|
||||
except Exception:
|
||||
return False
|
||||
except (URLError, TimeoutError, OSError):
|
||||
return False
|
||||
except Exception: # pragma: no cover
|
||||
return False
|
||||
|
||||
|
||||
def detect(base_url: str, api_key: str) -> DetectionResult:
|
||||
"""Inspect an Azure endpoint and describe its transport + models.
|
||||
|
||||
Call this from the wizard before asking the user to pick an API
|
||||
mode manually. The caller should treat the returned
|
||||
:class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
|
||||
fall back to asking the user.
|
||||
"""
|
||||
result = DetectionResult()
|
||||
|
||||
try:
|
||||
parsed = urlparse(base_url)
|
||||
result.hostname = (parsed.hostname or "").lower()
|
||||
except Exception:
|
||||
result.hostname = ""
|
||||
|
||||
# 1. Path sniff. Azure Foundry exposes Anthropic-style deployments
|
||||
# under a dedicated ``/anthropic`` path.
|
||||
if _looks_like_anthropic_path(base_url):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "URL path ends in /anthropic → Anthropic Messages API"
|
||||
return result
|
||||
|
||||
# 2. Try the OpenAI-style /models probe. If this works, the
|
||||
# endpoint definitely speaks OpenAI wire.
|
||||
ok, models = _probe_openai_models(base_url, api_key)
|
||||
if ok:
|
||||
result.models_probe_ok = True
|
||||
result.models = models
|
||||
result.api_mode = "chat_completions"
|
||||
result.reason = (
|
||||
f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
|
||||
if models
|
||||
else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
|
||||
)
|
||||
return result
|
||||
|
||||
# 3. Fallback: probe the Anthropic Messages shape. Slower and more
|
||||
# intrusive than /models, so only run it when the OpenAI probe
|
||||
# failed.
|
||||
if _probe_anthropic_messages(base_url, api_key):
|
||||
result.is_anthropic = True
|
||||
result.api_mode = "anthropic_messages"
|
||||
result.reason = "Endpoint accepts Anthropic Messages shape"
|
||||
return result
|
||||
|
||||
# Nothing matched. Caller falls back to manual selection.
|
||||
result.reason = (
|
||||
"Could not probe endpoint (private network, missing model list, or "
|
||||
"non-standard path) — falling back to manual API-mode selection"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
|
||||
"""Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
|
||||
that returns ``None`` when only the fallback default (128k) would
|
||||
fire, so the wizard can distinguish "we actually know this" from
|
||||
"we guessed."""
|
||||
try:
|
||||
from agent.model_metadata import (
|
||||
DEFAULT_FALLBACK_CONTEXT,
|
||||
get_model_context_length,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
try:
|
||||
n = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
except Exception as exc:
|
||||
logger.debug("azure_detect: context length lookup failed: %s", exc)
|
||||
return None
|
||||
|
||||
if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
|
||||
return n
|
||||
return None
|
||||
|
||||
|
||||
__all__ = ["DetectionResult", "detect", "lookup_context_length"]
|
||||
+54
-1
@@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
|
||||
return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
|
||||
|
||||
|
||||
_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
|
||||
_latest_release_cache: Optional[tuple] = None # (tag, url) once resolved
|
||||
|
||||
|
||||
def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
|
||||
"""Return ``(tag, release_url)`` for the latest git tag, or None.
|
||||
|
||||
Local-only — runs ``git describe --tags --abbrev=0`` against the
|
||||
Hermes checkout. Cached per-process. Release URL always points at the
|
||||
canonical NousResearch/hermes-agent repo (forks don't get a link).
|
||||
"""
|
||||
global _latest_release_cache
|
||||
if _latest_release_cache is not None:
|
||||
return _latest_release_cache or None
|
||||
|
||||
repo_dir = repo_dir or _resolve_repo_dir()
|
||||
if repo_dir is None:
|
||||
_latest_release_cache = () # falsy sentinel — skip future lookups
|
||||
return None
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "describe", "--tags", "--abbrev=0"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3,
|
||||
cwd=str(repo_dir),
|
||||
)
|
||||
except Exception:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
tag = (result.stdout or "").strip()
|
||||
if not tag:
|
||||
_latest_release_cache = ()
|
||||
return None
|
||||
|
||||
url = f"{_RELEASE_URL_BASE}/{tag}"
|
||||
_latest_release_cache = (tag, url)
|
||||
return _latest_release_cache
|
||||
|
||||
|
||||
def format_banner_version_label() -> str:
|
||||
"""Return the version label shown in the startup banner title."""
|
||||
base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
|
||||
@@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
agent_name = _skin_branding("agent_name", "Hermes Agent")
|
||||
title_color = _skin_color("banner_title", "#FFD700")
|
||||
border_color = _skin_color("banner_border", "#CD7F32")
|
||||
version_label = format_banner_version_label()
|
||||
release_info = get_latest_release_tag()
|
||||
if release_info:
|
||||
_tag, _url = release_info
|
||||
title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
|
||||
else:
|
||||
title_markup = f"[bold {title_color}]{version_label}[/]"
|
||||
outer_panel = Panel(
|
||||
layout_table,
|
||||
title=f"[bold {title_color}]{format_banner_version_label()}[/]",
|
||||
title=title_markup,
|
||||
border_style=border_color,
|
||||
padding=(0, 2),
|
||||
)
|
||||
|
||||
+18
-11
@@ -77,16 +77,14 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
|
||||
aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
cli_only=True, aliases=("snap",), args_hint="[create|restore <id>|prune]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
CommandDef("approve", "Approve a pending dangerous command", "Session",
|
||||
gateway_only=True, args_hint="[session|always]"),
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
|
||||
args_hint="<question>"),
|
||||
aliases=("bg", "btw"), args_hint="<prompt>"),
|
||||
CommandDef("agents", "Show active agents and running tasks", "Session",
|
||||
aliases=("tasks",)),
|
||||
CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
|
||||
@@ -103,10 +101,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
|
||||
CommandDef("model", "Switch model for this session", "Configuration",
|
||||
aliases=("provider",), args_hint="[model] [--provider name] [--global]"),
|
||||
CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
|
||||
cli_only=True),
|
||||
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
@@ -124,9 +122,12 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
args_hint="[name]"),
|
||||
cli_only=True, args_hint="[name]"),
|
||||
CommandDef("voice", "Toggle voice mode", "Configuration",
|
||||
args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
|
||||
CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
|
||||
cli_only=True, args_hint="[queue|interrupt|status]",
|
||||
subcommands=("queue", "interrupt", "status")),
|
||||
|
||||
# Tools & Skills
|
||||
CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
|
||||
@@ -139,7 +140,13 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
|
||||
cli_only=True, args_hint="[subcommand]",
|
||||
subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
|
||||
CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
|
||||
"Tools & Skills", args_hint="[subcommand]",
|
||||
subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
|
||||
"claim", "comment", "complete", "block", "unblock", "archive",
|
||||
"tail", "dispatch", "context", "init", "gc")),
|
||||
CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
|
||||
cli_only=True),
|
||||
CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
|
||||
aliases=("reload_mcp",)),
|
||||
CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
|
||||
@@ -317,7 +324,7 @@ def should_bypass_active_session(command_name: str | None) -> bool:
|
||||
safety net in gateway.run discards any command text that reaches
|
||||
the pending queue — which meant a mid-run /model (or /reasoning,
|
||||
/voice, /insights, /title, /resume, /retry, /undo, /compress,
|
||||
/usage, /provider, /reload-mcp, /sethome, /reset) would silently
|
||||
/usage, /reload-mcp, /sethome, /reset) would silently
|
||||
interrupt the agent AND get discarded, producing a zero-char
|
||||
response. See issue #5057 / PRs #6252, #10370, #4665.
|
||||
|
||||
|
||||
+169
-10
@@ -466,6 +466,12 @@ DEFAULT_CONFIG = {
|
||||
"record_sessions": False, # Auto-record browser sessions as WebM videos
|
||||
"allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
|
||||
"cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
|
||||
# CDP supervisor — dialog + frame detection via a persistent WebSocket.
|
||||
# Active only when a CDP-capable backend is attached (Browserbase or
|
||||
# local Chrome via /browser connect). See
|
||||
# website/docs/developer-guide/browser-supervisor.md.
|
||||
"dialog_policy": "must_respond", # must_respond | auto_dismiss | auto_accept
|
||||
"dialog_timeout_s": 300, # Safety auto-dismiss after N seconds under must_respond
|
||||
"camofox": {
|
||||
# When true, Hermes sends a stable profile-scoped userId to Camofox
|
||||
# so the server maps it to a persistent Firefox profile automatically.
|
||||
@@ -486,7 +492,27 @@ DEFAULT_CONFIG = {
|
||||
# exceed this are rejected with guidance to use offset+limit.
|
||||
# 100K chars ≈ 25–35K tokens across typical tokenisers.
|
||||
"file_read_max_chars": 100_000,
|
||||
|
||||
|
||||
# Tool-output truncation thresholds. When terminal output or a
|
||||
# single read_file page exceeds these limits, Hermes truncates the
|
||||
# payload sent to the model (keeping head + tail for terminal,
|
||||
# enforcing pagination for read_file). Tuning these trades context
|
||||
# footprint against how much raw output the model can see in one
|
||||
# shot. Ported from anomalyco/opencode PR #23770.
|
||||
#
|
||||
# - max_bytes: terminal_tool output cap, in chars
|
||||
# (default 50_000 ≈ 12-15K tokens).
|
||||
# - max_lines: read_file pagination cap — the maximum `limit`
|
||||
# a single read_file call can request before
|
||||
# being clamped (default 2000).
|
||||
# - max_line_length: per-line cap applied when read_file emits a
|
||||
# line-numbered view (default 2000 chars).
|
||||
"tool_output": {
|
||||
"max_bytes": 50_000,
|
||||
"max_lines": 2000,
|
||||
"max_line_length": 2000,
|
||||
},
|
||||
|
||||
"compression": {
|
||||
"enabled": True,
|
||||
"threshold": 0.50, # compress when context usage exceeds this ratio
|
||||
@@ -495,6 +521,12 @@ DEFAULT_CONFIG = {
|
||||
|
||||
},
|
||||
|
||||
# Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
|
||||
# cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
|
||||
"prompt_caching": {
|
||||
"cache_ttl": "5m",
|
||||
},
|
||||
|
||||
# AWS Bedrock provider configuration.
|
||||
# Only used when model.provider is "bedrock".
|
||||
"bedrock": {
|
||||
@@ -580,14 +612,6 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"flush_memories": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
"base_url": "",
|
||||
"api_key": "",
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
},
|
||||
"title_generation": {
|
||||
"provider": "auto",
|
||||
"model": "",
|
||||
@@ -751,6 +775,15 @@ DEFAULT_CONFIG = {
|
||||
# warning log if out of range.
|
||||
"max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
|
||||
"orchestrator_enabled": True, # kill switch for role="orchestrator"
|
||||
# When a subagent hits a dangerous-command approval prompt, the parent's
|
||||
# prompt_toolkit TUI owns stdin — a thread-local input() call from the
|
||||
# subagent worker would deadlock the parent UI. To avoid the deadlock,
|
||||
# subagent threads ALWAYS resolve approvals non-interactively:
|
||||
# false (default) → auto-deny with a logger.warning audit line (safe)
|
||||
# true → auto-approve "once" with a logger.warning audit line
|
||||
# Flip to true only if you trust delegated work to run dangerous cmds
|
||||
# without human review (cron pipelines, batch automation, etc.).
|
||||
"subagent_auto_approve": False,
|
||||
},
|
||||
|
||||
# Ephemeral prefill messages file — JSON list of {role, content} dicts
|
||||
@@ -807,7 +840,7 @@ DEFAULT_CONFIG = {
|
||||
"auto_thread": True, # Auto-create threads on @mention in channels (like Slack)
|
||||
"reactions": True, # Add 👀/✅/❌ reactions to messages during processing
|
||||
"channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads)
|
||||
# discord_server tool: restrict which actions the agent may call.
|
||||
# discord / discord_admin tools: restrict which actions the agent may call.
|
||||
# Default (empty) = all actions allowed (subject to bot privileged intents).
|
||||
# Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
|
||||
# or YAML list. Unknown names are dropped with a warning at load time.
|
||||
@@ -904,6 +937,24 @@ DEFAULT_CONFIG = {
|
||||
"max_parallel_jobs": None,
|
||||
},
|
||||
|
||||
# Kanban multi-agent coordination — controls the dispatcher loop that
|
||||
# spawns workers for ready tasks. The dispatcher ticks every N seconds
|
||||
# (default 60), reclaims stale claims, promotes dependency-satisfied
|
||||
# todos to ready, and fires `hermes -p <assignee> chat -q ...` for
|
||||
# each claimable ready task. One dispatcher per profile is sufficient;
|
||||
# running more than one on the same kanban.db will race for claims.
|
||||
"kanban": {
|
||||
# Run the dispatcher inside the gateway process. On by default —
|
||||
# the cost is ~300µs every `dispatch_interval_seconds` when idle,
|
||||
# and gateway is the supervisor users already have. Set to false
|
||||
# only if you run the dispatcher as a separate systemd unit or
|
||||
# don't want the gateway to spawn workers.
|
||||
"dispatch_in_gateway": True,
|
||||
# Seconds between dispatcher ticks (idle or not). Lower = snappier
|
||||
# pickup of newly-ready tasks; higher = less SQL pressure.
|
||||
"dispatch_interval_seconds": 60,
|
||||
},
|
||||
|
||||
# execute_code settings — controls the tool used for programmatic tool calls.
|
||||
"code_execution": {
|
||||
# Execution mode:
|
||||
@@ -926,6 +977,27 @@ DEFAULT_CONFIG = {
|
||||
"backup_count": 3, # Number of rotated backup files to keep
|
||||
},
|
||||
|
||||
# Remotely-hosted model catalog manifest. When enabled, the CLI fetches
|
||||
# curated model lists for OpenRouter and Nous Portal from this URL,
|
||||
# falling back to the in-repo snapshot on network failure. Lets us
|
||||
# update model picker lists without shipping a hermes-agent release.
|
||||
# The default URL is served by the docs site GitHub Pages deploy.
|
||||
"model_catalog": {
|
||||
"enabled": True,
|
||||
"url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
|
||||
# Disk cache TTL in hours. Beyond this, the CLI refetches on the
|
||||
# next /model or `hermes model` invocation; network failures
|
||||
# silently fall back to the stale cache.
|
||||
"ttl_hours": 24,
|
||||
# Optional per-provider override URLs for third parties that want
|
||||
# to self-host their own curation list using the same schema.
|
||||
# Example:
|
||||
# providers:
|
||||
# openrouter:
|
||||
# url: https://example.com/my-curation.json
|
||||
"providers": {},
|
||||
},
|
||||
|
||||
# Network settings — workarounds for connectivity issues.
|
||||
"network": {
|
||||
# Force IPv4 connections. On servers with broken or unreachable IPv6,
|
||||
@@ -962,6 +1034,13 @@ DEFAULT_CONFIG = {
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Contextual first-touch onboarding hints (see agent/onboarding.py).
|
||||
# Each hint is shown once per install and then latched here so it
|
||||
# never fires again. Users can wipe the section to re-see all hints.
|
||||
"onboarding": {
|
||||
"seen": {},
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -1338,6 +1417,21 @@ OPTIONAL_ENV_VARS = {
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
"AZURE_FOUNDRY_API_KEY": {
|
||||
"description": "Azure Foundry API key for custom Azure endpoints",
|
||||
"prompt": "Azure Foundry API Key",
|
||||
"url": "https://ai.azure.com/",
|
||||
"password": True,
|
||||
"category": "provider",
|
||||
},
|
||||
"AZURE_FOUNDRY_BASE_URL": {
|
||||
"description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
|
||||
"prompt": "Azure Foundry base URL",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "provider",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"EXA_API_KEY": {
|
||||
@@ -2173,6 +2267,71 @@ def get_compatible_custom_providers(
|
||||
return compatible
|
||||
|
||||
|
||||
def get_custom_provider_context_length(
|
||||
model: str,
|
||||
base_url: str,
|
||||
custom_providers: Optional[List[Dict[str, Any]]] = None,
|
||||
config: Optional[Dict[str, Any]] = None,
|
||||
) -> Optional[int]:
|
||||
"""Look up a per-model ``context_length`` override from ``custom_providers``.
|
||||
|
||||
Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
|
||||
insensitive) and returns ``custom_providers[i].models.<model>.context_length``
|
||||
if present and valid. Returns ``None`` when no override applies.
|
||||
|
||||
This is the single source of truth for custom-provider context overrides,
|
||||
used by:
|
||||
* ``AIAgent.__init__`` (startup resolution)
|
||||
* ``AIAgent.switch_model`` (mid-session ``/model`` switch)
|
||||
* ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
|
||||
* ``gateway.run._format_session_info`` (``/info`` display)
|
||||
* ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
|
||||
|
||||
Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
|
||||
startup path only; every other path (notably ``/model`` switch) fell back
|
||||
to the 128K default. See #15779.
|
||||
"""
|
||||
if not model or not base_url:
|
||||
return None
|
||||
if custom_providers is None:
|
||||
try:
|
||||
custom_providers = get_compatible_custom_providers(config)
|
||||
except Exception:
|
||||
if config is None:
|
||||
return None
|
||||
raw = config.get("custom_providers")
|
||||
custom_providers = raw if isinstance(raw, list) else []
|
||||
if not isinstance(custom_providers, list):
|
||||
return None
|
||||
|
||||
target_url = (base_url or "").rstrip("/")
|
||||
if not target_url:
|
||||
return None
|
||||
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_url = (entry.get("base_url") or "").rstrip("/")
|
||||
if not entry_url or entry_url != target_url:
|
||||
continue
|
||||
models = entry.get("models")
|
||||
if not isinstance(models, dict):
|
||||
continue
|
||||
model_cfg = models.get(model)
|
||||
if not isinstance(model_cfg, dict):
|
||||
continue
|
||||
raw_ctx = model_cfg.get("context_length")
|
||||
if raw_ctx is None:
|
||||
continue
|
||||
try:
|
||||
ctx = int(raw_ctx)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if ctx > 0:
|
||||
return ctx
|
||||
return None
|
||||
|
||||
|
||||
def check_config_version() -> Tuple[int, int]:
|
||||
"""
|
||||
Check config version.
|
||||
|
||||
@@ -275,6 +275,99 @@ def copilot_device_code_login(
|
||||
return None
|
||||
|
||||
|
||||
# ─── Copilot Token Exchange ────────────────────────────────────────────────
|
||||
|
||||
# Module-level cache for exchanged Copilot API tokens.
|
||||
# Maps raw_token_fingerprint -> (api_token, expires_at_epoch).
|
||||
_jwt_cache: dict[str, tuple[str, float]] = {}
|
||||
_JWT_REFRESH_MARGIN_SECONDS = 120 # refresh 2 min before expiry
|
||||
|
||||
# Token exchange endpoint and headers (matching VS Code / Copilot CLI)
|
||||
_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
|
||||
_EDITOR_VERSION = "vscode/1.104.1"
|
||||
_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7"
|
||||
|
||||
|
||||
def _token_fingerprint(raw_token: str) -> str:
|
||||
"""Short fingerprint of a raw token for cache keying (avoids storing full token)."""
|
||||
import hashlib
|
||||
return hashlib.sha256(raw_token.encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]:
|
||||
"""Exchange a raw GitHub token for a short-lived Copilot API token.
|
||||
|
||||
Calls ``GET https://api.github.com/copilot_internal/v2/token`` with
|
||||
the raw GitHub token and returns ``(api_token, expires_at)``.
|
||||
|
||||
The returned token is a semicolon-separated string (not a standard JWT)
|
||||
used as ``Authorization: Bearer <token>`` for Copilot API requests.
|
||||
|
||||
Results are cached in-process and reused until close to expiry.
|
||||
Raises ``ValueError`` on failure.
|
||||
"""
|
||||
import urllib.request
|
||||
|
||||
fp = _token_fingerprint(raw_token)
|
||||
|
||||
# Check cache first
|
||||
cached = _jwt_cache.get(fp)
|
||||
if cached:
|
||||
api_token, expires_at = cached
|
||||
if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS:
|
||||
return api_token, expires_at
|
||||
|
||||
req = urllib.request.Request(
|
||||
_TOKEN_EXCHANGE_URL,
|
||||
method="GET",
|
||||
headers={
|
||||
"Authorization": f"token {raw_token}",
|
||||
"User-Agent": _EXCHANGE_USER_AGENT,
|
||||
"Accept": "application/json",
|
||||
"Editor-Version": _EDITOR_VERSION,
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except Exception as exc:
|
||||
raise ValueError(f"Copilot token exchange failed: {exc}") from exc
|
||||
|
||||
api_token = data.get("token", "")
|
||||
expires_at = data.get("expires_at", 0)
|
||||
if not api_token:
|
||||
raise ValueError("Copilot token exchange returned empty token")
|
||||
|
||||
# Convert expires_at to float if needed
|
||||
expires_at = float(expires_at) if expires_at else time.time() + 1800
|
||||
|
||||
_jwt_cache[fp] = (api_token, expires_at)
|
||||
logger.debug(
|
||||
"Copilot token exchanged, expires_at=%s",
|
||||
expires_at,
|
||||
)
|
||||
return api_token, expires_at
|
||||
|
||||
|
||||
def get_copilot_api_token(raw_token: str) -> str:
|
||||
"""Exchange a raw GitHub token for a Copilot API token, with fallback.
|
||||
|
||||
Convenience wrapper: returns the exchanged token on success, or the
|
||||
raw token unchanged if the exchange fails (e.g. network error, unsupported
|
||||
account type). This preserves existing behaviour for accounts that don't
|
||||
need exchange while enabling access to internal-only models for those that do.
|
||||
"""
|
||||
if not raw_token:
|
||||
return raw_token
|
||||
try:
|
||||
api_token, _ = exchange_copilot_token(raw_token)
|
||||
return api_token
|
||||
except Exception as exc:
|
||||
logger.debug("Copilot token exchange failed, using raw token: %s", exc)
|
||||
return raw_token
|
||||
|
||||
|
||||
# ─── Copilot API Headers ───────────────────────────────────────────────────
|
||||
|
||||
def copilot_request_headers(
|
||||
|
||||
@@ -93,6 +93,9 @@ def cron_list(show_all: bool = False):
|
||||
script = job.get("script")
|
||||
if script:
|
||||
print(f" Script: {script}")
|
||||
workdir = job.get("workdir")
|
||||
if workdir:
|
||||
print(f" Workdir: {workdir}")
|
||||
|
||||
# Execution history
|
||||
last_status = job.get("last_status")
|
||||
@@ -168,6 +171,7 @@ def cron_create(args):
|
||||
skill=getattr(args, "skill", None),
|
||||
skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -180,6 +184,8 @@ def cron_create(args):
|
||||
job_data = result.get("job", {})
|
||||
if job_data.get("script"):
|
||||
print(f" Script: {job_data['script']}")
|
||||
if job_data.get("workdir"):
|
||||
print(f" Workdir: {job_data['workdir']}")
|
||||
print(f" Next run: {result['next_run_at']}")
|
||||
return 0
|
||||
|
||||
@@ -218,6 +224,7 @@ def cron_edit(args):
|
||||
repeat=getattr(args, "repeat", None),
|
||||
skills=final_skills,
|
||||
script=getattr(args, "script", None),
|
||||
workdir=getattr(args, "workdir", None),
|
||||
)
|
||||
if not result.get("success"):
|
||||
print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
|
||||
@@ -233,6 +240,8 @@ def cron_edit(args):
|
||||
print(" Skills: none")
|
||||
if updated.get("script"):
|
||||
print(f" Script: {updated['script']}")
|
||||
if updated.get("workdir"):
|
||||
print(f" Workdir: {updated['workdir']}")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
+33
-8
@@ -29,6 +29,7 @@ if _env_path.exists():
|
||||
load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")
|
||||
|
||||
from hermes_cli.colors import Colors, color
|
||||
from hermes_cli.models import _HERMES_USER_AGENT
|
||||
from hermes_constants import OPENROUTER_MODELS_URL
|
||||
from utils import base_url_host_matches
|
||||
|
||||
@@ -295,16 +296,37 @@ def run_doctor(args):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||
from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
|
||||
from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
|
||||
except Exception:
|
||||
_resolve_provider = None
|
||||
_compatible_custom_providers = None
|
||||
_resolve_provider_full = None
|
||||
|
||||
custom_providers = []
|
||||
if _compatible_custom_providers is not None:
|
||||
try:
|
||||
custom_providers = _compatible_custom_providers(cfg)
|
||||
except Exception:
|
||||
custom_providers = []
|
||||
|
||||
user_providers = cfg.get("providers")
|
||||
if isinstance(user_providers, dict):
|
||||
known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip())
|
||||
for entry in custom_providers:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = str(entry.get("name") or "").strip()
|
||||
if name:
|
||||
known_providers.add("custom:" + name.lower().replace(" ", "-"))
|
||||
|
||||
canonical_provider = provider
|
||||
if provider and _resolve_provider is not None and provider != "auto":
|
||||
try:
|
||||
canonical_provider = _resolve_provider(provider)
|
||||
except Exception:
|
||||
canonical_provider = None
|
||||
if (
|
||||
provider
|
||||
and _resolve_provider_full is not None
|
||||
and provider not in ("auto", "custom")
|
||||
):
|
||||
provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
|
||||
canonical_provider = provider_def.id if provider_def is not None else None
|
||||
|
||||
if provider and provider != "auto":
|
||||
if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
|
||||
@@ -957,7 +979,10 @@ def run_doctor(args):
|
||||
if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
|
||||
_base = _base.rstrip("/") + "/v1"
|
||||
_url = (_base.rstrip("/") + "/models") if _base else _default_url
|
||||
_headers = {"Authorization": f"Bearer {_key}"}
|
||||
_headers = {
|
||||
"Authorization": f"Bearer {_key}",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
if base_url_host_matches(_base, "api.kimi.com"):
|
||||
_headers["User-Agent"] = "claude-code/0.1.0"
|
||||
_resp = httpx.get(
|
||||
|
||||
@@ -267,6 +267,8 @@ def run_dump(args):
|
||||
("ANTHROPIC_API_KEY", "anthropic"),
|
||||
("ANTHROPIC_TOKEN", "anthropic_token"),
|
||||
("NOUS_API_KEY", "nous"),
|
||||
("GOOGLE_API_KEY", "google/gemini"),
|
||||
("GEMINI_API_KEY", "gemini"),
|
||||
("GLM_API_KEY", "glm/zai"),
|
||||
("ZAI_API_KEY", "zai"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
|
||||
@@ -0,0 +1,361 @@
|
||||
"""
|
||||
hermes fallback — manage the fallback provider chain.
|
||||
|
||||
Fallback providers are tried in order when the primary model fails with
|
||||
rate-limit, overload, or connection errors. See:
|
||||
https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
|
||||
|
||||
Subcommands:
|
||||
hermes fallback [list] Show the current fallback chain (default when no subcommand)
|
||||
hermes fallback add Pick provider + model via the same picker as `hermes model`,
|
||||
then append the selection to the chain
|
||||
hermes fallback remove Pick an entry to delete from the chain
|
||||
hermes fallback clear Remove all fallback entries
|
||||
|
||||
Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
|
||||
``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict
|
||||
``fallback_model`` format is migrated to the new list format on first add.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Return the normalized fallback chain as a list of dicts.
|
||||
|
||||
Accepts both the new list format (``fallback_providers``) and the legacy
|
||||
single-dict format (``fallback_model``). The returned list is always a
|
||||
fresh copy — callers can mutate without touching the config dict.
|
||||
"""
|
||||
chain = config.get("fallback_providers") or []
|
||||
if isinstance(chain, list):
|
||||
result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
if result:
|
||||
return result
|
||||
legacy = config.get("fallback_model")
|
||||
if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
|
||||
return [dict(legacy)]
|
||||
if isinstance(legacy, list):
|
||||
return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
|
||||
return []
|
||||
|
||||
|
||||
def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
|
||||
"""Persist the chain to ``fallback_providers`` and clear legacy key."""
|
||||
config["fallback_providers"] = chain
|
||||
# Drop the legacy single-dict key on write so there's only one source of truth.
|
||||
if "fallback_model" in config:
|
||||
config.pop("fallback_model", None)
|
||||
|
||||
|
||||
def _format_entry(entry: Dict[str, Any]) -> str:
|
||||
"""One-line human-readable rendering of a fallback entry."""
|
||||
provider = entry.get("provider", "?")
|
||||
model = entry.get("model", "?")
|
||||
base = entry.get("base_url")
|
||||
suffix = f" [{base}]" if base else ""
|
||||
return f"{model} (via {provider}){suffix}"
|
||||
|
||||
|
||||
def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
|
||||
if not isinstance(model_cfg, dict):
|
||||
return None
|
||||
provider = (model_cfg.get("provider") or "").strip()
|
||||
# The picker writes the selected model to ``model.default``.
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
|
||||
if not provider or not model:
|
||||
return None
|
||||
entry: Dict[str, Any] = {"provider": provider, "model": model}
|
||||
base_url = (model_cfg.get("base_url") or "").strip()
|
||||
if base_url:
|
||||
entry["base_url"] = base_url
|
||||
api_mode = (model_cfg.get("api_mode") or "").strip()
|
||||
if api_mode:
|
||||
entry["api_mode"] = api_mode
|
||||
return entry
|
||||
|
||||
|
||||
def _snapshot_auth_active_provider() -> Any:
|
||||
"""Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
return store.get("active_provider")
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _restore_auth_active_provider(value: Any) -> None:
|
||||
"""Write back a previously snapshotted ``active_provider`` value."""
|
||||
try:
|
||||
from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
|
||||
with _auth_store_lock():
|
||||
store = _load_auth_store()
|
||||
store["active_provider"] = value
|
||||
_save_auth_store(store)
|
||||
except Exception:
|
||||
# Best-effort — if auth.json can't be restored, the user's primary
|
||||
# provider may have been deactivated by the picker. They can re-run
|
||||
# `hermes model` to fix it. Don't fail the fallback add.
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Subcommand handlers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback_list(args) -> None: # noqa: ARG001
|
||||
"""Print the current fallback chain."""
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
print()
|
||||
if not chain:
|
||||
print(" No fallback providers configured.")
|
||||
print()
|
||||
print(" Add one with: hermes fallback add")
|
||||
print()
|
||||
return
|
||||
|
||||
primary = _describe_primary(config)
|
||||
if primary:
|
||||
print(f" Primary: {primary}")
|
||||
print()
|
||||
print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
|
||||
print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
|
||||
print()
|
||||
|
||||
|
||||
def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
|
||||
"""One-line description of the primary model for display purposes."""
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = (model_cfg.get("provider") or "?").strip() or "?"
|
||||
model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
|
||||
return f"{model} (via {provider})"
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return model_cfg.strip()
|
||||
return None
|
||||
|
||||
|
||||
def cmd_fallback_add(args) -> None:
|
||||
"""Launch the same picker as `hermes model`, then append the selection to the chain."""
|
||||
from hermes_cli.main import _require_tty, select_provider_and_model
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
_require_tty("fallback add")
|
||||
|
||||
# Snapshot BEFORE the picker runs so we can distinguish "user actually
|
||||
# picked something" from "user cancelled" by comparing before/after.
|
||||
before_cfg = load_config()
|
||||
model_before = copy.deepcopy(before_cfg.get("model"))
|
||||
active_provider_before = _snapshot_auth_active_provider()
|
||||
|
||||
print()
|
||||
print(" Adding a fallback provider. The picker below is the same one used by")
|
||||
print(" `hermes model` — select the provider + model you want as a fallback.")
|
||||
print()
|
||||
|
||||
try:
|
||||
select_provider_and_model(args=args)
|
||||
except SystemExit:
|
||||
# Some provider flows exit on auth failure — restore state and re-raise.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
raise
|
||||
|
||||
# Read the post-picker state to see what the user selected.
|
||||
after_cfg = load_config()
|
||||
model_after = after_cfg.get("model")
|
||||
|
||||
new_entry = _extract_fallback_from_model_cfg(model_after)
|
||||
if not new_entry:
|
||||
# Picker didn't complete (user cancelled or flow bailed). Nothing to do.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(" No fallback added.")
|
||||
return
|
||||
|
||||
# Picker picked the same thing that's already the primary → nothing changed,
|
||||
# and there's nothing useful to add as a fallback to itself.
|
||||
primary_entry = _extract_fallback_from_model_cfg(model_before)
|
||||
if primary_entry and primary_entry["provider"] == new_entry["provider"] \
|
||||
and primary_entry["model"] == new_entry["model"]:
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
print()
|
||||
print(f" Selected model matches the current primary ({_format_entry(new_entry)}).")
|
||||
print(" A provider cannot be a fallback for itself — no change.")
|
||||
return
|
||||
|
||||
# Reload the config with the primary restored, then append the new entry
|
||||
# to ``fallback_providers``. We deliberately re-load (rather than mutating
|
||||
# ``after_cfg``) because the picker may have touched other top-level keys
|
||||
# (custom_providers, providers credentials) that we want to keep.
|
||||
_restore_model_cfg(model_before)
|
||||
_restore_auth_active_provider(active_provider_before)
|
||||
|
||||
final_cfg = load_config()
|
||||
chain = _read_chain(final_cfg)
|
||||
|
||||
# Reject exact-duplicate fallback entries.
|
||||
for existing in chain:
|
||||
if existing.get("provider") == new_entry["provider"] \
|
||||
and existing.get("model") == new_entry["model"]:
|
||||
print()
|
||||
print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.")
|
||||
return
|
||||
|
||||
chain.append(new_entry)
|
||||
_write_chain(final_cfg, chain)
|
||||
save_config(final_cfg)
|
||||
|
||||
print()
|
||||
print(f" Added fallback: {_format_entry(new_entry)}")
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
print()
|
||||
print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
|
||||
|
||||
|
||||
def _restore_model_cfg(model_before: Any) -> None:
|
||||
"""Restore ``config["model"]`` to a previously-captured snapshot."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
cfg = load_config()
|
||||
if model_before is None:
|
||||
cfg.pop("model", None)
|
||||
else:
|
||||
cfg["model"] = copy.deepcopy(model_before)
|
||||
save_config(cfg)
|
||||
|
||||
|
||||
def cmd_fallback_remove(args) -> None: # noqa: ARG001
|
||||
"""Pick an entry from the chain and remove it."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to remove.")
|
||||
print()
|
||||
return
|
||||
|
||||
choices = [_format_entry(e) for e in chain]
|
||||
choices.append("Cancel")
|
||||
|
||||
try:
|
||||
from hermes_cli.setup import _curses_prompt_choice
|
||||
idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
|
||||
except Exception:
|
||||
idx = _numbered_pick("Select a fallback to remove:", choices)
|
||||
|
||||
if idx is None or idx < 0 or idx >= len(chain):
|
||||
print()
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
removed = chain.pop(idx)
|
||||
_write_chain(config, chain)
|
||||
save_config(config)
|
||||
|
||||
print()
|
||||
print(f" Removed fallback: {_format_entry(removed)}")
|
||||
if chain:
|
||||
print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
|
||||
else:
|
||||
print(" Fallback chain is now empty.")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_fallback_clear(args) -> None: # noqa: ARG001
|
||||
"""Remove all fallback entries (with confirmation)."""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
||||
config = load_config()
|
||||
chain = _read_chain(config)
|
||||
|
||||
if not chain:
|
||||
print()
|
||||
print(" No fallback providers configured — nothing to clear.")
|
||||
print()
|
||||
return
|
||||
|
||||
print()
|
||||
print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
|
||||
for i, entry in enumerate(chain, 1):
|
||||
print(f" {i}. {_format_entry(entry)}")
|
||||
print()
|
||||
try:
|
||||
resp = input(" Clear all entries? [y/N]: ").strip().lower()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
print(" Cancelled.")
|
||||
return
|
||||
if resp not in ("y", "yes"):
|
||||
print(" Cancelled — no change.")
|
||||
return
|
||||
|
||||
_write_chain(config, [])
|
||||
save_config(config)
|
||||
print()
|
||||
print(" Fallback chain cleared.")
|
||||
print()
|
||||
|
||||
|
||||
def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
|
||||
"""Fallback numbered-list picker when curses is unavailable."""
|
||||
print(question)
|
||||
for i, c in enumerate(choices, 1):
|
||||
print(f" {i}. {c}")
|
||||
print()
|
||||
while True:
|
||||
try:
|
||||
val = input(f"Choice [1-{len(choices)}]: ").strip()
|
||||
if not val:
|
||||
return None
|
||||
idx = int(val) - 1
|
||||
if 0 <= idx < len(choices):
|
||||
return idx
|
||||
print(f"Please enter 1-{len(choices)}")
|
||||
except ValueError:
|
||||
print("Please enter a number")
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print()
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def cmd_fallback(args) -> None:
|
||||
"""Top-level dispatcher for ``hermes fallback [subcommand]``."""
|
||||
sub = getattr(args, "fallback_command", None)
|
||||
if sub in (None, "", "list", "ls"):
|
||||
cmd_fallback_list(args)
|
||||
elif sub == "add":
|
||||
cmd_fallback_add(args)
|
||||
elif sub in ("remove", "rm"):
|
||||
cmd_fallback_remove(args)
|
||||
elif sub == "clear":
|
||||
cmd_fallback_clear(args)
|
||||
else:
|
||||
print(f"Unknown fallback subcommand: {sub}")
|
||||
print("Use one of: list, add, remove, clear")
|
||||
raise SystemExit(2)
|
||||
@@ -125,6 +125,7 @@ _DEFAULT_PAYLOADS = {
|
||||
"task_id": "test-task",
|
||||
"tool_call_id": "test-call",
|
||||
"result": '{"output": "hello"}',
|
||||
"duration_ms": 42,
|
||||
},
|
||||
"pre_llm_call": {
|
||||
"session_id": "test-session",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+772
-45
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,329 @@
|
||||
"""Remote model catalog fetcher.
|
||||
|
||||
The Hermes docs site hosts a JSON manifest of curated models for providers
|
||||
we want to update without shipping a release (currently OpenRouter and
|
||||
Nous Portal). This module fetches, validates, and caches that manifest,
|
||||
falling back to the in-repo hardcoded lists when the network is unavailable.
|
||||
|
||||
Pipeline
|
||||
--------
|
||||
1. ``get_catalog()`` — returns a parsed manifest dict.
|
||||
- Checks in-process cache (invalidated by TTL).
|
||||
- Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
|
||||
- Fetches the master URL if disk cache is stale or missing.
|
||||
- On any fetch failure, keeps using the stale cache (or empty dict).
|
||||
|
||||
2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
|
||||
thin accessors returning the shapes existing callers expect. Each
|
||||
falls back to the in-repo hardcoded list on any lookup failure.
|
||||
|
||||
Schema (version 1)
|
||||
------------------
|
||||
::
|
||||
|
||||
{
|
||||
"version": 1,
|
||||
"updated_at": "2026-04-25T22:00:00Z",
|
||||
"metadata": {...}, # free-form
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {...}, # free-form
|
||||
"models": [
|
||||
{"id": "vendor/model", "description": "recommended",
|
||||
"metadata": {...}} # free-form, model-level
|
||||
]
|
||||
},
|
||||
"nous": {...}
|
||||
}
|
||||
}
|
||||
|
||||
Unknown fields are ignored — extra metadata can be added at either level
|
||||
without bumping ``version``. ``version`` bumps are reserved for
|
||||
breaking changes (renaming ``providers``, changing ``models`` shape).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
DEFAULT_CATALOG_URL = (
|
||||
"https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
|
||||
)
|
||||
DEFAULT_TTL_HOURS = 24
|
||||
DEFAULT_FETCH_TIMEOUT = 8.0
|
||||
SUPPORTED_SCHEMA_VERSION = 1
|
||||
|
||||
_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
|
||||
|
||||
# In-process cache to avoid repeated disk + parse work across multiple
|
||||
# calls within the same session. Invalidated by TTL against the disk file's
|
||||
# mtime, so calling code never has to think about this.
|
||||
_catalog_cache: dict[str, Any] | None = None
|
||||
_catalog_cache_source_mtime: float = 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _load_catalog_config() -> dict[str, Any]:
|
||||
"""Load the ``model_catalog`` config block with defaults filled in."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
|
||||
raw = cfg.get("model_catalog")
|
||||
if not isinstance(raw, dict):
|
||||
raw = {}
|
||||
|
||||
return {
|
||||
"enabled": bool(raw.get("enabled", True)),
|
||||
"url": str(raw.get("url") or DEFAULT_CATALOG_URL),
|
||||
"ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
|
||||
"providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
|
||||
}
|
||||
|
||||
|
||||
def _cache_path() -> Path:
|
||||
"""Return the disk cache path. Import lazily so tests can monkeypatch home."""
|
||||
from hermes_constants import get_hermes_home
|
||||
return get_hermes_home() / "cache" / "model_catalog.json"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fetch + validate + cache
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
|
||||
"""HTTP GET the manifest URL and return a parsed dict, or None on failure."""
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
|
||||
logger.info("model catalog fetch failed (%s): %s", url, exc)
|
||||
return None
|
||||
except Exception as exc: # pragma: no cover — defensive
|
||||
logger.info("model catalog fetch errored (%s): %s", url, exc)
|
||||
return None
|
||||
|
||||
if not _validate_manifest(data):
|
||||
logger.info("model catalog at %s failed schema validation", url)
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _validate_manifest(data: Any) -> bool:
|
||||
"""Return True when ``data`` matches the minimum manifest shape."""
|
||||
if not isinstance(data, dict):
|
||||
return False
|
||||
version = data.get("version")
|
||||
if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
|
||||
# Future schema version we don't understand — refuse rather than
|
||||
# guess. Older schemas (version < 1) aren't supported either.
|
||||
return False
|
||||
providers = data.get("providers")
|
||||
if not isinstance(providers, dict):
|
||||
return False
|
||||
for pname, pblock in providers.items():
|
||||
if not isinstance(pname, str) or not isinstance(pblock, dict):
|
||||
return False
|
||||
models = pblock.get("models")
|
||||
if not isinstance(models, list):
|
||||
return False
|
||||
for m in models:
|
||||
if not isinstance(m, dict):
|
||||
return False
|
||||
if not isinstance(m.get("id"), str) or not m["id"].strip():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
|
||||
"""Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
|
||||
path = _cache_path()
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
except (OSError, FileNotFoundError):
|
||||
return (None, 0.0)
|
||||
try:
|
||||
with open(path) as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return (None, 0.0)
|
||||
if not _validate_manifest(data):
|
||||
return (None, 0.0)
|
||||
return (data, mtime)
|
||||
|
||||
|
||||
def _write_disk_cache(data: dict[str, Any]) -> None:
|
||||
path = _cache_path()
|
||||
try:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
with open(tmp, "w") as fh:
|
||||
json.dump(data, fh, indent=2)
|
||||
fh.write("\n")
|
||||
os.replace(tmp, path)
|
||||
except OSError as exc:
|
||||
logger.info("model catalog cache write failed: %s", exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
|
||||
"""Return the parsed model catalog manifest, or an empty dict on failure.
|
||||
|
||||
Callers should treat a missing provider/model as "use the in-repo fallback"
|
||||
— never raise from this function so the CLI keeps working offline.
|
||||
"""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return {}
|
||||
|
||||
ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
|
||||
|
||||
disk_data, disk_mtime = _read_disk_cache()
|
||||
now = time.time()
|
||||
disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
|
||||
|
||||
# In-process cache hit: disk hasn't changed since we loaded it and still fresh.
|
||||
if (
|
||||
not force_refresh
|
||||
and _catalog_cache is not None
|
||||
and disk_data is not None
|
||||
and disk_mtime == _catalog_cache_source_mtime
|
||||
and disk_fresh
|
||||
):
|
||||
return _catalog_cache
|
||||
|
||||
# Disk is fresh enough — use it without a network hit.
|
||||
if not force_refresh and disk_fresh and disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
# Need to (re)fetch. If it fails, fall back to any stale disk copy.
|
||||
fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
|
||||
if fetched is not None:
|
||||
_write_disk_cache(fetched)
|
||||
new_disk_data, new_mtime = _read_disk_cache()
|
||||
if new_disk_data is not None:
|
||||
_catalog_cache = new_disk_data
|
||||
_catalog_cache_source_mtime = new_mtime
|
||||
return new_disk_data
|
||||
_catalog_cache = fetched
|
||||
_catalog_cache_source_mtime = now
|
||||
return fetched
|
||||
|
||||
if disk_data is not None:
|
||||
_catalog_cache = disk_data
|
||||
_catalog_cache_source_mtime = disk_mtime
|
||||
return disk_data
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
|
||||
"""If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
|
||||
cfg = _load_catalog_config()
|
||||
if not cfg["enabled"]:
|
||||
return None
|
||||
provider_cfg = cfg["providers"].get(provider)
|
||||
if not isinstance(provider_cfg, dict):
|
||||
return None
|
||||
override_url = provider_cfg.get("url")
|
||||
if not isinstance(override_url, str) or not override_url.strip():
|
||||
return None
|
||||
# Override fetches skip the disk cache because they're usually
|
||||
# third-party self-hosted. Re-request on every call but with a short
|
||||
# timeout so they don't block the picker.
|
||||
return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
|
||||
|
||||
|
||||
def _get_provider_block(provider: str) -> dict[str, Any] | None:
|
||||
"""Return the provider's manifest block, respecting per-provider overrides."""
|
||||
override = _fetch_provider_override(provider)
|
||||
if override is not None:
|
||||
block = override.get("providers", {}).get(provider)
|
||||
if isinstance(block, dict):
|
||||
return block
|
||||
|
||||
catalog = get_catalog()
|
||||
if not catalog:
|
||||
return None
|
||||
block = catalog.get("providers", {}).get(provider)
|
||||
return block if isinstance(block, dict) else None
|
||||
|
||||
|
||||
def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
|
||||
"""Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable, so callers can fall
|
||||
back to their hardcoded list.
|
||||
"""
|
||||
block = _get_provider_block("openrouter")
|
||||
if not block:
|
||||
return None
|
||||
out: list[tuple[str, str]] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
desc = str(m.get("description") or "")
|
||||
out.append((mid, desc))
|
||||
return out or None
|
||||
|
||||
|
||||
def get_curated_nous_models() -> list[str] | None:
|
||||
"""Return Nous Portal's curated list of model ids from the manifest.
|
||||
|
||||
Returns ``None`` when the manifest is unavailable.
|
||||
"""
|
||||
block = _get_provider_block("nous")
|
||||
if not block:
|
||||
return None
|
||||
out: list[str] = []
|
||||
for m in block.get("models", []):
|
||||
mid = str(m.get("id") or "").strip()
|
||||
if mid:
|
||||
out.append(mid)
|
||||
return out or None
|
||||
|
||||
|
||||
def reset_cache() -> None:
|
||||
"""Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
|
||||
global _catalog_cache, _catalog_cache_source_mtime
|
||||
_catalog_cache = None
|
||||
_catalog_cache_source_mtime = 0.0
|
||||
@@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats:
|
||||
model IDs, but Claude still uses hyphenated native names like
|
||||
``claude-sonnet-4-6``.
|
||||
- **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
|
||||
- **DeepSeek** only accepts two model identifiers:
|
||||
``deepseek-chat`` and ``deepseek-reasoner``.
|
||||
- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner``
|
||||
(R1-family), and the first-class V-series IDs (``deepseek-v4-pro``,
|
||||
``deepseek-v4-flash``, and any future ``deepseek-v<N>-*``). Older
|
||||
Hermes revisions folded every non-reasoner input into
|
||||
``deepseek-chat``, which on aggregators routes to V3 — so a user
|
||||
picking V4 Pro was silently downgraded.
|
||||
- **Custom** and remaining providers pass the name through as-is.
|
||||
|
||||
This module centralises that translation so callers can simply write::
|
||||
@@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -100,6 +105,15 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
|
||||
"custom",
|
||||
})
|
||||
|
||||
# Providers whose APIs require lowercase model IDs. Xiaomi's
|
||||
# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro``
|
||||
# that users might copy from marketing docs — it only accepts
|
||||
# ``mimo-v2.5-pro``. After stripping a matching provider prefix, these
|
||||
# providers also get ``.lower()`` applied.
|
||||
_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({
|
||||
"xiaomi",
|
||||
})
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DeepSeek special handling
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -115,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
|
||||
})
|
||||
|
||||
_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
"deepseek-chat", # V3 on DeepSeek direct and most aggregators
|
||||
"deepseek-reasoner", # R1-family reasoning model
|
||||
"deepseek-v4-pro", # V4 Pro — first-class model ID
|
||||
"deepseek-v4-flash", # V4 Flash — first-class model ID
|
||||
})
|
||||
|
||||
# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
|
||||
# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``).
|
||||
# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns
|
||||
# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called
|
||||
# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases
|
||||
# of ``deepseek-chat`` and must not be folded into it.
|
||||
_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$")
|
||||
|
||||
|
||||
def _normalize_for_deepseek(model_name: str) -> str:
|
||||
"""Map any model input to one of DeepSeek's two accepted identifiers.
|
||||
"""Map a model input to a DeepSeek-accepted identifier.
|
||||
|
||||
Rules:
|
||||
- Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
|
||||
- Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
- Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/
|
||||
``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through.
|
||||
- Matches the V-series pattern ``deepseek-v<digit>...`` -> pass through
|
||||
(covers future ``deepseek-v5-*`` and dated variants without a release).
|
||||
- Contains a reasoner keyword (r1, think, reasoning, cot, reasoner)
|
||||
-> ``deepseek-reasoner``.
|
||||
- Everything else -> ``deepseek-chat``.
|
||||
|
||||
@@ -133,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str:
|
||||
model_name: The bare model name (vendor prefix already stripped).
|
||||
|
||||
Returns:
|
||||
One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
|
||||
A DeepSeek-accepted model identifier.
|
||||
"""
|
||||
bare = _strip_vendor_prefix(model_name).lower()
|
||||
|
||||
if bare in _DEEPSEEK_CANONICAL_MODELS:
|
||||
return bare
|
||||
|
||||
# V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants)
|
||||
if _DEEPSEEK_V_SERIES_RE.match(bare):
|
||||
return bare
|
||||
|
||||
# Check for reasoner-like keywords anywhere in the name
|
||||
for keyword in _DEEPSEEK_REASONER_KEYWORDS:
|
||||
if keyword in bare:
|
||||
@@ -347,6 +378,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
|
||||
>>> normalize_model_for_provider("claude-sonnet-4.6", "zai")
|
||||
'claude-sonnet-4.6'
|
||||
|
||||
>>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi")
|
||||
'mimo-v2.5-pro'
|
||||
"""
|
||||
name = (model_input or "").strip()
|
||||
if not name:
|
||||
@@ -410,7 +444,12 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
|
||||
|
||||
# --- Direct providers: repair matching provider prefixes only ---
|
||||
if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
|
||||
return _strip_matching_provider_prefix(name, provider)
|
||||
result = _strip_matching_provider_prefix(name, provider)
|
||||
# Some providers require lowercase model IDs (e.g. Xiaomi's API
|
||||
# rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro").
|
||||
if provider in _LOWERCASE_MODEL_PROVIDERS:
|
||||
result = result.lower()
|
||||
return result
|
||||
|
||||
# --- Authoritative native providers: preserve user-facing slugs as-is ---
|
||||
if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
|
||||
|
||||
+110
-20
@@ -527,6 +527,49 @@ def _resolve_alias_fallback(
|
||||
return None
|
||||
|
||||
|
||||
def resolve_display_context_length(
|
||||
model: str,
|
||||
provider: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
custom_providers: list | None = None,
|
||||
) -> Optional[int]:
|
||||
"""Resolve the context length to show in /model output.
|
||||
|
||||
models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
|
||||
but provider-enforced limits can be lower (e.g. Codex OAuth caps the
|
||||
same slug at 272k). The authoritative source is
|
||||
``agent.model_metadata.get_model_context_length`` which already knows
|
||||
about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
|
||||
rest.
|
||||
|
||||
When ``custom_providers`` is provided, per-model ``context_length``
|
||||
overrides from ``custom_providers[].models.<id>.context_length`` are
|
||||
honored — this closes #15779 where ``/model`` switch ignored user-set
|
||||
overrides.
|
||||
|
||||
Prefer the provider-aware value; fall back to ``model_info.context_window``
|
||||
only if the resolver returns nothing.
|
||||
"""
|
||||
try:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
ctx = get_model_context_length(
|
||||
model,
|
||||
base_url=base_url or "",
|
||||
api_key=api_key or "",
|
||||
provider=provider or None,
|
||||
custom_providers=custom_providers,
|
||||
)
|
||||
if ctx:
|
||||
return int(ctx)
|
||||
except Exception:
|
||||
pass
|
||||
if model_info is not None and model_info.context_window:
|
||||
return int(model_info.context_window)
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core model-switching pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -771,7 +814,10 @@ def switch_model(
|
||||
|
||||
if provider_changed or explicit_provider:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=target_provider)
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=target_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
@@ -788,10 +834,18 @@ def switch_model(
|
||||
)
|
||||
else:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=current_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=current_provider,
|
||||
target_model=new_model,
|
||||
)
|
||||
# If resolution fell through to "custom" (e.g. named custom provider like
|
||||
# "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
|
||||
# credentials. Otherwise use the resolved values (picks up credential rotation,
|
||||
# base_url adjustments for OpenCode, etc.).
|
||||
if runtime.get("provider") != "custom":
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -815,6 +869,7 @@ def switch_model(
|
||||
target_provider,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_mode=api_mode or None,
|
||||
)
|
||||
except Exception as e:
|
||||
validation = {
|
||||
@@ -824,16 +879,31 @@ def switch_model(
|
||||
"message": f"Could not validate `{new_model}`: {e}",
|
||||
}
|
||||
|
||||
# Override rejection if model is in the user's saved provider config.
|
||||
# API /v1/models may not list cloud/aliased models even though the server supports them.
|
||||
if not validation.get("accepted"):
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
override = False
|
||||
if user_providers:
|
||||
for up in user_providers:
|
||||
if isinstance(up, dict) and up.get("provider") == target_provider:
|
||||
cfg_models = up.get("models", [])
|
||||
if new_model in cfg_models or any(
|
||||
m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
|
||||
):
|
||||
override = True
|
||||
break
|
||||
if override:
|
||||
validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
|
||||
else:
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_label=provider_label,
|
||||
is_global=is_global,
|
||||
error_message=msg,
|
||||
)
|
||||
|
||||
# Apply auto-correction if validation found a closer match
|
||||
if validation.get("corrected_model"):
|
||||
@@ -936,7 +1006,7 @@ def list_authenticated_providers(
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import (
|
||||
OPENROUTER_MODELS, _PROVIDER_MODELS,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev,
|
||||
_MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
|
||||
)
|
||||
|
||||
results: List[dict] = []
|
||||
@@ -984,6 +1054,14 @@ def list_authenticated_providers(
|
||||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if store and hermes_id in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
@@ -1095,11 +1173,14 @@ def list_authenticated_providers(
|
||||
if not has_creds:
|
||||
continue
|
||||
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
if hermes_slug in {"copilot", "copilot-acp"}:
|
||||
model_ids = provider_model_ids(hermes_slug)
|
||||
else:
|
||||
# Use curated list — look up by Hermes slug, fall back to overlay key
|
||||
model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
|
||||
# Merge with models.dev for preferred providers (same rationale as above).
|
||||
if hermes_slug in _MODELS_DEV_PREFERRED:
|
||||
model_ids = _merge_with_models_dev(hermes_slug, model_ids)
|
||||
total = len(model_ids)
|
||||
top = model_ids[:max_models]
|
||||
|
||||
@@ -1222,6 +1303,15 @@ def list_authenticated_providers(
|
||||
if m and m not in models_list:
|
||||
models_list.append(m)
|
||||
|
||||
# Official OpenAI API rows in providers: often have base_url but no
|
||||
# explicit models: dict — avoid a misleading zero count in /model.
|
||||
if not models_list:
|
||||
url_lower = str(api_url).strip().lower()
|
||||
if "api.openai.com" in url_lower:
|
||||
fb = curated.get("openai") or []
|
||||
if fb:
|
||||
models_list = list(fb)
|
||||
|
||||
# Try to probe /v1/models if URL is set (but don't block on it)
|
||||
# For now just show what we know from config
|
||||
results.append({
|
||||
|
||||
+351
-72
@@ -33,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
|
||||
# (model_id, display description shown in menus)
|
||||
OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("moonshotai/kimi-k2.6", "recommended"),
|
||||
("deepseek/deepseek-v4-pro", ""),
|
||||
("deepseek/deepseek-v4-flash", ""),
|
||||
("anthropic/claude-opus-4.7", ""),
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("anthropic/claude-sonnet-4.6", ""),
|
||||
@@ -40,7 +42,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("anthropic/claude-sonnet-4.5", ""),
|
||||
("anthropic/claude-haiku-4.5", ""),
|
||||
("openrouter/elephant-alpha", "free"),
|
||||
("openai/gpt-5.4", ""),
|
||||
("openai/gpt-5.5", ""),
|
||||
("openai/gpt-5.4-mini", ""),
|
||||
("xiaomi/mimo-v2.5-pro", ""),
|
||||
("xiaomi/mimo-v2.5", ""),
|
||||
@@ -63,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||
("nvidia/nemotron-3-super-120b-a12b:free", "free"),
|
||||
("arcee-ai/trinity-large-preview:free", "free"),
|
||||
("arcee-ai/trinity-large-thinking", ""),
|
||||
("openai/gpt-5.4-pro", ""),
|
||||
("openai/gpt-5.5-pro", ""),
|
||||
("openai/gpt-5.4-nano", ""),
|
||||
]
|
||||
|
||||
@@ -109,6 +111,8 @@ def _codex_curated_models() -> list[str]:
|
||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"nous": [
|
||||
"moonshotai/kimi-k2.6",
|
||||
"deepseek/deepseek-v4-pro",
|
||||
"deepseek/deepseek-v4-flash",
|
||||
"xiaomi/mimo-v2.5-pro",
|
||||
"xiaomi/mimo-v2.5",
|
||||
"anthropic/claude-opus-4.7",
|
||||
@@ -116,7 +120,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"anthropic/claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5",
|
||||
"openai/gpt-5.4",
|
||||
"openai/gpt-5.5",
|
||||
"openai/gpt-5.4-mini",
|
||||
"openai/gpt-5.3-codex",
|
||||
"google/gemini-3-pro-preview",
|
||||
@@ -135,9 +139,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"x-ai/grok-4.20-beta",
|
||||
"nvidia/nemotron-3-super-120b-a12b",
|
||||
"arcee-ai/trinity-large-thinking",
|
||||
"openai/gpt-5.4-pro",
|
||||
"openai/gpt-5.5-pro",
|
||||
"openai/gpt-5.4-nano",
|
||||
],
|
||||
# Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
|
||||
# provider_model_ids fallback when /v1/models is unavailable.
|
||||
"openai": [
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5-mini",
|
||||
"gpt-5.3-codex",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-4.1",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
],
|
||||
"openai-codex": _codex_curated_models(),
|
||||
"copilot-acp": [
|
||||
"copilot-acp",
|
||||
@@ -151,10 +167,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"gpt-4.1",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"claude-opus-4.6",
|
||||
"claude-sonnet-4.6",
|
||||
"claude-sonnet-4",
|
||||
"claude-sonnet-4.5",
|
||||
"claude-haiku-4.5",
|
||||
"gemini-3.1-pro-preview",
|
||||
"gemini-3-pro-preview",
|
||||
"gemini-3-flash-preview",
|
||||
"gemini-2.5-pro",
|
||||
"grok-code-fast-1",
|
||||
],
|
||||
@@ -246,6 +265,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"claude-haiku-4-5-20251001",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-v4-pro",
|
||||
"deepseek-v4-flash",
|
||||
"deepseek-chat",
|
||||
"deepseek-reasoner",
|
||||
],
|
||||
@@ -362,6 +383,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"us.meta.llama4-maverick-17b-instruct-v1:0",
|
||||
"us.meta.llama4-scout-17b-instruct-v1:0",
|
||||
],
|
||||
# Azure Foundry: user-provided endpoint and model.
|
||||
# Empty list because models depend on the endpoint configuration.
|
||||
"azure-foundry": [],
|
||||
}
|
||||
|
||||
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
|
||||
@@ -676,7 +700,7 @@ def get_nous_recommended_aux_model(
|
||||
# ---------------------------------------------------------------------------
|
||||
# Canonical provider list — single source of truth for provider identity.
|
||||
# Every code path that lists, displays, or iterates providers derives from
|
||||
# this list: hermes model, /model, /provider, list_authenticated_providers.
|
||||
# this list: hermes model, /model, list_authenticated_providers.
|
||||
#
|
||||
# Fields:
|
||||
# slug — internal provider ID (used in config.yaml, --provider flag)
|
||||
@@ -719,6 +743,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
|
||||
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
|
||||
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
|
||||
ProviderEntry("bedrock", "AWS Bedrock", "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
|
||||
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
|
||||
]
|
||||
|
||||
# Derived dicts — used throughout the codebase
|
||||
@@ -851,7 +876,16 @@ def fetch_openrouter_models(
|
||||
if _openrouter_catalog_cache is not None and not force_refresh:
|
||||
return list(_openrouter_catalog_cache)
|
||||
|
||||
fallback = list(OPENROUTER_MODELS)
|
||||
# Prefer the remotely-hosted catalog manifest; fall back to the in-repo
|
||||
# snapshot when the manifest is unreachable. Both are curated lists that
|
||||
# drive the picker; the OpenRouter live /v1/models filter (tool support,
|
||||
# free pricing) is applied on top either way.
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_openrouter_models
|
||||
remote = get_curated_openrouter_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
fallback = list(remote) if remote else list(OPENROUTER_MODELS)
|
||||
preferred_ids = [mid for mid, _ in fallback]
|
||||
|
||||
try:
|
||||
@@ -904,6 +938,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
|
||||
return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]
|
||||
|
||||
|
||||
def get_curated_nous_model_ids() -> list[str]:
|
||||
"""Return the curated Nous Portal model-id list.
|
||||
|
||||
Prefers the remotely-hosted catalog manifest (published under
|
||||
``website/static/api/model-catalog.json``); falls back to the in-repo
|
||||
snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is
|
||||
unreachable. Always returns a list (never None).
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.model_catalog import get_curated_nous_models
|
||||
remote = get_curated_nous_models()
|
||||
except Exception:
|
||||
remote = None
|
||||
if remote:
|
||||
return list(remote)
|
||||
return list(_PROVIDER_MODELS.get("nous", []))
|
||||
|
||||
|
||||
def _ai_gateway_model_is_free(pricing: Any) -> bool:
|
||||
"""Return True if an AI Gateway model has $0 input AND output pricing."""
|
||||
if not isinstance(pricing, dict):
|
||||
@@ -1104,7 +1156,10 @@ def fetch_models_with_pricing(
|
||||
return _pricing_cache[cache_key]
|
||||
|
||||
url = cache_key.rstrip("/") + "/v1/models"
|
||||
headers: dict[str, str] = {"Accept": "application/json"}
|
||||
headers: dict[str, str] = {
|
||||
"Accept": "application/json",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
if api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
|
||||
@@ -1355,27 +1410,93 @@ def curated_models_for_provider(
|
||||
return [(m, "") for m in models]
|
||||
|
||||
|
||||
def detect_provider_for_model(
|
||||
def _provider_keys(provider: str) -> set[str]:
|
||||
key = (provider or "").strip().lower()
|
||||
normalized = normalize_provider(provider)
|
||||
return {k for k in (key, normalized) if k}
|
||||
|
||||
|
||||
def _model_in_provider_catalog(name_lower: str, providers: set[str]) -> bool:
|
||||
return any(
|
||||
name_lower == model.lower()
|
||||
for provider in providers
|
||||
for model in _PROVIDER_MODELS.get(provider, [])
|
||||
)
|
||||
|
||||
|
||||
_AGGREGATOR_PROVIDERS = frozenset(
|
||||
{"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
)
|
||||
|
||||
|
||||
def _resolve_static_model_alias(
|
||||
name_lower: str,
|
||||
current_keys: set[str],
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Resolve short aliases (e.g. sonnet/opus) using static catalogs only."""
|
||||
try:
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES.get(name_lower)
|
||||
if identity is None:
|
||||
return None
|
||||
|
||||
vendor = identity.vendor
|
||||
family = identity.family
|
||||
|
||||
def _match(provider: str) -> Optional[str]:
|
||||
models = _PROVIDER_MODELS.get(provider, [])
|
||||
if not models:
|
||||
return None
|
||||
prefix = (
|
||||
f"{vendor}/{family}"
|
||||
if provider in _AGGREGATOR_PROVIDERS
|
||||
else family
|
||||
).lower()
|
||||
for model in models:
|
||||
if model.lower().startswith(prefix):
|
||||
return model
|
||||
return None
|
||||
|
||||
for provider in current_keys:
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _PROVIDER_MODELS:
|
||||
if provider in current_keys or provider in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if matched := _match(provider):
|
||||
return provider, matched
|
||||
|
||||
for provider in _AGGREGATOR_PROVIDERS:
|
||||
if provider in current_keys and (matched := _match(provider)):
|
||||
return provider, matched
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def detect_static_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
"""Auto-detect a provider from static catalogs only.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``(provider_id, model_name)``. The model name may be remapped
|
||||
when a static alias or bare provider name resolves to a catalog default.
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider with credentials (highest)
|
||||
2. Direct provider without credentials → remap to OpenRouter slug
|
||||
3. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
name_lower = name.lower()
|
||||
current_keys = _provider_keys(current_provider)
|
||||
|
||||
alias_match = _resolve_static_model_alias(name_lower, current_keys)
|
||||
if alias_match:
|
||||
return alias_match
|
||||
|
||||
# --- Step 0: bare provider name typed as model ---
|
||||
# If someone types `/model nous` or `/model anthropic`, treat it as a
|
||||
@@ -1388,64 +1509,49 @@ def detect_provider_for_model(
|
||||
if (
|
||||
resolved_provider in _PROVIDER_LABELS
|
||||
and default_models
|
||||
and resolved_provider != normalize_provider(current_provider)
|
||||
and resolved_provider not in current_keys
|
||||
):
|
||||
return (resolved_provider, default_models[0])
|
||||
|
||||
# Aggregators list other providers' models — never auto-switch TO them
|
||||
_AGGREGATORS = {"nous", "openrouter", "ai-gateway", "copilot", "kilocode"}
|
||||
|
||||
# If the model belongs to the current provider's catalog, don't suggest switching
|
||||
current_models = _PROVIDER_MODELS.get(current_provider, [])
|
||||
if any(name_lower == m.lower() for m in current_models):
|
||||
if _model_in_provider_catalog(name_lower, current_keys):
|
||||
return None
|
||||
|
||||
# --- Step 1: check static provider catalogs for a direct match ---
|
||||
direct_match: Optional[str] = None
|
||||
for pid, models in _PROVIDER_MODELS.items():
|
||||
if pid == current_provider or pid in _AGGREGATORS:
|
||||
if pid in current_keys or pid in _AGGREGATOR_PROVIDERS:
|
||||
continue
|
||||
if any(name_lower == m.lower() for m in models):
|
||||
direct_match = pid
|
||||
break
|
||||
return (pid, name)
|
||||
|
||||
if direct_match:
|
||||
# Check if we have credentials for this provider — env vars,
|
||||
# credential pool, or auth store entries.
|
||||
has_creds = False
|
||||
try:
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
pconfig = PROVIDER_REGISTRY.get(direct_match)
|
||||
if pconfig:
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
if os.getenv(env_var, "").strip():
|
||||
has_creds = True
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
# Also check credential pool and auth store — covers OAuth,
|
||||
# Claude Code tokens, and other non-env-var credentials (#10300).
|
||||
if not has_creds:
|
||||
try:
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool(direct_match)
|
||||
if pool.has_credentials():
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
if not has_creds:
|
||||
try:
|
||||
from hermes_cli.auth import _load_auth_store
|
||||
store = _load_auth_store()
|
||||
if direct_match in store.get("providers", {}) or direct_match in store.get("credential_pool", {}):
|
||||
has_creds = True
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
# Always return the direct provider match. If credentials are
|
||||
# missing, the client init will give a clear error rather than
|
||||
# silently routing through the wrong provider (#10300).
|
||||
return (direct_match, name)
|
||||
|
||||
def detect_provider_for_model(
|
||||
model_name: str,
|
||||
current_provider: str,
|
||||
) -> Optional[tuple[str, str]]:
|
||||
"""Auto-detect the best provider for a model name.
|
||||
|
||||
Returns ``(provider_id, model_name)`` — the model name may be remapped
|
||||
(e.g. bare ``deepseek-chat`` → ``deepseek/deepseek-chat`` for OpenRouter).
|
||||
Returns ``None`` when no confident match is found.
|
||||
|
||||
Priority:
|
||||
0. Bare provider name → switch to that provider's default model
|
||||
1. Direct provider static catalog match
|
||||
2. OpenRouter catalog match
|
||||
"""
|
||||
name = (model_name or "").strip()
|
||||
if not name:
|
||||
return None
|
||||
|
||||
static_match = detect_static_provider_for_model(name, current_provider)
|
||||
if static_match:
|
||||
return static_match
|
||||
if _model_in_provider_catalog(name.lower(), _provider_keys(current_provider)):
|
||||
return None
|
||||
|
||||
# --- Step 2: check OpenRouter catalog ---
|
||||
# First try exact match (handles provider/model format)
|
||||
@@ -1736,6 +1842,17 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
|
||||
live = fetch_ollama_cloud_models(force_refresh=force_refresh)
|
||||
if live:
|
||||
return live
|
||||
if normalized == "openai":
|
||||
api_key = os.getenv("OPENAI_API_KEY", "").strip()
|
||||
if api_key:
|
||||
base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
|
||||
base = base_raw or "https://api.openai.com/v1"
|
||||
try:
|
||||
live = fetch_api_models(api_key, base)
|
||||
if live:
|
||||
return live
|
||||
except Exception:
|
||||
pass
|
||||
if normalized == "custom":
|
||||
base_url = _get_custom_base_url()
|
||||
if base_url:
|
||||
@@ -1890,6 +2007,51 @@ def fetch_github_model_catalog(
|
||||
return None
|
||||
|
||||
|
||||
# ─── Copilot catalog context-window helpers ─────────────────────────────────
|
||||
|
||||
# Module-level cache: {model_id: max_prompt_tokens}
|
||||
_copilot_context_cache: dict[str, int] = {}
|
||||
_copilot_context_cache_time: float = 0.0
|
||||
_COPILOT_CONTEXT_CACHE_TTL = 3600 # 1 hour
|
||||
|
||||
|
||||
def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]:
|
||||
"""Look up max_prompt_tokens for a Copilot model from the live /models API.
|
||||
|
||||
Results are cached in-process for 1 hour to avoid repeated API calls.
|
||||
Returns the token limit or None if not found.
|
||||
"""
|
||||
global _copilot_context_cache, _copilot_context_cache_time
|
||||
|
||||
# Serve from cache if fresh
|
||||
if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL):
|
||||
if model_id in _copilot_context_cache:
|
||||
return _copilot_context_cache[model_id]
|
||||
# Cache is fresh but model not in it — don't re-fetch
|
||||
return None
|
||||
|
||||
# Fetch and populate cache
|
||||
catalog = fetch_github_model_catalog(api_key=api_key)
|
||||
if not catalog:
|
||||
return None
|
||||
|
||||
cache: dict[str, int] = {}
|
||||
for item in catalog:
|
||||
mid = str(item.get("id") or "").strip()
|
||||
if not mid:
|
||||
continue
|
||||
caps = item.get("capabilities") or {}
|
||||
limits = caps.get("limits") or {}
|
||||
max_prompt = limits.get("max_prompt_tokens")
|
||||
if isinstance(max_prompt, int) and max_prompt > 0:
|
||||
cache[mid] = max_prompt
|
||||
|
||||
_copilot_context_cache = cache
|
||||
_copilot_context_cache_time = time.time()
|
||||
|
||||
return cache.get(model_id)
|
||||
|
||||
|
||||
def _is_github_models_base_url(base_url: Optional[str]) -> bool:
|
||||
normalized = (base_url or "").strip().rstrip("/").lower()
|
||||
return (
|
||||
@@ -1923,6 +2085,7 @@ _COPILOT_MODEL_ALIASES = {
|
||||
"openai/o4-mini": "gpt-5-mini",
|
||||
"anthropic/claude-opus-4.6": "claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4.5": "claude-haiku-4.5",
|
||||
# Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
|
||||
@@ -1932,10 +2095,12 @@ _COPILOT_MODEL_ALIASES = {
|
||||
# "model_not_supported". See issue #6879.
|
||||
"claude-opus-4-6": "claude-opus-4.6",
|
||||
"claude-sonnet-4-6": "claude-sonnet-4.6",
|
||||
"claude-sonnet-4-0": "claude-sonnet-4",
|
||||
"claude-sonnet-4-5": "claude-sonnet-4.5",
|
||||
"claude-haiku-4-5": "claude-haiku-4.5",
|
||||
"anthropic/claude-opus-4-6": "claude-opus-4.6",
|
||||
"anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
|
||||
"anthropic/claude-sonnet-4-0": "claude-sonnet-4",
|
||||
"anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
|
||||
"anthropic/claude-haiku-4-5": "claude-haiku-4.5",
|
||||
}
|
||||
@@ -2160,8 +2325,15 @@ def probe_api_models(
|
||||
api_key: Optional[str],
|
||||
base_url: Optional[str],
|
||||
timeout: float = 5.0,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
|
||||
"""Probe a ``/models`` endpoint with light URL heuristics.
|
||||
|
||||
For ``anthropic_messages`` mode, uses ``x-api-key`` and
|
||||
``anthropic-version`` headers (Anthropic's native auth) instead of
|
||||
``Authorization: Bearer``. The response shape (``data[].id``) is
|
||||
identical, so the same parser works for both.
|
||||
"""
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
return {
|
||||
@@ -2193,7 +2365,10 @@ def probe_api_models(
|
||||
|
||||
tried: list[str] = []
|
||||
headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
|
||||
if api_key:
|
||||
if api_key and api_mode == "anthropic_messages":
|
||||
headers["x-api-key"] = api_key
|
||||
headers["anthropic-version"] = "2023-06-01"
|
||||
elif api_key:
|
||||
headers["Authorization"] = f"Bearer {api_key}"
|
||||
if normalized.startswith(COPILOT_BASE_URL):
|
||||
headers.update(copilot_default_headers())
|
||||
@@ -2235,7 +2410,10 @@ def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
|
||||
base_url = AI_GATEWAY_BASE_URL
|
||||
|
||||
url = base_url.rstrip("/") + "/models"
|
||||
headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
|
||||
headers: dict[str, str] = {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"User-Agent": _HERMES_USER_AGENT,
|
||||
}
|
||||
req = urllib.request.Request(url, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
@@ -2255,13 +2433,14 @@ def fetch_api_models(
|
||||
api_key: Optional[str],
|
||||
base_url: Optional[str],
|
||||
timeout: float = 5.0,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> Optional[list[str]]:
|
||||
"""Fetch the list of available model IDs from the provider's ``/models`` endpoint.
|
||||
|
||||
Returns a list of model ID strings, or ``None`` if the endpoint could not
|
||||
be reached (network error, timeout, auth failure, etc.).
|
||||
"""
|
||||
return probe_api_models(api_key, base_url, timeout=timeout).get("models")
|
||||
return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -2389,6 +2568,7 @@ def validate_requested_model(
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
base_url: Optional[str] = None,
|
||||
api_mode: Optional[str] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Validate a ``/model`` value for the active provider.
|
||||
@@ -2430,7 +2610,11 @@ def validate_requested_model(
|
||||
}
|
||||
|
||||
if normalized == "custom":
|
||||
probe = probe_api_models(api_key, base_url)
|
||||
# Try probing with correct auth for the api_mode.
|
||||
if api_mode == "anthropic_messages":
|
||||
probe = probe_api_models(api_key, base_url, api_mode=api_mode)
|
||||
else:
|
||||
probe = probe_api_models(api_key, base_url)
|
||||
api_models = probe.get("models")
|
||||
if api_models is not None:
|
||||
if requested_for_lookup in set(api_models):
|
||||
@@ -2469,8 +2653,8 @@ def validate_requested_model(
|
||||
)
|
||||
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
@@ -2479,12 +2663,17 @@ def validate_requested_model(
|
||||
f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
|
||||
f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
|
||||
)
|
||||
if api_mode == "anthropic_messages":
|
||||
message += (
|
||||
"\n Many Anthropic-compatible proxies do not implement the Models API "
|
||||
"(GET /v1/models). The model name has been accepted without verification."
|
||||
)
|
||||
if probe.get("suggested_base_url"):
|
||||
message += f"\n If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"
|
||||
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": api_mode == "anthropic_messages",
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": message,
|
||||
}
|
||||
@@ -2572,10 +2761,100 @@ def validate_requested_model(
|
||||
),
|
||||
}
|
||||
|
||||
# Native Anthropic provider: /v1/models requires x-api-key (or Bearer for
|
||||
# OAuth) plus anthropic-version headers. The generic OpenAI-style probe
|
||||
# below uses plain Bearer auth and 401s against Anthropic, so dispatch to
|
||||
# the native fetcher which handles both API keys and Claude-Code OAuth
|
||||
# tokens. (The api_mode=="anthropic_messages" branch below handles the
|
||||
# Messages-API transport case separately.)
|
||||
if normalized == "anthropic":
|
||||
anthropic_models = _fetch_anthropic_models()
|
||||
if anthropic_models is not None:
|
||||
if requested_for_lookup in set(anthropic_models):
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"message": None,
|
||||
}
|
||||
auto = get_close_matches(requested_for_lookup, anthropic_models, n=1, cutoff=0.9)
|
||||
if auto:
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"corrected_model": auto[0],
|
||||
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
||||
}
|
||||
suggestions = get_close_matches(requested, anthropic_models, n=3, cutoff=0.5)
|
||||
suggestion_text = ""
|
||||
if suggestions:
|
||||
suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
|
||||
# Accept anyway — Anthropic sometimes gates newer/preview models
|
||||
# (e.g. snapshot IDs, early-access releases) behind accounts
|
||||
# even though they aren't listed on /v1/models.
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Note: `{requested}` was not found in Anthropic's /v1/models listing. "
|
||||
f"It may still work if you have early-access or snapshot IDs."
|
||||
f"{suggestion_text}"
|
||||
),
|
||||
}
|
||||
# _fetch_anthropic_models returned None — no token resolvable or
|
||||
# network failure. Fall through to the generic warning below.
|
||||
|
||||
# Anthropic Messages API: many proxies don't implement /v1/models.
|
||||
# Try probing with correct auth; if it fails, accept with a warning.
|
||||
if api_mode == "anthropic_messages":
|
||||
api_models = fetch_api_models(api_key, base_url, api_mode=api_mode)
|
||||
if api_models is not None:
|
||||
if requested_for_lookup in set(api_models):
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"message": None,
|
||||
}
|
||||
auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
|
||||
if auto:
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"corrected_model": auto[0],
|
||||
"message": f"Auto-corrected `{requested}` → `{auto[0]}`",
|
||||
}
|
||||
# Probe failed or model not found — accept anyway (proxy likely
|
||||
# doesn't implement the Anthropic Models API).
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Note: could not verify `{requested}` against this endpoint's "
|
||||
f"model listing. Many Anthropic-compatible proxies do not "
|
||||
f"implement GET /v1/models. The model name has been accepted "
|
||||
f"without verification."
|
||||
),
|
||||
}
|
||||
|
||||
# Probe the live API to check if the model actually exists
|
||||
api_models = fetch_api_models(api_key, base_url)
|
||||
|
||||
if api_models is not None:
|
||||
# Gemini's OpenAI-compat /v1beta/openai/models endpoint returns IDs
|
||||
# prefixed with "models/" (e.g. "models/gemini-2.5-flash") — native
|
||||
# Gemini-API convention. Our curated list and user input both use
|
||||
# the bare ID, so a direct set-membership check drops every known
|
||||
# Gemini model. Strip the prefix before comparison. See #12532.
|
||||
if normalized == "gemini":
|
||||
api_models = [
|
||||
m[len("models/"):] if isinstance(m, str) and m.startswith("models/") else m
|
||||
for m in api_models
|
||||
]
|
||||
if requested_for_lookup in set(api_models):
|
||||
# API confirmed the model exists
|
||||
return {
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
"""Oneshot (-z) mode: send a prompt, get the final content block, exit.
|
||||
|
||||
Bypasses cli.py entirely. No banner, no spinner, no session_id line,
|
||||
no stderr chatter. Just the agent's final text to stdout.
|
||||
|
||||
Toolsets = whatever the user has configured for "cli" in `hermes tools`.
|
||||
Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
|
||||
Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
|
||||
Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
|
||||
|
||||
Model / provider selection mirrors `hermes chat`:
|
||||
- Both optional. If omitted, use the user's configured default.
|
||||
- If both given, pair them exactly as given.
|
||||
- If only --model given, auto-detect the provider that serves it.
|
||||
- If only --provider given, error out (ambiguous — caller must pick a model).
|
||||
|
||||
Env var fallbacks (used when the corresponding arg is not passed):
|
||||
- HERMES_INFERENCE_MODEL
|
||||
- HERMES_INFERENCE_PROVIDER (already read by resolve_runtime_provider)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from contextlib import redirect_stderr, redirect_stdout
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def run_oneshot(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> int:
|
||||
"""Execute a single prompt and print only the final content block.
|
||||
|
||||
Args:
|
||||
prompt: The user message to send.
|
||||
model: Optional model override. Falls back to HERMES_INFERENCE_MODEL
|
||||
env var, then config.yaml's model.default / model.model.
|
||||
provider: Optional provider override. Falls back to
|
||||
HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
|
||||
then "auto".
|
||||
|
||||
Returns the exit code. Caller should sys.exit() with the return.
|
||||
"""
|
||||
# Silence every stdlib logger for the duration. AIAgent, tools, and
|
||||
# provider adapters all log to stderr through the root logger; file
|
||||
# handlers added by setup_logging() keep working (they're attached to
|
||||
# the root logger's handler list, not affected by level), but no
|
||||
# bytes reach the terminal.
|
||||
logging.disable(logging.CRITICAL)
|
||||
|
||||
# --provider without --model is ambiguous: carrying the user's configured
|
||||
# model across to a different provider is usually wrong (that provider may
|
||||
# not host it), and silently picking the provider's catalog default hides
|
||||
# the mismatch. Require the caller to be explicit. Validate BEFORE the
|
||||
# stderr redirect so the message actually reaches the terminal.
|
||||
env_model_early = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
if provider and not ((model or "").strip() or env_model_early):
|
||||
sys.stderr.write(
|
||||
"hermes -z: --provider requires --model (or HERMES_INFERENCE_MODEL). "
|
||||
"Pass both explicitly, or neither to use your configured defaults.\n"
|
||||
)
|
||||
return 2
|
||||
|
||||
# Auto-approve any shell / tool approvals. Non-interactive by
|
||||
# definition — a prompt would hang forever.
|
||||
os.environ["HERMES_YOLO_MODE"] = "1"
|
||||
os.environ["HERMES_ACCEPT_HOOKS"] = "1"
|
||||
|
||||
# Redirect stderr AND stdout to devnull for the entire call tree.
|
||||
# We'll print the final response to the real stdout at the end.
|
||||
real_stdout = sys.stdout
|
||||
devnull = open(os.devnull, "w")
|
||||
|
||||
try:
|
||||
with redirect_stdout(devnull), redirect_stderr(devnull):
|
||||
response = _run_agent(prompt, model=model, provider=provider)
|
||||
finally:
|
||||
try:
|
||||
devnull.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if response:
|
||||
real_stdout.write(response)
|
||||
if not response.endswith("\n"):
|
||||
real_stdout.write("\n")
|
||||
real_stdout.flush()
|
||||
return 0
|
||||
|
||||
|
||||
def _run_agent(
|
||||
prompt: str,
|
||||
model: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Build an AIAgent exactly like a normal CLI chat turn would, then
|
||||
run a single conversation. Returns the final response string."""
|
||||
# Imports are local so they don't run when hermes is invoked for
|
||||
# other commands (keeps top-level CLI startup cheap).
|
||||
from hermes_cli.config import load_config
|
||||
from hermes_cli.models import detect_provider_for_model
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
from hermes_cli.tools_config import _get_platform_tools
|
||||
from run_agent import AIAgent
|
||||
|
||||
cfg = load_config()
|
||||
|
||||
# Resolve effective model: explicit arg → env var → config.
|
||||
model_cfg = cfg.get("model") or {}
|
||||
if isinstance(model_cfg, str):
|
||||
cfg_model = model_cfg
|
||||
else:
|
||||
cfg_model = model_cfg.get("default") or model_cfg.get("model") or ""
|
||||
|
||||
env_model = os.getenv("HERMES_INFERENCE_MODEL", "").strip()
|
||||
effective_model = (model or "").strip() or env_model or cfg_model
|
||||
|
||||
# Resolve effective provider: explicit arg → (auto-detect from model if
|
||||
# model was explicit) → env / config (handled inside resolve_runtime_provider).
|
||||
#
|
||||
# When --model is given without --provider, auto-detect the provider that
|
||||
# serves that model — same semantic as `/model <name>` in an interactive
|
||||
# session. Without this, resolve_runtime_provider() would fall back to
|
||||
# the user's configured default provider, which may not host the model
|
||||
# the caller just asked for.
|
||||
effective_provider = (provider or "").strip() or None
|
||||
if effective_provider is None and (model or env_model):
|
||||
# Only auto-detect when the model was explicitly requested via arg or
|
||||
# env var (not when it came from config — that's the "use my defaults"
|
||||
# path and the configured provider is already correct).
|
||||
explicit_model = (model or "").strip() or env_model
|
||||
if explicit_model:
|
||||
cfg_provider = ""
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
current_provider = (
|
||||
cfg_provider
|
||||
or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
|
||||
or "auto"
|
||||
)
|
||||
detected = detect_provider_for_model(explicit_model, current_provider)
|
||||
if detected:
|
||||
effective_provider, effective_model = detected
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=effective_provider,
|
||||
target_model=effective_model or None,
|
||||
)
|
||||
|
||||
# Pull in whatever toolsets the user has enabled for "cli".
|
||||
# sorted() gives stable ordering; set→list for AIAgent's signature.
|
||||
toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
|
||||
|
||||
agent = AIAgent(
|
||||
api_key=runtime.get("api_key"),
|
||||
base_url=runtime.get("base_url"),
|
||||
provider=runtime.get("provider"),
|
||||
api_mode=runtime.get("api_mode"),
|
||||
model=effective_model,
|
||||
enabled_toolsets=toolsets_list,
|
||||
quiet_mode=True,
|
||||
platform="cli",
|
||||
credential_pool=runtime.get("credential_pool"),
|
||||
# Interactive callbacks are intentionally NOT wired beyond this
|
||||
# one. In oneshot mode there's no user sitting at a terminal:
|
||||
# - clarify → returns a synthetic "pick a default" instruction
|
||||
# so the agent continues instead of stalling on
|
||||
# the tool's built-in "not available" error
|
||||
# - sudo password prompt → terminal_tool gates on
|
||||
# HERMES_INTERACTIVE which we never set
|
||||
# - shell-hook approval → auto-approved via HERMES_ACCEPT_HOOKS=1
|
||||
# (set above); also falls back to deny on non-tty
|
||||
# - dangerous-command approval → bypassed via HERMES_YOLO_MODE=1
|
||||
# - skill secret capture → returns gracefully when no callback set
|
||||
clarify_callback=_oneshot_clarify_callback,
|
||||
)
|
||||
|
||||
# Belt-and-braces: make sure AIAgent doesn't invoke any streaming
|
||||
# display callbacks that would bypass our stdout capture.
|
||||
agent.suppress_status_output = True
|
||||
agent.stream_delta_callback = None
|
||||
agent.tool_gen_callback = None
|
||||
|
||||
return agent.chat(prompt) or ""
|
||||
|
||||
|
||||
def _oneshot_clarify_callback(question: str, choices=None) -> str:
|
||||
"""Clarify is disabled in oneshot mode — tell the agent to pick a
|
||||
default and proceed instead of stalling or erroring."""
|
||||
if choices:
|
||||
return (
|
||||
f"[oneshot mode: no user available. Pick the best option from "
|
||||
f"{choices} using your own judgment and continue.]"
|
||||
)
|
||||
return (
|
||||
"[oneshot mode: no user available. Make the most reasonable "
|
||||
"assumption you can and continue.]"
|
||||
)
|
||||
@@ -71,6 +71,14 @@ VALID_HOOKS: Set[str] = {
|
||||
"on_session_finalize",
|
||||
"on_session_reset",
|
||||
"subagent_stop",
|
||||
# Gateway pre-dispatch hook. Fired once per incoming MessageEvent
|
||||
# after the internal-event guard but BEFORE auth/pairing and agent
|
||||
# dispatch. Plugins may return a dict to influence flow:
|
||||
# {"action": "skip", "reason": "..."} -> drop message (no reply)
|
||||
# {"action": "rewrite", "text": "..."} -> replace event.text, continue
|
||||
# {"action": "allow"} / None -> normal dispatch
|
||||
# Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
|
||||
"pre_gateway_dispatch",
|
||||
}
|
||||
|
||||
ENTRY_POINTS_GROUP = "hermes_agent.plugins"
|
||||
|
||||
@@ -116,6 +116,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="DASHSCOPE_BASE_URL",
|
||||
),
|
||||
"alibaba-coding-plan": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL",
|
||||
),
|
||||
"vercel": HermesOverlay(
|
||||
transport="openai_chat",
|
||||
is_aggregator=True,
|
||||
@@ -163,6 +167,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
|
||||
transport="openai_chat",
|
||||
base_url_env_var="OLLAMA_BASE_URL",
|
||||
),
|
||||
# Azure Foundry: supports both OpenAI-style and Anthropic-style endpoints.
|
||||
# The transport is determined at runtime from config.yaml model.api_mode.
|
||||
"azure-foundry": HermesOverlay(
|
||||
transport="openai_chat", # default; overridden by api_mode in config
|
||||
base_url_env_var="AZURE_FOUNDRY_BASE_URL",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
@@ -259,6 +269,9 @@ ALIASES: Dict[str, str] = {
|
||||
"aliyun": "alibaba",
|
||||
"qwen": "alibaba",
|
||||
"alibaba-cloud": "alibaba",
|
||||
"alibaba_coding": "alibaba-coding-plan",
|
||||
"alibaba-coding": "alibaba-coding-plan",
|
||||
"alibaba_coding_plan": "alibaba-coding-plan",
|
||||
|
||||
# google-gemini-cli (OAuth + Code Assist)
|
||||
"gemini-cli": "google-gemini-cli",
|
||||
|
||||
@@ -0,0 +1,229 @@
|
||||
"""PTY bridge for `hermes dashboard` chat tab.
|
||||
|
||||
Wraps a child process behind a pseudo-terminal so its ANSI output can be
|
||||
streamed to a browser-side terminal emulator (xterm.js) and typed
|
||||
keystrokes can be fed back in. The only caller today is the
|
||||
``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
|
||||
|
||||
Design constraints:
|
||||
|
||||
* **POSIX-only.** Hermes Agent supports Windows exclusively via WSL, which
|
||||
exposes a native POSIX PTY via ``openpty(3)``. Native Windows Python
|
||||
has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
|
||||
install/platform message so the dashboard can render a banner instead of
|
||||
crashing.
|
||||
* **Zero Node dependency on the server side.** We use :mod:`ptyprocess`,
|
||||
which is a pure-Python wrapper around the OS calls. The browser talks
|
||||
to the same ``hermes --tui`` binary it would launch from the CLI, so
|
||||
every TUI feature (slash popover, model picker, tool rows, markdown,
|
||||
skin engine, clarify/sudo/approval prompts) ships automatically.
|
||||
* **Byte-safe I/O.** Reads and writes go through the PTY master fd
|
||||
directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
|
||||
streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
|
||||
mid-read.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import os
|
||||
import select
|
||||
import signal
|
||||
import struct
|
||||
import sys
|
||||
import termios
|
||||
import time
|
||||
from typing import Optional, Sequence
|
||||
|
||||
try:
|
||||
import ptyprocess # type: ignore
|
||||
_PTY_AVAILABLE = not sys.platform.startswith("win")
|
||||
except ImportError: # pragma: no cover - dev env without ptyprocess
|
||||
ptyprocess = None # type: ignore
|
||||
_PTY_AVAILABLE = False
|
||||
|
||||
|
||||
__all__ = ["PtyBridge", "PtyUnavailableError"]
|
||||
|
||||
|
||||
class PtyUnavailableError(RuntimeError):
|
||||
"""Raised when a PTY cannot be created on this platform.
|
||||
|
||||
Today this means native Windows (no ConPTY bindings) or a dev
|
||||
environment missing the ``ptyprocess`` dependency. The dashboard
|
||||
surfaces the message to the user as a chat-tab banner.
|
||||
"""
|
||||
|
||||
|
||||
class PtyBridge:
|
||||
"""Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
|
||||
|
||||
Not thread-safe. A single bridge is owned by the WebSocket handler
|
||||
that spawned it; the reader runs in an executor thread while writes
|
||||
happen on the event-loop thread. Both sides are OK because the
|
||||
kernel PTY is the actual synchronization point — we never call
|
||||
:mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
|
||||
``os.write`` on the master fd, which is safe.
|
||||
"""
|
||||
|
||||
def __init__(self, proc: "ptyprocess.PtyProcess"): # type: ignore[name-defined]
|
||||
self._proc = proc
|
||||
self._fd: int = proc.fd
|
||||
self._closed = False
|
||||
|
||||
# -- lifecycle --------------------------------------------------------
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""True if a PTY can be spawned on this platform."""
|
||||
return bool(_PTY_AVAILABLE)
|
||||
|
||||
@classmethod
|
||||
def spawn(
|
||||
cls,
|
||||
argv: Sequence[str],
|
||||
*,
|
||||
cwd: Optional[str] = None,
|
||||
env: Optional[dict] = None,
|
||||
cols: int = 80,
|
||||
rows: int = 24,
|
||||
) -> "PtyBridge":
|
||||
"""Spawn ``argv`` behind a new PTY and return a bridge.
|
||||
|
||||
Raises :class:`PtyUnavailableError` if the platform can't host a
|
||||
PTY. Raises :class:`FileNotFoundError` or :class:`OSError` for
|
||||
ordinary exec failures (missing binary, bad cwd, etc.).
|
||||
"""
|
||||
if not _PTY_AVAILABLE:
|
||||
if sys.platform.startswith("win"):
|
||||
raise PtyUnavailableError(
|
||||
"Pseudo-terminals are unavailable on this platform. "
|
||||
"Hermes Agent supports Windows only via WSL."
|
||||
)
|
||||
if ptyprocess is None:
|
||||
raise PtyUnavailableError(
|
||||
"The `ptyprocess` package is missing. "
|
||||
"Install with: pip install ptyprocess "
|
||||
"(or pip install -e '.[pty]')."
|
||||
)
|
||||
raise PtyUnavailableError("Pseudo-terminals are unavailable.")
|
||||
# Let caller-supplied env fully override inheritance; if they pass
|
||||
# None we inherit the server's env (same semantics as subprocess).
|
||||
spawn_env = os.environ.copy() if env is None else env
|
||||
proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr]
|
||||
list(argv),
|
||||
cwd=cwd,
|
||||
env=spawn_env,
|
||||
dimensions=(rows, cols),
|
||||
)
|
||||
return cls(proc)
|
||||
|
||||
@property
|
||||
def pid(self) -> int:
|
||||
return int(self._proc.pid)
|
||||
|
||||
def is_alive(self) -> bool:
|
||||
if self._closed:
|
||||
return False
|
||||
try:
|
||||
return bool(self._proc.isalive())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# -- I/O --------------------------------------------------------------
|
||||
|
||||
def read(self, timeout: float = 0.2) -> Optional[bytes]:
|
||||
"""Read up to 64 KiB of raw bytes from the PTY master.
|
||||
|
||||
Returns:
|
||||
* bytes — zero or more bytes of child output
|
||||
* empty bytes (``b""``) — no data available within ``timeout``
|
||||
* None — child has exited and the master fd is at EOF
|
||||
|
||||
Never blocks longer than ``timeout`` seconds. Safe to call after
|
||||
:meth:`close`; returns ``None`` in that case.
|
||||
"""
|
||||
if self._closed:
|
||||
return None
|
||||
try:
|
||||
readable, _, _ = select.select([self._fd], [], [], timeout)
|
||||
except (OSError, ValueError):
|
||||
return None
|
||||
if not readable:
|
||||
return b""
|
||||
try:
|
||||
data = os.read(self._fd, 65536)
|
||||
except OSError as exc:
|
||||
# EIO on Linux = slave side closed. EBADF = already closed.
|
||||
if exc.errno in (errno.EIO, errno.EBADF):
|
||||
return None
|
||||
raise
|
||||
if not data:
|
||||
return None
|
||||
return data
|
||||
|
||||
def write(self, data: bytes) -> None:
|
||||
"""Write raw bytes to the PTY master (i.e. the child's stdin)."""
|
||||
if self._closed or not data:
|
||||
return
|
||||
# os.write can return a short write under load; loop until drained.
|
||||
view = memoryview(data)
|
||||
while view:
|
||||
try:
|
||||
n = os.write(self._fd, view)
|
||||
except OSError as exc:
|
||||
if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
|
||||
return
|
||||
raise
|
||||
if n <= 0:
|
||||
return
|
||||
view = view[n:]
|
||||
|
||||
def resize(self, cols: int, rows: int) -> None:
|
||||
"""Forward a terminal resize to the child via ``TIOCSWINSZ``."""
|
||||
if self._closed:
|
||||
return
|
||||
# struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
|
||||
winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
|
||||
try:
|
||||
fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# -- teardown ---------------------------------------------------------
|
||||
|
||||
def close(self) -> None:
|
||||
"""Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
|
||||
|
||||
Idempotent. Reaping the child is important so we don't leak
|
||||
zombies across the lifetime of the dashboard process.
|
||||
"""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
|
||||
# SIGHUP is the conventional "your terminal went away" signal.
|
||||
# We escalate if the child ignores it.
|
||||
for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
|
||||
if not self._proc.isalive():
|
||||
break
|
||||
try:
|
||||
self._proc.kill(sig)
|
||||
except Exception:
|
||||
pass
|
||||
deadline = time.monotonic() + 0.5
|
||||
while self._proc.isalive() and time.monotonic() < deadline:
|
||||
time.sleep(0.02)
|
||||
|
||||
try:
|
||||
self._proc.close(force=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Context-manager sugar — handy in tests and ad-hoc scripts.
|
||||
def __enter__(self) -> "PtyBridge":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_exc) -> None:
|
||||
self.close()
|
||||
+212
-13
@@ -36,6 +36,29 @@ def _normalize_custom_provider_name(value: str) -> str:
|
||||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _loopback_hostname(host: str) -> bool:
|
||||
h = (host or "").lower().rstrip(".")
|
||||
return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}
|
||||
|
||||
|
||||
def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool:
|
||||
"""Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution.
|
||||
|
||||
GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a
|
||||
previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``,
|
||||
so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions.
|
||||
"""
|
||||
cfg_provider_norm = (cfg_provider or "").strip().lower()
|
||||
bu = (cfg_base_url or "").strip()
|
||||
if not bu:
|
||||
return False
|
||||
if cfg_provider_norm == "custom":
|
||||
return True
|
||||
if base_url_host_matches(bu, "openrouter.ai"):
|
||||
return False
|
||||
return _loopback_hostname(base_url_hostname(bu))
|
||||
|
||||
|
||||
def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
|
||||
"""Auto-detect api_mode from the resolved base URL.
|
||||
|
||||
@@ -160,8 +183,16 @@ def _resolve_runtime_from_pool_entry(
|
||||
requested_provider: str,
|
||||
model_cfg: Optional[Dict[str, Any]] = None,
|
||||
pool: Optional[CredentialPool] = None,
|
||||
target_model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
model_cfg = model_cfg or _get_model_config()
|
||||
# When the caller is resolving for a specific target model (e.g. a /model
|
||||
# mid-session switch), prefer that over the persisted model.default. This
|
||||
# prevents api_mode being computed from a stale config default that no
|
||||
# longer matches the model actually being used — the bug that caused
|
||||
# opencode-zen /v1 to be stripped for chat_completions requests when
|
||||
# config.default was still a Claude model.
|
||||
effective_model = (target_model or model_cfg.get("default") or "")
|
||||
base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
|
||||
api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
|
||||
api_mode = "chat_completions"
|
||||
@@ -190,6 +221,19 @@ def _resolve_runtime_from_pool_entry(
|
||||
elif provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
|
||||
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
|
||||
elif provider == "azure-foundry":
|
||||
# Azure Foundry: read api_mode and base_url from config
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# For Anthropic-style endpoints, strip /v1 suffix
|
||||
if api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
else:
|
||||
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
# Honour model.base_url from config.yaml when the configured provider
|
||||
@@ -207,7 +251,7 @@ def _resolve_runtime_from_pool_entry(
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
api_mode = opencode_model_api_mode(provider, effective_model)
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
|
||||
# Kimi /coding, api.openai.com → codex_responses, api.x.ai →
|
||||
@@ -323,12 +367,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
# Found match by provider key
|
||||
base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
|
||||
if base_url:
|
||||
return {
|
||||
result = {
|
||||
"name": entry.get("name", ep_name),
|
||||
"base_url": base_url.strip(),
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
return result
|
||||
# Also check the 'name' field if present
|
||||
display_name = entry.get("name", "")
|
||||
if display_name:
|
||||
@@ -337,12 +385,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
|
||||
# Found match by display name
|
||||
base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
|
||||
if base_url:
|
||||
return {
|
||||
result = {
|
||||
"name": display_name,
|
||||
"base_url": base_url.strip(),
|
||||
"api_key": resolved_api_key,
|
||||
"model": entry.get("default_model", ""),
|
||||
}
|
||||
api_mode = _parse_api_mode(entry.get("api_mode"))
|
||||
if api_mode:
|
||||
result["api_mode"] = api_mode
|
||||
return result
|
||||
|
||||
# Fall back to custom_providers: list (legacy format)
|
||||
custom_providers = config.get("custom_providers")
|
||||
@@ -464,6 +516,7 @@ def _resolve_openrouter_runtime(
|
||||
cfg_provider = cfg_provider.strip().lower()
|
||||
|
||||
env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
|
||||
env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
|
||||
|
||||
# Use config base_url when available and the provider context matches.
|
||||
# OPENAI_BASE_URL env var is no longer consulted — config.yaml is
|
||||
@@ -473,11 +526,14 @@ def _resolve_openrouter_runtime(
|
||||
if requested_norm == "auto":
|
||||
if not cfg_provider or cfg_provider == "auto":
|
||||
use_config_base_url = True
|
||||
elif requested_norm == "custom" and cfg_provider == "custom":
|
||||
elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom(
|
||||
cfg_base_url, cfg_provider
|
||||
):
|
||||
use_config_base_url = True
|
||||
|
||||
base_url = (
|
||||
(explicit_base_url or "").strip()
|
||||
or env_custom_base_url
|
||||
or (cfg_base_url.strip() if use_config_base_url else "")
|
||||
or env_openrouter_base_url
|
||||
or OPENROUTER_BASE_URL
|
||||
@@ -546,6 +602,71 @@ def _resolve_openrouter_runtime(
|
||||
}
|
||||
|
||||
|
||||
def _resolve_azure_foundry_runtime(
|
||||
*,
|
||||
requested_provider: str,
|
||||
model_cfg: Dict[str, Any],
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve an Azure Foundry runtime entry.
|
||||
|
||||
Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
|
||||
explicit overrides), pulls the API key from ``.env`` / env var, and
|
||||
strips a trailing ``/v1`` for Anthropic-style endpoints because the
|
||||
Anthropic SDK appends ``/v1/messages`` internally.
|
||||
|
||||
Raises :class:`AuthError` when required values are missing.
|
||||
"""
|
||||
explicit_api_key = str(explicit_api_key or "").strip()
|
||||
explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
|
||||
|
||||
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
|
||||
cfg_base_url = ""
|
||||
cfg_api_mode = "chat_completions"
|
||||
if cfg_provider == "azure-foundry":
|
||||
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
|
||||
|
||||
env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
|
||||
base_url = explicit_base_url_clean or cfg_base_url or env_base_url
|
||||
if not base_url:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires a base URL. Set it via 'hermes model' or "
|
||||
"the AZURE_FOUNDRY_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
api_key = explicit_api_key
|
||||
if not api_key:
|
||||
try:
|
||||
from hermes_cli.config import get_env_value
|
||||
api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
|
||||
except Exception:
|
||||
api_key = ""
|
||||
if not api_key:
|
||||
api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise AuthError(
|
||||
"Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
|
||||
"~/.hermes/.env or run 'hermes model' to configure."
|
||||
)
|
||||
|
||||
# Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
|
||||
# we inherited from the configured base_url to avoid double-/v1 paths.
|
||||
if cfg_api_mode == "anthropic_messages":
|
||||
base_url = re.sub(r"/v1/?$", "", base_url)
|
||||
|
||||
source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
|
||||
return {
|
||||
"provider": "azure-foundry",
|
||||
"api_mode": cfg_api_mode,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"source": source,
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
|
||||
def _resolve_explicit_runtime(
|
||||
*,
|
||||
provider: str,
|
||||
@@ -635,6 +756,15 @@ def _resolve_explicit_runtime(
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
if provider == "azure-foundry":
|
||||
return _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
env_url = ""
|
||||
@@ -689,10 +819,54 @@ def resolve_runtime_provider(
|
||||
requested: Optional[str] = None,
|
||||
explicit_api_key: Optional[str] = None,
|
||||
explicit_base_url: Optional[str] = None,
|
||||
target_model: Optional[str] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Resolve runtime provider credentials for agent execution."""
|
||||
"""Resolve runtime provider credentials for agent execution.
|
||||
|
||||
target_model: Optional override for model_cfg.get("default") when
|
||||
computing provider-specific api_mode (e.g. OpenCode Zen/Go where different
|
||||
models route through different API surfaces). Callers performing an
|
||||
explicit mid-session model switch should pass the new model here so
|
||||
api_mode is derived from the model they are switching TO, not the stale
|
||||
persisted default. Other callers can leave it None to preserve existing
|
||||
behavior (api_mode derived from config).
|
||||
"""
|
||||
requested_provider = resolve_requested_provider(requested)
|
||||
|
||||
# Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
|
||||
# with provider="anthropic", bypass _resolve_named_custom_runtime (which would
|
||||
# return provider="custom" with chat_completions api_mode and no valid key).
|
||||
# Instead, use the Azure key directly with anthropic_messages api_mode.
|
||||
_eff_base = (explicit_base_url or "").strip()
|
||||
if requested_provider == "anthropic" and "azure.com" in _eff_base:
|
||||
_azure_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": _eff_base.rstrip("/"),
|
||||
"api_key": _azure_key,
|
||||
"source": "azure-explicit",
|
||||
"requested_provider": requested_provider,
|
||||
}
|
||||
|
||||
# Azure Foundry: user-configured endpoint with selectable API mode
|
||||
# (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
|
||||
# Resolve before the custom-runtime / pool / generic paths so Azure
|
||||
# config is always picked up from model.base_url + model.api_mode,
|
||||
# regardless of whether the caller passed explicit_* args.
|
||||
if requested_provider == "azure-foundry":
|
||||
azure_runtime = _resolve_azure_foundry_runtime(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=_get_model_config(),
|
||||
explicit_api_key=explicit_api_key,
|
||||
explicit_base_url=explicit_base_url,
|
||||
)
|
||||
return azure_runtime
|
||||
|
||||
custom_runtime = _resolve_named_custom_runtime(
|
||||
requested_provider=requested_provider,
|
||||
explicit_api_key=explicit_api_key,
|
||||
@@ -772,6 +946,7 @@ def resolve_runtime_provider(
|
||||
requested_provider=requested_provider,
|
||||
model_cfg=model_cfg,
|
||||
pool=pool,
|
||||
target_model=target_model,
|
||||
)
|
||||
|
||||
if provider == "nous":
|
||||
@@ -870,13 +1045,6 @@ def resolve_runtime_provider(
|
||||
|
||||
# Anthropic (native Messages API)
|
||||
if provider == "anthropic":
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url, but only
|
||||
# when the configured provider is anthropic — otherwise a non-Anthropic
|
||||
# base_url (e.g. Codex endpoint) would leak into Anthropic requests.
|
||||
@@ -885,6 +1053,33 @@ def resolve_runtime_provider(
|
||||
if cfg_provider == "anthropic":
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
|
||||
# For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
|
||||
# Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
|
||||
# Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
|
||||
# would find the Claude Code OAuth token first (priority 3) and return
|
||||
# that instead, causing 401s. Detect Azure endpoints and use the env
|
||||
# key directly to bypass the OAuth priority chain.
|
||||
_is_azure_endpoint = "azure.com" in base_url.lower() or (
|
||||
cfg_base_url and "azure.com" in cfg_base_url.lower()
|
||||
)
|
||||
if _is_azure_endpoint:
|
||||
token = (
|
||||
os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
|
||||
or os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
)
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
|
||||
)
|
||||
else:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
raise AuthError(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
@@ -990,7 +1185,11 @@ def resolve_runtime_provider(
|
||||
api_mode = configured_mode
|
||||
elif provider in ("opencode-zen", "opencode-go"):
|
||||
from hermes_cli.models import opencode_model_api_mode
|
||||
api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
|
||||
# Prefer the target_model from the caller (explicit mid-session
|
||||
# switch) over the stale model.default; see _resolve_runtime_from_pool_entry
|
||||
# for the same rationale.
|
||||
_effective = target_model or model_cfg.get("default", "")
|
||||
api_mode = opencode_model_api_mode(provider, _effective)
|
||||
else:
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
|
||||
+36
-49
@@ -500,6 +500,15 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||
if get_env_value("HASS_TOKEN"):
|
||||
tool_status.append(("Smart Home (Home Assistant)", True, None))
|
||||
|
||||
# Spotify (OAuth via hermes auth spotify — check auth.json, not env vars)
|
||||
try:
|
||||
from hermes_cli.auth import get_provider_auth_state
|
||||
_spotify_state = get_provider_auth_state("spotify") or {}
|
||||
if _spotify_state.get("access_token") or _spotify_state.get("refresh_token"):
|
||||
tool_status.append(("Spotify (PKCE OAuth)", True, None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Skills Hub
|
||||
if get_env_value("GITHUB_TOKEN"):
|
||||
tool_status.append(("Skills Hub (GitHub)", True, None))
|
||||
@@ -2854,17 +2863,6 @@ SETUP_SECTIONS = [
|
||||
("agent", "Agent Settings", setup_agent_settings),
|
||||
]
|
||||
|
||||
# The returning-user menu intentionally omits standalone TTS because model setup
|
||||
# already includes TTS selection and tools setup covers the rest of the provider
|
||||
# configuration. Keep this list in the same order as the visible menu entries.
|
||||
RETURNING_USER_MENU_SECTION_KEYS = [
|
||||
"model",
|
||||
"terminal",
|
||||
"gateway",
|
||||
"tools",
|
||||
"agent",
|
||||
]
|
||||
|
||||
|
||||
def run_setup_wizard(args):
|
||||
"""Run the interactive setup wizard.
|
||||
@@ -2889,6 +2887,9 @@ def run_setup_wizard(args):
|
||||
save_config(copy.deepcopy(DEFAULT_CONFIG))
|
||||
print_success("Configuration reset to defaults.")
|
||||
|
||||
reconfigure_requested = bool(getattr(args, "reconfigure", False))
|
||||
quick_requested = bool(getattr(args, "quick", False))
|
||||
|
||||
config = load_config()
|
||||
hermes_home = get_hermes_home()
|
||||
|
||||
@@ -2980,50 +2981,36 @@ def run_setup_wizard(args):
|
||||
migration_ran = False
|
||||
|
||||
if is_existing:
|
||||
# ── Returning User Menu ──
|
||||
print()
|
||||
print_header("Welcome Back!")
|
||||
print_success("You already have Hermes configured.")
|
||||
print()
|
||||
|
||||
menu_choices = [
|
||||
"Quick Setup - configure missing items only",
|
||||
"Full Setup - reconfigure everything",
|
||||
"Model & Provider",
|
||||
"Terminal Backend",
|
||||
"Messaging Platforms (Gateway)",
|
||||
"Tools",
|
||||
"Agent Settings",
|
||||
"Exit",
|
||||
]
|
||||
choice = prompt_choice("What would you like to do?", menu_choices, 0)
|
||||
|
||||
if choice == 0:
|
||||
# Quick setup
|
||||
# Existing install — default is the full-wizard reconfigure flow.
|
||||
# Every prompt shows the current value as its default, so pressing
|
||||
# Enter keeps it. Opt into `--quick` for the narrow "just fill in
|
||||
# missing items" flow (useful after a partial OpenClaw migration
|
||||
# or when a required API key got cleared).
|
||||
if quick_requested:
|
||||
_run_quick_setup(config, hermes_home)
|
||||
return
|
||||
elif choice == 1:
|
||||
# Full setup — fall through to run all sections
|
||||
pass
|
||||
elif choice == 7:
|
||||
print_info("Exiting. Run 'hermes setup' again when ready.")
|
||||
return
|
||||
elif 2 <= choice <= 6:
|
||||
# Individual section — map by key, not by position.
|
||||
# SETUP_SECTIONS includes TTS but the returning-user menu skips it,
|
||||
# so positional indexing (choice - 2) would dispatch the wrong section.
|
||||
section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 2]
|
||||
section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
|
||||
if section:
|
||||
_, label, func = section
|
||||
func(config)
|
||||
save_config(config)
|
||||
_print_setup_summary(config, hermes_home)
|
||||
return
|
||||
|
||||
print()
|
||||
print_header("Reconfigure")
|
||||
print_success("You already have Hermes configured.")
|
||||
print_info("Running the full wizard — each prompt shows your current value.")
|
||||
print_info("Press Enter to keep it, or type a new value to change it.")
|
||||
print_info("")
|
||||
print_info("Tip: jump straight to a section with 'hermes setup model|terminal|")
|
||||
print_info(" gateway|tools|agent', or fill only missing items with --quick.")
|
||||
# Fall through to the "Full Setup — run all sections" block below.
|
||||
# --reconfigure is now the default on existing installs; the flag
|
||||
# is preserved for backwards compatibility but is a no-op here.
|
||||
else:
|
||||
# ── First-Time Setup ──
|
||||
print()
|
||||
|
||||
# --reconfigure / --quick on a fresh install are meaningless — fall
|
||||
# through to the normal first-time flow.
|
||||
if reconfigure_requested or quick_requested:
|
||||
print_info("No existing configuration found — running first-time setup.")
|
||||
print()
|
||||
|
||||
# Offer OpenClaw migration before configuration begins
|
||||
migration_ran = _offer_openclaw_migration(hermes_home)
|
||||
if migration_ran:
|
||||
|
||||
+13
-6
@@ -164,19 +164,26 @@ def show_status(args):
|
||||
qwen_status = {}
|
||||
|
||||
nous_logged_in = bool(nous_status.get("logged_in"))
|
||||
nous_error = nous_status.get("error")
|
||||
nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)"
|
||||
print(
|
||||
f" {'Nous Portal':<12} {check_mark(nous_logged_in)} "
|
||||
f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
|
||||
f"{nous_label}"
|
||||
)
|
||||
if nous_logged_in:
|
||||
portal_url = nous_status.get("portal_base_url") or "(unknown)"
|
||||
access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
|
||||
key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
|
||||
refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
|
||||
portal_url = nous_status.get("portal_base_url") or "(unknown)"
|
||||
access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
|
||||
key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
|
||||
refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
|
||||
if nous_logged_in or portal_url != "(unknown)" or nous_error:
|
||||
print(f" Portal URL: {portal_url}")
|
||||
if nous_logged_in or nous_status.get("access_expires_at"):
|
||||
print(f" Access exp: {access_exp}")
|
||||
if nous_logged_in or nous_status.get("agent_key_expires_at"):
|
||||
print(f" Key exp: {key_exp}")
|
||||
if nous_logged_in or nous_status.get("has_refresh_token"):
|
||||
print(f" Refresh: {refresh_label}")
|
||||
if nous_error and not nous_logged_in:
|
||||
print(f" Error: {nous_error}")
|
||||
|
||||
codex_logged_in = bool(codex_status.get("logged_in"))
|
||||
print(
|
||||
|
||||
+2
-3
@@ -10,8 +10,7 @@ import random
|
||||
|
||||
TIPS = [
|
||||
# --- Slash Commands ---
|
||||
"/btw <question> asks a quick side question without tools or history — great for clarifications.",
|
||||
"/background <prompt> runs a task in a separate session while your current one stays free.",
|
||||
"/background <prompt> (alias /bg or /btw) runs a task in a separate session while your current one stays free.",
|
||||
"/branch forks the current session so you can explore a different direction without losing progress.",
|
||||
"/compress manually compresses conversation context when things get long.",
|
||||
"/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.",
|
||||
@@ -127,7 +126,7 @@ TIPS = [
|
||||
|
||||
# --- Tools & Capabilities ---
|
||||
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
|
||||
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
|
||||
"delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
|
||||
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
|
||||
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
|
||||
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
|
||||
|
||||
+172
-13
@@ -67,25 +67,59 @@ CONFIGURABLE_TOOLSETS = [
|
||||
("messaging", "📨 Cross-Platform Messaging", "send_message"),
|
||||
("rl", "🧪 RL Training", "Tinker-Atropos training tools"),
|
||||
("homeassistant", "🏠 Home Assistant", "smart home device control"),
|
||||
("spotify", "🎵 Spotify", "playback, search, playlists, library"),
|
||||
("discord", "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
|
||||
("discord_admin", "🛡️ Discord Server Admin", "list channels/roles, pin, assign roles"),
|
||||
]
|
||||
|
||||
# Toolsets that are OFF by default for new installs.
|
||||
# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
|
||||
# but the setup checklist won't pre-select them for first-time users.
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
|
||||
_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
|
||||
|
||||
# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
|
||||
# these platforms, and only resolve/save for these platforms. A toolset
|
||||
# absent from this map is available on every platform (current behaviour).
|
||||
#
|
||||
# Use this for tools whose APIs only make sense on one platform (Discord
|
||||
# server admin, Slack workspace admin, etc.). Keeps every other platform's
|
||||
# checklist from filling up with irrelevant toggles.
|
||||
_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
|
||||
"discord": {"discord"},
|
||||
"discord_admin": {"discord"},
|
||||
}
|
||||
|
||||
|
||||
def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
|
||||
"""Return True if ``ts_key`` is configurable on ``platform``.
|
||||
|
||||
Toolsets without a restriction entry are allowed everywhere (the default).
|
||||
"""
|
||||
allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
|
||||
return allowed is None or platform in allowed
|
||||
|
||||
|
||||
def _get_effective_configurable_toolsets():
|
||||
"""Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.
|
||||
|
||||
Plugin toolsets are appended at the end so they appear after the
|
||||
built-in toolsets in the TUI checklist.
|
||||
built-in toolsets in the TUI checklist. A plugin whose toolset key
|
||||
already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
|
||||
plugins (e.g. ``plugins/spotify``) share their toolset key with the
|
||||
built-in entry, and we want the built-in label/description to win.
|
||||
Without the dedupe, ``hermes tools`` → "reconfigure existing" would
|
||||
list the same toolset twice.
|
||||
"""
|
||||
result = list(CONFIGURABLE_TOOLSETS)
|
||||
seen = {ts_key for ts_key, _, _ in result}
|
||||
try:
|
||||
from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
|
||||
discover_plugins() # idempotent — ensures plugins are loaded
|
||||
result.extend(get_plugin_toolsets())
|
||||
for entry in get_plugin_toolsets():
|
||||
if entry[0] in seen:
|
||||
continue
|
||||
seen.add(entry[0])
|
||||
result.append(entry)
|
||||
except Exception:
|
||||
pass
|
||||
return result
|
||||
@@ -361,6 +395,18 @@ TOOL_CATEGORIES = {
|
||||
},
|
||||
],
|
||||
},
|
||||
"spotify": {
|
||||
"name": "Spotify",
|
||||
"icon": "🎵",
|
||||
"providers": [
|
||||
{
|
||||
"name": "Spotify Web API",
|
||||
"tag": "PKCE OAuth — opens the setup wizard",
|
||||
"env_vars": [],
|
||||
"post_setup": "spotify",
|
||||
},
|
||||
],
|
||||
},
|
||||
"rl": {
|
||||
"name": "RL Training",
|
||||
"icon": "🧪",
|
||||
@@ -461,6 +507,35 @@ def _run_post_setup(post_setup_key: str):
|
||||
_print_warning(" kittentts install timed out (>5min)")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
|
||||
elif post_setup_key == "spotify":
|
||||
# Run the full `hermes auth spotify` flow — if the user has no
|
||||
# client_id yet, this drops them into the interactive wizard
|
||||
# (opens the Spotify dashboard, prompts for client_id, persists
|
||||
# to ~/.hermes/.env), then continues straight into PKCE. If they
|
||||
# already have an app, it skips the wizard and just does OAuth.
|
||||
from types import SimpleNamespace
|
||||
try:
|
||||
from hermes_cli.auth import login_spotify_command
|
||||
except Exception as exc:
|
||||
_print_warning(f" Could not load Spotify auth: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
return
|
||||
_print_info(" Starting Spotify login...")
|
||||
try:
|
||||
login_spotify_command(SimpleNamespace(
|
||||
client_id=None, redirect_uri=None, scope=None,
|
||||
no_browser=False, timeout=None,
|
||||
))
|
||||
_print_success(" Spotify authenticated")
|
||||
except SystemExit as exc:
|
||||
# User aborted the wizard, or OAuth failed — don't fail the
|
||||
# toolset enable; they can retry with `hermes auth spotify`.
|
||||
_print_warning(f" Spotify login did not complete: {exc}")
|
||||
_print_info(" Run later: hermes auth spotify")
|
||||
except Exception as exc:
|
||||
_print_warning(f" Spotify login failed: {exc}")
|
||||
_print_info(" Run manually: hermes auth spotify")
|
||||
|
||||
elif post_setup_key == "rl_training":
|
||||
try:
|
||||
__import__("tinker_atropos")
|
||||
@@ -549,7 +624,7 @@ def _get_platform_tools(
|
||||
include_default_mcp_servers: bool = True,
|
||||
) -> Set[str]:
|
||||
"""Resolve which individual toolset names are enabled for a platform."""
|
||||
from toolsets import resolve_toolset
|
||||
from toolsets import resolve_toolset, TOOLSETS
|
||||
|
||||
platform_toolsets = config.get("platform_toolsets") or {}
|
||||
toolset_names = platform_toolsets.get(platform)
|
||||
@@ -563,6 +638,8 @@ def _get_platform_tools(
|
||||
toolset_names = [str(ts) for ts in toolset_names]
|
||||
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
|
||||
# If the saved list contains any configurable keys directly, the user
|
||||
# has explicitly configured this platform — use direct membership.
|
||||
@@ -572,7 +649,10 @@ def _get_platform_tools(
|
||||
has_explicit_config = any(ts in configurable_keys for ts in toolset_names)
|
||||
|
||||
if has_explicit_config:
|
||||
enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
|
||||
enabled_toolsets = {
|
||||
ts for ts in toolset_names
|
||||
if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
else:
|
||||
# No explicit config — fall back to resolving composite toolset names
|
||||
# (e.g. "hermes-cli") to individual tool names and reverse-mapping.
|
||||
@@ -582,19 +662,59 @@ def _get_platform_tools(
|
||||
|
||||
enabled_toolsets = set()
|
||||
for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
|
||||
if not _toolset_allowed_for_platform(ts_key, platform):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if ts_tools and ts_tools.issubset(all_tool_names):
|
||||
enabled_toolsets.add(ts_key)
|
||||
|
||||
default_off = set(_DEFAULT_OFF_TOOLSETS)
|
||||
if platform in default_off:
|
||||
# Legacy safety: if the platform's own name matches a default-off
|
||||
# toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
|
||||
# keep that toolset enabled on first install. Skip this dodge for
|
||||
# platform-restricted toolsets — those are always opt-in even on
|
||||
# their own platform (e.g. `discord` + `discord` should stay OFF).
|
||||
if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
|
||||
default_off.remove(platform)
|
||||
enabled_toolsets -= default_off
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled.
|
||||
# Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
|
||||
# feishu_drive). These are part of the platform's default composite but
|
||||
# absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
|
||||
# checklist or in a user-saved config. Must run in BOTH branches —
|
||||
# otherwise saving via `hermes tools` (which flips has_explicit_config
|
||||
# to True) silently drops them.
|
||||
platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
|
||||
configurable_tool_universe = set()
|
||||
for ck in configurable_keys:
|
||||
configurable_tool_universe.update(resolve_toolset(ck))
|
||||
claimed = set()
|
||||
for ts_key in enabled_toolsets:
|
||||
claimed.update(resolve_toolset(ts_key))
|
||||
skip = configurable_keys | plugin_ts_keys | platform_default_keys
|
||||
skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
|
||||
skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
|
||||
for ts_key, ts_def in TOOLSETS.items():
|
||||
if ts_key in skip:
|
||||
continue
|
||||
if ts_def.get("includes"):
|
||||
continue
|
||||
ts_tools = set(resolve_toolset(ts_key))
|
||||
if not ts_tools or not ts_tools.issubset(platform_tool_universe):
|
||||
continue
|
||||
if ts_tools.issubset(configurable_tool_universe):
|
||||
continue
|
||||
if not ts_tools.issubset(claimed):
|
||||
enabled_toolsets.add(ts_key)
|
||||
claimed.update(ts_tools)
|
||||
|
||||
# Plugin toolsets: enabled by default unless explicitly disabled, or
|
||||
# unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
|
||||
# shipped as a bundled plugin but user must opt in via `hermes tools`
|
||||
# so we don't ship 7 Spotify tool schemas to users who don't use it).
|
||||
# A plugin toolset is "known" for a platform once `hermes tools`
|
||||
# has been saved for that platform (tracked via known_plugin_toolsets).
|
||||
# Unknown plugins default to enabled; known-but-absent = disabled.
|
||||
plugin_ts_keys = _get_plugin_toolset_keys()
|
||||
if plugin_ts_keys:
|
||||
known_map = config.get("known_plugin_toolsets", {})
|
||||
known_for_platform = set(known_map.get(platform, []))
|
||||
@@ -602,6 +722,9 @@ def _get_platform_tools(
|
||||
if pts in toolset_names:
|
||||
# Explicitly listed in config — enabled
|
||||
enabled_toolsets.add(pts)
|
||||
elif pts in _DEFAULT_OFF_TOOLSETS:
|
||||
# Opt-in plugin toolset — stay off until user picks it
|
||||
continue
|
||||
elif pts not in known_for_platform:
|
||||
# New plugin not yet seen by hermes tools — default enabled
|
||||
enabled_toolsets.add(pts)
|
||||
@@ -609,7 +732,6 @@ def _get_platform_tools(
|
||||
|
||||
# Preserve any explicit non-configurable toolset entries (for example,
|
||||
# custom toolsets or MCP server names saved in platform_toolsets).
|
||||
platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
|
||||
explicit_passthrough = {
|
||||
ts
|
||||
for ts in toolset_names
|
||||
@@ -655,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
"""
|
||||
config.setdefault("platform_toolsets", {})
|
||||
|
||||
# Drop platform-scoped toolsets that don't apply here. Prevents the
|
||||
# "Configure all platforms" checklist (or a hand-edited config.yaml)
|
||||
# from turning on, say, the `discord` toolset for Telegram.
|
||||
enabled_toolset_keys = {
|
||||
ts for ts in enabled_toolset_keys
|
||||
if _toolset_allowed_for_platform(ts, platform)
|
||||
}
|
||||
|
||||
# Get the set of all configurable toolset keys (built-in + plugin)
|
||||
configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
plugin_keys = _get_plugin_toolset_keys()
|
||||
@@ -669,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
|
||||
if not isinstance(existing_toolsets, list):
|
||||
existing_toolsets = []
|
||||
existing_toolsets = [str(ts) for ts in existing_toolsets]
|
||||
|
||||
# Preserve any entries that are NOT configurable toolsets and NOT platform
|
||||
# defaults (i.e. only MCP server names should be preserved)
|
||||
@@ -676,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
|
||||
entry for entry in existing_toolsets
|
||||
if entry not in configurable_keys and entry not in platform_default_keys
|
||||
}
|
||||
# Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
|
||||
# saving from the picker as consent to clear the "no_mcp" sentinel. The
|
||||
# picker has no checkbox for no_mcp, so without this users who once set it
|
||||
# by hand could never re-enable MCP servers through the UI.
|
||||
preserved_entries.discard("no_mcp")
|
||||
|
||||
# Merge preserved entries with new enabled toolsets
|
||||
config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
|
||||
@@ -783,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
|
||||
return _tool_token_cache
|
||||
|
||||
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
|
||||
def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
|
||||
"""Multi-select checklist of toolsets. Returns set of selected toolset keys."""
|
||||
from hermes_cli.curses_ui import curses_checklist
|
||||
from toolsets import resolve_toolset
|
||||
@@ -791,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
|
||||
# Pre-compute per-tool token counts (cached after first call).
|
||||
tool_tokens = _estimate_tool_tokens()
|
||||
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
# Drop platform-scoped toolsets that don't apply to this platform.
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
|
||||
labels = []
|
||||
for ts_key, ts_label, ts_desc in effective:
|
||||
@@ -1705,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
|
||||
checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS
|
||||
|
||||
# Show checklist
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
|
||||
new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)
|
||||
|
||||
added = new_enabled - current_enabled
|
||||
removed = current_enabled - new_enabled
|
||||
@@ -2061,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]
|
||||
|
||||
def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
|
||||
"""Print a summary of enabled/disabled toolsets and MCP tool filters."""
|
||||
effective = _get_effective_configurable_toolsets()
|
||||
effective_all = _get_effective_configurable_toolsets()
|
||||
effective = [
|
||||
(k, l, d) for (k, l, d) in effective_all
|
||||
if _toolset_allowed_for_platform(k, platform)
|
||||
]
|
||||
builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
|
||||
|
||||
print(f"Built-in toolsets ({platform}):")
|
||||
@@ -2127,6 +2272,20 @@ def tools_disable_enable_command(args):
|
||||
_print_error(f"Unknown toolset '{name}'")
|
||||
toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
|
||||
|
||||
# Reject platform-scoped toolsets on platforms that don't allow them.
|
||||
restricted_targets = [
|
||||
t for t in toolset_targets
|
||||
if not _toolset_allowed_for_platform(t, platform)
|
||||
]
|
||||
if restricted_targets:
|
||||
for name in restricted_targets:
|
||||
allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
|
||||
_print_error(
|
||||
f"Toolset '{name}' is not available on platform '{platform}' "
|
||||
f"(only: {', '.join(allowed)})"
|
||||
)
|
||||
toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
|
||||
|
||||
if toolset_targets:
|
||||
_apply_toolset_change(config, platform, toolset_targets, action)
|
||||
|
||||
|
||||
+351
-10
@@ -49,7 +49,7 @@ from hermes_cli.config import (
|
||||
from gateway.status import get_running_pid, read_runtime_status
|
||||
|
||||
try:
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
@@ -73,6 +73,10 @@ app = FastAPI(title="Hermes Agent", version=__version__)
|
||||
_SESSION_TOKEN = secrets.token_urlsafe(32)
|
||||
_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
|
||||
|
||||
# In-browser Chat tab (/chat, /api/pty, …). Off unless ``hermes dashboard --tui``
|
||||
# or HERMES_DASHBOARD_TUI=1. Set from :func:`start_server`.
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = False
|
||||
|
||||
# Simple rate limiter for the reveal endpoint
|
||||
_reveal_timestamps: List[float] = []
|
||||
_REVEAL_MAX_PER_WINDOW = 5
|
||||
@@ -283,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
|
||||
"display.busy_input_mode": {
|
||||
"type": "select",
|
||||
"description": "Input behavior while agent is running",
|
||||
"options": ["queue", "interrupt", "block"],
|
||||
"options": ["interrupt", "queue"],
|
||||
},
|
||||
"memory.provider": {
|
||||
"type": "select",
|
||||
@@ -1529,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
result = json.loads(resp.read().decode())
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Token exchange failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
access_token = result.get("access_token", "")
|
||||
refresh_token = result.get("refresh_token", "")
|
||||
expires_in = int(result.get("expires_in") or 3600)
|
||||
if not access_token:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = "No access token returned"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
|
||||
expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
|
||||
try:
|
||||
_save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
|
||||
except Exception as e:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "error"
|
||||
sess["error_message"] = f"Save failed: {e}"
|
||||
return {"ok": False, "status": "error", "message": sess["error_message"]}
|
||||
sess["status"] = "approved"
|
||||
with _oauth_sessions_lock:
|
||||
sess["status"] = "approved"
|
||||
_log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
|
||||
return {"ok": True, "status": "approved"}
|
||||
|
||||
@@ -2263,6 +2271,329 @@ async def get_usage_analytics(days: int = 30):
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
|
||||
#
|
||||
# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
|
||||
# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
|
||||
# WebSocket. The browser renders the ANSI through xterm.js (see
|
||||
# web/src/pages/ChatPage.tsx).
|
||||
#
|
||||
# Auth: ``?token=<session_token>`` query param (browsers can't set
|
||||
# Authorization on the WS upgrade). Same ephemeral ``_SESSION_TOKEN`` as
|
||||
# REST. Localhost-only — we defensively reject non-loopback clients even
|
||||
# though uvicorn binds to 127.0.0.1.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
import re
|
||||
import asyncio
|
||||
|
||||
from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
|
||||
|
||||
_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
|
||||
_PTY_READ_CHUNK_TIMEOUT = 0.2
|
||||
_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
|
||||
# Starlette's TestClient reports the peer as "testclient"; treat it as
|
||||
# loopback so tests don't need to rewrite request scope.
|
||||
_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
|
||||
|
||||
# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
|
||||
# and /api/events (dashboard → browser sidebar). Keyed by an opaque channel id
|
||||
# the chat tab generates on mount; entries auto-evict when the last subscriber
|
||||
# drops AND the publisher has disconnected.
|
||||
_event_channels: dict[str, set] = {}
|
||||
_event_lock = asyncio.Lock()
|
||||
|
||||
|
||||
def _resolve_chat_argv(
|
||||
resume: Optional[str] = None,
|
||||
sidecar_url: Optional[str] = None,
|
||||
) -> tuple[list[str], Optional[str], Optional[dict]]:
|
||||
"""Resolve the argv + cwd + env for the chat PTY.
|
||||
|
||||
Default: whatever ``hermes --tui`` would run. Tests monkeypatch this
|
||||
function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
|
||||
so nothing has to build Node or the TUI bundle.
|
||||
|
||||
Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
|
||||
matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
|
||||
Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
|
||||
not parse its argv.
|
||||
|
||||
`sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
|
||||
the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
|
||||
dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
|
||||
"""
|
||||
from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
|
||||
|
||||
argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
|
||||
env: Optional[dict] = None
|
||||
|
||||
if resume or sidecar_url:
|
||||
env = os.environ.copy()
|
||||
|
||||
if resume:
|
||||
env["HERMES_TUI_RESUME"] = resume
|
||||
|
||||
if sidecar_url:
|
||||
env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
|
||||
|
||||
return list(argv), str(cwd) if cwd else None, env
|
||||
|
||||
|
||||
def _build_sidecar_url(channel: str) -> Optional[str]:
|
||||
"""ws:// URL the PTY child should publish events to, or None when unbound."""
|
||||
host = getattr(app.state, "bound_host", None)
|
||||
port = getattr(app.state, "bound_port", None)
|
||||
|
||||
if not host or not port:
|
||||
return None
|
||||
|
||||
netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
|
||||
qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
|
||||
|
||||
return f"ws://{netloc}/api/pub?{qs}"
|
||||
|
||||
|
||||
async def _broadcast_event(channel: str, payload: str) -> None:
|
||||
"""Fan out one publisher frame to every subscriber on `channel`."""
|
||||
async with _event_lock:
|
||||
subs = list(_event_channels.get(channel, ()))
|
||||
|
||||
for sub in subs:
|
||||
try:
|
||||
await sub.send_text(payload)
|
||||
except Exception:
|
||||
# Subscriber went away mid-send; the /api/events finally clause
|
||||
# will remove it from the registry on its next iteration.
|
||||
pass
|
||||
|
||||
|
||||
def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
|
||||
"""Return the channel id from the query string or None if invalid."""
|
||||
channel = ws.query_params.get("channel", "")
|
||||
|
||||
return channel if _VALID_CHANNEL_RE.match(channel) else None
|
||||
|
||||
|
||||
@app.websocket("/api/pty")
|
||||
async def pty_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
# --- auth + loopback check (before accept so we can close cleanly) ---
|
||||
token = ws.query_params.get("token", "")
|
||||
expected = _SESSION_TOKEN
|
||||
if not hmac.compare_digest(token.encode(), expected.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
# --- spawn PTY ------------------------------------------------------
|
||||
resume = ws.query_params.get("resume") or None
|
||||
channel = _channel_or_close_code(ws)
|
||||
sidecar_url = _build_sidecar_url(channel) if channel else None
|
||||
|
||||
try:
|
||||
argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
|
||||
except SystemExit as exc:
|
||||
# _make_tui_argv calls sys.exit(1) when node/npm is missing.
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
|
||||
except PtyUnavailableError as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
except (FileNotFoundError, OSError) as exc:
|
||||
await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
|
||||
await ws.close(code=1011)
|
||||
return
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
# --- reader task: PTY master → WebSocket ----------------------------
|
||||
async def pump_pty_to_ws() -> None:
|
||||
while True:
|
||||
chunk = await loop.run_in_executor(
|
||||
None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
|
||||
)
|
||||
if chunk is None: # EOF
|
||||
return
|
||||
if not chunk: # no data this tick; yield control and retry
|
||||
await asyncio.sleep(0)
|
||||
continue
|
||||
try:
|
||||
await ws.send_bytes(chunk)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
reader_task = asyncio.create_task(pump_pty_to_ws())
|
||||
|
||||
# --- writer loop: WebSocket → PTY master ----------------------------
|
||||
try:
|
||||
while True:
|
||||
msg = await ws.receive()
|
||||
msg_type = msg.get("type")
|
||||
if msg_type == "websocket.disconnect":
|
||||
break
|
||||
raw = msg.get("bytes")
|
||||
if raw is None:
|
||||
text = msg.get("text")
|
||||
raw = text.encode("utf-8") if isinstance(text, str) else b""
|
||||
if not raw:
|
||||
continue
|
||||
|
||||
# Resize escape is consumed locally, never written to the PTY.
|
||||
match = _RESIZE_RE.match(raw)
|
||||
if match and match.end() == len(raw):
|
||||
cols = int(match.group(1))
|
||||
rows = int(match.group(2))
|
||||
bridge.resize(cols=cols, rows=rows)
|
||||
continue
|
||||
|
||||
bridge.write(raw)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
reader_task.cancel()
|
||||
try:
|
||||
await reader_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
bridge.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
|
||||
#
|
||||
# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
|
||||
# dashboard can render structured metadata (model badge, tool-call sidebar,
|
||||
# slash launcher, session info) alongside the xterm.js terminal that PTY
|
||||
# already paints. Both transports bind to the same session id when one is
|
||||
# active, so a tool.start emitted by the agent fans out to both sinks.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/ws")
|
||||
async def gateway_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
from tui_gateway.ws import handle_ws
|
||||
|
||||
await handle_ws(ws)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /api/pub + /api/events — chat-tab event broadcast.
|
||||
#
|
||||
# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
|
||||
# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
|
||||
# dispatcher emit through it. The dashboard fans those frames out to any
|
||||
# subscriber that opened /api/events on the same channel id. This is what
|
||||
# gives the React sidebar its tool-call feed without breaking the PTY
|
||||
# child's stdio handshake with Ink.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@app.websocket("/api/pub")
|
||||
async def pub_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
await _broadcast_event(channel, await ws.receive_text())
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
||||
|
||||
@app.websocket("/api/events")
|
||||
async def events_ws(ws: WebSocket) -> None:
|
||||
if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
token = ws.query_params.get("token", "")
|
||||
if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
|
||||
await ws.close(code=4401)
|
||||
return
|
||||
|
||||
client_host = ws.client.host if ws.client else ""
|
||||
if client_host and client_host not in _LOOPBACK_HOSTS:
|
||||
await ws.close(code=4403)
|
||||
return
|
||||
|
||||
channel = _channel_or_close_code(ws)
|
||||
if not channel:
|
||||
await ws.close(code=4400)
|
||||
return
|
||||
|
||||
await ws.accept()
|
||||
|
||||
async with _event_lock:
|
||||
_event_channels.setdefault(channel, set()).add(ws)
|
||||
|
||||
try:
|
||||
while True:
|
||||
# Subscribers don't speak — the receive() just blocks until
|
||||
# disconnect so the connection stays open as long as the
|
||||
# browser holds it.
|
||||
await ws.receive_text()
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
finally:
|
||||
async with _event_lock:
|
||||
subs = _event_channels.get(channel)
|
||||
|
||||
if subs is not None:
|
||||
subs.discard(ws)
|
||||
|
||||
if not subs:
|
||||
_event_channels.pop(channel, None)
|
||||
|
||||
|
||||
def mount_spa(application: FastAPI):
|
||||
"""Mount the built SPA. Falls back to index.html for client-side routing.
|
||||
|
||||
@@ -2284,8 +2615,10 @@ def mount_spa(application: FastAPI):
|
||||
def _serve_index():
|
||||
"""Return index.html with the session token injected."""
|
||||
html = _index_path.read_text()
|
||||
chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
|
||||
token_script = (
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
|
||||
f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
|
||||
f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
|
||||
)
|
||||
html = html.replace("</head>", f"{token_script}</head>", 1)
|
||||
return HTMLResponse(
|
||||
@@ -2798,10 +3131,15 @@ def start_server(
|
||||
port: int = 9119,
|
||||
open_browser: bool = True,
|
||||
allow_public: bool = False,
|
||||
*,
|
||||
embedded_chat: bool = False,
|
||||
):
|
||||
"""Start the web UI server."""
|
||||
import uvicorn
|
||||
|
||||
global _DASHBOARD_EMBEDDED_CHAT_ENABLED
|
||||
_DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
|
||||
|
||||
_LOCALHOST = ("127.0.0.1", "localhost", "::1")
|
||||
if host not in _LOCALHOST and not allow_public:
|
||||
raise SystemExit(
|
||||
@@ -2817,7 +3155,10 @@ def start_server(
|
||||
|
||||
# Record the bound host so host_header_middleware can validate incoming
|
||||
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
|
||||
# bound_port is also stashed so /api/pty can build the back-WS URL the
|
||||
# PTY child uses to publish events to the dashboard sidebar.
|
||||
app.state.bound_host = host
|
||||
app.state.bound_port = port
|
||||
|
||||
if open_browser:
|
||||
import webbrowser
|
||||
|
||||
+94
-5
@@ -31,7 +31,7 @@ T = TypeVar("T")
|
||||
|
||||
DEFAULT_DB_PATH = get_hermes_home() / "state.db"
|
||||
|
||||
SCHEMA_VERSION = 8
|
||||
SCHEMA_VERSION = 9
|
||||
|
||||
SCHEMA_SQL = """
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
@@ -83,7 +83,8 @@ CREATE TABLE IF NOT EXISTS messages (
|
||||
reasoning TEXT,
|
||||
reasoning_content TEXT,
|
||||
reasoning_details TEXT,
|
||||
codex_reasoning_items TEXT
|
||||
codex_reasoning_items TEXT,
|
||||
codex_message_items TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS state_meta (
|
||||
@@ -356,6 +357,15 @@ class SessionDB:
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 8")
|
||||
if current_version < 9:
|
||||
# v9: preserve replayable Codex assistant message ids/phases so
|
||||
# follow-up turns can rebuild Responses API message items instead
|
||||
# of flattening everything to plain assistant text.
|
||||
try:
|
||||
cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
|
||||
except sqlite3.OperationalError:
|
||||
pass # Column already exists
|
||||
cursor.execute("UPDATE schema_version SET version = 9")
|
||||
|
||||
# Unique title index — always ensure it exists (safe to run after migrations
|
||||
# since the title column is guaranteed to exist at this point)
|
||||
@@ -956,6 +966,7 @@ class SessionDB:
|
||||
reasoning_content: str = None,
|
||||
reasoning_details: Any = None,
|
||||
codex_reasoning_items: Any = None,
|
||||
codex_message_items: Any = None,
|
||||
) -> int:
|
||||
"""
|
||||
Append a message to a session. Returns the message row ID.
|
||||
@@ -972,6 +983,10 @@ class SessionDB:
|
||||
json.dumps(codex_reasoning_items)
|
||||
if codex_reasoning_items else None
|
||||
)
|
||||
codex_message_items_json = (
|
||||
json.dumps(codex_message_items)
|
||||
if codex_message_items else None
|
||||
)
|
||||
tool_calls_json = json.dumps(tool_calls) if tool_calls else None
|
||||
|
||||
# Pre-compute tool call count
|
||||
@@ -983,8 +998,9 @@ class SessionDB:
|
||||
cursor = conn.execute(
|
||||
"""INSERT INTO messages (session_id, role, content, tool_call_id,
|
||||
tool_calls, tool_name, timestamp, token_count, finish_reason,
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
|
||||
codex_message_items)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
session_id,
|
||||
role,
|
||||
@@ -999,6 +1015,7 @@ class SessionDB:
|
||||
reasoning_content,
|
||||
reasoning_details_json,
|
||||
codex_items_json,
|
||||
codex_message_items_json,
|
||||
),
|
||||
)
|
||||
msg_id = cursor.lastrowid
|
||||
@@ -1039,6 +1056,71 @@ class SessionDB:
|
||||
result.append(msg)
|
||||
return result
|
||||
|
||||
def resolve_resume_session_id(self, session_id: str) -> str:
|
||||
"""Redirect a resume target to the descendant session that holds the messages.
|
||||
|
||||
Context compression ends the current session and forks a new child session
|
||||
(linked via ``parent_session_id``). The flush cursor is reset, so the
|
||||
child is where new messages actually land — the parent ends up with
|
||||
``message_count = 0`` rows unless messages had already been flushed to
|
||||
it before compression. See #15000.
|
||||
|
||||
This helper walks ``parent_session_id`` forward from ``session_id`` and
|
||||
returns the first descendant in the chain that has at least one message
|
||||
row. If the original session already has messages, or no descendant
|
||||
has any, the original ``session_id`` is returned unchanged.
|
||||
|
||||
The chain is always walked via the child whose ``started_at`` is
|
||||
latest; that matches the single-chain shape that compression creates.
|
||||
A depth cap (32) guards against accidental loops in malformed data.
|
||||
"""
|
||||
if not session_id:
|
||||
return session_id
|
||||
|
||||
with self._lock:
|
||||
# If this session already has messages, nothing to redirect.
|
||||
try:
|
||||
row = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
|
||||
(session_id,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if row is not None:
|
||||
return session_id
|
||||
|
||||
# Walk descendants: at each step, pick the most-recently-started
|
||||
# child session; stop once we find one with messages.
|
||||
current = session_id
|
||||
seen = {current}
|
||||
for _ in range(32):
|
||||
try:
|
||||
child_row = self._conn.execute(
|
||||
"SELECT id FROM sessions "
|
||||
"WHERE parent_session_id = ? "
|
||||
"ORDER BY started_at DESC, id DESC LIMIT 1",
|
||||
(current,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if child_row is None:
|
||||
return session_id
|
||||
child_id = child_row["id"] if hasattr(child_row, "keys") else child_row[0]
|
||||
if not child_id or child_id in seen:
|
||||
return session_id
|
||||
seen.add(child_id)
|
||||
try:
|
||||
msg_row = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
|
||||
(child_id,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
return session_id
|
||||
if msg_row is not None:
|
||||
return child_id
|
||||
current = child_id
|
||||
return session_id
|
||||
|
||||
def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Load messages in the OpenAI conversation format (role + content dicts).
|
||||
@@ -1047,7 +1129,8 @@ class SessionDB:
|
||||
with self._lock:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT role, content, tool_call_id, tool_calls, tool_name, "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
|
||||
"reasoning, reasoning_content, reasoning_details, codex_reasoning_items, "
|
||||
"codex_message_items "
|
||||
"FROM messages WHERE session_id = ? ORDER BY timestamp, id",
|
||||
(session_id,),
|
||||
)
|
||||
@@ -1085,6 +1168,12 @@ class SessionDB:
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_reasoning_items, falling back to None")
|
||||
msg["codex_reasoning_items"] = None
|
||||
if row["codex_message_items"]:
|
||||
try:
|
||||
msg["codex_message_items"] = json.loads(row["codex_message_items"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning("Failed to deserialize codex_message_items, falling back to None")
|
||||
msg["codex_message_items"] = None
|
||||
messages.append(msg)
|
||||
return messages
|
||||
|
||||
|
||||
+53
-25
@@ -24,6 +24,7 @@ import json
|
||||
import asyncio
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from tools.registry import discover_builtin_tools, registry
|
||||
@@ -288,30 +289,34 @@ def get_tool_definitions(
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
||||
break
|
||||
|
||||
# Rebuild discord_server schema based on the bot's privileged intents
|
||||
# (detected from GET /applications/@me) and the user's action allowlist
|
||||
# in config. Hides actions the bot's intents don't support so the
|
||||
# model never attempts them, and annotates fetch_messages when the
|
||||
# Rebuild discord / discord_admin schemas based on the bot's privileged
|
||||
# intents (detected from GET /applications/@me) and the user's action
|
||||
# allowlist in config. Hides actions the bot's intents don't support so
|
||||
# the model never attempts them, and annotates fetch_messages when the
|
||||
# MESSAGE_CONTENT intent is missing.
|
||||
if "discord_server" in available_tool_names:
|
||||
try:
|
||||
from tools.discord_tool import get_dynamic_schema
|
||||
dynamic = get_dynamic_schema()
|
||||
except Exception: # pragma: no cover — defensive, fall back to static
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
# Tool filtered out entirely (empty allowlist or detection disabled
|
||||
# the only remaining actions). Drop it from the schema list.
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != "discord_server"
|
||||
]
|
||||
available_tool_names.discard("discord_server")
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == "discord_server":
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
_discord_schema_fns = {
|
||||
"discord": "get_dynamic_schema_core",
|
||||
"discord_admin": "get_dynamic_schema_admin",
|
||||
}
|
||||
for discord_tool_name in _discord_schema_fns:
|
||||
if discord_tool_name in available_tool_names:
|
||||
try:
|
||||
from tools import discord_tool as _dt
|
||||
schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
|
||||
dynamic = schema_fn()
|
||||
except Exception:
|
||||
dynamic = None
|
||||
if dynamic is None:
|
||||
filtered_tools = [
|
||||
t for t in filtered_tools
|
||||
if t.get("function", {}).get("name") != discord_tool_name
|
||||
]
|
||||
available_tool_names.discard(discord_tool_name)
|
||||
else:
|
||||
for i, td in enumerate(filtered_tools):
|
||||
if td.get("function", {}).get("name") == discord_tool_name:
|
||||
filtered_tools[i] = {"type": "function", "function": dynamic}
|
||||
break
|
||||
|
||||
# Strip web tool cross-references from browser_navigate description when
|
||||
# web_search / web_extract are not available. The static schema says
|
||||
@@ -343,6 +348,18 @@ def get_tool_definitions(
|
||||
global _last_resolved_tool_names
|
||||
_last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
|
||||
|
||||
# Sanitize schemas for broad backend compatibility. llama.cpp's
|
||||
# json-schema-to-grammar converter (used by its OAI server to build
|
||||
# GBNF tool-call parsers) rejects some shapes that cloud providers
|
||||
# silently accept — bare "type": "object" with no properties,
|
||||
# string-valued schema nodes from malformed MCP servers, etc. This
|
||||
# is a no-op for schemas that are already well-formed.
|
||||
try:
|
||||
from tools.schema_sanitizer import sanitize_tool_schemas
|
||||
filtered_tools = sanitize_tool_schemas(filtered_tools)
|
||||
except Exception as e: # pragma: no cover — defensive
|
||||
logger.warning("Schema sanitization skipped: %s", e)
|
||||
|
||||
return filtered_tools
|
||||
|
||||
|
||||
@@ -452,9 +469,9 @@ def _coerce_number(value: str, integer_only: bool = False):
|
||||
f = float(value)
|
||||
except (ValueError, OverflowError):
|
||||
return value
|
||||
# Guard against inf/nan before int() conversion
|
||||
# Guard against inf/nan — not JSON-serializable, keep original string
|
||||
if f != f or f == float("inf") or f == float("-inf"):
|
||||
return f
|
||||
return value
|
||||
# If it looks like an integer (no fractional part), return int
|
||||
if f == int(f):
|
||||
return int(f)
|
||||
@@ -551,6 +568,14 @@ def handle_function_call(
|
||||
except Exception:
|
||||
pass # file_tools may not be loaded yet
|
||||
|
||||
# Measure tool dispatch latency so post_tool_call and
|
||||
# transform_tool_result hooks can observe per-tool duration.
|
||||
# Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
|
||||
# PostToolUse hook inputs so plugin authors can build latency
|
||||
# dashboards, budget alerts, and regression canaries without having
|
||||
# to wrap every tool manually. We use monotonic() so the value is
|
||||
# unaffected by wall-clock adjustments during the call.
|
||||
_dispatch_start = time.monotonic()
|
||||
if function_name == "execute_code":
|
||||
# Prefer the caller-provided list so subagents can't overwrite
|
||||
# the parent's tool set via the process-global.
|
||||
@@ -566,6 +591,7 @@ def handle_function_call(
|
||||
task_id=task_id,
|
||||
user_task=user_task,
|
||||
)
|
||||
duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
|
||||
|
||||
try:
|
||||
from hermes_cli.plugins import invoke_hook
|
||||
@@ -577,6 +603,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -597,6 +624,7 @@ def handle_function_call(
|
||||
task_id=task_id or "",
|
||||
session_id=session_id or "",
|
||||
tool_call_id=tool_call_id or "",
|
||||
duration_ms=duration_ms,
|
||||
)
|
||||
for hook_result in hook_results:
|
||||
if isinstance(hook_result, str):
|
||||
|
||||
+1
-1
@@ -156,7 +156,7 @@
|
||||
for entry in "''${ENTRIES[@]}"; do
|
||||
IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
|
||||
echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
|
||||
OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
|
||||
STATUS=$?
|
||||
if [ "$STATUS" -eq 0 ]; then
|
||||
echo " ok"
|
||||
|
||||
+1
-1
@@ -4,7 +4,7 @@ let
|
||||
src = ../web;
|
||||
npmDeps = pkgs.fetchNpmDeps {
|
||||
inherit src;
|
||||
hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
|
||||
hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
|
||||
};
|
||||
|
||||
npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
|
||||
|
||||
@@ -91,4 +91,29 @@
|
||||
|
||||
// Register this plugin — the dashboard picks it up automatically.
|
||||
window.__HERMES_PLUGINS__.register("example", ExamplePage);
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
// Page-scoped slot demo: inject a small banner at the top of /sessions.
|
||||
//
|
||||
// Built-in pages expose named slots (<page>:top, <page>:bottom) that
|
||||
// plugins can populate without overriding the whole route. The
|
||||
// manifest lists the slots we use in its `slots` array so the shell
|
||||
// knows to render <PluginSlot name="sessions:top" /> there.
|
||||
// ─────────────────────────────────────────────────────────────────────
|
||||
function SessionsTopBanner() {
|
||||
return React.createElement(Card, {
|
||||
className: "border-dashed",
|
||||
},
|
||||
React.createElement(CardContent, { className: "flex items-center gap-3 py-2" },
|
||||
React.createElement(Badge, { variant: "outline" }, "Example"),
|
||||
React.createElement("span", {
|
||||
className: "text-xs text-muted-foreground",
|
||||
}, "This banner was injected into the Sessions page by the example plugin via the ",
|
||||
React.createElement("code", { className: "font-courier" }, "sessions:top"),
|
||||
" slot."),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
window.__HERMES_PLUGINS__.registerSlot("example", "sessions:top", SessionsTopBanner);
|
||||
})();
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
"path": "/example",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"slots": ["sessions:top"],
|
||||
"entry": "dist/index.js",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
|
||||
+1601
File diff suppressed because it is too large
Load Diff
+752
@@ -0,0 +1,752 @@
|
||||
/*
|
||||
* Hermes Kanban — dashboard plugin styles.
|
||||
*
|
||||
* All colors reference theme CSS vars so the board reskins with the
|
||||
* active dashboard theme. No hardcoded palette.
|
||||
*/
|
||||
|
||||
.hermes-kanban {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* ---- Columns layout -------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-columns {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.hermes-kanban-column {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: color-mix(in srgb, var(--color-card) 85%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius);
|
||||
padding: 0.5rem;
|
||||
min-height: 200px;
|
||||
max-height: calc(100vh - 220px);
|
||||
transition: border-color 120ms ease, background-color 120ms ease;
|
||||
}
|
||||
|
||||
.hermes-kanban-column--drop {
|
||||
border-color: var(--color-ring);
|
||||
background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-column-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.25rem 0.25rem 0.35rem;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-label {
|
||||
flex: 1;
|
||||
letter-spacing: 0.01em;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-count {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-add {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 1px solid var(--color-border);
|
||||
color: var(--color-foreground);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
width: 22px;
|
||||
height: 22px;
|
||||
line-height: 1;
|
||||
font-size: 1rem;
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-column-add:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-column-sub {
|
||||
padding: 0 0.25rem 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-column-body {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
overflow-y: auto;
|
||||
padding-right: 0.1rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-empty {
|
||||
padding: 1.5rem 0.5rem;
|
||||
text-align: center;
|
||||
font-size: 0.75rem;
|
||||
color: var(--color-muted-foreground);
|
||||
border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Status dots ----------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-dot {
|
||||
display: inline-block;
|
||||
width: 0.5rem;
|
||||
height: 0.5rem;
|
||||
border-radius: 999px;
|
||||
background: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */
|
||||
.hermes-kanban-dot-todo { background: var(--color-muted-foreground); }
|
||||
.hermes-kanban-dot-ready { background: #d4b348; } /* amber */
|
||||
.hermes-kanban-dot-running { background: #3fb97d; } /* green */
|
||||
.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */
|
||||
.hermes-kanban-dot-archived { background: var(--color-border); }
|
||||
|
||||
/* ---- Progress pill (N/M child tasks done) --------------------------- */
|
||||
|
||||
.hermes-kanban-progress {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.62rem;
|
||||
padding: 0.05rem 0.35rem;
|
||||
border-radius: 999px;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
.hermes-kanban-progress--full {
|
||||
background: color-mix(in srgb, #3fb97d 22%, transparent);
|
||||
border-color: color-mix(in srgb, #3fb97d 45%, transparent);
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */
|
||||
|
||||
.hermes-kanban-lane {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.25rem 0 0.35rem;
|
||||
border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
.hermes-kanban-lane:first-child {
|
||||
border-top: 0;
|
||||
padding-top: 0;
|
||||
}
|
||||
.hermes-kanban-lane-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
font-size: 0.65rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
padding: 0 0.1rem;
|
||||
}
|
||||
.hermes-kanban-lane-name {
|
||||
font-weight: 600;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-lane-count {
|
||||
margin-left: auto;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ---- Card ------------------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card {
|
||||
cursor: grab;
|
||||
transition: transform 100ms ease, box-shadow 100ms ease;
|
||||
}
|
||||
.hermes-kanban-card:hover {
|
||||
box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset;
|
||||
}
|
||||
.hermes-kanban-card:active {
|
||||
cursor: grabbing;
|
||||
transform: scale(0.995);
|
||||
}
|
||||
|
||||
.hermes-kanban-card-content {
|
||||
padding: 0.5rem 0.6rem !important;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-id {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.65rem;
|
||||
color: var(--color-muted-foreground);
|
||||
letter-spacing: 0.03em;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-title {
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
line-height: 1.3;
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-card-meta {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
gap: 0.55rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-priority {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
background: color-mix(in srgb, var(--color-ring) 18%, transparent);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-tag {
|
||||
font-size: 0.6rem !important;
|
||||
padding: 0.05rem 0.3rem !important;
|
||||
}
|
||||
|
||||
.hermes-kanban-assignee {
|
||||
font-weight: 500;
|
||||
color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground));
|
||||
}
|
||||
.hermes-kanban-unassigned {
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-ago {
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* ---- Inline create --------------------------------------------------- */
|
||||
|
||||
.hermes-kanban-inline-create {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
padding: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
background: color-mix(in srgb, var(--color-card) 70%, transparent);
|
||||
border: 1px dashed var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
/* ---- Drawer (task detail side panel) --------------------------------- */
|
||||
|
||||
.hermes-kanban-drawer-shade {
|
||||
position: fixed;
|
||||
inset: 0;
|
||||
background: rgba(0, 0, 0, 0.45);
|
||||
z-index: 60;
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer {
|
||||
width: min(480px, 92vw);
|
||||
height: 100vh;
|
||||
background: var(--color-card);
|
||||
border-left: 1px solid var(--color-border);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35);
|
||||
animation: hermes-kanban-drawer-in 180ms ease-out;
|
||||
}
|
||||
|
||||
@keyframes hermes-kanban-drawer-in {
|
||||
from { transform: translateX(100%); opacity: 0.3; }
|
||||
to { transform: translateX(0); opacity: 1; }
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0.6rem 0.8rem;
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-close {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 1.25rem;
|
||||
line-height: 1;
|
||||
cursor: pointer;
|
||||
padding: 0 0.25rem;
|
||||
}
|
||||
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
|
||||
|
||||
.hermes-kanban-drawer-body {
|
||||
flex: 1;
|
||||
overflow-y: auto;
|
||||
padding: 0.9rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.85rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-meta {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.15rem;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
|
||||
.hermes-kanban-meta-row {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.72rem;
|
||||
}
|
||||
.hermes-kanban-meta-label {
|
||||
width: 92px;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-meta-value {
|
||||
color: var(--color-foreground);
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.hermes-kanban-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 0.3rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.35rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.07em;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-pre {
|
||||
margin: 0;
|
||||
padding: 0.45rem 0.55rem;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
background: color-mix(in srgb, var(--color-foreground) 4%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.72rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-comment {
|
||||
border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent);
|
||||
padding-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.2rem;
|
||||
}
|
||||
|
||||
.hermes-kanban-comment-head {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-comment-author {
|
||||
font-weight: 600;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-comment-ago {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
|
||||
.hermes-kanban-event {
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-event-kind {
|
||||
color: var(--color-foreground);
|
||||
min-width: 6rem;
|
||||
}
|
||||
.hermes-kanban-event-payload {
|
||||
color: var(--color-muted-foreground);
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 280px;
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-comment-row {
|
||||
display: flex;
|
||||
gap: 0.4rem;
|
||||
padding: 0.55rem 0.75rem;
|
||||
border-top: 1px solid var(--color-border);
|
||||
background: color-mix(in srgb, var(--color-card) 90%, transparent);
|
||||
}
|
||||
|
||||
.hermes-kanban-count {
|
||||
display: inline-flex;
|
||||
gap: 0.2rem;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* ---- Selection chrome ----------------------------------------------- */
|
||||
|
||||
.hermes-kanban-card--selected :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-ring) inset,
|
||||
0 0 0 1px var(--color-ring) inset;
|
||||
background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card));
|
||||
}
|
||||
|
||||
.hermes-kanban-card-check {
|
||||
width: 0.85rem;
|
||||
height: 0.85rem;
|
||||
margin: 0;
|
||||
cursor: pointer;
|
||||
accent-color: var(--color-ring);
|
||||
}
|
||||
|
||||
/* ---- Bulk action bar ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-bulk {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.4rem 0.75rem;
|
||||
background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card));
|
||||
border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border));
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.hermes-kanban-bulk-count {
|
||||
font-weight: 600;
|
||||
font-size: 0.75rem;
|
||||
padding-right: 0.25rem;
|
||||
}
|
||||
.hermes-kanban-bulk-btn {
|
||||
height: 1.7rem !important;
|
||||
padding: 0 0.5rem !important;
|
||||
font-size: 0.7rem !important;
|
||||
border: 1px solid var(--color-border);
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-bulk-btn:hover {
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
}
|
||||
.hermes-kanban-bulk-reassign {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
padding-left: 0.5rem;
|
||||
border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Dependency editor chips --------------------------------------- */
|
||||
|
||||
.hermes-kanban-deps-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
}
|
||||
.hermes-kanban-deps-label {
|
||||
font-size: 0.68rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.08em;
|
||||
color: var(--color-muted-foreground);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.hermes-kanban-deps-chips {
|
||||
display: flex;
|
||||
gap: 0.3rem;
|
||||
flex-wrap: wrap;
|
||||
flex: 1;
|
||||
}
|
||||
.hermes-kanban-deps-empty {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-muted-foreground);
|
||||
font-style: italic;
|
||||
}
|
||||
.hermes-kanban-dep-chip {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 0.15rem;
|
||||
padding: 0.1rem 0.35rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 6%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.68rem;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-dep-chip-x {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
cursor: pointer;
|
||||
font-size: 0.85rem;
|
||||
line-height: 1;
|
||||
padding: 0 0.15rem;
|
||||
}
|
||||
.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); }
|
||||
|
||||
/* ---- Inline edit affordances --------------------------------------- */
|
||||
|
||||
.hermes-kanban-editable {
|
||||
cursor: pointer;
|
||||
border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent);
|
||||
}
|
||||
.hermes-kanban-editable:hover {
|
||||
color: var(--color-foreground);
|
||||
border-bottom-color: var(--color-ring);
|
||||
}
|
||||
|
||||
.hermes-kanban-drawer-title-text {
|
||||
cursor: pointer;
|
||||
}
|
||||
.hermes-kanban-drawer-title-text:hover {
|
||||
text-decoration: underline;
|
||||
text-decoration-color: var(--color-ring);
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}
|
||||
|
||||
.hermes-kanban-edit-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.35rem;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.hermes-kanban-section-head-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.hermes-kanban-edit-link {
|
||||
appearance: none;
|
||||
background: transparent;
|
||||
border: 0;
|
||||
color: var(--color-muted-foreground);
|
||||
font-size: 0.7rem;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
cursor: pointer;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-edit-link:hover { color: var(--color-ring); }
|
||||
|
||||
.hermes-kanban-textarea {
|
||||
width: 100%;
|
||||
min-height: 8rem;
|
||||
background: var(--color-card);
|
||||
color: var(--color-foreground);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
padding: 0.5rem 0.6rem;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.5;
|
||||
resize: vertical;
|
||||
}
|
||||
.hermes-kanban-textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-ring);
|
||||
box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Markdown rendering -------------------------------------------- */
|
||||
|
||||
.hermes-kanban-md {
|
||||
font-size: 0.8rem;
|
||||
line-height: 1.55;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-md p { margin: 0.25rem 0; }
|
||||
.hermes-kanban-md h1,
|
||||
.hermes-kanban-md h2,
|
||||
.hermes-kanban-md h3,
|
||||
.hermes-kanban-md h4 {
|
||||
margin: 0.6rem 0 0.2rem;
|
||||
line-height: 1.25;
|
||||
}
|
||||
.hermes-kanban-md h1 { font-size: 1.05rem; }
|
||||
.hermes-kanban-md h2 { font-size: 0.95rem; }
|
||||
.hermes-kanban-md h3 { font-size: 0.88rem; }
|
||||
.hermes-kanban-md h4 { font-size: 0.82rem; }
|
||||
.hermes-kanban-md ul {
|
||||
margin: 0.25rem 0 0.25rem 1.1rem;
|
||||
padding: 0;
|
||||
}
|
||||
.hermes-kanban-md li { margin: 0.1rem 0; }
|
||||
.hermes-kanban-md a {
|
||||
color: var(--color-ring);
|
||||
text-decoration: underline;
|
||||
}
|
||||
.hermes-kanban-md code {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-size: 0.75rem;
|
||||
padding: 0.05rem 0.3rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 8%, transparent);
|
||||
border-radius: 3px;
|
||||
}
|
||||
.hermes-kanban-md-code {
|
||||
margin: 0.35rem 0;
|
||||
padding: 0.5rem 0.6rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 5%, transparent);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
overflow-x: auto;
|
||||
}
|
||||
.hermes-kanban-md-code code {
|
||||
background: transparent;
|
||||
padding: 0;
|
||||
font-size: 0.75rem;
|
||||
white-space: pre;
|
||||
}
|
||||
.hermes-kanban-md strong { font-weight: 600; }
|
||||
|
||||
/* ---- Touch-drag proxy ---------------------------------------------- */
|
||||
|
||||
.hermes-kanban-touch-proxy {
|
||||
pointer-events: none;
|
||||
opacity: 0.85;
|
||||
box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35);
|
||||
transform: scale(1.02);
|
||||
transition: none;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Staleness tiers ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px #d4b34888 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px #d4b348 inset;
|
||||
}
|
||||
.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent);
|
||||
}
|
||||
.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) {
|
||||
box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset,
|
||||
0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent);
|
||||
}
|
||||
|
||||
/* ---- Worker log pane ------------------------------------------------ */
|
||||
|
||||
.hermes-kanban-log {
|
||||
max-height: 340px;
|
||||
overflow: auto;
|
||||
white-space: pre;
|
||||
font-size: 0.7rem;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
|
||||
/* ---- Run history (per-attempt log in the drawer) ------------------- */
|
||||
|
||||
.hermes-kanban-run {
|
||||
border-left: 2px solid var(--color-border);
|
||||
padding: 0.35rem 0.5rem;
|
||||
margin-bottom: 0.4rem;
|
||||
background: color-mix(in srgb, var(--color-foreground) 3%, transparent);
|
||||
border-radius: var(--radius-sm, 0.25rem);
|
||||
}
|
||||
.hermes-kanban-run--active { border-left-color: #3fb97d; }
|
||||
.hermes-kanban-run--completed { border-left-color: #4a8cd1; }
|
||||
.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */
|
||||
.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); }
|
||||
.hermes-kanban-run--crashed,
|
||||
.hermes-kanban-run--timed_out,
|
||||
.hermes-kanban-run--gave_up,
|
||||
.hermes-kanban-run--spawn_failed {
|
||||
border-left-color: var(--color-destructive, #d14a4a);
|
||||
background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent);
|
||||
}
|
||||
.hermes-kanban-run--reclaimed { border-left-color: #d4b348; }
|
||||
|
||||
.hermes-kanban-run-head {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.6rem;
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
.hermes-kanban-run-outcome {
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.05em;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-profile {
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-elapsed {
|
||||
font-variant-numeric: tabular-nums;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-ago {
|
||||
margin-left: auto;
|
||||
color: var(--color-muted-foreground);
|
||||
}
|
||||
.hermes-kanban-run-summary {
|
||||
font-size: 0.75rem;
|
||||
padding: 0.2rem 0 0;
|
||||
color: var(--color-foreground);
|
||||
}
|
||||
.hermes-kanban-run-error {
|
||||
font-size: 0.7rem;
|
||||
color: var(--color-destructive, #d14a4a);
|
||||
padding: 0.15rem 0 0;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
.hermes-kanban-run-meta {
|
||||
display: block;
|
||||
font-size: 0.65rem;
|
||||
padding: 0.15rem 0 0;
|
||||
color: var(--color-muted-foreground);
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-family: var(--font-mono, ui-monospace, monospace);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"name": "kanban",
|
||||
"label": "Kanban",
|
||||
"description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what",
|
||||
"icon": "Package",
|
||||
"version": "1.0.0",
|
||||
"tab": {
|
||||
"path": "/kanban",
|
||||
"position": "after:skills"
|
||||
},
|
||||
"entry": "dist/index.js",
|
||||
"css": "dist/style.css",
|
||||
"api": "plugin_api.py"
|
||||
}
|
||||
@@ -0,0 +1,845 @@
|
||||
"""Kanban dashboard plugin — backend API routes.
|
||||
|
||||
Mounted at /api/plugins/kanban/ by the dashboard plugin system.
|
||||
|
||||
This layer is intentionally thin: every handler is a small wrapper around
|
||||
``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code
|
||||
paths the CLI and gateway ``/kanban`` command use, so the three surfaces
|
||||
cannot drift.
|
||||
|
||||
Live updates arrive via the ``/events`` WebSocket, which tails the
|
||||
append-only ``task_events`` table on a short poll interval (WAL mode lets
|
||||
reads run alongside the dispatcher's IMMEDIATE write transactions).
|
||||
|
||||
Security note
|
||||
-------------
|
||||
The dashboard's HTTP auth middleware (``web_server.auth_middleware``)
|
||||
explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by
|
||||
design because the dashboard binds to localhost by default. For the
|
||||
WebSocket we still require the session token as a ``?token=`` query
|
||||
parameter (browsers cannot set the ``Authorization`` header on an upgrade
|
||||
request), matching the established pattern used by the in-browser PTY
|
||||
bridge in ``hermes_cli/web_server.py``. If you run the dashboard with
|
||||
``--host 0.0.0.0``, every plugin route — kanban included — becomes
|
||||
reachable from the network. Don't do that on a shared host.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from typing import Any, Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from hermes_cli import kanban_db
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth helper — WebSocket only (HTTP routes live behind the dashboard's
|
||||
# existing plugin-bypass; this is documented above).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _check_ws_token(provided: Optional[str]) -> bool:
|
||||
"""Constant-time compare against the dashboard session token.
|
||||
|
||||
Imported lazily so the plugin still loads in test contexts where the
|
||||
dashboard web_server module isn't importable (e.g. the bare-FastAPI
|
||||
test harness).
|
||||
"""
|
||||
if not provided:
|
||||
return False
|
||||
try:
|
||||
from hermes_cli import web_server as _ws
|
||||
except Exception:
|
||||
# No dashboard context (tests). Accept so the tail loop is still
|
||||
# testable; in production the dashboard module always imports
|
||||
# cleanly because it's the caller.
|
||||
return True
|
||||
expected = getattr(_ws, "_SESSION_TOKEN", None)
|
||||
if not expected:
|
||||
return True
|
||||
return hmac.compare_digest(str(provided), str(expected))
|
||||
|
||||
|
||||
def _conn():
|
||||
"""Open a kanban_db connection, creating the schema on first use.
|
||||
|
||||
Every handler that mutates the DB goes through this so the plugin
|
||||
self-heals on a fresh install (no user-visible "no such table"
|
||||
error if somebody hits POST /tasks before GET /board).
|
||||
``init_db`` is idempotent.
|
||||
"""
|
||||
try:
|
||||
kanban_db.init_db()
|
||||
except Exception as exc:
|
||||
log.warning("kanban init_db failed: %s", exc)
|
||||
return kanban_db.connect()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Columns shown by the dashboard, in left-to-right order. "archived" is
|
||||
# available via a filter toggle rather than a visible column.
|
||||
BOARD_COLUMNS: list[str] = [
|
||||
"triage", "todo", "ready", "running", "blocked", "done",
|
||||
]
|
||||
|
||||
|
||||
def _task_dict(task: kanban_db.Task) -> dict[str, Any]:
|
||||
d = asdict(task)
|
||||
# Add derived age metrics so the UI can colour stale cards without
|
||||
# computing deltas client-side.
|
||||
d["age"] = kanban_db.task_age(task)
|
||||
# Keep body short on list endpoints; full body comes from /tasks/:id.
|
||||
return d
|
||||
|
||||
|
||||
def _event_dict(event: kanban_db.Event) -> dict[str, Any]:
|
||||
return {
|
||||
"id": event.id,
|
||||
"task_id": event.task_id,
|
||||
"kind": event.kind,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at,
|
||||
"run_id": event.run_id,
|
||||
}
|
||||
|
||||
|
||||
def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
|
||||
return {
|
||||
"id": c.id,
|
||||
"task_id": c.task_id,
|
||||
"author": c.author,
|
||||
"body": c.body,
|
||||
"created_at": c.created_at,
|
||||
}
|
||||
|
||||
|
||||
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
|
||||
"""Serialise a Run for the drawer's Run history section."""
|
||||
return {
|
||||
"id": r.id,
|
||||
"task_id": r.task_id,
|
||||
"profile": r.profile,
|
||||
"step_key": r.step_key,
|
||||
"status": r.status,
|
||||
"claim_lock": r.claim_lock,
|
||||
"claim_expires": r.claim_expires,
|
||||
"worker_pid": r.worker_pid,
|
||||
"max_runtime_seconds": r.max_runtime_seconds,
|
||||
"last_heartbeat_at": r.last_heartbeat_at,
|
||||
"started_at": r.started_at,
|
||||
"ended_at": r.ended_at,
|
||||
"outcome": r.outcome,
|
||||
"summary": r.summary,
|
||||
"metadata": r.metadata,
|
||||
"error": r.error,
|
||||
}
|
||||
|
||||
|
||||
def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]:
|
||||
"""Return {'parents': [...], 'children': [...]} for a task."""
|
||||
parents = [
|
||||
r["parent_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
children = [
|
||||
r["child_id"]
|
||||
for r in conn.execute(
|
||||
"SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id",
|
||||
(task_id,),
|
||||
)
|
||||
]
|
||||
return {"parents": parents, "children": children}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /board
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/board")
|
||||
def get_board(
|
||||
tenant: Optional[str] = Query(None, description="Filter to a single tenant"),
|
||||
include_archived: bool = Query(False),
|
||||
):
|
||||
"""Return the full board grouped by status column.
|
||||
|
||||
``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh
|
||||
install doesn't surface a "failed to load" error on the plugin tab.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
tasks = kanban_db.list_tasks(
|
||||
conn, tenant=tenant, include_archived=include_archived
|
||||
)
|
||||
# Pre-fetch link counts per task (cheap: one query).
|
||||
link_counts: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT parent_id, child_id FROM task_links"
|
||||
).fetchall():
|
||||
link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[
|
||||
"children"
|
||||
] += 1
|
||||
link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[
|
||||
"parents"
|
||||
] += 1
|
||||
|
||||
# Comment + event counts (both cheap aggregates).
|
||||
comment_counts: dict[str, int] = {
|
||||
r["task_id"]: r["n"]
|
||||
for r in conn.execute(
|
||||
"SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id"
|
||||
)
|
||||
}
|
||||
|
||||
# Progress rollup: for each parent, how many children are done / total.
|
||||
# One pass over task_links joined with child status — cheaper than
|
||||
# N per-task queries and the plugin uses it to render "N/M".
|
||||
progress: dict[str, dict[str, int]] = {}
|
||||
for row in conn.execute(
|
||||
"SELECT l.parent_id AS pid, t.status AS cstatus "
|
||||
"FROM task_links l JOIN tasks t ON t.id = l.child_id"
|
||||
).fetchall():
|
||||
p = progress.setdefault(row["pid"], {"done": 0, "total": 0})
|
||||
p["total"] += 1
|
||||
if row["cstatus"] == "done":
|
||||
p["done"] += 1
|
||||
|
||||
latest_event_id = conn.execute(
|
||||
"SELECT COALESCE(MAX(id), 0) AS m FROM task_events"
|
||||
).fetchone()["m"]
|
||||
|
||||
columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS}
|
||||
if include_archived:
|
||||
columns["archived"] = []
|
||||
|
||||
for t in tasks:
|
||||
d = _task_dict(t)
|
||||
d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0})
|
||||
d["comment_count"] = comment_counts.get(t.id, 0)
|
||||
d["progress"] = progress.get(t.id) # None when the task has no children
|
||||
col = t.status if t.status in columns else "todo"
|
||||
columns[col].append(d)
|
||||
|
||||
# Stable per-column ordering already applied by list_tasks
|
||||
# (priority DESC, created_at ASC), keep as-is.
|
||||
|
||||
# List of known tenants for the UI filter dropdown.
|
||||
tenants = [
|
||||
r["tenant"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant"
|
||||
)
|
||||
]
|
||||
# List of distinct assignees for the lane-by-profile sub-grouping.
|
||||
assignees = [
|
||||
r["assignee"]
|
||||
for r in conn.execute(
|
||||
"SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL "
|
||||
"AND status != 'archived' ORDER BY assignee"
|
||||
)
|
||||
]
|
||||
|
||||
return {
|
||||
"columns": [
|
||||
{"name": name, "tasks": columns[name]} for name in columns.keys()
|
||||
],
|
||||
"tenants": tenants,
|
||||
"assignees": assignees,
|
||||
"latest_event_id": int(latest_event_id),
|
||||
"now": int(time.time()),
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GET /tasks/:id
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}")
|
||||
def get_task(task_id: str):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
return {
|
||||
"task": _task_dict(task),
|
||||
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
|
||||
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
|
||||
"links": _links_for(conn, task_id),
|
||||
"runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)],
|
||||
}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# POST /tasks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CreateTaskBody(BaseModel):
|
||||
title: str
|
||||
body: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
tenant: Optional[str] = None
|
||||
priority: int = 0
|
||||
workspace_kind: str = "scratch"
|
||||
workspace_path: Optional[str] = None
|
||||
parents: list[str] = Field(default_factory=list)
|
||||
triage: bool = False
|
||||
idempotency_key: Optional[str] = None
|
||||
max_runtime_seconds: Optional[int] = None
|
||||
skills: Optional[list[str]] = None
|
||||
|
||||
|
||||
@router.post("/tasks")
|
||||
def create_task(payload: CreateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task_id = kanban_db.create_task(
|
||||
conn,
|
||||
title=payload.title,
|
||||
body=payload.body,
|
||||
assignee=payload.assignee,
|
||||
created_by="dashboard",
|
||||
workspace_kind=payload.workspace_kind,
|
||||
workspace_path=payload.workspace_path,
|
||||
tenant=payload.tenant,
|
||||
priority=payload.priority,
|
||||
parents=payload.parents,
|
||||
triage=payload.triage,
|
||||
idempotency_key=payload.idempotency_key,
|
||||
max_runtime_seconds=payload.max_runtime_seconds,
|
||||
skills=payload.skills,
|
||||
)
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
body: dict[str, Any] = {"task": _task_dict(task) if task else None}
|
||||
# Surface a dispatcher-presence warning so the UI can show a
|
||||
# banner when a `ready` task would otherwise sit idle because no
|
||||
# gateway is running (or dispatch_in_gateway=false). Only emit
|
||||
# for ready+assigned tasks; triage/todo are expected to wait,
|
||||
# and unassigned tasks can't be dispatched regardless.
|
||||
if task and task.status == "ready" and task.assignee:
|
||||
try:
|
||||
from hermes_cli.kanban import _check_dispatcher_presence
|
||||
running, message = _check_dispatcher_presence()
|
||||
if not running and message:
|
||||
body["warning"] = message
|
||||
except Exception:
|
||||
# Probe failure must never block the create itself.
|
||||
pass
|
||||
return body
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PATCH /tasks/:id (status / assignee / priority / title / body)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class UpdateTaskBody(BaseModel):
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None
|
||||
priority: Optional[int] = None
|
||||
title: Optional[str] = None
|
||||
body: Optional[str] = None
|
||||
result: Optional[str] = None
|
||||
block_reason: Optional[str] = None
|
||||
# Structured handoff fields — forwarded to complete_task when status
|
||||
# transitions to 'done'. Dashboard parity with ``hermes kanban
|
||||
# complete --summary ... --metadata ...``.
|
||||
summary: Optional[str] = None
|
||||
metadata: Optional[dict] = None
|
||||
|
||||
|
||||
@router.patch("/tasks/{task_id}")
|
||||
def update_task(task_id: str, payload: UpdateTaskBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
|
||||
# --- assignee ----------------------------------------------------
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
ok = kanban_db.assign_task(
|
||||
conn, task_id, payload.assignee or None,
|
||||
)
|
||||
except RuntimeError as e:
|
||||
raise HTTPException(status_code=409, detail=str(e))
|
||||
if not ok:
|
||||
raise HTTPException(status_code=404, detail="task not found")
|
||||
|
||||
# --- status -------------------------------------------------------
|
||||
if payload.status is not None:
|
||||
s = payload.status
|
||||
ok = True
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(
|
||||
conn, task_id,
|
||||
result=payload.result,
|
||||
summary=payload.summary,
|
||||
metadata=payload.metadata,
|
||||
)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason)
|
||||
elif s == "ready":
|
||||
# Re-open a blocked task, or just an explicit status set.
|
||||
current = kanban_db.get_task(conn, task_id)
|
||||
if current and current.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, task_id)
|
||||
else:
|
||||
# Direct status write for drag-drop (todo -> ready etc).
|
||||
ok = _set_status_direct(conn, task_id, "ready")
|
||||
elif s == "archived":
|
||||
ok = kanban_db.archive_task(conn, task_id)
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, task_id, s)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail=f"unknown status: {s}")
|
||||
if not ok:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail=f"status transition to {s!r} not valid from current state",
|
||||
)
|
||||
|
||||
# --- priority -----------------------------------------------------
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), task_id),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(task_id, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
|
||||
# --- title / body -------------------------------------------------
|
||||
if payload.title is not None or payload.body is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
sets, vals = [], []
|
||||
if payload.title is not None:
|
||||
if not payload.title.strip():
|
||||
raise HTTPException(status_code=400, detail="title cannot be empty")
|
||||
sets.append("title = ?")
|
||||
vals.append(payload.title.strip())
|
||||
if payload.body is not None:
|
||||
sets.append("body = ?")
|
||||
vals.append(payload.body)
|
||||
vals.append(task_id)
|
||||
conn.execute(
|
||||
f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals,
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'edited', NULL, ?)",
|
||||
(task_id, int(time.time())),
|
||||
)
|
||||
|
||||
updated = kanban_db.get_task(conn, task_id)
|
||||
return {"task": _task_dict(updated) if updated else None}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _set_status_direct(
|
||||
conn: sqlite3.Connection, task_id: str, new_status: str,
|
||||
) -> bool:
|
||||
"""Direct status write for drag-drop moves that aren't covered by the
|
||||
structured complete/block/unblock/archive verbs (e.g. todo<->ready,
|
||||
running<->ready). Appends a ``status`` event row for the live feed.
|
||||
|
||||
When this transitions OFF ``running`` to anything other than the
|
||||
terminal verbs above (which own their own run closing), we close the
|
||||
active run with outcome='reclaimed' so attempt history isn't
|
||||
orphaned. ``running -> ready`` via drag-drop is the common case
|
||||
(user yanking a stuck worker back to the queue).
|
||||
"""
|
||||
with kanban_db.write_txn(conn):
|
||||
# Snapshot current state so we know whether to close a run.
|
||||
prev = conn.execute(
|
||||
"SELECT status, current_run_id FROM tasks WHERE id = ?",
|
||||
(task_id,),
|
||||
).fetchone()
|
||||
if prev is None:
|
||||
return False
|
||||
was_running = prev["status"] == "running"
|
||||
|
||||
cur = conn.execute(
|
||||
"UPDATE tasks SET status = ?, "
|
||||
" claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, "
|
||||
" claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, "
|
||||
" worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END "
|
||||
"WHERE id = ?",
|
||||
(new_status, new_status, new_status, new_status, task_id),
|
||||
)
|
||||
if cur.rowcount != 1:
|
||||
return False
|
||||
run_id = None
|
||||
if was_running and new_status != "running" and prev["current_run_id"]:
|
||||
run_id = kanban_db._end_run(
|
||||
conn, task_id,
|
||||
outcome="reclaimed", status="reclaimed",
|
||||
summary=f"status changed to {new_status} (dashboard/direct)",
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, run_id, kind, payload, created_at) "
|
||||
"VALUES (?, ?, 'status', ?, ?)",
|
||||
(task_id, run_id, json.dumps({"status": new_status}), int(time.time())),
|
||||
)
|
||||
# If we re-opened something, children may have gone stale.
|
||||
if new_status in ("done", "ready"):
|
||||
kanban_db.recompute_ready(conn)
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Comments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class CommentBody(BaseModel):
|
||||
body: str
|
||||
author: Optional[str] = "dashboard"
|
||||
|
||||
|
||||
@router.post("/tasks/{task_id}/comments")
|
||||
def add_comment(task_id: str, payload: CommentBody):
|
||||
if not payload.body.strip():
|
||||
raise HTTPException(status_code=400, detail="body is required")
|
||||
conn = _conn()
|
||||
try:
|
||||
if kanban_db.get_task(conn, task_id) is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
kanban_db.add_comment(
|
||||
conn, task_id, author=payload.author or "dashboard", body=payload.body,
|
||||
)
|
||||
return {"ok": True}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Links
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class LinkBody(BaseModel):
|
||||
parent_id: str
|
||||
child_id: str
|
||||
|
||||
|
||||
@router.post("/links")
|
||||
def add_link(payload: LinkBody):
|
||||
conn = _conn()
|
||||
try:
|
||||
kanban_db.link_tasks(conn, payload.parent_id, payload.child_id)
|
||||
return {"ok": True}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.delete("/links")
|
||||
def delete_link(parent_id: str = Query(...), child_id: str = Query(...)):
|
||||
conn = _conn()
|
||||
try:
|
||||
ok = kanban_db.unlink_tasks(conn, parent_id, child_id)
|
||||
return {"ok": bool(ok)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bulk actions (multi-select on the board)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class BulkTaskBody(BaseModel):
|
||||
ids: list[str]
|
||||
status: Optional[str] = None
|
||||
assignee: Optional[str] = None # "" or None = unassign
|
||||
priority: Optional[int] = None
|
||||
archive: bool = False
|
||||
|
||||
|
||||
@router.post("/tasks/bulk")
|
||||
def bulk_update(payload: BulkTaskBody):
|
||||
"""Apply the same patch to every id in ``payload.ids``.
|
||||
|
||||
This is an *independent* iteration — per-task failures don't abort
|
||||
siblings. Returns per-id outcome so the UI can surface partials.
|
||||
"""
|
||||
ids = [i for i in (payload.ids or []) if i]
|
||||
if not ids:
|
||||
raise HTTPException(status_code=400, detail="ids is required")
|
||||
results: list[dict] = []
|
||||
conn = _conn()
|
||||
try:
|
||||
for tid in ids:
|
||||
entry: dict[str, Any] = {"id": tid, "ok": True}
|
||||
try:
|
||||
task = kanban_db.get_task(conn, tid)
|
||||
if task is None:
|
||||
entry.update(ok=False, error="not found")
|
||||
results.append(entry)
|
||||
continue
|
||||
if payload.archive:
|
||||
if not kanban_db.archive_task(conn, tid):
|
||||
entry.update(ok=False, error="archive refused")
|
||||
if payload.status is not None and not payload.archive:
|
||||
s = payload.status
|
||||
if s == "done":
|
||||
ok = kanban_db.complete_task(conn, tid)
|
||||
elif s == "blocked":
|
||||
ok = kanban_db.block_task(conn, tid)
|
||||
elif s == "ready":
|
||||
cur = kanban_db.get_task(conn, tid)
|
||||
if cur and cur.status == "blocked":
|
||||
ok = kanban_db.unblock_task(conn, tid)
|
||||
else:
|
||||
ok = _set_status_direct(conn, tid, "ready")
|
||||
elif s in ("todo", "running", "triage"):
|
||||
ok = _set_status_direct(conn, tid, s)
|
||||
else:
|
||||
entry.update(ok=False, error=f"unknown status {s!r}")
|
||||
results.append(entry)
|
||||
continue
|
||||
if not ok:
|
||||
entry.update(ok=False, error=f"transition to {s!r} refused")
|
||||
if payload.assignee is not None:
|
||||
try:
|
||||
if not kanban_db.assign_task(
|
||||
conn, tid, payload.assignee or None,
|
||||
):
|
||||
entry.update(ok=False, error="assign refused")
|
||||
except RuntimeError as e:
|
||||
entry.update(ok=False, error=str(e))
|
||||
if payload.priority is not None:
|
||||
with kanban_db.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET priority = ? WHERE id = ?",
|
||||
(int(payload.priority), tid),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO task_events (task_id, kind, payload, created_at) "
|
||||
"VALUES (?, 'reprioritized', ?, ?)",
|
||||
(tid, json.dumps({"priority": int(payload.priority)}),
|
||||
int(time.time())),
|
||||
)
|
||||
except Exception as e: # defensive — one bad id shouldn't kill the batch
|
||||
entry.update(ok=False, error=str(e))
|
||||
results.append(entry)
|
||||
return {"results": results}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plugin config (read dashboard.kanban.* defaults from config.yaml)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/config")
|
||||
def get_config():
|
||||
"""Return kanban dashboard preferences from ~/.hermes/config.yaml.
|
||||
|
||||
Reads the ``dashboard.kanban`` section if present; defaults otherwise.
|
||||
Used by the UI to pre-select tenant filters, toggle markdown rendering,
|
||||
or set column-width preferences without a round-trip per page load.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config() or {}
|
||||
except Exception:
|
||||
cfg = {}
|
||||
dash_cfg = (cfg.get("dashboard") or {})
|
||||
# dashboard.kanban may itself be a dict; fall back to {}.
|
||||
k_cfg = dash_cfg.get("kanban") or {}
|
||||
return {
|
||||
"default_tenant": k_cfg.get("default_tenant") or "",
|
||||
"lane_by_profile": bool(k_cfg.get("lane_by_profile", True)),
|
||||
"include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)),
|
||||
"render_markdown": bool(k_cfg.get("render_markdown", True)),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stats (per-profile / per-status counts + oldest-ready age)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/stats")
|
||||
def get_stats():
|
||||
"""Per-status + per-assignee counts + oldest-ready age.
|
||||
|
||||
Designed for the dashboard HUD and for router profiles that need to
|
||||
answer "is this specialist overloaded?" without scanning the whole
|
||||
board themselves.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return kanban_db.board_stats(conn)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
@router.get("/assignees")
|
||||
def get_assignees():
|
||||
"""Known profiles + per-profile task counts.
|
||||
|
||||
Returns the union of ``~/.hermes/profiles/*`` on disk and every
|
||||
distinct assignee currently used on the board. The dashboard uses
|
||||
this to populate its assignee dropdown so a freshly-created profile
|
||||
appears in the picker before it's been given any task.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
return {"assignees": kanban_db.known_assignees(conn)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Worker log (read-only; file written by _default_spawn)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/tasks/{task_id}/log")
|
||||
def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)):
|
||||
"""Return the worker's stdout/stderr log.
|
||||
|
||||
``tail`` caps the response size (bytes) so the dashboard drawer
|
||||
doesn't paginate megabytes into the browser. Returns 404 if the task
|
||||
has never spawned. The on-disk log is rotated at 2 MiB per
|
||||
``_rotate_worker_log`` — a single ``.log.1`` is kept, no further
|
||||
generations, so disk usage per task is bounded at ~4 MiB.
|
||||
"""
|
||||
conn = _conn()
|
||||
try:
|
||||
task = kanban_db.get_task(conn, task_id)
|
||||
finally:
|
||||
conn.close()
|
||||
if task is None:
|
||||
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
|
||||
content = kanban_db.read_worker_log(task_id, tail_bytes=tail)
|
||||
log_path = kanban_db.worker_log_path(task_id)
|
||||
size = log_path.stat().st_size if log_path.exists() else 0
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"path": str(log_path),
|
||||
"exists": content is not None,
|
||||
"size_bytes": size,
|
||||
"content": content or "",
|
||||
# Truncated when the on-disk file was larger than the tail cap.
|
||||
"truncated": bool(tail and size > tail),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.post("/dispatch")
|
||||
def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")):
|
||||
conn = _conn()
|
||||
try:
|
||||
result = kanban_db.dispatch_once(
|
||||
conn, dry_run=dry_run, max_spawn=max_n,
|
||||
)
|
||||
# DispatchResult is a dataclass.
|
||||
try:
|
||||
return asdict(result)
|
||||
except TypeError:
|
||||
return {"result": str(result)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WebSocket: /events?since=<event_id>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is
|
||||
# the simplest and most robust approach; it adds a fraction of a percent
|
||||
# of CPU and has no shared state to synchronize across workers.
|
||||
_EVENT_POLL_SECONDS = 0.3
|
||||
|
||||
|
||||
@router.websocket("/events")
|
||||
async def stream_events(ws: WebSocket):
|
||||
# Enforce the dashboard session token as a query param — browsers can't
|
||||
# set Authorization on a WS upgrade. This matches how the PTY bridge
|
||||
# authenticates in hermes_cli/web_server.py.
|
||||
token = ws.query_params.get("token")
|
||||
if not _check_ws_token(token):
|
||||
await ws.close(code=http_status.WS_1008_POLICY_VIOLATION)
|
||||
return
|
||||
await ws.accept()
|
||||
try:
|
||||
since_raw = ws.query_params.get("since", "0")
|
||||
try:
|
||||
cursor = int(since_raw)
|
||||
except ValueError:
|
||||
cursor = 0
|
||||
|
||||
def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]:
|
||||
conn = kanban_db.connect()
|
||||
try:
|
||||
rows = conn.execute(
|
||||
"SELECT id, task_id, run_id, kind, payload, created_at "
|
||||
"FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200",
|
||||
(cursor_val,),
|
||||
).fetchall()
|
||||
out: list[dict] = []
|
||||
new_cursor = cursor_val
|
||||
for r in rows:
|
||||
try:
|
||||
payload = json.loads(r["payload"]) if r["payload"] else None
|
||||
except Exception:
|
||||
payload = None
|
||||
out.append({
|
||||
"id": r["id"],
|
||||
"task_id": r["task_id"],
|
||||
"run_id": r["run_id"],
|
||||
"kind": r["kind"],
|
||||
"payload": payload,
|
||||
"created_at": r["created_at"],
|
||||
})
|
||||
new_cursor = r["id"]
|
||||
return new_cursor, out
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
while True:
|
||||
cursor, events = await asyncio.to_thread(_fetch_new, cursor)
|
||||
if events:
|
||||
await ws.send_json({"events": events, "cursor": cursor})
|
||||
await asyncio.sleep(_EVENT_POLL_SECONDS)
|
||||
except WebSocketDisconnect:
|
||||
return
|
||||
except Exception as exc: # defensive: never crash the dashboard worker
|
||||
log.warning("Kanban event stream error: %s", exc)
|
||||
try:
|
||||
await ws.close()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -0,0 +1,32 @@
|
||||
# DEPRECATED — the kanban dispatcher now runs inside the gateway by
|
||||
# default (config key: kanban.dispatch_in_gateway, default true). To
|
||||
# migrate:
|
||||
#
|
||||
# systemctl --user disable --now hermes-kanban-dispatcher.service
|
||||
# # then make sure a gateway is running; e.g. a systemd user unit
|
||||
# # for `hermes gateway start`. The gateway hosts the dispatcher.
|
||||
#
|
||||
# This unit is kept for users who truly cannot run the gateway (host
|
||||
# policy forbids long-lived services, etc.). It now invokes the
|
||||
# standalone dispatcher via the explicit --force flag, so nobody
|
||||
# accidentally keeps two dispatchers racing against the same
|
||||
# kanban.db. Running this unit AND a gateway with
|
||||
# dispatch_in_gateway=true is NOT supported.
|
||||
|
||||
[Unit]
|
||||
Description=Hermes Kanban dispatcher (DEPRECATED standalone daemon — prefer gateway-embedded dispatch)
|
||||
Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/env hermes kanban daemon --force --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
# Log to the journal via stdout/stderr; the dispatcher also writes per-task
|
||||
# worker output to $HERMES_HOME/kanban/logs/<task>.log.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -59,7 +59,8 @@ Config file: `~/.hermes/hindsight/config.json`
|
||||
|
||||
| Key | Default | Description |
|
||||
|-----|---------|-------------|
|
||||
| `bank_id` | `hermes` | Memory bank name |
|
||||
| `bank_id` | `hermes` | Memory bank name (static fallback used when `bank_id_template` is unset or resolves empty) |
|
||||
| `bank_id_template` | — | Optional template to derive the bank name dynamically. Placeholders: `{profile}`, `{workspace}`, `{platform}`, `{user}`, `{session}`. Example: `hermes-{profile}` isolates memory per active Hermes profile. Empty placeholders collapse cleanly (e.g. `hermes-{user}` with no user becomes `hermes`). |
|
||||
| `bank_mission` | — | Reflect mission (identity/framing for reflect reasoning). Applied via Banks API. |
|
||||
| `bank_retain_mission` | — | Retain mission (steers what gets extracted). Applied via Banks API. |
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
Long-term memory with knowledge graph, entity resolution, and multi-strategy
|
||||
retrieval. Supports cloud (API key) and local modes.
|
||||
|
||||
Configurable timeout via HINDSIGHT_TIMEOUT env var or config.json.
|
||||
|
||||
Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
|
||||
|
||||
Config via environment variables:
|
||||
@@ -11,6 +13,7 @@ Config via environment variables:
|
||||
HINDSIGHT_BUDGET — recall budget: low/mid/high (default: mid)
|
||||
HINDSIGHT_API_URL — API endpoint
|
||||
HINDSIGHT_MODE — cloud or local (default: cloud)
|
||||
HINDSIGHT_TIMEOUT — API request timeout in seconds (default: 120)
|
||||
HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories
|
||||
HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories
|
||||
HINDSIGHT_RETAIN_USER_PREFIX — label used before user turns in retained transcripts
|
||||
@@ -23,6 +26,7 @@ Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -40,6 +44,7 @@ logger = logging.getLogger(__name__)
|
||||
_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
|
||||
_DEFAULT_LOCAL_URL = "http://localhost:8888"
|
||||
_MIN_CLIENT_VERSION = "0.4.22"
|
||||
_DEFAULT_TIMEOUT = 120 # seconds — cloud API can take 30-40s per request
|
||||
_VALID_BUDGETS = {"low", "mid", "high"}
|
||||
_PROVIDER_DEFAULT_MODELS = {
|
||||
"openai": "gpt-4o-mini",
|
||||
@@ -54,6 +59,22 @@ _PROVIDER_DEFAULT_MODELS = {
|
||||
}
|
||||
|
||||
|
||||
def _check_local_runtime() -> tuple[bool, str | None]:
|
||||
"""Return whether local embedded Hindsight imports cleanly.
|
||||
|
||||
On older CPUs, importing the local Hindsight stack can raise a runtime
|
||||
error from NumPy before the daemon starts. Treat that as "unavailable"
|
||||
so Hermes can degrade gracefully instead of repeatedly trying to start
|
||||
a broken local memory backend.
|
||||
"""
|
||||
try:
|
||||
importlib.import_module("hindsight")
|
||||
importlib.import_module("hindsight_embed.daemon_embed_manager")
|
||||
return True, None
|
||||
except Exception as exc:
|
||||
return False, str(exc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dedicated event loop for Hindsight async calls (one per process, reused).
|
||||
# Avoids creating ephemeral loops that leak aiohttp sessions.
|
||||
@@ -81,13 +102,18 @@ def _get_loop() -> asyncio.AbstractEventLoop:
|
||||
return _loop
|
||||
|
||||
|
||||
def _run_sync(coro, timeout: float = 120.0):
|
||||
def _run_sync(coro, timeout: float = _DEFAULT_TIMEOUT):
|
||||
"""Schedule *coro* on the shared loop and block until done."""
|
||||
loop = _get_loop()
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
return future.result(timeout=timeout)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Backward-compatible alias — instances use self._run_sync() instead.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool schemas
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -233,6 +259,126 @@ def _utc_timestamp() -> str:
|
||||
return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
|
||||
|
||||
|
||||
def _embedded_profile_name(config: dict[str, Any]) -> str:
|
||||
"""Return the Hindsight embedded profile name for this Hermes config."""
|
||||
profile = config.get("profile", "hermes")
|
||||
return str(profile or "hermes")
|
||||
|
||||
|
||||
def _load_simple_env(path) -> dict[str, str]:
|
||||
"""Parse a simple KEY=VALUE env file, ignoring comments and blank lines."""
|
||||
if not path.exists():
|
||||
return {}
|
||||
|
||||
values: dict[str, str] = {}
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
values[key.strip()] = value.strip()
|
||||
return values
|
||||
|
||||
|
||||
def _build_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None) -> dict[str, str]:
|
||||
"""Build the profile-scoped env file that standalone hindsight-embed consumes."""
|
||||
current_key = llm_api_key
|
||||
if current_key is None:
|
||||
current_key = (
|
||||
config.get("llmApiKey")
|
||||
or config.get("llm_api_key")
|
||||
or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
)
|
||||
|
||||
current_provider = config.get("llm_provider", "")
|
||||
current_model = config.get("llm_model", "")
|
||||
current_base_url = config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
|
||||
# The embedded daemon expects OpenAI wire format for these providers.
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
|
||||
env_values = {
|
||||
"HINDSIGHT_API_LLM_PROVIDER": str(daemon_provider),
|
||||
"HINDSIGHT_API_LLM_API_KEY": str(current_key or ""),
|
||||
"HINDSIGHT_API_LLM_MODEL": str(current_model),
|
||||
"HINDSIGHT_API_LOG_LEVEL": "info",
|
||||
}
|
||||
if current_base_url:
|
||||
env_values["HINDSIGHT_API_LLM_BASE_URL"] = str(current_base_url)
|
||||
return env_values
|
||||
|
||||
|
||||
def _embedded_profile_env_path(config: dict[str, Any]):
|
||||
from pathlib import Path
|
||||
|
||||
return Path.home() / ".hindsight" / "profiles" / f"{_embedded_profile_name(config)}.env"
|
||||
|
||||
|
||||
def _materialize_embedded_profile_env(config: dict[str, Any], *, llm_api_key: str | None = None):
|
||||
"""Write the profile-scoped env file that standalone hindsight-embed uses."""
|
||||
profile_env = _embedded_profile_env_path(config)
|
||||
profile_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
env_values = _build_embedded_profile_env(config, llm_api_key=llm_api_key)
|
||||
profile_env.write_text(
|
||||
"".join(f"{key}={value}\n" for key, value in env_values.items()),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return profile_env
|
||||
|
||||
def _sanitize_bank_segment(value: str) -> str:
|
||||
"""Sanitize a bank_id_template placeholder value.
|
||||
|
||||
Bank IDs should be safe for URL paths and filesystem use. Replaces any
|
||||
character that isn't alphanumeric, dash, or underscore with a dash, and
|
||||
collapses runs of dashes.
|
||||
"""
|
||||
if not value:
|
||||
return ""
|
||||
out = []
|
||||
prev_dash = False
|
||||
for ch in str(value):
|
||||
if ch.isalnum() or ch == "-" or ch == "_":
|
||||
out.append(ch)
|
||||
prev_dash = False
|
||||
else:
|
||||
if not prev_dash:
|
||||
out.append("-")
|
||||
prev_dash = True
|
||||
return "".join(out).strip("-_")
|
||||
|
||||
|
||||
def _resolve_bank_id_template(template: str, fallback: str, **placeholders: str) -> str:
|
||||
"""Resolve a bank_id template string with the given placeholders.
|
||||
|
||||
Supported placeholders (each is sanitized before substitution):
|
||||
{profile} — active Hermes profile name (from agent_identity)
|
||||
{workspace} — Hermes workspace name (from agent_workspace)
|
||||
{platform} — "cli", "telegram", "discord", etc.
|
||||
{user} — platform user id (gateway sessions)
|
||||
{session} — current session id
|
||||
|
||||
Missing/empty placeholders are rendered as the empty string and then
|
||||
collapsed — e.g. ``hermes-{user}`` with no user becomes ``hermes``.
|
||||
|
||||
If the template is empty, resolution falls back to *fallback*.
|
||||
Returns the sanitized bank id.
|
||||
"""
|
||||
if not template:
|
||||
return fallback
|
||||
sanitized = {k: _sanitize_bank_segment(v) for k, v in placeholders.items()}
|
||||
try:
|
||||
rendered = template.format(**sanitized)
|
||||
except (KeyError, IndexError) as exc:
|
||||
logger.warning("Invalid bank_id_template %r: %s — using fallback %r",
|
||||
template, exc, fallback)
|
||||
return fallback
|
||||
while "--" in rendered:
|
||||
rendered = rendered.replace("--", "-")
|
||||
while "__" in rendered:
|
||||
rendered = rendered.replace("__", "_")
|
||||
rendered = rendered.strip("-_")
|
||||
return rendered or fallback
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MemoryProvider implementation
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -262,13 +408,17 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._chat_type = ""
|
||||
self._thread_id = ""
|
||||
self._agent_identity = ""
|
||||
self._agent_workspace = ""
|
||||
self._turn_index = 0
|
||||
self._client = None
|
||||
self._timeout = _DEFAULT_TIMEOUT
|
||||
self._prefetch_result = ""
|
||||
self._prefetch_lock = threading.Lock()
|
||||
self._prefetch_thread = None
|
||||
self._sync_thread = None
|
||||
self._session_id = ""
|
||||
self._parent_session_id = ""
|
||||
self._document_id = ""
|
||||
|
||||
# Tags
|
||||
self._tags: list[str] | None = None
|
||||
@@ -293,6 +443,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Bank
|
||||
self._bank_mission = ""
|
||||
self._bank_retain_mission: str | None = None
|
||||
self._bank_id_template = ""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
@@ -302,9 +453,16 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
try:
|
||||
cfg = _load_config()
|
||||
mode = cfg.get("mode", "cloud")
|
||||
if mode in ("local", "local_embedded", "local_external"):
|
||||
if mode in ("local", "local_embedded"):
|
||||
available, _ = _check_local_runtime()
|
||||
return available
|
||||
if mode == "local_external":
|
||||
return True
|
||||
has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
|
||||
has_key = bool(
|
||||
cfg.get("apiKey")
|
||||
or cfg.get("api_key")
|
||||
or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
)
|
||||
has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
|
||||
return has_key or has_url
|
||||
except Exception:
|
||||
@@ -363,7 +521,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
else:
|
||||
deps_to_install = [cloud_dep]
|
||||
|
||||
print(f"\n Checking dependencies...")
|
||||
print("\n Checking dependencies...")
|
||||
uv_path = shutil.which("uv")
|
||||
if not uv_path:
|
||||
print(" ⚠ uv not found — install it: curl -LsSf https://astral.sh/uv/install.sh | sh")
|
||||
@@ -374,14 +532,14 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
[uv_path, "pip", "install", "--python", sys.executable, "--quiet", "--upgrade"] + deps_to_install,
|
||||
check=True, timeout=120, capture_output=True,
|
||||
)
|
||||
print(f" ✓ Dependencies up to date")
|
||||
print(" ✓ Dependencies up to date")
|
||||
except Exception as e:
|
||||
print(f" ⚠ Install failed: {e}")
|
||||
print(f" Run manually: uv pip install --python {sys.executable} {' '.join(deps_to_install)}")
|
||||
|
||||
# Step 3: Mode-specific config
|
||||
if mode == "cloud":
|
||||
print(f"\n Get your API key at https://ui.hindsight.vectorize.io\n")
|
||||
print("\n Get your API key at https://ui.hindsight.vectorize.io\n")
|
||||
existing_key = os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
if existing_key:
|
||||
masked = f"...{existing_key[-4:]}" if len(existing_key) > 4 else "set"
|
||||
@@ -434,13 +592,19 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
sys.stdout.write(" LLM API key: ")
|
||||
sys.stdout.flush()
|
||||
llm_key = getpass.getpass(prompt="") if sys.stdin.isatty() else sys.stdin.readline().strip()
|
||||
if llm_key:
|
||||
env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key
|
||||
# Always write explicitly (including empty) so the provider sees ""
|
||||
# rather than a missing variable. The daemon reads from .env at
|
||||
# startup and fails when HINDSIGHT_LLM_API_KEY is unset.
|
||||
env_writes["HINDSIGHT_LLM_API_KEY"] = llm_key
|
||||
|
||||
# Step 4: Save everything
|
||||
provider_config["bank_id"] = "hermes"
|
||||
provider_config["recall_budget"] = "mid"
|
||||
bank_id = "hermes"
|
||||
# Read existing timeout from config if present, otherwise use default
|
||||
existing_timeout = self._config.get("timeout") if self._config else None
|
||||
timeout_val = existing_timeout if existing_timeout else _DEFAULT_TIMEOUT
|
||||
provider_config["timeout"] = timeout_val
|
||||
env_writes["HINDSIGHT_TIMEOUT"] = str(timeout_val)
|
||||
config["memory"]["provider"] = "hindsight"
|
||||
save_config(config)
|
||||
|
||||
@@ -466,10 +630,32 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
new_lines.append(f"{k}={v}")
|
||||
env_path.write_text("\n".join(new_lines) + "\n")
|
||||
|
||||
if mode == "local_embedded":
|
||||
materialized_config = dict(provider_config)
|
||||
config_path = Path(hermes_home) / "hindsight" / "config.json"
|
||||
try:
|
||||
materialized_config = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
llm_api_key = env_writes.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
if not llm_api_key:
|
||||
llm_api_key = _load_simple_env(Path(hermes_home) / ".env").get("HINDSIGHT_LLM_API_KEY", "")
|
||||
if not llm_api_key:
|
||||
llm_api_key = _load_simple_env(_embedded_profile_env_path(materialized_config)).get(
|
||||
"HINDSIGHT_API_LLM_API_KEY",
|
||||
"",
|
||||
)
|
||||
|
||||
_materialize_embedded_profile_env(
|
||||
materialized_config,
|
||||
llm_api_key=llm_api_key or None,
|
||||
)
|
||||
|
||||
print(f"\n ✓ Hindsight memory configured ({mode} mode)")
|
||||
if env_writes:
|
||||
print(f" API keys saved to .env")
|
||||
print(f"\n Start a new session to activate.\n")
|
||||
print(" API keys saved to .env")
|
||||
print("\n Start a new session to activate.\n")
|
||||
|
||||
def get_config_schema(self):
|
||||
return [
|
||||
@@ -485,7 +671,8 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
{"key": "llm_base_url", "description": "Endpoint URL (e.g. http://192.168.1.10:8080/v1)", "default": "", "when": {"mode": "local_embedded", "llm_provider": "openai_compatible"}},
|
||||
{"key": "llm_api_key", "description": "LLM API key (optional for openai_compatible)", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local_embedded"}},
|
||||
{"key": "llm_model", "description": "LLM model", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local_embedded"}},
|
||||
{"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
|
||||
{"key": "bank_id", "description": "Memory bank name (static fallback when bank_id_template is unset)", "default": "hermes"},
|
||||
{"key": "bank_id_template", "description": "Optional template to derive bank_id dynamically. Placeholders: {profile}, {workspace}, {platform}, {user}, {session}. Example: hermes-{profile}", "default": ""},
|
||||
{"key": "bank_mission", "description": "Mission/purpose description for the memory bank"},
|
||||
{"key": "bank_retain_mission", "description": "Custom extraction prompt for memory retention"},
|
||||
{"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
|
||||
@@ -505,12 +692,19 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
{"key": "recall_max_tokens", "description": "Maximum tokens for recall results", "default": 4096},
|
||||
{"key": "recall_max_input_chars", "description": "Maximum input query length for auto-recall", "default": 800},
|
||||
{"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"},
|
||||
{"key": "timeout", "description": "API request timeout in seconds", "default": _DEFAULT_TIMEOUT},
|
||||
]
|
||||
|
||||
def _get_client(self):
|
||||
"""Return the cached Hindsight client (created once, reused)."""
|
||||
if self._client is None:
|
||||
if self._mode == "local_embedded":
|
||||
available, reason = _check_local_runtime()
|
||||
if not available:
|
||||
raise RuntimeError(
|
||||
"Hindsight local runtime is unavailable"
|
||||
+ (f": {reason}" if reason else "")
|
||||
)
|
||||
from hindsight import HindsightEmbedded
|
||||
HindsightEmbedded.__del__ = lambda self: None
|
||||
llm_provider = self._config.get("llm_provider", "")
|
||||
@@ -529,16 +723,30 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._client = HindsightEmbedded(**kwargs)
|
||||
else:
|
||||
from hindsight_client import Hindsight
|
||||
kwargs = {"base_url": self._api_url, "timeout": 30.0}
|
||||
timeout = self._timeout or _DEFAULT_TIMEOUT
|
||||
kwargs = {"base_url": self._api_url, "timeout": float(timeout)}
|
||||
if self._api_key:
|
||||
kwargs["api_key"] = self._api_key
|
||||
logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s)",
|
||||
self._api_url, bool(self._api_key))
|
||||
logger.debug("Creating Hindsight cloud client (url=%s, has_key=%s, timeout=%s)",
|
||||
self._api_url, bool(self._api_key), kwargs["timeout"])
|
||||
self._client = Hindsight(**kwargs)
|
||||
return self._client
|
||||
|
||||
def _run_sync(self, coro):
|
||||
"""Schedule *coro* on the shared loop using the configured timeout."""
|
||||
return _run_sync(coro, timeout=self._timeout)
|
||||
|
||||
def initialize(self, session_id: str, **kwargs) -> None:
|
||||
self._session_id = str(session_id or "").strip()
|
||||
self._parent_session_id = str(kwargs.get("parent_session_id", "") or "").strip()
|
||||
|
||||
# Each process lifecycle gets its own document_id. Reusing session_id
|
||||
# alone caused overwrites on /resume — the reloaded session starts
|
||||
# with an empty _session_turns, so the next retain would replace the
|
||||
# previously stored content. session_id stays in tags so processes
|
||||
# for the same session remain filterable together.
|
||||
start_ts = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
self._document_id = f"{self._session_id}-{start_ts}"
|
||||
|
||||
# Check client version and auto-upgrade if needed
|
||||
try:
|
||||
@@ -548,7 +756,9 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
if Version(installed) < Version(_MIN_CLIENT_VERSION):
|
||||
logger.warning("hindsight-client %s is outdated (need >=%s), attempting upgrade...",
|
||||
installed, _MIN_CLIENT_VERSION)
|
||||
import shutil, subprocess, sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
uv_path = shutil.which("uv")
|
||||
if uv_path:
|
||||
try:
|
||||
@@ -575,19 +785,41 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
self._chat_type = str(kwargs.get("chat_type") or "").strip()
|
||||
self._thread_id = str(kwargs.get("thread_id") or "").strip()
|
||||
self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
|
||||
self._agent_workspace = str(kwargs.get("agent_workspace") or "").strip()
|
||||
self._turn_index = 0
|
||||
self._session_turns = []
|
||||
self._mode = self._config.get("mode", "cloud")
|
||||
# Read timeout from config or env var, fall back to default
|
||||
self._timeout = self._config.get("timeout") or int(os.environ.get("HINDSIGHT_TIMEOUT", str(_DEFAULT_TIMEOUT)))
|
||||
# "local" is a legacy alias for "local_embedded"
|
||||
if self._mode == "local":
|
||||
self._mode = "local_embedded"
|
||||
if self._mode == "local_embedded":
|
||||
available, reason = _check_local_runtime()
|
||||
if not available:
|
||||
logger.warning(
|
||||
"Hindsight local mode disabled because its runtime could not be imported: %s",
|
||||
reason,
|
||||
)
|
||||
self._mode = "disabled"
|
||||
return
|
||||
self._api_key = self._config.get("apiKey") or self._config.get("api_key") or os.environ.get("HINDSIGHT_API_KEY", "")
|
||||
default_url = _DEFAULT_LOCAL_URL if self._mode in ("local_embedded", "local_external") else _DEFAULT_API_URL
|
||||
self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
|
||||
self._llm_base_url = self._config.get("llm_base_url", "")
|
||||
|
||||
banks = self._config.get("banks", {}).get("hermes", {})
|
||||
self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
static_bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
|
||||
self._bank_id_template = self._config.get("bank_id_template", "") or ""
|
||||
self._bank_id = _resolve_bank_id_template(
|
||||
self._bank_id_template,
|
||||
fallback=static_bank_id,
|
||||
profile=self._agent_identity,
|
||||
workspace=self._agent_workspace,
|
||||
platform=self._platform,
|
||||
user=self._user_id,
|
||||
session=self._session_id,
|
||||
)
|
||||
budget = self._config.get("recall_budget") or self._config.get("budget") or banks.get("budget", "mid")
|
||||
self._budget = budget if budget in _VALID_BUDGETS else "mid"
|
||||
|
||||
@@ -640,6 +872,10 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
pass
|
||||
logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
|
||||
self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
|
||||
if self._bank_id_template:
|
||||
logger.debug("Hindsight bank resolved from template %r: profile=%s workspace=%s platform=%s user=%s -> bank=%s",
|
||||
self._bank_id_template, self._agent_identity, self._agent_workspace,
|
||||
self._platform, self._user_id, self._bank_id)
|
||||
logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
|
||||
"retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
|
||||
self._auto_retain, self._auto_recall, self._retain_every_n_turns,
|
||||
@@ -669,42 +905,13 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
# Update the profile .env to match our current config so
|
||||
# the daemon always starts with the right settings.
|
||||
# If the config changed and the daemon is running, stop it.
|
||||
from pathlib import Path as _Path
|
||||
profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
|
||||
current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
|
||||
current_provider = self._config.get("llm_provider", "")
|
||||
current_model = self._config.get("llm_model", "")
|
||||
current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
|
||||
# Map openai_compatible/openrouter → openai for the daemon (OpenAI wire format)
|
||||
daemon_provider = "openai" if current_provider in ("openai_compatible", "openrouter") else current_provider
|
||||
|
||||
# Read saved profile config
|
||||
saved = {}
|
||||
if profile_env.exists():
|
||||
for line in profile_env.read_text().splitlines():
|
||||
if "=" in line and not line.startswith("#"):
|
||||
k, v = line.split("=", 1)
|
||||
saved[k.strip()] = v.strip()
|
||||
|
||||
config_changed = (
|
||||
saved.get("HINDSIGHT_API_LLM_PROVIDER") != daemon_provider or
|
||||
saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
|
||||
saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or
|
||||
saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url
|
||||
)
|
||||
profile_env = _embedded_profile_env_path(self._config)
|
||||
expected_env = _build_embedded_profile_env(self._config)
|
||||
saved = _load_simple_env(profile_env)
|
||||
config_changed = saved != expected_env
|
||||
|
||||
if config_changed:
|
||||
# Write updated profile .env
|
||||
profile_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
env_lines = (
|
||||
f"HINDSIGHT_API_LLM_PROVIDER={daemon_provider}\n"
|
||||
f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
|
||||
f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
|
||||
f"HINDSIGHT_API_LOG_LEVEL=info\n"
|
||||
)
|
||||
if current_base_url:
|
||||
env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n"
|
||||
profile_env.write_text(env_lines)
|
||||
profile_env = _materialize_embedded_profile_env(self._config)
|
||||
if client._manager.is_running(profile):
|
||||
with open(log_path, "a") as f:
|
||||
f.write("\n=== Config changed, restarting daemon ===\n")
|
||||
@@ -777,7 +984,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
client = self._get_client()
|
||||
if self._prefetch_method == "reflect":
|
||||
logger.debug("Prefetch: calling reflect (bank=%s, query_len=%d)", self._bank_id, len(query))
|
||||
resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
resp = self._run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
|
||||
text = resp.text or ""
|
||||
else:
|
||||
recall_kwargs: dict = {
|
||||
@@ -791,7 +998,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Prefetch: calling recall (bank=%s, query_len=%d, budget=%s)",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
resp = self._run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Prefetch: recall returned %d results", num_results)
|
||||
text = "\n".join(f"- {r.text}" for r in resp.results if r.text) if resp.results else ""
|
||||
@@ -888,7 +1095,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
if session_id:
|
||||
self._session_id = str(session_id).strip()
|
||||
|
||||
turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
|
||||
turn = json.dumps(self._build_turn_messages(user_content, assistant_content), ensure_ascii=False)
|
||||
self._session_turns.append(turn)
|
||||
self._turn_counter += 1
|
||||
self._turn_index = self._turn_counter
|
||||
@@ -902,6 +1109,12 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
len(self._session_turns), sum(len(t) for t in self._session_turns))
|
||||
content = "[" + ",".join(self._session_turns) + "]"
|
||||
|
||||
lineage_tags: list[str] = []
|
||||
if self._session_id:
|
||||
lineage_tags.append(f"session:{self._session_id}")
|
||||
if self._parent_session_id:
|
||||
lineage_tags.append(f"parent:{self._parent_session_id}")
|
||||
|
||||
def _sync():
|
||||
try:
|
||||
client = self._get_client()
|
||||
@@ -912,15 +1125,16 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
message_count=len(self._session_turns) * 2,
|
||||
turn_index=self._turn_index,
|
||||
),
|
||||
tags=lineage_tags or None,
|
||||
)
|
||||
item.pop("bank_id", None)
|
||||
item.pop("retain_async", None)
|
||||
logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
|
||||
self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
|
||||
_run_sync(client.aretain_batch(
|
||||
self._bank_id, self._document_id, self._retain_async, len(content), len(self._session_turns))
|
||||
self._run_sync(client.aretain_batch(
|
||||
bank_id=self._bank_id,
|
||||
items=[item],
|
||||
document_id=self._session_id,
|
||||
document_id=self._document_id,
|
||||
retain_async=self._retain_async,
|
||||
))
|
||||
logger.debug("Hindsight retain succeeded")
|
||||
@@ -957,7 +1171,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
)
|
||||
logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
|
||||
self._bank_id, len(content), context)
|
||||
_run_sync(client.aretain(**retain_kwargs))
|
||||
self._run_sync(client.aretain(**retain_kwargs))
|
||||
logger.debug("Tool hindsight_retain: success")
|
||||
return json.dumps({"result": "Memory stored successfully."})
|
||||
except Exception as e:
|
||||
@@ -980,7 +1194,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
recall_kwargs["types"] = self._recall_types
|
||||
logger.debug("Tool hindsight_recall: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.arecall(**recall_kwargs))
|
||||
resp = self._run_sync(client.arecall(**recall_kwargs))
|
||||
num_results = len(resp.results) if resp.results else 0
|
||||
logger.debug("Tool hindsight_recall: %d results", num_results)
|
||||
if not resp.results:
|
||||
@@ -998,7 +1212,7 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
try:
|
||||
logger.debug("Tool hindsight_reflect: bank=%s, query_len=%d, budget=%s",
|
||||
self._bank_id, len(query), self._budget)
|
||||
resp = _run_sync(client.areflect(
|
||||
resp = self._run_sync(client.areflect(
|
||||
bank_id=self._bank_id, query=query, budget=self._budget
|
||||
))
|
||||
logger.debug("Tool hindsight_reflect: response_len=%d", len(resp.text or ""))
|
||||
@@ -1011,7 +1225,6 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
|
||||
def shutdown(self) -> None:
|
||||
logger.debug("Hindsight shutdown: waiting for background threads")
|
||||
global _loop, _loop_thread
|
||||
for t in (self._prefetch_thread, self._sync_thread):
|
||||
if t and t.is_alive():
|
||||
t.join(timeout=5.0)
|
||||
@@ -1026,17 +1239,21 @@ class HindsightMemoryProvider(MemoryProvider):
|
||||
except RuntimeError:
|
||||
pass
|
||||
else:
|
||||
_run_sync(self._client.aclose())
|
||||
self._run_sync(self._client.aclose())
|
||||
except Exception:
|
||||
pass
|
||||
self._client = None
|
||||
# Stop the background event loop so no tasks are pending at exit
|
||||
if _loop is not None and _loop.is_running():
|
||||
_loop.call_soon_threadsafe(_loop.stop)
|
||||
if _loop_thread is not None:
|
||||
_loop_thread.join(timeout=5.0)
|
||||
_loop = None
|
||||
_loop_thread = None
|
||||
# The module-global background event loop (_loop / _loop_thread)
|
||||
# is intentionally NOT stopped here. It is shared across every
|
||||
# HindsightMemoryProvider instance in the process — the plugin
|
||||
# loader creates a new provider per AIAgent, and the gateway
|
||||
# creates one AIAgent per concurrent chat session. Stopping the
|
||||
# loop from one provider's shutdown() strands the aiohttp
|
||||
# ClientSession + TCPConnector owned by every sibling provider
|
||||
# on a dead loop, which surfaces as the "Unclosed client session"
|
||||
# / "Unclosed connector" warnings reported in #11923. The loop
|
||||
# runs on a daemon thread and is reclaimed on process exit;
|
||||
# per-session cleanup happens via self._client.aclose() above.
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
|
||||
@@ -43,7 +43,7 @@ _TIMEOUT = 30.0
|
||||
# ---------------------------------------------------------------------------
|
||||
# Process-level atexit safety net — ensures pending sessions are committed
|
||||
# even if shutdown_memory_provider is never called (e.g. gateway crash,
|
||||
# SIGKILL, or exception in _async_flush_memories preventing shutdown).
|
||||
# SIGKILL, or exception in the session expiry watcher preventing shutdown).
|
||||
# ---------------------------------------------------------------------------
|
||||
_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
|
||||
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
"""Spotify integration plugin — bundled, auto-loaded.
|
||||
|
||||
Registers 7 tools (playback, devices, queue, search, playlists, albums,
|
||||
library) into the ``spotify`` toolset. Each tool's handler is gated by
|
||||
``_check_spotify_available()`` — when the user has not run ``hermes auth
|
||||
spotify``, the tools remain registered (so they appear in ``hermes
|
||||
tools``) but the runtime check prevents dispatch.
|
||||
|
||||
Why a plugin instead of a top-level ``tools/`` file?
|
||||
|
||||
- ``plugins/`` is where third-party service integrations live (see
|
||||
``plugins/image_gen/`` for the backend-provider pattern, ``plugins/
|
||||
disk-cleanup/`` for the standalone pattern). ``tools/`` is reserved
|
||||
for foundational capabilities (terminal, read_file, web_search, etc.).
|
||||
- Mirroring the image_gen plugin layout (``plugins/<category>/<backend>/``
|
||||
for categories, flat ``plugins/<name>/`` for standalones) makes new
|
||||
service integrations a pattern contributors can copy.
|
||||
- Bundled + ``kind: backend`` auto-loads on startup just like image_gen
|
||||
backends — no user opt-in needed, no ``plugins.enabled`` config.
|
||||
|
||||
The Spotify auth flow (``hermes auth spotify``), CLI plumbing, and docs
|
||||
are unchanged. This move is purely structural.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from plugins.spotify.tools import (
|
||||
SPOTIFY_ALBUMS_SCHEMA,
|
||||
SPOTIFY_DEVICES_SCHEMA,
|
||||
SPOTIFY_LIBRARY_SCHEMA,
|
||||
SPOTIFY_PLAYBACK_SCHEMA,
|
||||
SPOTIFY_PLAYLISTS_SCHEMA,
|
||||
SPOTIFY_QUEUE_SCHEMA,
|
||||
SPOTIFY_SEARCH_SCHEMA,
|
||||
_check_spotify_available,
|
||||
_handle_spotify_albums,
|
||||
_handle_spotify_devices,
|
||||
_handle_spotify_library,
|
||||
_handle_spotify_playback,
|
||||
_handle_spotify_playlists,
|
||||
_handle_spotify_queue,
|
||||
_handle_spotify_search,
|
||||
)
|
||||
|
||||
_TOOLS = (
|
||||
("spotify_playback", SPOTIFY_PLAYBACK_SCHEMA, _handle_spotify_playback, "🎵"),
|
||||
("spotify_devices", SPOTIFY_DEVICES_SCHEMA, _handle_spotify_devices, "🔈"),
|
||||
("spotify_queue", SPOTIFY_QUEUE_SCHEMA, _handle_spotify_queue, "📻"),
|
||||
("spotify_search", SPOTIFY_SEARCH_SCHEMA, _handle_spotify_search, "🔎"),
|
||||
("spotify_playlists", SPOTIFY_PLAYLISTS_SCHEMA, _handle_spotify_playlists, "📚"),
|
||||
("spotify_albums", SPOTIFY_ALBUMS_SCHEMA, _handle_spotify_albums, "💿"),
|
||||
("spotify_library", SPOTIFY_LIBRARY_SCHEMA, _handle_spotify_library, "❤️"),
|
||||
)
|
||||
|
||||
|
||||
def register(ctx) -> None:
|
||||
"""Register all Spotify tools. Called once by the plugin loader."""
|
||||
for name, schema, handler, emoji in _TOOLS:
|
||||
ctx.register_tool(
|
||||
name=name,
|
||||
toolset="spotify",
|
||||
schema=schema,
|
||||
handler=handler,
|
||||
check_fn=_check_spotify_available,
|
||||
emoji=emoji,
|
||||
)
|
||||
@@ -0,0 +1,435 @@
|
||||
"""Thin Spotify Web API helper used by Hermes native tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any, Dict, Iterable, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
from hermes_cli.auth import (
|
||||
AuthError,
|
||||
resolve_spotify_runtime_credentials,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyError(RuntimeError):
|
||||
"""Base Spotify tool error."""
|
||||
|
||||
|
||||
class SpotifyAuthRequiredError(SpotifyError):
|
||||
"""Raised when the user needs to authenticate with Spotify first."""
|
||||
|
||||
|
||||
class SpotifyAPIError(SpotifyError):
|
||||
"""Structured Spotify API failure."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
status_code: Optional[int] = None,
|
||||
response_body: Optional[str] = None,
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.status_code = status_code
|
||||
self.response_body = response_body
|
||||
self.path = None
|
||||
|
||||
|
||||
class SpotifyClient:
|
||||
def __init__(self) -> None:
|
||||
self._runtime = self._resolve_runtime(refresh_if_expiring=True)
|
||||
|
||||
def _resolve_runtime(self, *, force_refresh: bool = False, refresh_if_expiring: bool = True) -> Dict[str, Any]:
|
||||
try:
|
||||
return resolve_spotify_runtime_credentials(
|
||||
force_refresh=force_refresh,
|
||||
refresh_if_expiring=refresh_if_expiring,
|
||||
)
|
||||
except AuthError as exc:
|
||||
raise SpotifyAuthRequiredError(str(exc)) from exc
|
||||
|
||||
@property
|
||||
def base_url(self) -> str:
|
||||
return str(self._runtime.get("base_url") or "").rstrip("/")
|
||||
|
||||
def _headers(self) -> Dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self._runtime['access_token']}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
json_body: Optional[Dict[str, Any]] = None,
|
||||
allow_retry_on_401: bool = True,
|
||||
empty_response: Optional[Dict[str, Any]] = None,
|
||||
) -> Any:
|
||||
url = f"{self.base_url}{path}"
|
||||
response = httpx.request(
|
||||
method,
|
||||
url,
|
||||
headers=self._headers(),
|
||||
params=_strip_none(params),
|
||||
json=_strip_none(json_body) if json_body is not None else None,
|
||||
timeout=30.0,
|
||||
)
|
||||
if response.status_code == 401 and allow_retry_on_401:
|
||||
self._runtime = self._resolve_runtime(force_refresh=True, refresh_if_expiring=True)
|
||||
return self.request(
|
||||
method,
|
||||
path,
|
||||
params=params,
|
||||
json_body=json_body,
|
||||
allow_retry_on_401=False,
|
||||
)
|
||||
if response.status_code >= 400:
|
||||
self._raise_api_error(response, method=method, path=path)
|
||||
if response.status_code == 204 or not response.content:
|
||||
return empty_response or {"success": True, "status_code": response.status_code, "empty": True}
|
||||
if "application/json" in response.headers.get("content-type", ""):
|
||||
return response.json()
|
||||
return {"success": True, "text": response.text}
|
||||
|
||||
def _raise_api_error(self, response: httpx.Response, *, method: str, path: str) -> None:
|
||||
detail = response.text.strip()
|
||||
message = _friendly_spotify_error_message(
|
||||
status_code=response.status_code,
|
||||
detail=_extract_spotify_error_detail(response, fallback=detail),
|
||||
method=method,
|
||||
path=path,
|
||||
retry_after=response.headers.get("Retry-After"),
|
||||
)
|
||||
error = SpotifyAPIError(message, status_code=response.status_code, response_body=detail)
|
||||
error.path = path
|
||||
raise error
|
||||
|
||||
def get_devices(self) -> Any:
|
||||
return self.request("GET", "/me/player/devices")
|
||||
|
||||
def transfer_playback(self, *, device_id: str, play: bool = False) -> Any:
|
||||
return self.request("PUT", "/me/player", json_body={
|
||||
"device_ids": [device_id],
|
||||
"play": play,
|
||||
})
|
||||
|
||||
def get_playback_state(self, *, market: Optional[str] = None) -> Any:
|
||||
return self.request(
|
||||
"GET",
|
||||
"/me/player",
|
||||
params={"market": market},
|
||||
empty_response={
|
||||
"status_code": 204,
|
||||
"empty": True,
|
||||
"message": "No active Spotify playback session was found. Open Spotify on a device and start playback, or transfer playback to an available device.",
|
||||
},
|
||||
)
|
||||
|
||||
def get_currently_playing(self, *, market: Optional[str] = None) -> Any:
|
||||
return self.request(
|
||||
"GET",
|
||||
"/me/player/currently-playing",
|
||||
params={"market": market},
|
||||
empty_response={
|
||||
"status_code": 204,
|
||||
"empty": True,
|
||||
"message": "Spotify is not currently playing anything. Start playback in Spotify and try again.",
|
||||
},
|
||||
)
|
||||
|
||||
def start_playback(
|
||||
self,
|
||||
*,
|
||||
device_id: Optional[str] = None,
|
||||
context_uri: Optional[str] = None,
|
||||
uris: Optional[list[str]] = None,
|
||||
offset: Optional[Dict[str, Any]] = None,
|
||||
position_ms: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request(
|
||||
"PUT",
|
||||
"/me/player/play",
|
||||
params={"device_id": device_id},
|
||||
json_body={
|
||||
"context_uri": context_uri,
|
||||
"uris": uris,
|
||||
"offset": offset,
|
||||
"position_ms": position_ms,
|
||||
},
|
||||
)
|
||||
|
||||
def pause_playback(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/pause", params={"device_id": device_id})
|
||||
|
||||
def skip_next(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/next", params={"device_id": device_id})
|
||||
|
||||
def skip_previous(self, *, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/previous", params={"device_id": device_id})
|
||||
|
||||
def seek(self, *, position_ms: int, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/seek", params={
|
||||
"position_ms": position_ms,
|
||||
"device_id": device_id,
|
||||
})
|
||||
|
||||
def set_repeat(self, *, state: str, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/repeat", params={"state": state, "device_id": device_id})
|
||||
|
||||
def set_shuffle(self, *, state: bool, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/shuffle", params={"state": str(bool(state)).lower(), "device_id": device_id})
|
||||
|
||||
def set_volume(self, *, volume_percent: int, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("PUT", "/me/player/volume", params={
|
||||
"volume_percent": volume_percent,
|
||||
"device_id": device_id,
|
||||
})
|
||||
|
||||
def get_queue(self) -> Any:
|
||||
return self.request("GET", "/me/player/queue")
|
||||
|
||||
def add_to_queue(self, *, uri: str, device_id: Optional[str] = None) -> Any:
|
||||
return self.request("POST", "/me/player/queue", params={"uri": uri, "device_id": device_id})
|
||||
|
||||
def search(
|
||||
self,
|
||||
*,
|
||||
query: str,
|
||||
search_types: list[str],
|
||||
limit: int = 10,
|
||||
offset: int = 0,
|
||||
market: Optional[str] = None,
|
||||
include_external: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("GET", "/search", params={
|
||||
"q": query,
|
||||
"type": ",".join(search_types),
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"market": market,
|
||||
"include_external": include_external,
|
||||
})
|
||||
|
||||
def get_my_playlists(self, *, limit: int = 20, offset: int = 0) -> Any:
|
||||
return self.request("GET", "/me/playlists", params={"limit": limit, "offset": offset})
|
||||
|
||||
def get_playlist(self, *, playlist_id: str, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/playlists/{playlist_id}", params={"market": market})
|
||||
|
||||
def create_playlist(
|
||||
self,
|
||||
*,
|
||||
name: str,
|
||||
public: bool = False,
|
||||
collaborative: bool = False,
|
||||
description: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("POST", "/me/playlists", json_body={
|
||||
"name": name,
|
||||
"public": public,
|
||||
"collaborative": collaborative,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
def add_playlist_items(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
uris: list[str],
|
||||
position: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request("POST", f"/playlists/{playlist_id}/items", json_body={
|
||||
"uris": uris,
|
||||
"position": position,
|
||||
})
|
||||
|
||||
def remove_playlist_items(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
uris: list[str],
|
||||
snapshot_id: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("DELETE", f"/playlists/{playlist_id}/items", json_body={
|
||||
"items": [{"uri": uri} for uri in uris],
|
||||
"snapshot_id": snapshot_id,
|
||||
})
|
||||
|
||||
def update_playlist_details(
|
||||
self,
|
||||
*,
|
||||
playlist_id: str,
|
||||
name: Optional[str] = None,
|
||||
public: Optional[bool] = None,
|
||||
collaborative: Optional[bool] = None,
|
||||
description: Optional[str] = None,
|
||||
) -> Any:
|
||||
return self.request("PUT", f"/playlists/{playlist_id}", json_body={
|
||||
"name": name,
|
||||
"public": public,
|
||||
"collaborative": collaborative,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
def get_album(self, *, album_id: str, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/albums/{album_id}", params={"market": market})
|
||||
|
||||
def get_album_tracks(self, *, album_id: str, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", f"/albums/{album_id}/tracks", params={
|
||||
"limit": limit,
|
||||
"offset": offset,
|
||||
"market": market,
|
||||
})
|
||||
|
||||
def get_saved_tracks(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", "/me/tracks", params={"limit": limit, "offset": offset, "market": market})
|
||||
|
||||
def save_library_items(self, *, uris: list[str]) -> Any:
|
||||
return self.request("PUT", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def library_contains(self, *, uris: list[str]) -> Any:
|
||||
return self.request("GET", "/me/library/contains", params={"uris": ",".join(uris)})
|
||||
|
||||
def get_saved_albums(self, *, limit: int = 20, offset: int = 0, market: Optional[str] = None) -> Any:
|
||||
return self.request("GET", "/me/albums", params={"limit": limit, "offset": offset, "market": market})
|
||||
|
||||
def remove_saved_tracks(self, *, track_ids: list[str]) -> Any:
|
||||
uris = [f"spotify:track:{track_id}" for track_id in track_ids]
|
||||
return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def remove_saved_albums(self, *, album_ids: list[str]) -> Any:
|
||||
uris = [f"spotify:album:{album_id}" for album_id in album_ids]
|
||||
return self.request("DELETE", "/me/library", params={"uris": ",".join(uris)})
|
||||
|
||||
def get_recently_played(
|
||||
self,
|
||||
*,
|
||||
limit: int = 20,
|
||||
after: Optional[int] = None,
|
||||
before: Optional[int] = None,
|
||||
) -> Any:
|
||||
return self.request("GET", "/me/player/recently-played", params={
|
||||
"limit": limit,
|
||||
"after": after,
|
||||
"before": before,
|
||||
})
|
||||
|
||||
|
||||
def _extract_spotify_error_detail(response: httpx.Response, *, fallback: str) -> str:
|
||||
detail = fallback
|
||||
try:
|
||||
payload = response.json()
|
||||
if isinstance(payload, dict):
|
||||
error_obj = payload.get("error")
|
||||
if isinstance(error_obj, dict):
|
||||
detail = str(error_obj.get("message") or detail)
|
||||
elif isinstance(error_obj, str):
|
||||
detail = error_obj
|
||||
except Exception:
|
||||
pass
|
||||
return detail.strip()
|
||||
|
||||
|
||||
def _friendly_spotify_error_message(
|
||||
*,
|
||||
status_code: int,
|
||||
detail: str,
|
||||
method: str,
|
||||
path: str,
|
||||
retry_after: Optional[str],
|
||||
) -> str:
|
||||
normalized_detail = detail.lower()
|
||||
is_playback_path = path.startswith("/me/player")
|
||||
|
||||
if status_code == 401:
|
||||
return "Spotify authentication failed or expired. Run `hermes auth spotify` again."
|
||||
|
||||
if status_code == 403:
|
||||
if is_playback_path:
|
||||
return (
|
||||
"Spotify rejected this playback request. Playback control usually requires a Spotify Premium account "
|
||||
"and an active Spotify Connect device."
|
||||
)
|
||||
if "scope" in normalized_detail or "permission" in normalized_detail:
|
||||
return "Spotify rejected the request because the current auth scope is insufficient. Re-run `hermes auth spotify` to refresh permissions."
|
||||
return "Spotify rejected the request. The account may not have permission for this action."
|
||||
|
||||
if status_code == 404:
|
||||
if is_playback_path:
|
||||
return "Spotify could not find an active playback device or player session for this request."
|
||||
return "Spotify resource not found."
|
||||
|
||||
if status_code == 429:
|
||||
message = "Spotify rate limit exceeded."
|
||||
if retry_after:
|
||||
message += f" Retry after {retry_after} seconds."
|
||||
return message
|
||||
|
||||
if detail:
|
||||
return detail
|
||||
return f"Spotify API request failed with status {status_code}."
|
||||
|
||||
|
||||
def _strip_none(payload: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
if not payload:
|
||||
return {}
|
||||
return {key: value for key, value in payload.items() if value is not None}
|
||||
|
||||
|
||||
def normalize_spotify_id(value: str, expected_type: Optional[str] = None) -> str:
|
||||
cleaned = (value or "").strip()
|
||||
if not cleaned:
|
||||
raise SpotifyError("Spotify id/uri/url is required.")
|
||||
if cleaned.startswith("spotify:"):
|
||||
parts = cleaned.split(":")
|
||||
if len(parts) >= 3:
|
||||
item_type = parts[1]
|
||||
if expected_type and item_type != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.")
|
||||
return parts[2]
|
||||
if "open.spotify.com" in cleaned:
|
||||
parsed = urlparse(cleaned)
|
||||
path_parts = [part for part in parsed.path.split("/") if part]
|
||||
if len(path_parts) >= 2:
|
||||
item_type, item_id = path_parts[0], path_parts[1]
|
||||
if expected_type and item_type != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {item_type}.")
|
||||
return item_id
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_spotify_uri(value: str, expected_type: Optional[str] = None) -> str:
|
||||
cleaned = (value or "").strip()
|
||||
if not cleaned:
|
||||
raise SpotifyError("Spotify URI/url/id is required.")
|
||||
if cleaned.startswith("spotify:"):
|
||||
if expected_type:
|
||||
parts = cleaned.split(":")
|
||||
if len(parts) >= 3 and parts[1] != expected_type:
|
||||
raise SpotifyError(f"Expected a Spotify {expected_type}, got {parts[1]}.")
|
||||
return cleaned
|
||||
item_id = normalize_spotify_id(cleaned, expected_type)
|
||||
if expected_type:
|
||||
return f"spotify:{expected_type}:{item_id}"
|
||||
return cleaned
|
||||
|
||||
|
||||
def normalize_spotify_uris(values: Iterable[str], expected_type: Optional[str] = None) -> list[str]:
|
||||
uris: list[str] = []
|
||||
for value in values:
|
||||
uri = normalize_spotify_uri(str(value), expected_type)
|
||||
if uri not in uris:
|
||||
uris.append(uri)
|
||||
if not uris:
|
||||
raise SpotifyError("At least one Spotify item is required.")
|
||||
return uris
|
||||
|
||||
|
||||
def compact_json(data: Any) -> str:
|
||||
return json.dumps(data, ensure_ascii=False)
|
||||
@@ -0,0 +1,13 @@
|
||||
name: spotify
|
||||
version: 1.0.0
|
||||
description: "Native Spotify integration — 7 tools (playback, devices, queue, search, playlists, albums, library) using Spotify Web API + PKCE OAuth. Auth via `hermes auth spotify`. Tools gate on `providers.spotify` in ~/.hermes/auth.json."
|
||||
author: NousResearch
|
||||
kind: backend
|
||||
provides_tools:
|
||||
- spotify_playback
|
||||
- spotify_devices
|
||||
- spotify_queue
|
||||
- spotify_search
|
||||
- spotify_playlists
|
||||
- spotify_albums
|
||||
- spotify_library
|
||||
@@ -0,0 +1,454 @@
|
||||
"""Native Spotify tools for Hermes (registered via plugins/spotify)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from hermes_cli.auth import get_auth_status
|
||||
from plugins.spotify.client import (
|
||||
SpotifyAPIError,
|
||||
SpotifyAuthRequiredError,
|
||||
SpotifyClient,
|
||||
SpotifyError,
|
||||
normalize_spotify_id,
|
||||
normalize_spotify_uri,
|
||||
normalize_spotify_uris,
|
||||
)
|
||||
from tools.registry import tool_error, tool_result
|
||||
|
||||
|
||||
def _check_spotify_available() -> bool:
|
||||
try:
|
||||
return bool(get_auth_status("spotify").get("logged_in"))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _spotify_client() -> SpotifyClient:
|
||||
return SpotifyClient()
|
||||
|
||||
|
||||
def _spotify_tool_error(exc: Exception) -> str:
|
||||
if isinstance(exc, (SpotifyError, SpotifyAuthRequiredError)):
|
||||
return tool_error(str(exc))
|
||||
if isinstance(exc, SpotifyAPIError):
|
||||
return tool_error(str(exc), status_code=exc.status_code)
|
||||
return tool_error(f"Spotify tool failed: {type(exc).__name__}: {exc}")
|
||||
|
||||
|
||||
def _coerce_limit(raw: Any, *, default: int = 20, minimum: int = 1, maximum: int = 50) -> int:
|
||||
try:
|
||||
value = int(raw)
|
||||
except Exception:
|
||||
value = default
|
||||
return max(minimum, min(maximum, value))
|
||||
|
||||
|
||||
def _coerce_bool(raw: Any, default: bool = False) -> bool:
|
||||
if isinstance(raw, bool):
|
||||
return raw
|
||||
if isinstance(raw, str):
|
||||
cleaned = raw.strip().lower()
|
||||
if cleaned in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if cleaned in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _as_list(raw: Any) -> List[str]:
|
||||
if raw is None:
|
||||
return []
|
||||
if isinstance(raw, list):
|
||||
return [str(item).strip() for item in raw if str(item).strip()]
|
||||
return [str(raw).strip()] if str(raw).strip() else []
|
||||
|
||||
|
||||
def _describe_empty_playback(payload: Any, *, action: str) -> dict | None:
|
||||
if not isinstance(payload, dict) or not payload.get("empty"):
|
||||
return None
|
||||
if action == "get_currently_playing":
|
||||
return {
|
||||
"success": True,
|
||||
"action": action,
|
||||
"is_playing": False,
|
||||
"status_code": payload.get("status_code", 204),
|
||||
"message": payload.get("message") or "Spotify is not currently playing anything.",
|
||||
}
|
||||
if action == "get_state":
|
||||
return {
|
||||
"success": True,
|
||||
"action": action,
|
||||
"has_active_device": False,
|
||||
"status_code": payload.get("status_code", 204),
|
||||
"message": payload.get("message") or "No active Spotify playback session was found.",
|
||||
}
|
||||
return None
|
||||
|
||||
|
||||
def _handle_spotify_playback(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get_state").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "get_state":
|
||||
payload = client.get_playback_state(market=args.get("market"))
|
||||
empty_result = _describe_empty_playback(payload, action=action)
|
||||
return tool_result(empty_result or payload)
|
||||
if action == "get_currently_playing":
|
||||
payload = client.get_currently_playing(market=args.get("market"))
|
||||
empty_result = _describe_empty_playback(payload, action=action)
|
||||
return tool_result(empty_result or payload)
|
||||
if action == "play":
|
||||
offset = args.get("offset")
|
||||
if isinstance(offset, dict):
|
||||
payload_offset = {k: v for k, v in offset.items() if v is not None}
|
||||
else:
|
||||
payload_offset = None
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")), "track") if args.get("uris") else None
|
||||
context_uri = None
|
||||
if args.get("context_uri"):
|
||||
raw_context = str(args.get("context_uri"))
|
||||
context_type = None
|
||||
if raw_context.startswith("spotify:album:") or "/album/" in raw_context:
|
||||
context_type = "album"
|
||||
elif raw_context.startswith("spotify:playlist:") or "/playlist/" in raw_context:
|
||||
context_type = "playlist"
|
||||
elif raw_context.startswith("spotify:artist:") or "/artist/" in raw_context:
|
||||
context_type = "artist"
|
||||
context_uri = normalize_spotify_uri(raw_context, context_type)
|
||||
result = client.start_playback(
|
||||
device_id=args.get("device_id"),
|
||||
context_uri=context_uri,
|
||||
uris=uris,
|
||||
offset=payload_offset,
|
||||
position_ms=args.get("position_ms"),
|
||||
)
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "pause":
|
||||
result = client.pause_playback(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "next":
|
||||
result = client.skip_next(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "previous":
|
||||
result = client.skip_previous(device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "seek":
|
||||
if args.get("position_ms") is None:
|
||||
return tool_error("position_ms is required for action='seek'")
|
||||
result = client.seek(position_ms=int(args["position_ms"]), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_repeat":
|
||||
state = str(args.get("state") or "").strip().lower()
|
||||
if state not in {"track", "context", "off"}:
|
||||
return tool_error("state must be one of: track, context, off")
|
||||
result = client.set_repeat(state=state, device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_shuffle":
|
||||
result = client.set_shuffle(state=_coerce_bool(args.get("state")), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "set_volume":
|
||||
if args.get("volume_percent") is None:
|
||||
return tool_error("volume_percent is required for action='set_volume'")
|
||||
result = client.set_volume(volume_percent=max(0, min(100, int(args["volume_percent"]))), device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
if action == "recently_played":
|
||||
after = args.get("after")
|
||||
before = args.get("before")
|
||||
if after and before:
|
||||
return tool_error("Provide only one of 'after' or 'before'")
|
||||
return tool_result(client.get_recently_played(
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
after=int(after) if after is not None else None,
|
||||
before=int(before) if before is not None else None,
|
||||
))
|
||||
return tool_error(f"Unknown spotify_playback action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_devices(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
return tool_result(client.get_devices())
|
||||
if action == "transfer":
|
||||
device_id = str(args.get("device_id") or "").strip()
|
||||
if not device_id:
|
||||
return tool_error("device_id is required for action='transfer'")
|
||||
result = client.transfer_playback(device_id=device_id, play=_coerce_bool(args.get("play")))
|
||||
return tool_result({"success": True, "action": action, "result": result})
|
||||
return tool_error(f"Unknown spotify_devices action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_queue(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "get":
|
||||
return tool_result(client.get_queue())
|
||||
if action == "add":
|
||||
uri = normalize_spotify_uri(str(args.get("uri") or ""), None)
|
||||
result = client.add_to_queue(uri=uri, device_id=args.get("device_id"))
|
||||
return tool_result({"success": True, "action": action, "uri": uri, "result": result})
|
||||
return tool_error(f"Unknown spotify_queue action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_search(args: dict, **kw) -> str:
|
||||
client = _spotify_client()
|
||||
query = str(args.get("query") or "").strip()
|
||||
if not query:
|
||||
return tool_error("query is required")
|
||||
raw_types = _as_list(args.get("types") or args.get("type") or ["track"])
|
||||
search_types = [value.lower() for value in raw_types if value.lower() in {"album", "artist", "playlist", "track", "show", "episode", "audiobook"}]
|
||||
if not search_types:
|
||||
return tool_error("types must contain one or more of: album, artist, playlist, track, show, episode, audiobook")
|
||||
try:
|
||||
return tool_result(client.search(
|
||||
query=query,
|
||||
search_types=search_types,
|
||||
limit=_coerce_limit(args.get("limit"), default=10),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
market=args.get("market"),
|
||||
include_external=args.get("include_external"),
|
||||
))
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_playlists(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
return tool_result(client.get_my_playlists(
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
))
|
||||
if action == "get":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
return tool_result(client.get_playlist(playlist_id=playlist_id, market=args.get("market")))
|
||||
if action == "create":
|
||||
name = str(args.get("name") or "").strip()
|
||||
if not name:
|
||||
return tool_error("name is required for action='create'")
|
||||
return tool_result(client.create_playlist(
|
||||
name=name,
|
||||
public=_coerce_bool(args.get("public")),
|
||||
collaborative=_coerce_bool(args.get("collaborative")),
|
||||
description=args.get("description"),
|
||||
))
|
||||
if action == "add_items":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")))
|
||||
return tool_result(client.add_playlist_items(
|
||||
playlist_id=playlist_id,
|
||||
uris=uris,
|
||||
position=args.get("position"),
|
||||
))
|
||||
if action == "remove_items":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris")))
|
||||
return tool_result(client.remove_playlist_items(
|
||||
playlist_id=playlist_id,
|
||||
uris=uris,
|
||||
snapshot_id=args.get("snapshot_id"),
|
||||
))
|
||||
if action == "update_details":
|
||||
playlist_id = normalize_spotify_id(str(args.get("playlist_id") or ""), "playlist")
|
||||
return tool_result(client.update_playlist_details(
|
||||
playlist_id=playlist_id,
|
||||
name=args.get("name"),
|
||||
public=args.get("public"),
|
||||
collaborative=args.get("collaborative"),
|
||||
description=args.get("description"),
|
||||
))
|
||||
return tool_error(f"Unknown spotify_playlists action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_albums(args: dict, **kw) -> str:
|
||||
action = str(args.get("action") or "get").strip().lower()
|
||||
client = _spotify_client()
|
||||
try:
|
||||
album_id = normalize_spotify_id(str(args.get("album_id") or args.get("id") or ""), "album")
|
||||
if action == "get":
|
||||
return tool_result(client.get_album(album_id=album_id, market=args.get("market")))
|
||||
if action == "tracks":
|
||||
return tool_result(client.get_album_tracks(
|
||||
album_id=album_id,
|
||||
limit=_coerce_limit(args.get("limit"), default=20),
|
||||
offset=max(0, int(args.get("offset") or 0)),
|
||||
market=args.get("market"),
|
||||
))
|
||||
return tool_error(f"Unknown spotify_albums action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
def _handle_spotify_library(args: dict, **kw) -> str:
|
||||
"""Unified handler for saved tracks + saved albums (formerly two tools)."""
|
||||
kind = str(args.get("kind") or "").strip().lower()
|
||||
if kind not in {"tracks", "albums"}:
|
||||
return tool_error("kind must be one of: tracks, albums")
|
||||
action = str(args.get("action") or "list").strip().lower()
|
||||
item_type = "track" if kind == "tracks" else "album"
|
||||
client = _spotify_client()
|
||||
try:
|
||||
if action == "list":
|
||||
limit = _coerce_limit(args.get("limit"), default=20)
|
||||
offset = max(0, int(args.get("offset") or 0))
|
||||
market = args.get("market")
|
||||
if kind == "tracks":
|
||||
return tool_result(client.get_saved_tracks(limit=limit, offset=offset, market=market))
|
||||
return tool_result(client.get_saved_albums(limit=limit, offset=offset, market=market))
|
||||
if action == "save":
|
||||
uris = normalize_spotify_uris(_as_list(args.get("uris") or args.get("items")), item_type)
|
||||
return tool_result(client.save_library_items(uris=uris))
|
||||
if action == "remove":
|
||||
ids = [normalize_spotify_id(item, item_type) for item in _as_list(args.get("ids") or args.get("items"))]
|
||||
if not ids:
|
||||
return tool_error("ids/items is required for action='remove'")
|
||||
if kind == "tracks":
|
||||
return tool_result(client.remove_saved_tracks(track_ids=ids))
|
||||
return tool_result(client.remove_saved_albums(album_ids=ids))
|
||||
return tool_error(f"Unknown spotify_library action: {action}")
|
||||
except Exception as exc:
|
||||
return _spotify_tool_error(exc)
|
||||
|
||||
|
||||
COMMON_STRING = {"type": "string"}
|
||||
|
||||
SPOTIFY_PLAYBACK_SCHEMA = {
|
||||
"name": "spotify_playback",
|
||||
"description": "Control Spotify playback, inspect the active playback state, or fetch recently played tracks.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get_state", "get_currently_playing", "play", "pause", "next", "previous", "seek", "set_repeat", "set_shuffle", "set_volume", "recently_played"]},
|
||||
"device_id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"context_uri": COMMON_STRING,
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"offset": {"type": "object"},
|
||||
"position_ms": {"type": "integer"},
|
||||
"state": {"description": "For set_repeat use track/context/off. For set_shuffle use boolean-like true/false.", "oneOf": [{"type": "string"}, {"type": "boolean"}]},
|
||||
"volume_percent": {"type": "integer"},
|
||||
"limit": {"type": "integer", "description": "For recently_played: number of tracks (max 50)"},
|
||||
"after": {"type": "integer", "description": "For recently_played: Unix ms cursor (after this timestamp)"},
|
||||
"before": {"type": "integer", "description": "For recently_played: Unix ms cursor (before this timestamp)"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_DEVICES_SCHEMA = {
|
||||
"name": "spotify_devices",
|
||||
"description": "List Spotify Connect devices or transfer playback to a different device.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["list", "transfer"]},
|
||||
"device_id": COMMON_STRING,
|
||||
"play": {"type": "boolean"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_QUEUE_SCHEMA = {
|
||||
"name": "spotify_queue",
|
||||
"description": "Inspect the user's Spotify queue or add an item to it.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get", "add"]},
|
||||
"uri": COMMON_STRING,
|
||||
"device_id": COMMON_STRING,
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_SEARCH_SCHEMA = {
|
||||
"name": "spotify_search",
|
||||
"description": "Search the Spotify catalog for tracks, albums, artists, playlists, shows, or episodes.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": COMMON_STRING,
|
||||
"types": {"type": "array", "items": COMMON_STRING},
|
||||
"type": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"market": COMMON_STRING,
|
||||
"include_external": COMMON_STRING,
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_PLAYLISTS_SCHEMA = {
|
||||
"name": "spotify_playlists",
|
||||
"description": "List, inspect, create, update, and modify Spotify playlists.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["list", "get", "create", "add_items", "remove_items", "update_details"]},
|
||||
"playlist_id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"name": COMMON_STRING,
|
||||
"description": COMMON_STRING,
|
||||
"public": {"type": "boolean"},
|
||||
"collaborative": {"type": "boolean"},
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"position": {"type": "integer"},
|
||||
"snapshot_id": COMMON_STRING,
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_ALBUMS_SCHEMA = {
|
||||
"name": "spotify_albums",
|
||||
"description": "Fetch Spotify album metadata or album tracks.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {"type": "string", "enum": ["get", "tracks"]},
|
||||
"album_id": COMMON_STRING,
|
||||
"id": COMMON_STRING,
|
||||
"market": COMMON_STRING,
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
},
|
||||
"required": ["action"],
|
||||
},
|
||||
}
|
||||
|
||||
SPOTIFY_LIBRARY_SCHEMA = {
|
||||
"name": "spotify_library",
|
||||
"description": "List, save, or remove the user's saved Spotify tracks or albums. Use `kind` to select which.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {"type": "string", "enum": ["tracks", "albums"], "description": "Which library to operate on"},
|
||||
"action": {"type": "string", "enum": ["list", "save", "remove"]},
|
||||
"limit": {"type": "integer"},
|
||||
"offset": {"type": "integer"},
|
||||
"market": COMMON_STRING,
|
||||
"uris": {"type": "array", "items": COMMON_STRING},
|
||||
"ids": {"type": "array", "items": COMMON_STRING},
|
||||
"items": {"type": "array", "items": COMMON_STRING},
|
||||
},
|
||||
"required": ["kind", "action"],
|
||||
},
|
||||
}
|
||||
@@ -78,6 +78,16 @@ termux = [
|
||||
]
|
||||
dingtalk = ["dingtalk-stream>=0.20,<1", "alibabacloud-dingtalk>=2.0.0", "qrcode>=7.0,<8"]
|
||||
feishu = ["lark-oapi>=1.5.3,<2", "qrcode>=7.0,<8"]
|
||||
google = [
|
||||
# Required by the google-workspace skill (Gmail, Calendar, Drive, Contacts,
|
||||
# Sheets, Docs). Declared here so packagers (Nix, Homebrew) ship them with
|
||||
# the [all] extra and users don't hit runtime `pip install` paths that fail
|
||||
# in environments without pip (e.g. Nix-managed Python).
|
||||
"google-api-python-client>=2.100,<3",
|
||||
"google-auth-oauthlib>=1.0,<2",
|
||||
"google-auth-httplib2>=0.2,<1",
|
||||
]
|
||||
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
|
||||
web = ["fastapi>=0.104.0,<1", "uvicorn[standard]>=0.24.0,<1"]
|
||||
rl = [
|
||||
"atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30",
|
||||
@@ -109,6 +119,7 @@ all = [
|
||||
"hermes-agent[voice]",
|
||||
"hermes-agent[dingtalk]",
|
||||
"hermes-agent[feishu]",
|
||||
"hermes-agent[google]",
|
||||
"hermes-agent[mistral]",
|
||||
"hermes-agent[bedrock]",
|
||||
"hermes-agent[web]",
|
||||
|
||||
+1063
-392
File diff suppressed because it is too large
Load Diff
Executable
+95
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models.
|
||||
|
||||
This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``,
|
||||
``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the
|
||||
Hermes CLI fetches at runtime. Publishing the catalog through the docs site
|
||||
lets maintainers update model lists without shipping a Hermes release.
|
||||
|
||||
The runtime fetcher falls back to the same in-repo hardcoded lists if the
|
||||
manifest is unreachable, so this script is a convenience for keeping the
|
||||
manifest in sync — not a source of truth.
|
||||
|
||||
Usage::
|
||||
|
||||
python scripts/build_model_catalog.py
|
||||
|
||||
Output: ``website/static/api/model-catalog.json``
|
||||
|
||||
Live URL (after ``deploy-site.yml`` runs on merge to main):
|
||||
``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json``
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
|
||||
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
sys.path.insert(0, REPO_ROOT)
|
||||
|
||||
# Ensure HERMES_HOME is set for imports that touch it at module level.
|
||||
os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes"))
|
||||
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402
|
||||
|
||||
OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json")
|
||||
CATALOG_VERSION = 1
|
||||
|
||||
|
||||
def build_catalog() -> dict:
|
||||
return {
|
||||
"version": CATALOG_VERSION,
|
||||
"updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"metadata": {
|
||||
"source": "hermes-agent repo",
|
||||
"docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog",
|
||||
},
|
||||
"providers": {
|
||||
"openrouter": {
|
||||
"metadata": {
|
||||
"display_name": "OpenRouter",
|
||||
"note": (
|
||||
"Descriptions drive picker badges. Live /api/v1/models "
|
||||
"filters curated ids by tool-calling support and free pricing."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid, "description": desc}
|
||||
for mid, desc in OPENROUTER_MODELS
|
||||
],
|
||||
},
|
||||
"nous": {
|
||||
"metadata": {
|
||||
"display_name": "Nous Portal",
|
||||
"note": (
|
||||
"Free-tier gating is determined live via Portal pricing "
|
||||
"(partition_nous_models_by_tier), not this manifest."
|
||||
),
|
||||
},
|
||||
"models": [
|
||||
{"id": mid}
|
||||
for mid in _PROVIDER_MODELS.get("nous", [])
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
catalog = build_catalog()
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as fh:
|
||||
json.dump(catalog, fh, indent=2)
|
||||
fh.write("\n")
|
||||
|
||||
print(f"Wrote {OUTPUT_PATH}")
|
||||
for provider, block in catalog["providers"].items():
|
||||
print(f" {provider}: {len(block['models'])} models")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
+99
-7
@@ -29,10 +29,25 @@ BOLD='\033[1m'
|
||||
REPO_URL_SSH="git@github.com:NousResearch/hermes-agent.git"
|
||||
REPO_URL_HTTPS="https://github.com/NousResearch/hermes-agent.git"
|
||||
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
|
||||
INSTALL_DIR="${HERMES_INSTALL_DIR:-$HERMES_HOME/hermes-agent}"
|
||||
# INSTALL_DIR is resolved AFTER arg parsing and OS detection so we can pick an
|
||||
# FHS-style layout for root installs. Track whether the user gave us an
|
||||
# explicit directory — if so we never override it.
|
||||
if [ -n "${HERMES_INSTALL_DIR:-}" ]; then
|
||||
INSTALL_DIR="$HERMES_INSTALL_DIR"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
else
|
||||
INSTALL_DIR=""
|
||||
INSTALL_DIR_EXPLICIT=false
|
||||
fi
|
||||
PYTHON_VERSION="3.11"
|
||||
NODE_VERSION="22"
|
||||
|
||||
# FHS-style root install layout (set by resolve_install_layout when applicable):
|
||||
# code at /usr/local/lib/hermes-agent, command at /usr/local/bin/hermes,
|
||||
# data still at /root/.hermes (HERMES_HOME). Matches Claude Code / Codex CLI
|
||||
# and keeps Docker bind-mounted /root/ volumes lean.
|
||||
ROOT_FHS_LAYOUT=false
|
||||
|
||||
# Options
|
||||
USE_VENV=true
|
||||
RUN_SETUP=true
|
||||
@@ -64,6 +79,7 @@ while [[ $# -gt 0 ]]; do
|
||||
;;
|
||||
--dir)
|
||||
INSTALL_DIR="$2"
|
||||
INSTALL_DIR_EXPLICIT=true
|
||||
shift 2
|
||||
;;
|
||||
--hermes-home)
|
||||
@@ -79,9 +95,20 @@ while [[ $# -gt 0 ]]; do
|
||||
echo " --no-venv Don't create virtual environment"
|
||||
echo " --skip-setup Skip interactive setup wizard"
|
||||
echo " --branch NAME Git branch to install (default: main)"
|
||||
echo " --dir PATH Installation directory (default: ~/.hermes/hermes-agent)"
|
||||
echo " --dir PATH Installation directory"
|
||||
echo " default (non-root): ~/.hermes/hermes-agent"
|
||||
echo " default (root, Linux): /usr/local/lib/hermes-agent"
|
||||
echo " --hermes-home PATH Data directory (default: ~/.hermes, or \$HERMES_HOME)"
|
||||
echo " -h, --help Show this help"
|
||||
echo ""
|
||||
echo "Notes:"
|
||||
echo " When running as root on Linux, Hermes installs the code under"
|
||||
echo " /usr/local/lib/hermes-agent and links the command into"
|
||||
echo " /usr/local/bin/hermes (FHS layout — matches Claude Code / Codex CLI)."
|
||||
echo " Data, config, sessions, and logs still live in \$HERMES_HOME"
|
||||
echo " (default /root/.hermes). This keeps Docker bind-mounted volumes"
|
||||
echo " small and ensures the command is on PATH for all shells."
|
||||
echo " Existing installs at \$HERMES_HOME/hermes-agent are preserved in-place."
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
@@ -163,9 +190,60 @@ is_termux() {
|
||||
[ -n "${TERMUX_VERSION:-}" ] || [[ "${PREFIX:-}" == *"com.termux/files/usr"* ]]
|
||||
}
|
||||
|
||||
# Decide where the repo checkout + venv live, and where the `hermes` command
|
||||
# symlink goes. Called after detect_os so $OS/$DISTRO are known.
|
||||
#
|
||||
# Defaults:
|
||||
# - Non-root, any OS: INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $HOME/.local/bin
|
||||
# - Termux (any uid): INSTALL_DIR = $HERMES_HOME/hermes-agent
|
||||
# command link in $PREFIX/bin (already on PATH)
|
||||
# - Root on Linux (new): INSTALL_DIR = /usr/local/lib/hermes-agent
|
||||
# command link in /usr/local/bin
|
||||
# (unless a legacy install already exists at
|
||||
# $HERMES_HOME/hermes-agent — then preserve it)
|
||||
#
|
||||
# Always no-op when the user set --dir or $HERMES_INSTALL_DIR.
|
||||
resolve_install_layout() {
|
||||
if [ "$INSTALL_DIR_EXPLICIT" = true ]; then
|
||||
log_info "Install directory: $INSTALL_DIR (explicit)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Termux: package manager manages /data/data/..., keep code in HERMES_HOME.
|
||||
if is_termux; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Root on Linux: prefer FHS layout unless a legacy install already exists.
|
||||
# macOS root installs keep the legacy layout because /usr/local/ on macOS
|
||||
# is Homebrew territory and we don't want to fight that.
|
||||
if [ "$OS" = "linux" ] && [ "$(id -u)" -eq 0 ]; then
|
||||
if [ -d "$HERMES_HOME/hermes-agent/.git" ]; then
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
log_info "Existing install detected at $INSTALL_DIR — keeping legacy layout"
|
||||
log_info " (new root installs use /usr/local/lib/hermes-agent)"
|
||||
return 0
|
||||
fi
|
||||
INSTALL_DIR="/usr/local/lib/hermes-agent"
|
||||
ROOT_FHS_LAYOUT=true
|
||||
log_info "Root install on Linux — using FHS layout"
|
||||
log_info " Code: $INSTALL_DIR"
|
||||
log_info " Command: /usr/local/bin/hermes"
|
||||
log_info " Data: $HERMES_HOME (unchanged)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Default: non-root, non-Termux → legacy user-scoped layout.
|
||||
INSTALL_DIR="$HERMES_HOME/hermes-agent"
|
||||
}
|
||||
|
||||
get_command_link_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo "$PREFIX/bin"
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo "/usr/local/bin"
|
||||
else
|
||||
echo "$HOME/.local/bin"
|
||||
fi
|
||||
@@ -174,6 +252,8 @@ get_command_link_dir() {
|
||||
get_command_link_display_dir() {
|
||||
if is_termux && [ -n "${PREFIX:-}" ]; then
|
||||
echo '$PREFIX/bin'
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo '/usr/local/bin'
|
||||
else
|
||||
echo '~/.local/bin'
|
||||
fi
|
||||
@@ -975,6 +1055,14 @@ setup_path() {
|
||||
return 0
|
||||
fi
|
||||
|
||||
# FHS layout: /usr/local/bin is on PATH for every standard shell, nothing to inject.
|
||||
if [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
export PATH="$command_link_dir:$PATH"
|
||||
log_info "/usr/local/bin is already on PATH for all shells"
|
||||
log_success "hermes command ready"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if ~/.local/bin is on PATH; if not, add it to shell config.
|
||||
# Detect the user's actual login shell (not the shell running this script,
|
||||
# which is always bash when piped from curl).
|
||||
@@ -1339,12 +1427,12 @@ print_success() {
|
||||
echo ""
|
||||
|
||||
# Show file locations
|
||||
echo -e "${CYAN}${BOLD}📁 Your files (all in ~/.hermes/):${NC}"
|
||||
echo -e "${CYAN}${BOLD}📁 Your files:${NC}"
|
||||
echo ""
|
||||
echo -e " ${YELLOW}Config:${NC} ~/.hermes/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} ~/.hermes/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} ~/.hermes/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} ~/.hermes/hermes-agent/"
|
||||
echo -e " ${YELLOW}Config:${NC} $HERMES_HOME/config.yaml"
|
||||
echo -e " ${YELLOW}API Keys:${NC} $HERMES_HOME/.env"
|
||||
echo -e " ${YELLOW}Data:${NC} $HERMES_HOME/cron/, sessions/, logs/"
|
||||
echo -e " ${YELLOW}Code:${NC} $INSTALL_DIR"
|
||||
echo ""
|
||||
|
||||
echo -e "${CYAN}─────────────────────────────────────────────────────────${NC}"
|
||||
@@ -1364,6 +1452,9 @@ print_success() {
|
||||
if [ "$DISTRO" = "termux" ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into $(get_command_link_display_dir), which is already on PATH in Termux.${NC}"
|
||||
echo ""
|
||||
elif [ "$ROOT_FHS_LAYOUT" = true ]; then
|
||||
echo -e "${YELLOW}⚡ 'hermes' was linked into /usr/local/bin and is ready to use — no shell reload needed.${NC}"
|
||||
echo ""
|
||||
else
|
||||
echo -e "${YELLOW}⚡ Reload your shell to use 'hermes' command:${NC}"
|
||||
echo ""
|
||||
@@ -1415,6 +1506,7 @@ main() {
|
||||
print_banner
|
||||
|
||||
detect_os
|
||||
resolve_install_layout
|
||||
install_uv
|
||||
check_python
|
||||
check_git
|
||||
|
||||
@@ -43,10 +43,16 @@ AUTHOR_MAP = {
|
||||
"teknium1@gmail.com": "teknium1",
|
||||
"teknium@nousresearch.com": "teknium1",
|
||||
"127238744+teknium1@users.noreply.github.com": "teknium1",
|
||||
"focusflow.app.help@gmail.com": "yes999zc",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
"maks.mir@yahoo.com": "say8hi",
|
||||
"web3blind@users.noreply.github.com": "web3blind",
|
||||
"julia@alexland.us": "alexg0bot",
|
||||
"1060770+benjaminsehl@users.noreply.github.com": "benjaminsehl",
|
||||
"nerijusn76@gmail.com": "Nerijusas",
|
||||
# contributors (from noreply pattern)
|
||||
"david.vv@icloud.com": "davidvv",
|
||||
"wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
|
||||
@@ -58,13 +64,21 @@ AUTHOR_MAP = {
|
||||
"keifergu@tencent.com": "keifergu",
|
||||
"kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
|
||||
"abner.the.foreman@agentmail.to": "Abnertheforeman",
|
||||
"thomasgeorgevii09@gmail.com": "tochukwuada",
|
||||
"harryykyle1@gmail.com": "hharry11",
|
||||
"kshitijk4poor@gmail.com": "kshitijk4poor",
|
||||
"keira.voss94@gmail.com": "keiravoss94",
|
||||
"16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"fqsy1416@gmail.com": "EKKOLearnAI",
|
||||
"simbamax99@gmail.com": "simbam99",
|
||||
"iris@growthpillars.co": "irispillars",
|
||||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
|
||||
"cyprian@ironin.pl": "iRonin",
|
||||
"valdi.jorge@gmail.com": "jvcl",
|
||||
"q19dcp@gmail.com": "aj-nt",
|
||||
"ebukau84@gmail.com": "UgwujaGeorge",
|
||||
"francip@gmail.com": "francip",
|
||||
"omni@comelse.com": "omnissiah-comelse",
|
||||
"oussama.redcode@gmail.com": "mavrickdeveloper",
|
||||
@@ -77,10 +91,13 @@ AUTHOR_MAP = {
|
||||
"77628552+raulvidis@users.noreply.github.com": "raulvidis",
|
||||
"145567217+Aum08Desai@users.noreply.github.com": "Aum08Desai",
|
||||
"256820943+kshitij-eliza@users.noreply.github.com": "kshitij-eliza",
|
||||
"jiechengwu@pony.ai": "Jason2031",
|
||||
"44278268+shitcoinsherpa@users.noreply.github.com": "shitcoinsherpa",
|
||||
"104278804+Sertug17@users.noreply.github.com": "Sertug17",
|
||||
"112503481+caentzminger@users.noreply.github.com": "caentzminger",
|
||||
"258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
|
||||
"liusway405@gmail.com": "voidborne-d",
|
||||
"xydarcher@uestc.edu.cn": "Readon",
|
||||
"sir_even@icloud.com": "sirEven",
|
||||
"36056348+sirEven@users.noreply.github.com": "sirEven",
|
||||
"70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
|
||||
@@ -103,6 +120,7 @@ AUTHOR_MAP = {
|
||||
"30841158+n-WN@users.noreply.github.com": "n-WN",
|
||||
"tsuijinglei@gmail.com": "hiddenpuppy",
|
||||
"jerome@clawwork.ai": "HiddenPuppy",
|
||||
"jerome.benoit@sap.com": "jerome-benoit",
|
||||
"wysie@users.noreply.github.com": "Wysie",
|
||||
"leoyuan0099@gmail.com": "keyuyuan",
|
||||
"bxzt2006@163.com": "Only-Code-A",
|
||||
@@ -163,11 +181,47 @@ AUTHOR_MAP = {
|
||||
"jaisehgal11299@gmail.com": "jaisup",
|
||||
"percydikec@gmail.com": "PercyDikec",
|
||||
"noonou7@gmail.com": "HenkDz",
|
||||
# Azure Foundry salvage (PRs #9029, #4599, #10086, #8766)
|
||||
"tech@smartlogics.net": "TechPrototyper",
|
||||
"637186+HangGlidersRule@users.noreply.github.com": "HangGlidersRule",
|
||||
"pein892@gmail.com": "pein892",
|
||||
"dean.kerr@gmail.com": "deankerr",
|
||||
"socrates1024@gmail.com": "socrates1024",
|
||||
"seanalt555@gmail.com": "Salt-555",
|
||||
"satelerd@gmail.com": "satelerd",
|
||||
"dan@danlynn.com": "danklynn",
|
||||
"mattmaximo@hotmail.com": "MattMaximo",
|
||||
"149063006+j3ffffff@users.noreply.github.com": "j3ffffff",
|
||||
"A-FdL-Prog@users.noreply.github.com": "A-FdL-Prog",
|
||||
"l0hde@users.noreply.github.com": "l0hde",
|
||||
"difujia@users.noreply.github.com": "difujia",
|
||||
"vominh1919@gmail.com": "vominh1919",
|
||||
"yue.gu2023@gmail.com": "YueLich",
|
||||
"51783311+andyylin@users.noreply.github.com": "andyylin",
|
||||
"me@jakubkrcmar.cz": "jakubkrcmar",
|
||||
"prasadus92@gmail.com": "prasadus92",
|
||||
"michael@make.software": "mssteuer",
|
||||
"der@konsi.org": "konsisumer",
|
||||
"abogale2@gmail.com": "amanuel2",
|
||||
"alexazzjjtt@163.com": "alexzhu0",
|
||||
"pub_forgreatagent@antgroup.com": "AntAISecurityLab",
|
||||
"252620095+briandevans@users.noreply.github.com": "briandevans",
|
||||
"danielrpike9@gmail.com": "Bartok9",
|
||||
"skozyuk@cruxexperts.com": "CruxExperts",
|
||||
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
|
||||
"mgparkprint@gmail.com": "vlwkaos",
|
||||
"tranquil_flow@protonmail.com": "Tranquil-Flow",
|
||||
"wangshengyang2004@163.com": "Wangshengyang2004",
|
||||
"hasan.ali13381@gmail.com": "H-Ali13381",
|
||||
"xienb@proton.me": "XieNBi",
|
||||
"139681654+maymuneth@users.noreply.github.com": "maymuneth",
|
||||
"zengwei@nightq.cn": "nightq",
|
||||
"1434494126@qq.com": "5park1e",
|
||||
"158153005+5park1e@users.noreply.github.com": "5park1e",
|
||||
"innocarpe@gmail.com": "innocarpe",
|
||||
"noreply@ked.com": "qike-ms",
|
||||
"andrekurait@gmail.com": "AndreKurait",
|
||||
"bsgdigital@users.noreply.github.com": "bsgdigital",
|
||||
"numman.ali@gmail.com": "nummanali",
|
||||
"rohithsaimidigudla@gmail.com": "whitehatjr1001",
|
||||
"0xNyk@users.noreply.github.com": "0xNyk",
|
||||
@@ -186,6 +240,11 @@ AUTHOR_MAP = {
|
||||
"bryan@intertwinesys.com": "bryanyoung",
|
||||
"christo.mitov@gmail.com": "christomitov",
|
||||
"hermes@nousresearch.com": "NousResearch",
|
||||
"reginaldasr@gmail.com": "ReginaldasR",
|
||||
"ntconguit@gmail.com": "0xharryriddle",
|
||||
"agent@wildcat.local": "ericnicolaides",
|
||||
"georgex8001@gmail.com": "georgex8001",
|
||||
"stefan@dimagents.ai": "dimitrovi",
|
||||
"hermes@noushq.ai": "benbarclay",
|
||||
"chinmingcock@gmail.com": "ChimingLiu",
|
||||
"openclaw@sparklab.ai": "openclaw",
|
||||
@@ -334,6 +393,9 @@ AUTHOR_MAP = {
|
||||
"brian@bde.io": "briandevans",
|
||||
"hubin_ll@qq.com": "LLQWQ",
|
||||
"memosr_email@gmail.com": "memosr",
|
||||
"jperlow@gmail.com": "perlowja",
|
||||
"tangyuanjc@JCdeAIfenshendeMac-mini.local": "tangyuanjc",
|
||||
"harryplusplus@gmail.com": "harryplusplus",
|
||||
"anthhub@163.com": "anthhub",
|
||||
"shenuu@gmail.com": "shenuu",
|
||||
"xiayh17@gmail.com": "xiayh0107",
|
||||
@@ -357,6 +419,7 @@ AUTHOR_MAP = {
|
||||
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
|
||||
"vivien000812@gmail.com": "iamagenius00",
|
||||
"89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
|
||||
"oluwadareferanmi11@gmail.com": "Feranmi10",
|
||||
"simon@gtcl.us": "simon-gtcl",
|
||||
"suzukaze.haduki@gmail.com": "houko",
|
||||
"cliff@cigii.com": "cgarwood82",
|
||||
@@ -437,6 +500,12 @@ AUTHOR_MAP = {
|
||||
"topcheer@me.com": "topcheer",
|
||||
"walli@tencent.com": "walli",
|
||||
"zhuofengwang@tencent.com": "Zhuofeng-Wang",
|
||||
# April 2026 salvage-PR batch (#14920, #14986, #14966)
|
||||
"mrunmayeerane17@gmail.com": "mrunmayee17",
|
||||
"69489633+camaragon@users.noreply.github.com": "camaragon",
|
||||
"shamork@outlook.com": "shamork",
|
||||
# April 2026 Discord Copilot /model salvage (#15030)
|
||||
"cshong2017@outlook.com": "Nicecsh",
|
||||
# no-github-match — keep as display names
|
||||
"clio-agent@sisyphuslabs.ai": "Sisyphus",
|
||||
"marco@rutimka.de": "Marco Rutsch",
|
||||
@@ -444,6 +513,9 @@ AUTHOR_MAP = {
|
||||
"zhangxicen@example.com": "zhangxicen",
|
||||
"codex@openai.invalid": "teknium1",
|
||||
"screenmachine@gmail.com": "teknium1",
|
||||
"chenzeshi@live.com": "chen1749144759",
|
||||
"mor.aleksandr@yahoo.com": "MorAlekss",
|
||||
"ash@users.noreply.github.com": "ash",
|
||||
}
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user